Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 169a4b2c14 | |||
| 4795b7a7f1 | |||
| d7fd75657d | |||
| cd1839fecb | |||
| eb71b0a8b4 | |||
| 636bd78854 | |||
| a326ef146c | |||
| c5130cb15c | |||
| b88f966f83 | |||
| 4010461300 | |||
| 1e54f04e24 | |||
| 8ca590004b | |||
| 9deaca7697 | |||
| 178fc5bb4e | |||
| 11f310061a | |||
| df476472e2 | |||
| 6ee2f474c7 | |||
| 4313c3f30d | |||
| 097ca81605 |
@@ -3317,6 +3317,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
|
||||
transpose_npp_filter_deps="ffnvcodec libnpp"
|
||||
overlay_cuda_filter_deps="ffnvcodec"
|
||||
overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
|
||||
cuda_grid_filter_deps="ffnvcodec"
|
||||
cuda_grid_filter_deps_any="cuda_nvcc cuda_llvm"
|
||||
sharpen_npp_filter_deps="ffnvcodec libnpp"
|
||||
|
||||
ddagrab_filter_deps="d3d11va IDXGIOutput1 DXGI_OUTDUPL_FRAME_INFO"
|
||||
|
||||
@@ -410,6 +410,8 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o
|
||||
OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync.o
|
||||
OBJS-$(CONFIG_OVERLAY_CUDA_FILTER) += vf_overlay_cuda.o framesync.o vf_overlay_cuda.ptx.o \
|
||||
cuda/load_helper.o
|
||||
OBJS-$(CONFIG_CUDA_GRID_FILTER) += vf_cuda_grid.o framesync.o \
|
||||
vf_cuda_grid.ptx.o cuda/load_helper.o
|
||||
OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
|
||||
opencl/overlay.o framesync.o
|
||||
OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o
|
||||
|
||||
@@ -390,6 +390,7 @@ extern const AVFilter ff_vf_overlay_qsv;
|
||||
extern const AVFilter ff_vf_overlay_vaapi;
|
||||
extern const AVFilter ff_vf_overlay_vulkan;
|
||||
extern const AVFilter ff_vf_overlay_cuda;
|
||||
extern const AVFilter ff_vf_cuda_grid;
|
||||
extern const AVFilter ff_vf_owdenoise;
|
||||
extern const AVFilter ff_vf_pad;
|
||||
extern const AVFilter ff_vf_pad_opencl;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
* cuda_grid overlay CUDA kernels.
|
||||
*
|
||||
* Алфа-блендинг poверх NV12 frame:
|
||||
* - Alpha_Fill_Y / Alpha_Fill_UV: solid colour fill region (rect/dim)
|
||||
* - Alpha_Blit_RGBA_Y / Alpha_Blit_RGBA_UV: blit RGBA atlas → NV12
|
||||
* (для text Phase 4b-3 и icon Phase 4b-4)
|
||||
*
|
||||
* BT.709 limited-range conversion (HDTV). См. также vf_cuda_grid.c rgb_to_yuv709.
|
||||
*
|
||||
* Лицензия: LGPL-2.1+ (соответствует FFmpeg)
|
||||
*/
|
||||
|
||||
#include "cuda/vector_helpers.cuh"
|
||||
|
||||
extern "C" {
|
||||
|
||||
/* Solid colour α-blend на Y plane.
|
||||
* dst: pointer на Y plane base
|
||||
* dst_pitch: bytes per row
|
||||
* rx, ry, rw, rh: region rect (pixels, must be in-bounds)
|
||||
* fill: 0..255 (Y component to blend in)
|
||||
* alpha: 0..255 (255 = fully opaque)
|
||||
*/
|
||||
__global__ void Alpha_Fill_Y(unsigned char *dst, int dst_pitch,
|
||||
int rx, int ry, int rw, int rh,
|
||||
int fill, int alpha)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (x >= rw || y >= rh) return;
|
||||
unsigned char *p = dst + (ry + y) * dst_pitch + (rx + x);
|
||||
int cur = *p;
|
||||
*p = (unsigned char)((fill * alpha + cur * (255 - alpha)) / 255);
|
||||
}
|
||||
|
||||
/* Solid colour α-blend на UV plane (NV12 interleaved).
|
||||
* rx, ry: in chroma plane coords (= full-res Y x/2, y/2)
|
||||
* rw, rh: also в chroma coords
|
||||
*/
|
||||
__global__ void Alpha_Fill_UV(unsigned char *dst, int dst_pitch,
|
||||
int rx, int ry, int rw, int rh,
|
||||
int fill_u, int fill_v, int alpha)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (x >= rw || y >= rh) return;
|
||||
unsigned char *p = dst + (ry + y) * dst_pitch + (rx + x) * 2;
|
||||
int cu = p[0], cv = p[1];
|
||||
p[0] = (unsigned char)((fill_u * alpha + cu * (255 - alpha)) / 255);
|
||||
p[1] = (unsigned char)((fill_v * alpha + cv * (255 - alpha)) / 255);
|
||||
}
|
||||
|
||||
/* Blit RGBA atlas → Y plane с α-blending.
|
||||
* dst, dst_pitch: Y plane
|
||||
* dx, dy: destination pixel offset (full-res coords)
|
||||
* atlas, atlas_pitch: RGBA source (interleaved R,G,B,A bytes), pitch в bytes
|
||||
* w, h: atlas dimensions (in pixels)
|
||||
* extra_alpha: дополнительный множитель (0..255) — overlay-level opacity
|
||||
*/
|
||||
__global__ void Alpha_Blit_RGBA_Y(unsigned char *dst, int dst_pitch,
|
||||
int dx, int dy,
|
||||
const unsigned char *atlas, int atlas_pitch,
|
||||
int w, int h, int extra_alpha)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (x >= w || y >= h) return;
|
||||
const unsigned char *sp = atlas + y * atlas_pitch + x * 4;
|
||||
int r = sp[0], g = sp[1], b = sp[2], a = sp[3];
|
||||
a = a * extra_alpha / 255;
|
||||
if (a == 0) return;
|
||||
int Y = (int)(0.183f * r + 0.614f * g + 0.062f * b) + 16;
|
||||
Y = Y < 0 ? 0 : (Y > 255 ? 255 : Y);
|
||||
unsigned char *p = dst + (dy + y) * dst_pitch + (dx + x);
|
||||
int cur = *p;
|
||||
*p = (unsigned char)((Y * a + cur * (255 - a)) / 255);
|
||||
}
|
||||
|
||||
/* Blit RGBA atlas → UV plane с 4:2:0 chroma subsampling.
|
||||
* dst, dst_pitch: UV plane
|
||||
* dx, dy: destination pixel offset (full-res coords; обе должны быть кратны 2)
|
||||
* atlas, atlas_pitch: RGBA source
|
||||
* w, h: atlas dimensions (full-res; UV operates на w/2 × h/2)
|
||||
* extra_alpha: 0..255
|
||||
*/
|
||||
__global__ void Alpha_Blit_RGBA_UV(unsigned char *dst, int dst_pitch,
|
||||
int dx, int dy,
|
||||
const unsigned char *atlas, int atlas_pitch,
|
||||
int w, int h, int extra_alpha)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
int hw = w / 2, hh = h / 2;
|
||||
if (x >= hw || y >= hh) return;
|
||||
|
||||
int sx = x * 2, sy = y * 2;
|
||||
const unsigned char *row0 = atlas + sy * atlas_pitch;
|
||||
const unsigned char *row1 = atlas + (sy + 1) * atlas_pitch;
|
||||
|
||||
int r = (row0[sx*4+0] + row0[(sx+1)*4+0] + row1[sx*4+0] + row1[(sx+1)*4+0]) >> 2;
|
||||
int g = (row0[sx*4+1] + row0[(sx+1)*4+1] + row1[sx*4+1] + row1[(sx+1)*4+1]) >> 2;
|
||||
int b = (row0[sx*4+2] + row0[(sx+1)*4+2] + row1[sx*4+2] + row1[(sx+1)*4+2]) >> 2;
|
||||
int a = (row0[sx*4+3] + row0[(sx+1)*4+3] + row1[sx*4+3] + row1[(sx+1)*4+3]) >> 2;
|
||||
a = a * extra_alpha / 255;
|
||||
if (a == 0) return;
|
||||
|
||||
int U = (int)(-0.101f * r - 0.339f * g + 0.439f * b) + 128;
|
||||
int V = (int)( 0.439f * r - 0.399f * g - 0.040f * b) + 128;
|
||||
U = U < 0 ? 0 : (U > 255 ? 255 : U);
|
||||
V = V < 0 ? 0 : (V > 255 ? 255 : V);
|
||||
|
||||
int du = dx / 2 + x;
|
||||
int dv = dy / 2 + y;
|
||||
unsigned char *p = dst + dv * dst_pitch + du * 2;
|
||||
int cu = p[0], cv = p[1];
|
||||
p[0] = (unsigned char)((U * a + cu * (255 - a)) / 255);
|
||||
p[1] = (unsigned char)((V * a + cv * (255 - a)) / 255);
|
||||
}
|
||||
|
||||
/* NV12 Y plane bilinear resize → output rect (Phase 7 internal scaling).
|
||||
* dst is full output buffer; pixel at (dst_x..dst_x+dst_w-1, dst_y..dst_y+dst_h-1)
|
||||
* receives resampled src plane (size src_w × src_h, pitch src_pitch). */
|
||||
__global__ void NV12_Resize_Y(
|
||||
const unsigned char *src, int src_w, int src_h, int src_pitch,
|
||||
unsigned char *dst, int dst_x, int dst_y, int dst_w, int dst_h, int dst_pitch)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (x >= dst_w || y >= dst_h) return;
|
||||
|
||||
float fx = ((float)x + 0.5f) * src_w / dst_w - 0.5f;
|
||||
float fy = ((float)y + 0.5f) * src_h / dst_h - 0.5f;
|
||||
int x0 = max(0, (int)floorf(fx)), x1 = min(src_w - 1, x0 + 1);
|
||||
int y0 = max(0, (int)floorf(fy)), y1 = min(src_h - 1, y0 + 1);
|
||||
float wx = fx - x0, wy = fy - y0;
|
||||
|
||||
float p00 = (float)src[y0 * src_pitch + x0];
|
||||
float p01 = (float)src[y0 * src_pitch + x1];
|
||||
float p10 = (float)src[y1 * src_pitch + x0];
|
||||
float p11 = (float)src[y1 * src_pitch + x1];
|
||||
float v = (1 - wx) * (1 - wy) * p00 + wx * (1 - wy) * p01
|
||||
+ (1 - wx) * wy * p10 + wx * wy * p11;
|
||||
|
||||
dst[(dst_y + y) * dst_pitch + (dst_x + x)] = (unsigned char)v;
|
||||
}
|
||||
|
||||
/* NV12 UV plane bilinear resize → output rect.
|
||||
* UV interleaved: каждый "pixel" = 2 bytes (U, V). Size halved vs Y.
|
||||
* dst_x/y/w/h в chroma plane coords (half-res of full frame). */
|
||||
__global__ void NV12_Resize_UV(
|
||||
const unsigned char *src, int src_w, int src_h, int src_pitch,
|
||||
unsigned char *dst, int dst_x, int dst_y, int dst_w, int dst_h, int dst_pitch)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (x >= dst_w || y >= dst_h) return;
|
||||
|
||||
float fx = ((float)x + 0.5f) * src_w / dst_w - 0.5f;
|
||||
float fy = ((float)y + 0.5f) * src_h / dst_h - 0.5f;
|
||||
int x0 = max(0, (int)floorf(fx)), x1 = min(src_w - 1, x0 + 1);
|
||||
int y0 = max(0, (int)floorf(fy)), y1 = min(src_h - 1, y0 + 1);
|
||||
float wx = fx - x0, wy = fy - y0;
|
||||
|
||||
/* Each UV "pixel" is 2 bytes (U then V). */
|
||||
float u00 = (float)src[y0 * src_pitch + x0 * 2 + 0];
|
||||
float v00 = (float)src[y0 * src_pitch + x0 * 2 + 1];
|
||||
float u01 = (float)src[y0 * src_pitch + x1 * 2 + 0];
|
||||
float v01 = (float)src[y0 * src_pitch + x1 * 2 + 1];
|
||||
float u10 = (float)src[y1 * src_pitch + x0 * 2 + 0];
|
||||
float v10 = (float)src[y1 * src_pitch + x0 * 2 + 1];
|
||||
float u11 = (float)src[y1 * src_pitch + x1 * 2 + 0];
|
||||
float v11 = (float)src[y1 * src_pitch + x1 * 2 + 1];
|
||||
|
||||
float u = (1 - wx) * (1 - wy) * u00 + wx * (1 - wy) * u01
|
||||
+ (1 - wx) * wy * u10 + wx * wy * u11;
|
||||
float v = (1 - wx) * (1 - wy) * v00 + wx * (1 - wy) * v01
|
||||
+ (1 - wx) * wy * v10 + wx * wy * v11;
|
||||
|
||||
int idx = (dst_y + y) * dst_pitch + (dst_x + x) * 2;
|
||||
dst[idx + 0] = (unsigned char)u;
|
||||
dst[idx + 1] = (unsigned char)v;
|
||||
}
|
||||
|
||||
} /* extern "C" */
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "libavutil/avstring.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/time.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuframes/cuframes.h>
|
||||
@@ -42,6 +43,12 @@ typedef struct CuframesDemuxerContext {
|
||||
* сохраняем его для первого read_packet */
|
||||
int pending_first_frame;
|
||||
cuframes_frame_t *first_frame;
|
||||
/* Reconnect state — publisher container restart = new IPC namespace,
|
||||
* old subscriber становится DISCONNECTED. Без reconnect логики input
|
||||
* pad навсегда EOF → filter показывает placeholder forever даже когда
|
||||
* publisher восстановился. */
|
||||
char saved_key[80];
|
||||
int64_t last_reconnect_us;
|
||||
} CuframesDemuxerContext;
|
||||
|
||||
#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
|
||||
@@ -133,6 +140,8 @@ static int cuframes_read_header(AVFormatContext *s)
|
||||
key, cuframes_strerror(rc));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
/* Save key для reconnect attempts. */
|
||||
av_strlcpy(c->saved_key, key, sizeof(c->saved_key));
|
||||
|
||||
/* Получаем первый кадр чтобы узнать width/height. */
|
||||
cuframes_frame_t *frame = NULL;
|
||||
@@ -175,6 +184,34 @@ static int cuframes_read_header(AVFormatContext *s)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Attempts re-subscribe after publisher disconnect. Rate-limited к 1 try / 2 sec. */
|
||||
static void try_reconnect(AVFormatContext *s)
|
||||
{
|
||||
CuframesDemuxerContext *c = s->priv_data;
|
||||
int64_t now = av_gettime();
|
||||
if (now - c->last_reconnect_us < 2000000) return;
|
||||
c->last_reconnect_us = now;
|
||||
|
||||
if (c->sub) {
|
||||
cuframes_subscriber_destroy(c->sub);
|
||||
c->sub = NULL;
|
||||
}
|
||||
cuframes_subscriber_config_t rcfg = {0};
|
||||
rcfg.key = c->saved_key;
|
||||
rcfg.consumer_name = NULL;
|
||||
rcfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
|
||||
rcfg.cuda_device = c->cuda_device;
|
||||
rcfg.connect_timeout_ms = 1000;
|
||||
int rrc = cuframes_subscriber_create(&rcfg, &c->sub);
|
||||
if (rrc == CUFRAMES_OK) {
|
||||
av_log(s, AV_LOG_INFO, "cuframes: reconnected to '%s'\n", c->saved_key);
|
||||
} else {
|
||||
c->sub = NULL;
|
||||
av_log(s, AV_LOG_DEBUG, "cuframes: reconnect '%s' fail: %s\n",
|
||||
c->saved_key, cuframes_strerror(rrc));
|
||||
}
|
||||
}
|
||||
|
||||
static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||||
{
|
||||
CuframesDemuxerContext *c = s->priv_data;
|
||||
@@ -185,11 +222,42 @@ static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||||
c->first_frame = NULL;
|
||||
c->pending_first_frame = 0;
|
||||
} else {
|
||||
/* Guard — subscriber может быть NULL после failed reconnect attempt */
|
||||
if (!c->sub) {
|
||||
try_reconnect(s);
|
||||
return AVERROR(EAGAIN);
|
||||
}
|
||||
int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||||
if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
|
||||
return AVERROR(EAGAIN);
|
||||
if (rc == CUFRAMES_ERR_DISCONNECTED)
|
||||
return AVERROR_EOF;
|
||||
if (rc == CUFRAMES_ERR_DISCONNECTED) {
|
||||
/* Publisher container died / recreated. Try reconnect — rate-limited
|
||||
* к одному attempt каждые 2 sec чтобы не spam'ить unix socket.
|
||||
* Возвращаем EAGAIN (ffmpeg retries) вместо EOF (ffmpeg stops). */
|
||||
int64_t now = av_gettime();
|
||||
if (now - c->last_reconnect_us > 2000000) {
|
||||
c->last_reconnect_us = now;
|
||||
if (c->sub) {
|
||||
cuframes_subscriber_destroy(c->sub);
|
||||
c->sub = NULL;
|
||||
}
|
||||
cuframes_subscriber_config_t rcfg = {0};
|
||||
rcfg.key = c->saved_key;
|
||||
rcfg.consumer_name = NULL;
|
||||
rcfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
|
||||
rcfg.cuda_device = c->cuda_device;
|
||||
rcfg.connect_timeout_ms = 1000;
|
||||
int rrc = cuframes_subscriber_create(&rcfg, &c->sub);
|
||||
if (rrc == CUFRAMES_OK) {
|
||||
av_log(s, AV_LOG_INFO, "cuframes: reconnected to '%s'\n",
|
||||
c->saved_key);
|
||||
} else {
|
||||
av_log(s, AV_LOG_DEBUG, "cuframes: reconnect к '%s' fail: %s\n",
|
||||
c->saved_key, cuframes_strerror(rrc));
|
||||
}
|
||||
}
|
||||
return AVERROR(EAGAIN);
|
||||
}
|
||||
if (rc != CUFRAMES_OK || !frame) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
|
||||
return AVERROR_EXTERNAL;
|
||||
|
||||
Reference in New Issue
Block a user