vf_cuda_grid: Phase 4b-1 — rect overlay primitives (solid fill, no alpha)

Добавляет inner overlay state с mutex + process_command handler.
Rendering filled/border rects через cuMemsetD2D8Async/D2D16Async — без
custom kernel'а (Phase 4b-2 = alpha blend, требует .cu).

Commands:
  add_overlay    <id> rect cell=N x=.. y=.. w=.. h=.. r=.. g=.. b=.. thickness=.. opacity=..
  remove_overlay <id>
  clear_overlays

text/icon/dim — типы определены, render заглушен до Phase 4b-2/3/4.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
gx
2026-05-19 22:17:41 +01:00
parent 178fc5bb4e
commit 9deaca7697
+391 -11
View File
@@ -40,6 +40,7 @@
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/thread.h"
#include "avfilter.h"
#include "filters.h"
@@ -48,7 +49,46 @@
#include "video.h"
#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
#define MAX_CELLS 16
#define MAX_CELLS 16
#define MAX_OVERLAYS 64
#define OVERLAY_ID_MAX 32
#define OVERLAY_TEXT_MAX 128
/* ─── Overlay primitives (Phase 4b-1: rect only, no alpha) ─────────────── */
typedef enum {
OV_TYPE_RECT = 0,
OV_TYPE_TEXT, /* Phase 4b-3 */
OV_TYPE_ICON, /* Phase 4b-4 */
OV_TYPE_DIM, /* Phase 4b-2 (alpha kernel) */
} GridOverlayType;
typedef struct GridOverlay {
char id[OVERLAY_ID_MAX];
GridOverlayType type;
int cell; /* -1 = absolute on output, иначе cell-relative */
float x, y, w, h; /* [0..1] normalized; relative к cell или output */
int z_order; /* меньше → рисуется первым */
uint8_t opacity; /* 0..255, реально применяется в Phase 4b-2 */
int visible;
union {
struct {
uint8_t r, g, b;
int thickness; /* 0 = filled, >0 = border (border via 4 strips) */
} rect;
struct {
char text[OVERLAY_TEXT_MAX];
int font_size;
uint8_t r, g, b;
} text;
struct {
char icon_name[32];
} icon;
struct {
uint8_t amount; /* 0..255, alpha for dim */
} dim;
} u;
} GridOverlay;
/* ─── Layout templates (normalized координаты 0.0–1.0) ─────────────────── */
@@ -157,6 +197,12 @@ typedef struct CudaGridContext {
struct {
int x, y, w, h;
} cell_px[MAX_CELLS];
/* Overlay state (Phase 4b) — mutex-guarded, process_command/render thread-safe */
GridOverlay overlays[MAX_OVERLAYS];
int nb_overlays;
pthread_mutex_t overlay_lock;
int overlay_lock_inited;
} CudaGridContext;
/* ─── Composition: copy одного input plane в target region output ──────── */
@@ -188,6 +234,272 @@ static int copy_input_plane(AVFilterContext *ctx,
return CHECK_CU(s->hwctx->internal->cuda_dl->cuMemcpy2DAsync(&cpy, s->cu_stream));
}
/* ─── Overlay parsing + rendering (Phase 4b-1: rect, solid fill, no alpha) ─ */
/* BT.709 limited-range RGB → YUV. Достаточно для HDTV (любое 1920×1080 output). */
static av_always_inline void rgb_to_yuv709(uint8_t r, uint8_t g, uint8_t b,
uint8_t *Y, uint8_t *U, uint8_t *V)
{
int y = (int)( 0.183f * r + 0.614f * g + 0.062f * b) + 16;
int u = (int)(-0.101f * r - 0.339f * g + 0.439f * b) + 128;
int v = (int)( 0.439f * r - 0.399f * g - 0.040f * b) + 128;
*Y = av_clip_uint8(y);
*U = av_clip_uint8(u);
*V = av_clip_uint8(v);
}
/* Parse args в формате: "<id> <type> <key>=<val> <key>=<val> ..."
* Out *ov заполняется defaults + parsed values. Returns 0 / AVERROR. */
static int parse_overlay_args(AVFilterContext *ctx, const char *args, GridOverlay *ov)
{
char id[OVERLAY_ID_MAX], type_str[16];
const char *p;
if (!args)
return AVERROR(EINVAL);
if (sscanf(args, "%31s %15s", id, type_str) != 2) {
av_log(ctx, AV_LOG_ERROR, "overlay args: expected '<id> <type> ...', got: %s\n", args);
return AVERROR(EINVAL);
}
memset(ov, 0, sizeof(*ov));
av_strlcpy(ov->id, id, sizeof(ov->id));
if (!strcmp(type_str, "rect")) ov->type = OV_TYPE_RECT;
else if (!strcmp(type_str, "text")) ov->type = OV_TYPE_TEXT;
else if (!strcmp(type_str, "icon")) ov->type = OV_TYPE_ICON;
else if (!strcmp(type_str, "dim")) ov->type = OV_TYPE_DIM;
else {
av_log(ctx, AV_LOG_ERROR, "unknown overlay type: %s\n", type_str);
return AVERROR(EINVAL);
}
/* Defaults */
ov->cell = -1;
ov->opacity = 255;
ov->visible = 1;
if (ov->type == OV_TYPE_RECT)
ov->u.rect.thickness = 0; /* filled */
if (ov->type == OV_TYPE_DIM)
ov->u.dim.amount = 128;
/* Advance past <id> и <type> */
p = strchr(args, ' ');
if (!p) return 0;
while (*p == ' ') p++;
p = strchr(p, ' ');
if (!p) return 0;
while (*p == ' ') p++;
while (*p) {
char key[32], val[OVERLAY_TEXT_MAX];
int n = 0;
if (sscanf(p, "%31[^= \t]=%127s%n", key, val, &n) < 2)
break;
p += n;
while (*p == ' ' || *p == '\t') p++;
if (!strcmp(key, "cell")) ov->cell = atoi(val);
else if (!strcmp(key, "x")) ov->x = (float)atof(val);
else if (!strcmp(key, "y")) ov->y = (float)atof(val);
else if (!strcmp(key, "w")) ov->w = (float)atof(val);
else if (!strcmp(key, "h")) ov->h = (float)atof(val);
else if (!strcmp(key, "z_order") || !strcmp(key, "z"))
ov->z_order = atoi(val);
else if (!strcmp(key, "opacity")) ov->opacity = av_clip(atoi(val), 0, 255);
else if (!strcmp(key, "visible")) ov->visible = atoi(val) ? 1 : 0;
else if (ov->type == OV_TYPE_RECT) {
if (!strcmp(key, "r")) ov->u.rect.r = av_clip_uint8(atoi(val));
else if (!strcmp(key, "g")) ov->u.rect.g = av_clip_uint8(atoi(val));
else if (!strcmp(key, "b")) ov->u.rect.b = av_clip_uint8(atoi(val));
else if (!strcmp(key, "thickness")) ov->u.rect.thickness = atoi(val);
} else if (ov->type == OV_TYPE_TEXT) {
if (!strcmp(key, "text")) av_strlcpy(ov->u.text.text, val, sizeof(ov->u.text.text));
else if (!strcmp(key, "font_size")) ov->u.text.font_size = atoi(val);
else if (!strcmp(key, "r")) ov->u.text.r = av_clip_uint8(atoi(val));
else if (!strcmp(key, "g")) ov->u.text.g = av_clip_uint8(atoi(val));
else if (!strcmp(key, "b")) ov->u.text.b = av_clip_uint8(atoi(val));
} else if (ov->type == OV_TYPE_ICON) {
if (!strcmp(key, "icon_name")) av_strlcpy(ov->u.icon.icon_name, val, sizeof(ov->u.icon.icon_name));
} else if (ov->type == OV_TYPE_DIM) {
if (!strcmp(key, "amount")) ov->u.dim.amount = av_clip_uint8(atoi(val));
}
}
return 0;
}
/* Lock must be held by caller. */
static int overlay_upsert_locked(CudaGridContext *s, const GridOverlay *ov)
{
int i;
for (i = 0; i < s->nb_overlays; i++) {
if (!strcmp(s->overlays[i].id, ov->id)) {
s->overlays[i] = *ov;
return 0;
}
}
if (s->nb_overlays >= MAX_OVERLAYS)
return AVERROR(ENOSPC);
s->overlays[s->nb_overlays++] = *ov;
return 0;
}
static int overlay_remove_locked(CudaGridContext *s, const char *id)
{
int i;
for (i = 0; i < s->nb_overlays; i++) {
if (!strcmp(s->overlays[i].id, id)) {
memmove(&s->overlays[i], &s->overlays[i + 1],
(s->nb_overlays - i - 1) * sizeof(GridOverlay));
s->nb_overlays--;
return 0;
}
}
return AVERROR(ENOENT);
}
/* Compute pixel rect для overlay: либо cell-relative, либо absolute.
* Clips против output bounds + 2px alignment для NV12 chroma. */
static int overlay_pixel_rect(CudaGridContext *s, const GridOverlay *ov,
int *out_x, int *out_y, int *out_w, int *out_h)
{
int rx, ry, rw, rh;
int base_x, base_y, base_w, base_h;
if (ov->cell < 0) {
base_x = 0;
base_y = 0;
base_w = s->out_width;
base_h = s->out_height;
} else if (ov->cell < s->layout->nb_cells) {
base_x = s->cell_px[ov->cell].x;
base_y = s->cell_px[ov->cell].y;
base_w = s->cell_px[ov->cell].w;
base_h = s->cell_px[ov->cell].h;
} else {
return AVERROR(EINVAL);
}
rx = base_x + (int)(ov->x * base_w);
ry = base_y + (int)(ov->y * base_h);
rw = (int)(ov->w * base_w);
rh = (int)(ov->h * base_h);
if (rx < 0) { rw += rx; rx = 0; }
if (ry < 0) { rh += ry; ry = 0; }
if (rx + rw > s->out_width) rw = s->out_width - rx;
if (ry + rh > s->out_height) rh = s->out_height - ry;
rx &= ~1; ry &= ~1; rw &= ~1; rh &= ~1;
*out_x = rx; *out_y = ry; *out_w = rw; *out_h = rh;
return (rw > 0 && rh > 0) ? 0 : 1; /* 1 → empty, skip */
}
/* Solid filled strip (no alpha). cu ctx must be pushed. */
static int render_strip_solid(AVFilterContext *ctx, AVFrame *out,
int x, int y, int w, int h,
uint8_t Y, uint8_t U, uint8_t V)
{
CudaGridContext *s = ctx->priv;
CUdeviceptr dst_y, dst_uv;
unsigned short uv;
int ret;
x &= ~1; y &= ~1; w &= ~1; h &= ~1;
if (w <= 0 || h <= 0) return 0;
dst_y = (CUdeviceptr)(out->data[0] + (size_t)y * out->linesize[0] + x);
ret = CHECK_CU(s->hwctx->internal->cuda_dl->cuMemsetD2D8Async(
dst_y, out->linesize[0], Y, (size_t)w, (size_t)h, s->cu_stream));
if (ret < 0) return ret;
/* NV12 UV interleaved, half res. cuMemsetD2D16Async sets 16-bit element
* = UV pair. Little-endian: low byte=U, high byte=V. */
uv = (unsigned short)U | ((unsigned short)V << 8);
dst_uv = (CUdeviceptr)(out->data[1] + (size_t)(y / 2) * out->linesize[1] + x);
ret = CHECK_CU(s->hwctx->internal->cuda_dl->cuMemsetD2D16Async(
dst_uv, out->linesize[1], uv, (size_t)(w / 2), (size_t)(h / 2), s->cu_stream));
return ret;
}
static int render_overlay_rect(AVFilterContext *ctx, AVFrame *out, const GridOverlay *ov)
{
CudaGridContext *s = ctx->priv;
int px, py, pw, ph;
uint8_t Y, U, V;
int ret;
ret = overlay_pixel_rect(s, ov, &px, &py, &pw, &ph);
if (ret != 0) return ret < 0 ? ret : 0;
rgb_to_yuv709(ov->u.rect.r, ov->u.rect.g, ov->u.rect.b, &Y, &U, &V);
if (ov->u.rect.thickness <= 0) {
/* Filled */
return render_strip_solid(ctx, out, px, py, pw, ph, Y, U, V);
} else {
int t = ov->u.rect.thickness;
t = FFMIN(t, FFMIN(pw / 2, ph / 2));
if (t < 2) t = 2;
t &= ~1;
/* Top */
ret = render_strip_solid(ctx, out, px, py, pw, t, Y, U, V);
if (ret < 0) return ret;
/* Bottom */
ret = render_strip_solid(ctx, out, px, py + ph - t, pw, t, Y, U, V);
if (ret < 0) return ret;
/* Left */
ret = render_strip_solid(ctx, out, px, py + t, t, ph - 2 * t, Y, U, V);
if (ret < 0) return ret;
/* Right */
ret = render_strip_solid(ctx, out, px + pw - t, py + t, t, ph - 2 * t, Y, U, V);
return ret;
}
}
/* Render all visible overlays. cu ctx must be pushed by caller. */
static int render_overlays(AVFilterContext *ctx, AVFrame *out)
{
CudaGridContext *s = ctx->priv;
GridOverlay sorted[MAX_OVERLAYS];
int i, j, n, ret;
pthread_mutex_lock(&s->overlay_lock);
n = s->nb_overlays;
memcpy(sorted, s->overlays, (size_t)n * sizeof(GridOverlay));
pthread_mutex_unlock(&s->overlay_lock);
/* Insertion sort by z_order (stable). n ≤ 64 → fine. */
for (i = 1; i < n; i++) {
GridOverlay tmp = sorted[i];
for (j = i; j > 0 && sorted[j - 1].z_order > tmp.z_order; j--)
sorted[j] = sorted[j - 1];
sorted[j] = tmp;
}
for (i = 0; i < n; i++) {
const GridOverlay *ov = &sorted[i];
if (!ov->visible) continue;
switch (ov->type) {
case OV_TYPE_RECT:
ret = render_overlay_rect(ctx, out, ov);
if (ret < 0) return ret;
break;
case OV_TYPE_DIM:
av_log(ctx, AV_LOG_TRACE, "overlay %s: dim type — Phase 4b-2 (alpha kernel)\n", ov->id);
break;
case OV_TYPE_TEXT:
av_log(ctx, AV_LOG_TRACE, "overlay %s: text — Phase 4b-3 (freetype)\n", ov->id);
break;
case OV_TYPE_ICON:
av_log(ctx, AV_LOG_TRACE, "overlay %s: icon — Phase 4b-4 (sprite)\n", ov->id);
break;
}
}
return 0;
}
/* ─── Framesync callback ──────────────────────────────────────────────── */
static int cuda_grid_compose(FFFrameSync *fs)
@@ -267,6 +579,13 @@ static int cuda_grid_compose(FFFrameSync *fs)
}
}
/* Overlay pass (Phase 4b) */
ret = render_overlays(ctx, out);
if (ret < 0) {
CHECK_CU(s->hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
goto fail;
}
CHECK_CU(s->hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
return ff_filter_frame(outlink, out);
@@ -305,7 +624,15 @@ static av_cold int cuda_grid_init(AVFilterContext *ctx)
return ret;
}
av_log(ctx, AV_LOG_INFO, "cuda_grid layout=%s cells=%d output=%dx%d\n",
/* Overlay mutex */
ret = pthread_mutex_init(&s->overlay_lock, NULL);
if (ret) {
av_log(ctx, AV_LOG_ERROR, "overlay_lock init failed: %d\n", ret);
return AVERROR(ENOMEM);
}
s->overlay_lock_inited = 1;
av_log(ctx, AV_LOG_INFO, "cuda_grid layout=%s cells=%d output=%dx%d (overlays: rect)\n",
lt->name, lt->nb_cells, s->out_width, s->out_height);
return 0;
}
@@ -314,6 +641,58 @@ static av_cold void cuda_grid_uninit(AVFilterContext *ctx)
{
CudaGridContext *s = ctx->priv;
ff_framesync_uninit(&s->fs);
if (s->overlay_lock_inited) {
pthread_mutex_destroy(&s->overlay_lock);
s->overlay_lock_inited = 0;
}
}
static int cuda_grid_process_command(AVFilterContext *ctx, const char *cmd,
const char *arg, char *res, int res_len, int flags)
{
CudaGridContext *s = ctx->priv;
int ret;
if (!strcmp(cmd, "add_overlay")) {
GridOverlay ov;
ret = parse_overlay_args(ctx, arg, &ov);
if (ret < 0) {
if (res) av_strlcpy(res, "err parse", res_len);
return ret;
}
pthread_mutex_lock(&s->overlay_lock);
ret = overlay_upsert_locked(s, &ov);
pthread_mutex_unlock(&s->overlay_lock);
if (res) snprintf(res, res_len, ret == 0 ? "ok id=%s n=%d" : "err full id=%s",
ov.id, s->nb_overlays);
return ret;
}
if (!strcmp(cmd, "remove_overlay")) {
char id[OVERLAY_ID_MAX];
if (!arg || sscanf(arg, "%31s", id) != 1) {
if (res) av_strlcpy(res, "err parse", res_len);
return AVERROR(EINVAL);
}
pthread_mutex_lock(&s->overlay_lock);
ret = overlay_remove_locked(s, id);
pthread_mutex_unlock(&s->overlay_lock);
if (res) snprintf(res, res_len, ret == 0 ? "ok id=%s" : "not_found id=%s", id);
return ret;
}
if (!strcmp(cmd, "clear_overlays")) {
pthread_mutex_lock(&s->overlay_lock);
s->nb_overlays = 0;
pthread_mutex_unlock(&s->overlay_lock);
if (res) av_strlcpy(res, "ok", res_len);
return 0;
}
/* Future: set_layout (Phase 3 на filter side — пока nb_inputs хардкодится в init) */
/* Fall back to standard option/command handling */
return ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
}
static int cuda_grid_config_input(AVFilterLink *inlink)
@@ -465,16 +844,17 @@ static const AVFilterPad cuda_grid_outputs[] = {
};
const AVFilter ff_vf_cuda_grid = {
.name = "cuda_grid",
.description = NULL_IF_CONFIG_SMALL("GPU-native video grid composer (CUDA)."),
.priv_class = &cuda_grid_class,
.priv_size = sizeof(CudaGridContext),
.init = cuda_grid_init,
.uninit = cuda_grid_uninit,
.activate = cuda_grid_activate,
.name = "cuda_grid",
.description = NULL_IF_CONFIG_SMALL("GPU-native video grid composer (CUDA)."),
.priv_class = &cuda_grid_class,
.priv_size = sizeof(CudaGridContext),
.init = cuda_grid_init,
.uninit = cuda_grid_uninit,
.activate = cuda_grid_activate,
.process_command = cuda_grid_process_command,
/* No FILTER_INPUTS — pads added dynamically в init() per layout. */
FILTER_OUTPUTS(cuda_grid_outputs),
FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA),
.flags = AVFILTER_FLAG_HWDEVICE | AVFILTER_FLAG_DYNAMIC_INPUTS,
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
.flags = AVFILTER_FLAG_HWDEVICE | AVFILTER_FLAG_DYNAMIC_INPUTS,
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
};