diff --git a/libavfilter/vf_cuda_grid.c b/libavfilter/vf_cuda_grid.c index 60a5ccd..9537b54 100644 --- a/libavfilter/vf_cuda_grid.c +++ b/libavfilter/vf_cuda_grid.c @@ -4,12 +4,13 @@ * Принимает N CUDA-frames на входе, выдаёт один composed frame с N cells * в layout. End-to-end CUDA (без CPU round-trip). * - * Phase 1 (MVP): fixed quad layout 2×2, 4 NV12-inputs одинакового размера, - * output size = 2W × 2H, без scaling. Композиция через cuMemcpy2DAsync per - * Y/UV plane на каждый input → soответствующую quadrant'у output. + * Phase 2a: layout templates (single/dual_h/dual_v/quad/main_plus_preview/ + * six_grid/nine_grid/sixteen_grid/panoramic), dynamic nb_inputs, output size + * через option (default 1920×1080). Cell rects = normalized × output size. + * **Scaling пока нет** — каждый input должен быть точно cell size (Phase 2b NPP). * * Future phases (см. gx/vf-cuda-grid#1): - * - Phase 2: dynamic layouts + per-cell scaling + * - Phase 2b: per-cell scaling через libnpp (mixed-size inputs) * - Phase 3: runtime layout switching через process_command (ZMQ) * - Phase 4+: overlay primitives (rect/text/icon/image/dim/graph/chat) * @@ -34,28 +35,115 @@ #include "video.h" #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) +#define MAX_CELLS 16 -#define CUDA_GRID_INPUTS 4 /* Phase 1: fixed quad */ +/* ─── Layout templates (normalized координаты 0.0–1.0) ─────────────────── */ + +typedef struct LayoutCell { + float x, y, w, h; /* normalized fraction of output size */ +} LayoutCell; + +typedef struct LayoutTemplate { + const char *name; + int nb_cells; + LayoutCell cells[MAX_CELLS]; +} LayoutTemplate; + +/* Layouts copy-paste'нуты по структуре cctv-processor config/grids.json. */ +static const LayoutTemplate layouts[] = { + { + "single", 1, + { {0.0f, 0.0f, 1.0f, 1.0f} } + }, + { + "dual_horizontal", 2, + { {0.0f, 0.0f, 0.5f, 1.0f}, {0.5f, 0.0f, 0.5f, 1.0f} } + }, + { + "dual_vertical", 2, + { {0.0f, 0.0f, 1.0f, 0.5f}, {0.0f, 0.5f, 1.0f, 0.5f} } + }, + { + "quad", 4, + { + {0.0f, 0.0f, 0.5f, 0.5f}, {0.5f, 0.0f, 0.5f, 0.5f}, + {0.0f, 0.5f, 0.5f, 0.5f}, {0.5f, 0.5f, 0.5f, 0.5f}, + } + }, + { + /* Main camera 2/3 width, 3 small cameras stacked справа сверху вниз */ + "main_plus_preview", 4, + { + {0.0f, 0.0f, 2.0f/3, 1.0f}, + {2.0f/3, 0.0f, 1.0f/3, 1.0f/3}, + {2.0f/3, 1.0f/3, 1.0f/3, 1.0f/3}, + {2.0f/3, 2.0f/3, 1.0f/3, 1.0f/3}, + } + }, + { + "six_grid", 6, + { + {0.0f, 0.0f, 1.0f/3, 0.5f}, {1.0f/3, 0.0f, 1.0f/3, 0.5f}, {2.0f/3, 0.0f, 1.0f/3, 0.5f}, + {0.0f, 0.5f, 1.0f/3, 0.5f}, {1.0f/3, 0.5f, 1.0f/3, 0.5f}, {2.0f/3, 0.5f, 1.0f/3, 0.5f}, + } + }, + { + "nine_grid", 9, + { + {0.0f, 0.0f, 1.0f/3, 1.0f/3}, {1.0f/3, 0.0f, 1.0f/3, 1.0f/3}, {2.0f/3, 0.0f, 1.0f/3, 1.0f/3}, + {0.0f, 1.0f/3, 1.0f/3, 1.0f/3}, {1.0f/3, 1.0f/3, 1.0f/3, 1.0f/3}, {2.0f/3, 1.0f/3, 1.0f/3, 1.0f/3}, + {0.0f, 2.0f/3, 1.0f/3, 1.0f/3}, {1.0f/3, 2.0f/3, 1.0f/3, 1.0f/3}, {2.0f/3, 2.0f/3, 1.0f/3, 1.0f/3}, + } + }, + { + "sixteen_grid", 16, + { + {0.00f, 0.00f, 0.25f, 0.25f}, {0.25f, 0.00f, 0.25f, 0.25f}, {0.50f, 0.00f, 0.25f, 0.25f}, {0.75f, 0.00f, 0.25f, 0.25f}, + {0.00f, 0.25f, 0.25f, 0.25f}, {0.25f, 0.25f, 0.25f, 0.25f}, {0.50f, 0.25f, 0.25f, 0.25f}, {0.75f, 0.25f, 0.25f, 0.25f}, + {0.00f, 0.50f, 0.25f, 0.25f}, {0.25f, 0.50f, 0.25f, 0.25f}, {0.50f, 0.50f, 0.25f, 0.25f}, {0.75f, 0.50f, 0.25f, 0.25f}, + {0.00f, 0.75f, 0.25f, 0.25f}, {0.25f, 0.75f, 0.25f, 0.25f}, {0.50f, 0.75f, 0.25f, 0.25f}, {0.75f, 0.75f, 0.25f, 0.25f}, + } + }, + { + /* Один widescreen panoramic — 1 cell full width */ + "panoramic", 1, + { {0.0f, 0.0f, 1.0f, 1.0f} } + }, +}; + +static const LayoutTemplate *find_layout(const char *name) +{ + for (size_t i = 0; i < FF_ARRAY_ELEMS(layouts); i++) { + if (!strcmp(layouts[i].name, name)) + return &layouts[i]; + } + return NULL; +} + +/* ─── Filter state ─────────────────────────────────────────────────────── */ typedef struct CudaGridContext { const AVClass *class; - AVBufferRef *hw_device_ctx; + /* Options */ + char *layout_name; + int out_width; + int out_height; + + /* Resolved layout (после init) */ + const LayoutTemplate *layout; + + /* CUDA */ AVCUDADeviceContext *hwctx; CUcontext cu_ctx; CUstream cu_stream; FFFrameSync fs; - /* Output dimensions (computed in config_output) */ - int out_width; - int out_height; - - /* Per-cell target rectangles в output frame. - * Phase 1 hardcode: 4 ячейки 2×2 (top-left, top-right, bottom-left, bottom-right). */ + /* Per-cell pixel rects (computed в config_output из normalized × out size) */ struct { int x, y, w, h; - } cells[CUDA_GRID_INPUTS]; + } cell_px[MAX_CELLS]; } CudaGridContext; /* ─── Composition: copy одного input plane в target region output ──────── */ @@ -87,7 +175,7 @@ static int copy_input_plane(AVFilterContext *ctx, return CHECK_CU(s->hwctx->internal->cuda_dl->cuMemcpy2DAsync(&cpy, s->cu_stream)); } -/* ─── Framesync callback — N frames аre ready, compose ────────────────── */ +/* ─── Framesync callback ──────────────────────────────────────────────── */ static int cuda_grid_compose(FFFrameSync *fs) { @@ -95,12 +183,12 @@ static int cuda_grid_compose(FFFrameSync *fs) AVFilterLink *outlink = ctx->outputs[0]; CudaGridContext *s = ctx->priv; AVFrame *out = NULL; - AVFrame *in[CUDA_GRID_INPUTS] = {0}; + AVFrame *in[MAX_CELLS] = {0}; CUcontext dummy; - int ret; + int i, ret; + int nb = s->layout->nb_cells; - /* Сбор всех N input frames из framesync */ - for (int i = 0; i < CUDA_GRID_INPUTS; i++) { + for (i = 0; i < nb; i++) { ret = ff_framesync_get_frame(fs, i, &in[i], 0); if (ret < 0) return ret; @@ -110,43 +198,38 @@ static int cuda_grid_compose(FFFrameSync *fs) } } - /* Output frame из output's hw_frames_pool */ out = ff_get_video_buffer(outlink, s->out_width, s->out_height); if (!out) return AVERROR(ENOMEM); - /* Copy props (timestamps, color metadata) от первого input */ ret = av_frame_copy_props(out, in[0]); if (ret < 0) goto fail; out->width = s->out_width; out->height = s->out_height; - /* CUDA context push для всех cuMemcpy в этом filter call */ ret = CHECK_CU(s->hwctx->internal->cuda_dl->cuCtxPushCurrent(s->cu_ctx)); if (ret < 0) goto fail; - /* Для каждого input — copy Y plane + UV plane в свою quadrant. - * NV12 layout: data[0] = Y, data[1] = UV interleaved. linesize[0/1] = pitch. */ - for (int i = 0; i < CUDA_GRID_INPUTS; i++) { + for (i = 0; i < nb; i++) { AVFrame *src = in[i]; - int cx = s->cells[i].x; - int cy = s->cells[i].y; - int cw = s->cells[i].w; - int ch = s->cells[i].h; + int cx = s->cell_px[i].x; + int cy = s->cell_px[i].y; + int cw = s->cell_px[i].w; + int ch = s->cell_px[i].h; if (src->width != cw || src->height != ch) { av_log(ctx, AV_LOG_ERROR, - "input %d size %dx%d != expected cell size %dx%d " - "(Phase 1: no scaling, all inputs must match cell size)\n", + "input %d size %dx%d != cell size %dx%d " + "(Phase 2a: no scaling — Phase 2b добавит NPP resize)\n", i, src->width, src->height, cw, ch); CHECK_CU(s->hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); ret = AVERROR(EINVAL); goto fail; } - /* Y plane (full resolution, 1 byte per pixel) */ + /* Y plane */ ret = copy_input_plane(ctx, (CUdeviceptr)src->data[0], src->linesize[0], src->width, src->height, @@ -157,7 +240,7 @@ static int cuda_grid_compose(FFFrameSync *fs) goto fail; } - /* UV plane (half resolution для NV12, но 2 bytes per "pixel" — interleaved UV) */ + /* UV plane (NV12: половинное разрешение, 2 bytes per "pixel") */ ret = copy_input_plane(ctx, (CUdeviceptr)src->data[1], src->linesize[1], src->width / 2, src->height / 2, @@ -170,7 +253,6 @@ static int cuda_grid_compose(FFFrameSync *fs) } CHECK_CU(s->hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); - return ff_filter_frame(outlink, out); fail: @@ -178,14 +260,38 @@ fail: return ret; } -/* ─── Lifecycle: init / uninit / query_formats / config_input / config_output ─ */ +/* ─── Lifecycle ────────────────────────────────────────────────────────── */ static av_cold int cuda_grid_init(AVFilterContext *ctx) { CudaGridContext *s = ctx->priv; - /* Сами inputs регистрируем в filter struct (см. внизу — нельзя AVFILTER_INPUT_COUNT_MAX - * без явных AVFilterPad'ов). Phase 1 fix=4. */ - (void)s; + const LayoutTemplate *lt; + int i, ret; + + lt = find_layout(s->layout_name); + if (!lt) { + av_log(ctx, AV_LOG_ERROR, "unknown layout '%s'. Доступны: ", s->layout_name); + for (i = 0; i < (int)FF_ARRAY_ELEMS(layouts); i++) + av_log(ctx, AV_LOG_ERROR, "%s ", layouts[i].name); + av_log(ctx, AV_LOG_ERROR, "\n"); + return AVERROR(EINVAL); + } + s->layout = lt; + + /* Dynamic inputs — append pad per cell */ + for (i = 0; i < lt->nb_cells; i++) { + AVFilterPad pad = { 0 }; + pad.type = AVMEDIA_TYPE_VIDEO; + pad.name = av_asprintf("input%d", i); + if (!pad.name) + return AVERROR(ENOMEM); + ret = ff_append_inpad_free_name(ctx, &pad); + if (ret < 0) + return ret; + } + + av_log(ctx, AV_LOG_INFO, "cuda_grid layout=%s cells=%d output=%dx%d\n", + lt->name, lt->nb_cells, s->out_width, s->out_height); return 0; } @@ -193,7 +299,6 @@ static av_cold void cuda_grid_uninit(AVFilterContext *ctx) { CudaGridContext *s = ctx->priv; ff_framesync_uninit(&s->fs); - av_buffer_unref(&s->hw_device_ctx); } static int cuda_grid_config_input(AVFilterLink *inlink) @@ -220,7 +325,7 @@ static int cuda_grid_config_output(AVFilterLink *outlink) AVHWDeviceContext *hwdev; AVBufferRef *out_ref; AVHWFramesContext *out_hwfc; - int W, H, ret; + int i, ret; if (!inl0->hw_frames_ctx) return AVERROR(EINVAL); @@ -228,15 +333,28 @@ static int cuda_grid_config_output(AVFilterLink *outlink) if (hwfc0->sw_format != AV_PIX_FMT_NV12) { av_log(ctx, AV_LOG_ERROR, - "Phase 1 supports only NV12, got %s\n", + "Phase 1-2a поддерживают только NV12, got %s\n", av_get_pix_fmt_name(hwfc0->sw_format)); return AVERROR(EINVAL); } - /* Все inputs должны иметь одинаковый device и sw_format (Phase 1 also same size) */ - W = in0->w; - H = in0->h; - for (int i = 1; i < CUDA_GRID_INPUTS; i++) { + /* Compute pixel rects из normalized layout × output size */ + for (i = 0; i < s->layout->nb_cells; i++) { + s->cell_px[i].x = (int)(s->layout->cells[i].x * s->out_width); + s->cell_px[i].y = (int)(s->layout->cells[i].y * s->out_height); + s->cell_px[i].w = (int)(s->layout->cells[i].w * s->out_width); + s->cell_px[i].h = (int)(s->layout->cells[i].h * s->out_height); + /* Align до chroma boundary (NV12 → 2x2) */ + s->cell_px[i].x &= ~1; + s->cell_px[i].y &= ~1; + s->cell_px[i].w &= ~1; + s->cell_px[i].h &= ~1; + av_log(ctx, AV_LOG_VERBOSE, " cell[%d] = %dx%d @ (%d,%d)\n", + i, s->cell_px[i].w, s->cell_px[i].h, s->cell_px[i].x, s->cell_px[i].y); + } + + /* Validate все inputs: device match + sw_format match */ + for (i = 1; i < s->layout->nb_cells; i++) { AVFilterLink *inN = ctx->inputs[i]; FilterLink *ilN = ff_filter_link(inN); AVHWFramesContext *hN; @@ -251,34 +369,16 @@ static int cuda_grid_config_output(AVFilterLink *outlink) av_log(ctx, AV_LOG_ERROR, "input %d sw_format mismatch\n", i); return AVERROR(EINVAL); } - if (inN->w != W || inN->h != H) { - av_log(ctx, AV_LOG_ERROR, - "Phase 1: input %d size %dx%d != input 0 size %dx%d. " - "В этой фазе scaling не поддерживается, все inputs должны быть одного размера.\n", - i, inN->w, inN->h, W, H); - return AVERROR(EINVAL); - } } - /* Output = 2W × 2H для quad layout */ - s->out_width = 2 * W; - s->out_height = 2 * H; outlink->w = s->out_width; outlink->h = s->out_height; - /* Hardcoded quad cell positions */ - s->cells[0].x = 0; s->cells[0].y = 0; s->cells[0].w = W; s->cells[0].h = H; - s->cells[1].x = W; s->cells[1].y = 0; s->cells[1].w = W; s->cells[1].h = H; - s->cells[2].x = 0; s->cells[2].y = H; s->cells[2].w = W; s->cells[2].h = H; - s->cells[3].x = W; s->cells[3].y = H; s->cells[3].w = W; s->cells[3].h = H; - - /* Setup CUDA device + stream context из input 0 */ hwdev = hwfc0->device_ctx; s->hwctx = (AVCUDADeviceContext *)hwdev->hwctx; s->cu_ctx = s->hwctx->cuda_ctx; s->cu_stream = s->hwctx->stream; - /* Аллокация output hw_frames_ctx — copy от input #0 с обновлёнными размерами */ out_ref = av_hwframe_ctx_alloc(hwfc0->device_ref); if (!out_ref) return AVERROR(ENOMEM); @@ -296,14 +396,12 @@ static int cuda_grid_config_output(AVFilterLink *outlink) } outl->hw_frames_ctx = out_ref; - /* Setup framesync для lock-step pull от N inputs */ - ret = ff_framesync_init(&s->fs, ctx, CUDA_GRID_INPUTS); + ret = ff_framesync_init(&s->fs, ctx, s->layout->nb_cells); if (ret < 0) return ret; { - int i; FFFrameSyncIn *fs_in; - for (i = 0; i < CUDA_GRID_INPUTS; i++) { + for (i = 0; i < s->layout->nb_cells; i++) { fs_in = &s->fs.in[i]; fs_in->time_base = ctx->inputs[i]->time_base; fs_in->sync = 1; @@ -315,7 +413,6 @@ static int cuda_grid_config_output(AVFilterLink *outlink) s->fs.on_event = cuda_grid_compose; outlink->time_base = ctx->inputs[0]->time_base; - return ff_framesync_configure(&s->fs); } @@ -325,10 +422,18 @@ static int cuda_grid_activate(AVFilterContext *ctx) return ff_framesync_activate(&s->fs); } -/* ─── Filter registration ──────────────────────────────────────────────── */ +/* ─── Options + registration ───────────────────────────────────────────── */ + +#define OFFSET(x) offsetof(CudaGridContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) static const AVOption cuda_grid_options[] = { - /* Phase 1: no options. Phase 2 добавит `layout=`. */ + { "layout", "имя layout template", + OFFSET(layout_name), AV_OPT_TYPE_STRING, { .str = "quad" }, 0, 0, FLAGS }, + { "out_w", "ширина output frame в пикселях", + OFFSET(out_width), AV_OPT_TYPE_INT, { .i64 = 1920 }, 16, 16384, FLAGS }, + { "out_h", "высота output frame в пикселях", + OFFSET(out_height), AV_OPT_TYPE_INT, { .i64 = 1080 }, 16, 16384, FLAGS }, { NULL } }; @@ -340,13 +445,6 @@ static const AVClass cuda_grid_class = { .category = AV_CLASS_CATEGORY_FILTER, }; -static const AVFilterPad cuda_grid_inputs[] = { - { .name = "input0", .type = AVMEDIA_TYPE_VIDEO, .config_props = cuda_grid_config_input }, - { .name = "input1", .type = AVMEDIA_TYPE_VIDEO, .config_props = cuda_grid_config_input }, - { .name = "input2", .type = AVMEDIA_TYPE_VIDEO, .config_props = cuda_grid_config_input }, - { .name = "input3", .type = AVMEDIA_TYPE_VIDEO, .config_props = cuda_grid_config_input }, -}; - static const AVFilterPad cuda_grid_outputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .config_props = cuda_grid_config_output }, }; @@ -359,9 +457,9 @@ const AVFilter ff_vf_cuda_grid = { .init = cuda_grid_init, .uninit = cuda_grid_uninit, .activate = cuda_grid_activate, - FILTER_INPUTS(cuda_grid_inputs), + /* No FILTER_INPUTS — pads added dynamically в init() per layout. */ FILTER_OUTPUTS(cuda_grid_outputs), FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA), - .flags = AVFILTER_FLAG_HWDEVICE, + .flags = AVFILTER_FLAG_HWDEVICE | AVFILTER_FLAG_DYNAMIC_INPUTS, .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, };