filter + demuxer: per-cell placeholder + cuframes auto-reconnect

vf_cuda_grid: placeholder branch теперь ищет per-cell icon "<base>_<pad>.png" сначала, fallback к "<base>.png". Controller рендерит per-cell PNGs с camera labels из FrigateBridge config (placeholder_renderer.py). cuframesdec: + try_reconnect() — на CUFRAMES_ERR_DISCONNECTED не возвращаем EOF (которое kill'ит весь pipeline), а пытаемся re-subscribe каждые 2 sec. EAGAIN tells ffmpeg "try later". Когда publisher container recreate'нут (new IPC namespace), pipeline auto-reconnects к нему без ffmpeg restart. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
vf_cuda_grid: + placeholder_icon / placeholder_timeout_ms — Level 2 resilience
2026-05-25 12:00:24 +01:00 · 2026-05-25 11:24:12 +01:00 · 2026-05-22 09:31:23 +01:00 · 2026-05-21 23:34:40 +01:00 · 2026-05-21 20:02:57 +01:00 · 2026-05-21 07:08:23 +01:00
6 changed files with 2247 additions and 2 deletions
@@ -3317,6 +3317,8 @@ thumbnail_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
 transpose_npp_filter_deps="ffnvcodec libnpp"
 overlay_cuda_filter_deps="ffnvcodec"
 overlay_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
+cuda_grid_filter_deps="ffnvcodec"
+cuda_grid_filter_deps_any="cuda_nvcc cuda_llvm"
 sharpen_npp_filter_deps="ffnvcodec libnpp"

 ddagrab_filter_deps="d3d11va IDXGIOutput1 DXGI_OUTDUPL_FRAME_INFO"
@@ -410,6 +410,8 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)           += vf_datascope.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o framesync.o
 OBJS-$(CONFIG_OVERLAY_CUDA_FILTER)           += vf_overlay_cuda.o framesync.o vf_overlay_cuda.ptx.o \
                                                cuda/load_helper.o
+OBJS-$(CONFIG_CUDA_GRID_FILTER)              += vf_cuda_grid.o framesync.o \
+                                                vf_cuda_grid.ptx.o cuda/load_helper.o
 OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
                                                opencl/overlay.o framesync.o
 OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o framesync.o
@@ -390,6 +390,7 @@ extern const AVFilter ff_vf_overlay_qsv;
 extern const AVFilter ff_vf_overlay_vaapi;
 extern const AVFilter ff_vf_overlay_vulkan;
 extern const AVFilter ff_vf_overlay_cuda;
+extern const AVFilter ff_vf_cuda_grid;
 extern const AVFilter ff_vf_owdenoise;
 extern const AVFilter ff_vf_pad;
 extern const AVFilter ff_vf_pad_opencl;
@@ -0,0 +1,185 @@
+/*
+ * cuda_grid overlay CUDA kernels.
+ *
+ * Алфа-блендинг poверх NV12 frame:
+ *   - Alpha_Fill_Y / Alpha_Fill_UV: solid colour fill region (rect/dim)
+ *   - Alpha_Blit_RGBA_Y / Alpha_Blit_RGBA_UV: blit RGBA atlas → NV12
+ *     (для text Phase 4b-3 и icon Phase 4b-4)
+ *
+ * BT.709 limited-range conversion (HDTV). См. также vf_cuda_grid.c rgb_to_yuv709.
+ *
+ * Лицензия: LGPL-2.1+ (соответствует FFmpeg)
+ */
+
+#include "cuda/vector_helpers.cuh"
+
+extern "C" {
+
+/* Solid colour α-blend на Y plane.
+ *   dst: pointer на Y plane base
+ *   dst_pitch: bytes per row
+ *   rx, ry, rw, rh: region rect (pixels, must be in-bounds)
+ *   fill: 0..255 (Y component to blend in)
+ *   alpha: 0..255 (255 = fully opaque)
+ */
+__global__ void Alpha_Fill_Y(unsigned char *dst, int dst_pitch,
+                             int rx, int ry, int rw, int rh,
+                             int fill, int alpha)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= rw || y >= rh) return;
+    unsigned char *p = dst + (ry + y) * dst_pitch + (rx + x);
+    int cur = *p;
+    *p = (unsigned char)((fill * alpha + cur * (255 - alpha)) / 255);
+}
+
+/* Solid colour α-blend на UV plane (NV12 interleaved).
+ *   rx, ry: in chroma plane coords (= full-res Y x/2, y/2)
+ *   rw, rh: also в chroma coords
+ */
+__global__ void Alpha_Fill_UV(unsigned char *dst, int dst_pitch,
+                              int rx, int ry, int rw, int rh,
+                              int fill_u, int fill_v, int alpha)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= rw || y >= rh) return;
+    unsigned char *p = dst + (ry + y) * dst_pitch + (rx + x) * 2;
+    int cu = p[0], cv = p[1];
+    p[0] = (unsigned char)((fill_u * alpha + cu * (255 - alpha)) / 255);
+    p[1] = (unsigned char)((fill_v * alpha + cv * (255 - alpha)) / 255);
+}
+
+/* Blit RGBA atlas → Y plane с α-blending.
+ *   dst, dst_pitch: Y plane
+ *   dx, dy: destination pixel offset (full-res coords)
+ *   atlas, atlas_pitch: RGBA source (interleaved R,G,B,A bytes), pitch в bytes
+ *   w, h: atlas dimensions (in pixels)
+ *   extra_alpha: дополнительный множитель (0..255) — overlay-level opacity
+ */
+__global__ void Alpha_Blit_RGBA_Y(unsigned char *dst, int dst_pitch,
+                                  int dx, int dy,
+                                  const unsigned char *atlas, int atlas_pitch,
+                                  int w, int h, int extra_alpha)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= w || y >= h) return;
+    const unsigned char *sp = atlas + y * atlas_pitch + x * 4;
+    int r = sp[0], g = sp[1], b = sp[2], a = sp[3];
+    a = a * extra_alpha / 255;
+    if (a == 0) return;
+    int Y = (int)(0.183f * r + 0.614f * g + 0.062f * b) + 16;
+    Y = Y < 0 ? 0 : (Y > 255 ? 255 : Y);
+    unsigned char *p = dst + (dy + y) * dst_pitch + (dx + x);
+    int cur = *p;
+    *p = (unsigned char)((Y * a + cur * (255 - a)) / 255);
+}
+
+/* Blit RGBA atlas → UV plane с 4:2:0 chroma subsampling.
+ *   dst, dst_pitch: UV plane
+ *   dx, dy: destination pixel offset (full-res coords; обе должны быть кратны 2)
+ *   atlas, atlas_pitch: RGBA source
+ *   w, h: atlas dimensions (full-res; UV operates на w/2 × h/2)
+ *   extra_alpha: 0..255
+ */
+__global__ void Alpha_Blit_RGBA_UV(unsigned char *dst, int dst_pitch,
+                                   int dx, int dy,
+                                   const unsigned char *atlas, int atlas_pitch,
+                                   int w, int h, int extra_alpha)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    int hw = w / 2, hh = h / 2;
+    if (x >= hw || y >= hh) return;
+
+    int sx = x * 2, sy = y * 2;
+    const unsigned char *row0 = atlas + sy * atlas_pitch;
+    const unsigned char *row1 = atlas + (sy + 1) * atlas_pitch;
+
+    int r = (row0[sx*4+0] + row0[(sx+1)*4+0] + row1[sx*4+0] + row1[(sx+1)*4+0]) >> 2;
+    int g = (row0[sx*4+1] + row0[(sx+1)*4+1] + row1[sx*4+1] + row1[(sx+1)*4+1]) >> 2;
+    int b = (row0[sx*4+2] + row0[(sx+1)*4+2] + row1[sx*4+2] + row1[(sx+1)*4+2]) >> 2;
+    int a = (row0[sx*4+3] + row0[(sx+1)*4+3] + row1[sx*4+3] + row1[(sx+1)*4+3]) >> 2;
+    a = a * extra_alpha / 255;
+    if (a == 0) return;
+
+    int U = (int)(-0.101f * r - 0.339f * g + 0.439f * b) + 128;
+    int V = (int)( 0.439f * r - 0.399f * g - 0.040f * b) + 128;
+    U = U < 0 ? 0 : (U > 255 ? 255 : U);
+    V = V < 0 ? 0 : (V > 255 ? 255 : V);
+
+    int du = dx / 2 + x;
+    int dv = dy / 2 + y;
+    unsigned char *p = dst + dv * dst_pitch + du * 2;
+    int cu = p[0], cv = p[1];
+    p[0] = (unsigned char)((U * a + cu * (255 - a)) / 255);
+    p[1] = (unsigned char)((V * a + cv * (255 - a)) / 255);
+}
+
+/* NV12 Y plane bilinear resize → output rect (Phase 7 internal scaling).
+ * dst is full output buffer; pixel at (dst_x..dst_x+dst_w-1, dst_y..dst_y+dst_h-1)
+ * receives resampled src plane (size src_w × src_h, pitch src_pitch). */
+__global__ void NV12_Resize_Y(
+    const unsigned char *src, int src_w, int src_h, int src_pitch,
+    unsigned char *dst, int dst_x, int dst_y, int dst_w, int dst_h, int dst_pitch)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= dst_w || y >= dst_h) return;
+
+    float fx = ((float)x + 0.5f) * src_w / dst_w - 0.5f;
+    float fy = ((float)y + 0.5f) * src_h / dst_h - 0.5f;
+    int x0 = max(0, (int)floorf(fx)), x1 = min(src_w - 1, x0 + 1);
+    int y0 = max(0, (int)floorf(fy)), y1 = min(src_h - 1, y0 + 1);
+    float wx = fx - x0, wy = fy - y0;
+
+    float p00 = (float)src[y0 * src_pitch + x0];
+    float p01 = (float)src[y0 * src_pitch + x1];
+    float p10 = (float)src[y1 * src_pitch + x0];
+    float p11 = (float)src[y1 * src_pitch + x1];
+    float v = (1 - wx) * (1 - wy) * p00 + wx * (1 - wy) * p01
+            + (1 - wx) * wy * p10 + wx * wy * p11;
+
+    dst[(dst_y + y) * dst_pitch + (dst_x + x)] = (unsigned char)v;
+}
+
+/* NV12 UV plane bilinear resize → output rect.
+ * UV interleaved: каждый "pixel" = 2 bytes (U, V). Size halved vs Y.
+ * dst_x/y/w/h в chroma plane coords (half-res of full frame). */
+__global__ void NV12_Resize_UV(
+    const unsigned char *src, int src_w, int src_h, int src_pitch,
+    unsigned char *dst, int dst_x, int dst_y, int dst_w, int dst_h, int dst_pitch)
+{
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x >= dst_w || y >= dst_h) return;
+
+    float fx = ((float)x + 0.5f) * src_w / dst_w - 0.5f;
+    float fy = ((float)y + 0.5f) * src_h / dst_h - 0.5f;
+    int x0 = max(0, (int)floorf(fx)), x1 = min(src_w - 1, x0 + 1);
+    int y0 = max(0, (int)floorf(fy)), y1 = min(src_h - 1, y0 + 1);
+    float wx = fx - x0, wy = fy - y0;
+
+    /* Each UV "pixel" is 2 bytes (U then V). */
+    float u00 = (float)src[y0 * src_pitch + x0 * 2 + 0];
+    float v00 = (float)src[y0 * src_pitch + x0 * 2 + 1];
+    float u01 = (float)src[y0 * src_pitch + x1 * 2 + 0];
+    float v01 = (float)src[y0 * src_pitch + x1 * 2 + 1];
+    float u10 = (float)src[y1 * src_pitch + x0 * 2 + 0];
+    float v10 = (float)src[y1 * src_pitch + x0 * 2 + 1];
+    float u11 = (float)src[y1 * src_pitch + x1 * 2 + 0];
+    float v11 = (float)src[y1 * src_pitch + x1 * 2 + 1];
+
+    float u = (1 - wx) * (1 - wy) * u00 + wx * (1 - wy) * u01
+            + (1 - wx) * wy * u10 + wx * wy * u11;
+    float v = (1 - wx) * (1 - wy) * v00 + wx * (1 - wy) * v01
+            + (1 - wx) * wy * v10 + wx * wy * v11;
+
+    int idx = (dst_y + y) * dst_pitch + (dst_x + x) * 2;
+    dst[idx + 0] = (unsigned char)u;
+    dst[idx + 1] = (unsigned char)v;
+}
+
+}  /* extern "C" */
@@ -20,6 +20,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
+#include "libavutil/time.h"

 #include <cuda_runtime.h>
 #include <cuframes/cuframes.h>
@@ -42,6 +43,12 @@ typedef struct CuframesDemuxerContext {
     * сохраняем его для первого read_packet */
    int pending_first_frame;
    cuframes_frame_t *first_frame;
+    /* Reconnect state — publisher container restart = new IPC namespace,
+     * old subscriber становится DISCONNECTED. Без reconnect логики input
+     * pad навсегда EOF → filter показывает placeholder forever даже когда
+     * publisher восстановился. */
+    char saved_key[80];
+    int64_t last_reconnect_us;
 } CuframesDemuxerContext;

 #define OFFSET(x) offsetof(CuframesDemuxerContext, x)
@@ -133,6 +140,8 @@ static int cuframes_read_header(AVFormatContext *s)
               key, cuframes_strerror(rc));
        return AVERROR_EXTERNAL;
    }
+    /* Save key для reconnect attempts. */
+    av_strlcpy(c->saved_key, key, sizeof(c->saved_key));

    /* Получаем первый кадр чтобы узнать width/height. */
    cuframes_frame_t *frame = NULL;
@@ -175,6 +184,34 @@ static int cuframes_read_header(AVFormatContext *s)
    return 0;
 }

+/* Attempts re-subscribe after publisher disconnect. Rate-limited к 1 try / 2 sec. */
+static void try_reconnect(AVFormatContext *s)
+{
+    CuframesDemuxerContext *c = s->priv_data;
+    int64_t now = av_gettime();
+    if (now - c->last_reconnect_us < 2000000) return;
+    c->last_reconnect_us = now;
+
+    if (c->sub) {
+        cuframes_subscriber_destroy(c->sub);
+        c->sub = NULL;
+    }
+    cuframes_subscriber_config_t rcfg = {0};
+    rcfg.key = c->saved_key;
+    rcfg.consumer_name = NULL;
+    rcfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
+    rcfg.cuda_device = c->cuda_device;
+    rcfg.connect_timeout_ms = 1000;
+    int rrc = cuframes_subscriber_create(&rcfg, &c->sub);
+    if (rrc == CUFRAMES_OK) {
+        av_log(s, AV_LOG_INFO, "cuframes: reconnected to '%s'\n", c->saved_key);
+    } else {
+        c->sub = NULL;
+        av_log(s, AV_LOG_DEBUG, "cuframes: reconnect '%s' fail: %s\n",
+               c->saved_key, cuframes_strerror(rrc));
+    }
+}
+
 static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
    CuframesDemuxerContext *c = s->priv_data;
@@ -185,11 +222,42 @@ static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
        c->first_frame = NULL;
        c->pending_first_frame = 0;
    } else {
+        /* Guard — subscriber может быть NULL после failed reconnect attempt */
+        if (!c->sub) {
+            try_reconnect(s);
+            return AVERROR(EAGAIN);
+        }
        int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
        if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
            return AVERROR(EAGAIN);
-        if (rc == CUFRAMES_ERR_DISCONNECTED)
-            return AVERROR_EOF;
+        if (rc == CUFRAMES_ERR_DISCONNECTED) {
+            /* Publisher container died / recreated. Try reconnect — rate-limited
+             * к одному attempt каждые 2 sec чтобы не spam'ить unix socket.
+             * Возвращаем EAGAIN (ffmpeg retries) вместо EOF (ffmpeg stops). */
+            int64_t now = av_gettime();
+            if (now - c->last_reconnect_us > 2000000) {
+                c->last_reconnect_us = now;
+                if (c->sub) {
+                    cuframes_subscriber_destroy(c->sub);
+                    c->sub = NULL;
+                }
+                cuframes_subscriber_config_t rcfg = {0};
+                rcfg.key = c->saved_key;
+                rcfg.consumer_name = NULL;
+                rcfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
+                rcfg.cuda_device = c->cuda_device;
+                rcfg.connect_timeout_ms = 1000;
+                int rrc = cuframes_subscriber_create(&rcfg, &c->sub);
+                if (rrc == CUFRAMES_OK) {
+                    av_log(s, AV_LOG_INFO, "cuframes: reconnected to '%s'\n",
+                           c->saved_key);
+                } else {
+                    av_log(s, AV_LOG_DEBUG, "cuframes: reconnect к '%s' fail: %s\n",
+                           c->saved_key, cuframes_strerror(rrc));
+                }
+            }
+            return AVERROR(EAGAIN);
+        }
        if (rc != CUFRAMES_OK || !frame) {
            av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
            return AVERROR_EXTERNAL;
Author	SHA1	Message	Date
gx	169a4b2c14	filter + demuxer: per-cell placeholder + cuframes auto-reconnect vf_cuda_grid: placeholder branch теперь ищет per-cell icon "<base>_<pad>.png" сначала, fallback к "<base>.png". Controller рендерит per-cell PNGs с camera labels из FrigateBridge config (placeholder_renderer.py). cuframesdec: + try_reconnect() — на CUFRAMES_ERR_DISCONNECTED не возвращаем EOF (которое kill'ит весь pipeline), а пытаемся re-subscribe каждые 2 sec. EAGAIN tells ffmpeg "try later". Когда publisher container recreate'нут (new IPC namespace), pipeline auto-reconnects к нему без ffmpeg restart. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-25 12:00:24 +01:00
gx	4795b7a7f1	vf_cuda_grid: + placeholder_icon / placeholder_timeout_ms — Level 2 resilience Закрывает архитектурный недостаток: камера / cuframes-pub умирает → pipeline filter показывает "застывшую картинку" этого input (framesync EXT_INFINITY mode), TV видит зависшее изображение и не понимает что не так. Now: per-input wall-clock detection of frame staleness. Если PTS не меняется > placeholder_timeout_ms (default 2000), filter: 1. Black-fill cell rect (Alpha_Fill_Y/UV kernels) 2. Blit placeholder atlas (default "offline" icon, loaded из icon_dir) centered внутри cell Pipeline продолжает работать. Остальные cells normal video. User видит explicit "НЕТ СИГНАЛА" placeholder вместо stuck frame. Options: placeholder_timeout_ms (default 2000, 0=disable) placeholder_icon (default "offline" — resolved через icon_dir) Per-input state: last_frame_pts[MAX_CELLS] — last seen PTS per pad last_change_wall_us[MAX_CELLS] — wall-clock когда PTS changed Тест: docker stop cuframes-pub-back_yard → через ~2s в back_yard cell появляется "НЕТ СИГНАЛА" placeholder, остальные 3 cells продолжают normal video с 25 fps. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-25 11:24:12 +01:00
gx	d7fd75657d	vf_cuda_grid: aspect_mode option — letterbox/pillarbox preserve New AVOption aspect_mode (0=stretch default, 1=fit). При fit вычисляется inscribed rect внутри cell сохраняющий src aspect ratio. Letterbox/pillarbox заполняется BT.709 limited-range black (Y=16, UV=128,128) через existing Alpha_Fill_Y/UV kernels с alpha=255. Fixes user complaint: main_plus_preview main cell {0,0,2/3,1.0} = 1.18:1 → camera 16:9 stretched по высоте. С aspect_mode=1 camera fits ширину 853×479 с letterbox 120px сверху+снизу. Verified: main_plus_preview snapshot — parking lot now proportional, не deformed; preview cells (16:9) — perfect fit camera (16:9) без bars. Cost: 2 extra kernel launches (Alpha_Fill Y+UV) per cell когда aspect_mode=1 и aspect mismatch. ~negligible vs resize. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-22 09:31:23 +01:00
gx	cd1839fecb	vf_cuda_grid: add main_with_strip layout (5 cells, info strip без input) Layout 1280x720 output: cell 0: main 960×540 = 16:9 (no stretch при camera 1920x1080) cells 1-3: 320×180 preview right column (16:9 native) cell 4: 1280×180 info strip (full width bottom) Use case: cell 4 без input pad → blank background, overlay через icon/text overlays (Grafana, чат, status). Pipeline нужен 5й input source (lavfi color=black:size=1280x180:rate=25) для filter framesync. Решает user complaint: main_plus_preview cell 0 = 2/3 width × full height = 853×720 = 1.18:1 → 16:9 camera стretching по высоте. main_with_strip cell 0 = exact 16:9. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-21 23:34:40 +01:00
gx	eb71b0a8b4	vf_cuda_grid: Phase 7 — internal NV12 scaling + runtime layout/cell_map (issue #2 Вариант 3) Цель: 1× GPU compute вместо 3× current (parallel cuda_grid instances). Filter сам делает scaling per cell — pipeline simplified до 4 inputs → 1 cuda_grid → encoder. Internal scaling: NV12_Resize_Y, NV12_Resize_UV kernels (vf_cuda_grid.cu) — bilinear resize cells где src size != cell size автоматически scale'ятся в compose pass. Fast path сохранён для exact size match (memcpy). Linked через cuModuleGetFunction в config_output. Variable max_cells: New option max_cells (default 0 = use initial layout's nb_cells). init() creates max_cells input pads ВСЕГДА. framesync_init с max_cells inputs. compose iterates only active layout's nb_cells. Runtime layout switch: process_command "set_layout <name>" — resolve template, recompute cell_px[]. Validates nb_cells ≤ max_cells. Под layout_lock (pthread_mutex) protect против concurrent compose. Cell-to-pad remapping: s->cell_map[16] (default identity). process_command "cell_map <cell> <pad>" — swap. compose использует cell_map[i] вместо i как source pad index. Pipeline теперь может: N cuframes → cuda_grid max_cells=N → NVENC Один cuda_grid instance, layout switching ZMQ command, cell content swap ZMQ command. No more streamselect, split filters — все внутри. Live verified (smoke test): 4× lavfi color 1920x1080 → cuda_grid layout=quad out_w=1280:out_h=720 Output: 4 colored quadrants 640×360 each, scaled internally. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-21 20:02:57 +01:00
gx	636bd78854	vf_cuda_grid: overlay с invalid cell — silent skip вместо AVERROR Когда controller broadcast'ит overlay command ко всем cuda_grid instances (quad/single/mpp), overlay с cell=2 (gate_lpr cell в quad/mpp) попадает в single layout который имеет nb_cells=1 → overlay_pixel_rect возвращал AVERROR(EINVAL) → render_overlays propagated к cuda_grid_compose → "Error while filtering: Invalid argument" каждый frame → pipeline crash loop. Fix: return 1 (positive skip) вместо AVERROR. render_overlay_rect уже имеет `if (ret != 0) return ret < 0 ? ret : 0;` — positive skip правильно обрабатывается без error. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-21 07:08:23 +01:00
gx	a326ef146c	vf_cuda_grid: Phase 6 — process_command 'reload_icon <name>' Invalidate cached icon atlas by name. Next render автоматически re-reads PNG file с disk. Used controller'ом для periodic re-render dynamic overlays (charts/chats) — write new PNG → send reload_icon → filter подхватывает. Без этого pattern dynamic overlay невозможен — icon overlay cached forever after first load by icon_name. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-20 21:48:09 +01:00
gx	c5130cb15c	vf_cuda_grid: Phase 4b-4 — icon overlay (PNG/JPG decode + sprite blit) PNG/JPG decode через libavcodec (системный decoder), conversion в RGBA8 через swscale, upload в pitched CUDA buffer, blit через Alpha_Blit_RGBA_Y/UV (те же kernels что text). Icon cache — shared by name (одна и та же иконка для нескольких overlays = один GPU atlas). Lifecycle: load on first use, free в uninit. Path resolution: <icon_dir>/<name> + try .png/.jpg extensions: /var/lib/cuda-grid/icons (default) /opt/cuda-grid/icons (fallback) + abs path supported Options: icon_dir= — overrides default search paths Wire format: add_overlay <id> icon x=.. y=.. icon_name=domofon opacity=200 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-19 22:41:11 +01:00
gx	b88f966f83	vf_cuda_grid: URL-decode для string overlay fields (text, icon_name) Controller URL-encode'ит string values (text="hello world" → text=hello%20world) чтобы пройти sscanf("%s") который stops on whitespace. Filter inline decode'ит '%xx' obratno в bytes. Только для text + icon_name. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-19 22:32:38 +01:00
gx	4010461300	vf_cuda_grid: Phase 4b-3 — text overlay (freetype + RGBA atlas) CPU rasterization через freetype (DejaVu/Liberation auto-detect или font_file= option), upload pitched RGBA buffer to GPU, blit через Alpha_Blit_RGBA_Y/UV kernels (Phase 4b-2 уже had). Cache: per-overlay-id atlas с keys (text, font_size, r/g/b) — re-rasterize только при change. Cleanup при remove_overlay/clear_overlays/uninit. Options: font_file= — TTF path (default: search DejaVu/Liberation) font_size= — default size if overlay не указал свой Wire format: add_overlay <id> text x=.. y=.. text=hello font_size=24 r=255 g=255 b=255 opacity=200 Conditional на CONFIG_LIBFREETYPE — без него text overlays no-op (остальные типы работают как обычно). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-19 22:30:36 +01:00
gx	1e54f04e24	configure: add cuda_grid_filter_deps_any для cuda_nvcc/cuda_llvm Без этого filter compileд но .ptx step failед: configure не knows что filter требует nvcc/clang для .cu compile. Same pattern как scale_cuda. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-19 22:26:12 +01:00
gx	8ca590004b	vf_cuda_grid: Phase 4b-2 — CUDA kernels (alpha-blended overlays) Custom .cu kernels компилируются через ffbuild → .ptx, embedded в binary через bin2c pattern (как vf_scale_cuda). Loading через ff_cuda_load_module в config_output, kernel handles cached на life-of-filter. Kernels: Alpha_Fill_Y/UV — solid colour α-blend (rect, dim, border strips) Alpha_Blit_RGBA_Y/UV — blit RGBA atlas → NV12 (text/icon в 4b-3/4) Render side: render_strip_alpha — заменил render_strip_solid (alpha=255 ≡ solid) render_overlay_rect — opacity передаётся в kernel render_overlay_dim — Y=16/UV=neutral × dim.amount (затемнение region) Также: убран unused cuda_grid_config_input (-Werror), добавлены fields CUmodule + 4×CUfunction в Context; unload в uninit. cuMemsetD2D* НЕ доступны в FFmpeg's CudaFunctions wrapper, поэтому solid fill реализован через kernel — лишний overhead minimal (16×16 threads). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-19 22:24:01 +01:00
gx	9deaca7697	vf_cuda_grid: Phase 4b-1 — rect overlay primitives (solid fill, no alpha) Добавляет inner overlay state с mutex + process_command handler. Rendering filled/border rects через cuMemsetD2D8Async/D2D16Async — без custom kernel'а (Phase 4b-2 = alpha blend, требует .cu). Commands: add_overlay <id> rect cell=N x=.. y=.. w=.. h=.. r=.. g=.. b=.. thickness=.. opacity=.. remove_overlay <id> clear_overlays text/icon/dim — типы определены, render заглушен до Phase 4b-2/3/4. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-19 22:17:41 +01:00
gx	178fc5bb4e	vf_cuda_grid: Phase 2b — delegated scaling to upstream scale_npp После попытки in-filter NPP scaling обнаружено что nppiResize не имеет _C2R variant для NV12 UV interleaved (только C1R, C3R, C4R). Alternatives: - 2× nppiResize_8u_C1R с split/merge через intermediate buffers - custom CUDA kernel - treat UV pair как 16u (blending artifact на boundaries) Pragmatic decision: cuda_grid делает только composition (same-size memcpy), а scaling делегируется существующему scale_npp filter в filter chain: [0]scale_npp=1280:1080[s0]; [1]scale_npp=640:360[s1]; ... \ [s0][s1]...cuda_grid=layout=main_plus_preview Unix philosophy + leverages production-tested NPP code. Controller (Phase 3) auto-generates filter graph с scale_npp per input. Revert: - #include <nppi.h> - libnpp dependency в configure (cuda_grid_filter_deps="ffnvcodec") - nppiResize* calls в compose path Add: - Error message с примером scale_npp chain pattern - Doc в file header c filter graph пример Phase 2 = full deliverable (2a + 2b). Дальше Phase 3 controller.	2026-05-19 21:45:40 +01:00
gx	11f310061a	vf_cuda_grid: Phase 2b — NPP scaling для mixed-size inputs - Add libnpp dependency в configure (cuda_grid_filter_deps) - #include <nppi.h>, nppSetStream(s->cu_stream) перед resize batch - Smart copy-or-scale в compose path: - src.size == cell.size → cuMemcpy2DAsync (fast path, zero overhead) - else → nppiResizeSqrPixel_8u_C1R для Y + _C2R для NV12 UV interleaved - NPPI_INTER_LINEAR interpolation (bilinear — стандартный для video) - Destination pointer offset через explicit pointer arithmetic (NPP не имеет dst_offset параметра, нужно сместить pSrc указатель) Это unblock'ает mixed-size cameras (parking 1920x1080 + gate_lpr 2688x1520 в одном grid с main_plus_preview layout — big cell scaled до 1280x1080, small cells scaled до 640x360). Phase 2 complete (2a + 2b). Phase 3 будет controller sidecar.	2026-05-19 21:20:04 +01:00
gx	df476472e2	vf_cuda_grid: fix include avstring.h для av_asprintf	2026-05-19 20:58:29 +01:00
gx	6ee2f474c7	vf_cuda_grid: Phase 2a — layout templates + dynamic nb_inputs Layout templates (9): - single, dual_horizontal, dual_vertical - quad (default), main_plus_preview (1 big + 3 small) - six_grid (3x2), nine_grid (3x3), sixteen_grid (4x4) - panoramic Cells определены в normalized координатах (0.0-1.0), переводятся в pixels в config_output (× out_w/out_h). Alignment до chroma boundary (NV12 ÷ 2). Filter options: - layout=<name> (default quad) - out_w=<int> (default 1920) - out_h=<int> (default 1080) Dynamic inputs: - nb_inputs derived из layout (single=1, quad=4, nine_grid=9, sixteen_grid=16) - ff_append_inpad_free_name в init() для каждой cell - AVFILTER_FLAG_DYNAMIC_INPUTS на filter Phase 2a limitation: - Каждый input должен быть точно cell_px size (no scaling). - Phase 2b добавит NPP resize для mixed-size inputs.	2026-05-19 20:57:08 +01:00
gx	4313c3f30d	vf_cuda_grid: fix #include cuda_check.h + mixed decl warnings (-Werror)	2026-05-19 20:50:17 +01:00
gx	097ca81605	vf_cuda_grid: Phase 1 MVP — fixed quad layout, 4 CUDA inputs → 1 output Phase 1 deliverable (см. gx/vf-cuda-grid#1): - libavfilter/vf_cuda_grid.c (~270 LOC): multi-input filter, fixed 2×2 quad - 4 NV12 CUDA frames same size → 2W × 2H output frame - Composition: cuMemcpy2DAsync per Y + UV plane на каждый input - framesync для lock-step pull всех 4 inputs - Output hw_frames_ctx allocated from input device_ref - Build wiring: CONFIG_CUDA_GRID_FILTER → libavfilter/{Makefile,allfilters.c}, configure deps на ffnvcodec Limitations Phase 1: - All inputs must be same size (no scaling) - Quad layout hardcoded (no DSL, no runtime switching) - NV12 only (no RGBA/YUV420P) Phase 2: dynamic layouts + scaling. Phase 3: runtime control via process_command.	2026-05-19 20:47:00 +01:00