Files
cuframes/filter/ffmpeg-7.1-cuframes-demuxer.patch
T
gx 99df68f69c feat(filter): FFmpeg 7.1 cuframes:// input demuxer
Out-of-tree patch + sources для FFmpeg-демаксера, который позволяет любому
FFmpeg-based потребителю (Frigate, кастомные рекордеры, re-streamers)
читать "cuframes://<key>" как обычный URL — без своего NVDEC.

Состав:
- filter/cuframesdec.c — реализация (libavformat-style)
- filter/ffmpeg-7.1-cuframes-demuxer.patch — patch для FFmpeg n7.1
  (Makefile / allformats.c / configure)
- filter/README.md — инструкции по сборке + CLI smoke test + Frigate plan

v1 ограничения (намеренно):
- только NV12
- GPU → CPU копия через cudaMemcpy2DAsync (zero-copy AVHWFramesContext — v2)

CLI smoke test 2026-05-17 (host build FFmpeg + libcuframes,
publisher на камере 192.168.88.98 1920x1080 HEVC 25fps):
  ffmpeg -f cuframes -i cuframes://cam-ff -c:v copy -f null -
  → frame=100 fps=25 q=-1.0 speed=1x  ✓
  → "cuframes: connected to 'cam-ff' — 1920x1080 NV12"

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 09:02:12 +01:00

333 lines
13 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
diff --git a/configure b/configure
index d77a55b..0ece49f 100755
--- a/configure
+++ b/configure
@@ -224,6 +224,7 @@ External library support:
--enable-libcelt enable CELT decoding via libcelt [no]
--enable-libcdio enable audio CD grabbing with libcdio [no]
--enable-libcodec2 enable codec2 en/decoding using libcodec2 [no]
+ --enable-libcuframes enable cuframes CUDA-IPC demuxer via libcuframes [no]
--enable-libdav1d enable AV1 decoding via libdav1d [no]
--enable-libdavs2 enable AVS2 decoding via libdavs2 [no]
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
@@ -1914,6 +1915,7 @@ EXTERNAL_LIBRARY_LIST="
libcaca
libcelt
libcodec2
+ libcuframes
libdav1d
libdc1394
libflite
@@ -3527,6 +3529,7 @@ libdavs2_decoder_select="avs2_parser"
libfdk_aac_decoder_deps="libfdk_aac"
libfdk_aac_encoder_deps="libfdk_aac"
libfdk_aac_encoder_select="audio_frame_queue"
+cuframes_demuxer_deps="libcuframes cuda"
libgme_demuxer_deps="libgme"
libgsm_decoder_deps="libgsm"
libgsm_encoder_deps="libgsm"
@@ -6902,6 +6905,8 @@ enabled libglslang && { check_lib spirv_compiler glslang/Include/glslang_c_inter
require spirv_compiler glslang/Include/glslang_c_interface.h glslang_initialize_process \
-lglslang -lMachineIndependent -lOSDependent -lHLSL -lOGLCompiler -lGenericCodeGen \
-lSPVRemapper -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lpthread -lstdc++ -lm ; }
+enabled libcuframes && require libcuframes cuframes/cuframes.h \
+ cuframes_subscriber_create -lcuframes -lcudart
enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu ||
require libgme gme/gme.h gme_new_emu -lgme -lstdc++; }
enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 7ca68a7..eb10698 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -172,6 +172,7 @@ OBJS-$(CONFIG_CODEC2RAW_DEMUXER) += codec2.o pcm.o
OBJS-$(CONFIG_CODEC2RAW_MUXER) += rawenc.o
OBJS-$(CONFIG_CONCAT_DEMUXER) += concatdec.o
OBJS-$(CONFIG_CRC_MUXER) += crcenc.o
+OBJS-$(CONFIG_CUFRAMES_DEMUXER) += cuframesdec.o
OBJS-$(CONFIG_DATA_DEMUXER) += rawdec.o
OBJS-$(CONFIG_DATA_MUXER) += rawenc.o
OBJS-$(CONFIG_DASH_MUXER) += dash.o dashenc.o hlsplaylist.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 305fa46..871dd40 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -126,6 +126,7 @@ extern const FFInputFormat ff_codec2raw_demuxer;
extern const FFOutputFormat ff_codec2raw_muxer;
extern const FFInputFormat ff_concat_demuxer;
extern const FFOutputFormat ff_crc_muxer;
+extern const FFInputFormat ff_cuframes_demuxer;
extern const FFInputFormat ff_dash_demuxer;
extern const FFOutputFormat ff_dash_muxer;
extern const FFInputFormat ff_data_demuxer;
diff --git a/libavformat/cuframesdec.c b/libavformat/cuframesdec.c
new file mode 100644
index 0000000..b5da5ea
--- /dev/null
+++ b/libavformat/cuframesdec.c
@@ -0,0 +1,265 @@
+/*
+ * cuframes input demuxer для FFmpeg 7.x.
+ *
+ * Принимает URL вида "cuframes://<key>" — подключается к запущенному
+ * cuframes-publisher'у и выдаёт NV12 raw video stream.
+ *
+ * Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
+ * выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
+ * запланировано в v2.
+ *
+ * Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
+ * на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
+ *
+ * Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
+ */
+
+#include "libavformat/avformat.h"
+#include "libavformat/demux.h"
+#include "libavformat/internal.h"
+#include "libavutil/avstring.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include <cuda_runtime.h>
+#include <cuframes/cuframes.h>
+
+typedef struct CuframesDemuxerContext {
+ const AVClass *class_;
+ /* options */
+ int cuda_device;
+ int connect_timeout_ms;
+ /* state */
+ cuframes_subscriber_t *sub;
+ cudaStream_t cuda_stream;
+ void *host_buffer;
+ size_t host_buffer_size;
+ int width;
+ int height;
+ int64_t first_pts_ns;
+ int got_first_pts;
+ /* первый кадр получаем в read_header чтобы узнать размеры,
+ * сохраняем его для первого read_packet */
+ int pending_first_frame;
+ cuframes_frame_t *first_frame;
+} CuframesDemuxerContext;
+
+#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
+#define D AV_OPT_FLAG_DECODING_PARAM
+static const AVOption cuframes_options[] = {
+ { "cuda_device", "CUDA device index (must match publisher's)",
+ OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
+ { "connect_timeout", "wait for publisher (ms); -1 = forever",
+ OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
+ { NULL }
+};
+
+static const AVClass cuframes_demuxer_class = {
+ .class_name = "cuframes demuxer",
+ .item_name = av_default_item_name,
+ .option = cuframes_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
+static const char *parse_key(const char *url)
+{
+ if (av_strstart(url, "cuframes://", &url))
+ return url;
+ if (av_strstart(url, "cuframes:", &url))
+ return url;
+ return url;
+}
+
+static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
+ const cuframes_frame_t *frame,
+ uint8_t *dst)
+{
+ int32_t w = 0, h = 0;
+ cuframes_frame_size(frame, &w, &h);
+ const int32_t pitch_y = cuframes_frame_pitch_y(frame);
+ const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
+ uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
+ uint8_t *y_dev = cu;
+ uint8_t *uv_dev = cu + (size_t)pitch_y * h;
+ uint8_t *y_host = dst;
+ uint8_t *uv_host = dst + (size_t)w * h;
+
+ cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
+ w, h, cudaMemcpyDeviceToHost,
+ c->cuda_stream);
+ if (err == cudaSuccess) {
+ err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
+ w, h / 2, cudaMemcpyDeviceToHost,
+ c->cuda_stream);
+ }
+ if (err == cudaSuccess)
+ err = cudaStreamSynchronize(c->cuda_stream);
+ return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
+}
+
+static int cuframes_read_header(AVFormatContext *s)
+{
+ CuframesDemuxerContext *c = s->priv_data;
+ const char *key = parse_key(s->url);
+ if (!key || !*key) {
+ av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
+ return AVERROR(EINVAL);
+ }
+
+ cudaError_t cerr = cudaSetDevice(c->cuda_device);
+ if (cerr != cudaSuccess) {
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
+ c->cuda_device, cudaGetErrorString(cerr));
+ return AVERROR_EXTERNAL;
+ }
+ cerr = cudaStreamCreate(&c->cuda_stream);
+ if (cerr != cudaSuccess) {
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
+ cudaGetErrorString(cerr));
+ return AVERROR_EXTERNAL;
+ }
+
+ cuframes_subscriber_config_t cfg = {0};
+ cfg.key = key;
+ cfg.consumer_name = NULL; /* auto-generated */
+ cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
+ cfg.cuda_device = c->cuda_device;
+ cfg.connect_timeout_ms = c->connect_timeout_ms;
+
+ int rc = cuframes_subscriber_create(&cfg, &c->sub);
+ if (rc != CUFRAMES_OK) {
+ av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
+ key, cuframes_strerror(rc));
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Получаем первый кадр чтобы узнать width/height. */
+ cuframes_frame_t *frame = NULL;
+ rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
+ if (rc != CUFRAMES_OK || !frame) {
+ av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
+ cuframes_strerror(rc));
+ return AVERROR_EXTERNAL;
+ }
+
+ int32_t w = 0, h = 0;
+ cuframes_frame_size(frame, &w, &h);
+ if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
+ av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
+ (int)cuframes_frame_format(frame));
+ cuframes_subscriber_release(c->sub, frame);
+ return AVERROR(ENOSYS);
+ }
+
+ c->width = w;
+ c->height = h;
+ c->first_pts_ns = cuframes_frame_pts_ns(frame);
+ c->got_first_pts = 1;
+ c->first_frame = frame;
+ c->pending_first_frame = 1;
+
+ AVStream *st = avformat_new_stream(s, NULL);
+ if (!st)
+ return AVERROR(ENOMEM);
+ st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+ st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
+ st->codecpar->format = AV_PIX_FMT_NV12;
+ st->codecpar->width = w;
+ st->codecpar->height = h;
+ st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
+ avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
+
+ av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
+ key, w, h);
+ return 0;
+}
+
+static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+ CuframesDemuxerContext *c = s->priv_data;
+
+ cuframes_frame_t *frame;
+ if (c->pending_first_frame) {
+ frame = c->first_frame;
+ c->first_frame = NULL;
+ c->pending_first_frame = 0;
+ } else {
+ int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
+ if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
+ return AVERROR(EAGAIN);
+ if (rc == CUFRAMES_ERR_DISCONNECTED)
+ return AVERROR_EOF;
+ if (rc != CUFRAMES_OK || !frame) {
+ av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ const size_t need = (size_t)c->width * c->height * 3 / 2;
+ if (need > c->host_buffer_size) {
+ if (c->host_buffer) cudaFreeHost(c->host_buffer);
+ cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
+ if (cerr != cudaSuccess) {
+ cuframes_subscriber_release(c->sub, frame);
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
+ cudaGetErrorString(cerr));
+ return AVERROR(ENOMEM);
+ }
+ c->host_buffer_size = need;
+ }
+
+ int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
+ int64_t pts_ns = cuframes_frame_pts_ns(frame);
+ cuframes_subscriber_release(c->sub, frame);
+ if (err < 0) return err;
+
+ int rc = av_new_packet(pkt, (int)need);
+ if (rc < 0) return rc;
+ memcpy(pkt->data, c->host_buffer, need);
+ pkt->stream_index = 0;
+ pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
+ pkt->flags |= AV_PKT_FLAG_KEY;
+ return 0;
+}
+
+static int cuframes_read_close(AVFormatContext *s)
+{
+ CuframesDemuxerContext *c = s->priv_data;
+ if (c->first_frame) {
+ if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
+ c->first_frame = NULL;
+ }
+ if (c->sub) {
+ cuframes_subscriber_destroy(c->sub);
+ c->sub = NULL;
+ }
+ if (c->cuda_stream) {
+ cudaStreamDestroy(c->cuda_stream);
+ c->cuda_stream = NULL;
+ }
+ if (c->host_buffer) {
+ cudaFreeHost(c->host_buffer);
+ c->host_buffer = NULL;
+ c->host_buffer_size = 0;
+ }
+ return 0;
+}
+
+static int cuframes_probe(const AVProbeData *p)
+{
+ /* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
+ return 0;
+}
+
+const FFInputFormat ff_cuframes_demuxer = {
+ .p.name = "cuframes",
+ .p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
+ .p.flags = AVFMT_NOFILE,
+ .p.priv_class = &cuframes_demuxer_class,
+ .priv_data_size = sizeof(CuframesDemuxerContext),
+ .read_probe = cuframes_probe,
+ .read_header = cuframes_read_header,
+ .read_packet = cuframes_read_packet,
+ .read_close = cuframes_read_close,
+};