99df68f69c
Out-of-tree patch + sources для FFmpeg-демаксера, который позволяет любому FFmpeg-based потребителю (Frigate, кастомные рекордеры, re-streamers) читать "cuframes://<key>" как обычный URL — без своего NVDEC. Состав: - filter/cuframesdec.c — реализация (libavformat-style) - filter/ffmpeg-7.1-cuframes-demuxer.patch — patch для FFmpeg n7.1 (Makefile / allformats.c / configure) - filter/README.md — инструкции по сборке + CLI smoke test + Frigate plan v1 ограничения (намеренно): - только NV12 - GPU → CPU копия через cudaMemcpy2DAsync (zero-copy AVHWFramesContext — v2) CLI smoke test 2026-05-17 (host build FFmpeg + libcuframes, publisher на камере 192.168.88.98 1920x1080 HEVC 25fps): ffmpeg -f cuframes -i cuframes://cam-ff -c:v copy -f null - → frame=100 fps=25 q=-1.0 speed=1x ✓ → "cuframes: connected to 'cam-ff' — 1920x1080 NV12" Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
333 lines
13 KiB
Diff
333 lines
13 KiB
Diff
diff --git a/configure b/configure
|
||
index d77a55b..0ece49f 100755
|
||
--- a/configure
|
||
+++ b/configure
|
||
@@ -224,6 +224,7 @@ External library support:
|
||
--enable-libcelt enable CELT decoding via libcelt [no]
|
||
--enable-libcdio enable audio CD grabbing with libcdio [no]
|
||
--enable-libcodec2 enable codec2 en/decoding using libcodec2 [no]
|
||
+ --enable-libcuframes enable cuframes CUDA-IPC demuxer via libcuframes [no]
|
||
--enable-libdav1d enable AV1 decoding via libdav1d [no]
|
||
--enable-libdavs2 enable AVS2 decoding via libdavs2 [no]
|
||
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
|
||
@@ -1914,6 +1915,7 @@ EXTERNAL_LIBRARY_LIST="
|
||
libcaca
|
||
libcelt
|
||
libcodec2
|
||
+ libcuframes
|
||
libdav1d
|
||
libdc1394
|
||
libflite
|
||
@@ -3527,6 +3529,7 @@ libdavs2_decoder_select="avs2_parser"
|
||
libfdk_aac_decoder_deps="libfdk_aac"
|
||
libfdk_aac_encoder_deps="libfdk_aac"
|
||
libfdk_aac_encoder_select="audio_frame_queue"
|
||
+cuframes_demuxer_deps="libcuframes cuda"
|
||
libgme_demuxer_deps="libgme"
|
||
libgsm_decoder_deps="libgsm"
|
||
libgsm_encoder_deps="libgsm"
|
||
@@ -6902,6 +6905,8 @@ enabled libglslang && { check_lib spirv_compiler glslang/Include/glslang_c_inter
|
||
require spirv_compiler glslang/Include/glslang_c_interface.h glslang_initialize_process \
|
||
-lglslang -lMachineIndependent -lOSDependent -lHLSL -lOGLCompiler -lGenericCodeGen \
|
||
-lSPVRemapper -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lpthread -lstdc++ -lm ; }
|
||
+enabled libcuframes && require libcuframes cuframes/cuframes.h \
|
||
+ cuframes_subscriber_create -lcuframes -lcudart
|
||
enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu ||
|
||
require libgme gme/gme.h gme_new_emu -lgme -lstdc++; }
|
||
enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do
|
||
diff --git a/libavformat/Makefile b/libavformat/Makefile
|
||
index 7ca68a7..eb10698 100644
|
||
--- a/libavformat/Makefile
|
||
+++ b/libavformat/Makefile
|
||
@@ -172,6 +172,7 @@ OBJS-$(CONFIG_CODEC2RAW_DEMUXER) += codec2.o pcm.o
|
||
OBJS-$(CONFIG_CODEC2RAW_MUXER) += rawenc.o
|
||
OBJS-$(CONFIG_CONCAT_DEMUXER) += concatdec.o
|
||
OBJS-$(CONFIG_CRC_MUXER) += crcenc.o
|
||
+OBJS-$(CONFIG_CUFRAMES_DEMUXER) += cuframesdec.o
|
||
OBJS-$(CONFIG_DATA_DEMUXER) += rawdec.o
|
||
OBJS-$(CONFIG_DATA_MUXER) += rawenc.o
|
||
OBJS-$(CONFIG_DASH_MUXER) += dash.o dashenc.o hlsplaylist.o
|
||
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
|
||
index 305fa46..871dd40 100644
|
||
--- a/libavformat/allformats.c
|
||
+++ b/libavformat/allformats.c
|
||
@@ -126,6 +126,7 @@ extern const FFInputFormat ff_codec2raw_demuxer;
|
||
extern const FFOutputFormat ff_codec2raw_muxer;
|
||
extern const FFInputFormat ff_concat_demuxer;
|
||
extern const FFOutputFormat ff_crc_muxer;
|
||
+extern const FFInputFormat ff_cuframes_demuxer;
|
||
extern const FFInputFormat ff_dash_demuxer;
|
||
extern const FFOutputFormat ff_dash_muxer;
|
||
extern const FFInputFormat ff_data_demuxer;
|
||
diff --git a/libavformat/cuframesdec.c b/libavformat/cuframesdec.c
|
||
new file mode 100644
|
||
index 0000000..b5da5ea
|
||
--- /dev/null
|
||
+++ b/libavformat/cuframesdec.c
|
||
@@ -0,0 +1,265 @@
|
||
+/*
|
||
+ * cuframes input demuxer для FFmpeg 7.x.
|
||
+ *
|
||
+ * Принимает URL вида "cuframes://<key>" — подключается к запущенному
|
||
+ * cuframes-publisher'у и выдаёт NV12 raw video stream.
|
||
+ *
|
||
+ * Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
|
||
+ * выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
|
||
+ * запланировано в v2.
|
||
+ *
|
||
+ * Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
|
||
+ * на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
|
||
+ *
|
||
+ * Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
|
||
+ */
|
||
+
|
||
+#include "libavformat/avformat.h"
|
||
+#include "libavformat/demux.h"
|
||
+#include "libavformat/internal.h"
|
||
+#include "libavutil/avstring.h"
|
||
+#include "libavutil/imgutils.h"
|
||
+#include "libavutil/opt.h"
|
||
+
|
||
+#include <cuda_runtime.h>
|
||
+#include <cuframes/cuframes.h>
|
||
+
|
||
+typedef struct CuframesDemuxerContext {
|
||
+ const AVClass *class_;
|
||
+ /* options */
|
||
+ int cuda_device;
|
||
+ int connect_timeout_ms;
|
||
+ /* state */
|
||
+ cuframes_subscriber_t *sub;
|
||
+ cudaStream_t cuda_stream;
|
||
+ void *host_buffer;
|
||
+ size_t host_buffer_size;
|
||
+ int width;
|
||
+ int height;
|
||
+ int64_t first_pts_ns;
|
||
+ int got_first_pts;
|
||
+ /* первый кадр получаем в read_header чтобы узнать размеры,
|
||
+ * сохраняем его для первого read_packet */
|
||
+ int pending_first_frame;
|
||
+ cuframes_frame_t *first_frame;
|
||
+} CuframesDemuxerContext;
|
||
+
|
||
+#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
|
||
+#define D AV_OPT_FLAG_DECODING_PARAM
|
||
+static const AVOption cuframes_options[] = {
|
||
+ { "cuda_device", "CUDA device index (must match publisher's)",
|
||
+ OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
|
||
+ { "connect_timeout", "wait for publisher (ms); -1 = forever",
|
||
+ OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
|
||
+ { NULL }
|
||
+};
|
||
+
|
||
+static const AVClass cuframes_demuxer_class = {
|
||
+ .class_name = "cuframes demuxer",
|
||
+ .item_name = av_default_item_name,
|
||
+ .option = cuframes_options,
|
||
+ .version = LIBAVUTIL_VERSION_INT,
|
||
+};
|
||
+
|
||
+/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
|
||
+static const char *parse_key(const char *url)
|
||
+{
|
||
+ if (av_strstart(url, "cuframes://", &url))
|
||
+ return url;
|
||
+ if (av_strstart(url, "cuframes:", &url))
|
||
+ return url;
|
||
+ return url;
|
||
+}
|
||
+
|
||
+static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
|
||
+ const cuframes_frame_t *frame,
|
||
+ uint8_t *dst)
|
||
+{
|
||
+ int32_t w = 0, h = 0;
|
||
+ cuframes_frame_size(frame, &w, &h);
|
||
+ const int32_t pitch_y = cuframes_frame_pitch_y(frame);
|
||
+ const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
|
||
+ uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
|
||
+ uint8_t *y_dev = cu;
|
||
+ uint8_t *uv_dev = cu + (size_t)pitch_y * h;
|
||
+ uint8_t *y_host = dst;
|
||
+ uint8_t *uv_host = dst + (size_t)w * h;
|
||
+
|
||
+ cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
|
||
+ w, h, cudaMemcpyDeviceToHost,
|
||
+ c->cuda_stream);
|
||
+ if (err == cudaSuccess) {
|
||
+ err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
|
||
+ w, h / 2, cudaMemcpyDeviceToHost,
|
||
+ c->cuda_stream);
|
||
+ }
|
||
+ if (err == cudaSuccess)
|
||
+ err = cudaStreamSynchronize(c->cuda_stream);
|
||
+ return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
|
||
+}
|
||
+
|
||
+static int cuframes_read_header(AVFormatContext *s)
|
||
+{
|
||
+ CuframesDemuxerContext *c = s->priv_data;
|
||
+ const char *key = parse_key(s->url);
|
||
+ if (!key || !*key) {
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
|
||
+ return AVERROR(EINVAL);
|
||
+ }
|
||
+
|
||
+ cudaError_t cerr = cudaSetDevice(c->cuda_device);
|
||
+ if (cerr != cudaSuccess) {
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
|
||
+ c->cuda_device, cudaGetErrorString(cerr));
|
||
+ return AVERROR_EXTERNAL;
|
||
+ }
|
||
+ cerr = cudaStreamCreate(&c->cuda_stream);
|
||
+ if (cerr != cudaSuccess) {
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
|
||
+ cudaGetErrorString(cerr));
|
||
+ return AVERROR_EXTERNAL;
|
||
+ }
|
||
+
|
||
+ cuframes_subscriber_config_t cfg = {0};
|
||
+ cfg.key = key;
|
||
+ cfg.consumer_name = NULL; /* auto-generated */
|
||
+ cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
|
||
+ cfg.cuda_device = c->cuda_device;
|
||
+ cfg.connect_timeout_ms = c->connect_timeout_ms;
|
||
+
|
||
+ int rc = cuframes_subscriber_create(&cfg, &c->sub);
|
||
+ if (rc != CUFRAMES_OK) {
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
|
||
+ key, cuframes_strerror(rc));
|
||
+ return AVERROR_EXTERNAL;
|
||
+ }
|
||
+
|
||
+ /* Получаем первый кадр чтобы узнать width/height. */
|
||
+ cuframes_frame_t *frame = NULL;
|
||
+ rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||
+ if (rc != CUFRAMES_OK || !frame) {
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
|
||
+ cuframes_strerror(rc));
|
||
+ return AVERROR_EXTERNAL;
|
||
+ }
|
||
+
|
||
+ int32_t w = 0, h = 0;
|
||
+ cuframes_frame_size(frame, &w, &h);
|
||
+ if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
|
||
+ (int)cuframes_frame_format(frame));
|
||
+ cuframes_subscriber_release(c->sub, frame);
|
||
+ return AVERROR(ENOSYS);
|
||
+ }
|
||
+
|
||
+ c->width = w;
|
||
+ c->height = h;
|
||
+ c->first_pts_ns = cuframes_frame_pts_ns(frame);
|
||
+ c->got_first_pts = 1;
|
||
+ c->first_frame = frame;
|
||
+ c->pending_first_frame = 1;
|
||
+
|
||
+ AVStream *st = avformat_new_stream(s, NULL);
|
||
+ if (!st)
|
||
+ return AVERROR(ENOMEM);
|
||
+ st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
|
||
+ st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
|
||
+ st->codecpar->format = AV_PIX_FMT_NV12;
|
||
+ st->codecpar->width = w;
|
||
+ st->codecpar->height = h;
|
||
+ st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
|
||
+ avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
|
||
+
|
||
+ av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
|
||
+ key, w, h);
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||
+{
|
||
+ CuframesDemuxerContext *c = s->priv_data;
|
||
+
|
||
+ cuframes_frame_t *frame;
|
||
+ if (c->pending_first_frame) {
|
||
+ frame = c->first_frame;
|
||
+ c->first_frame = NULL;
|
||
+ c->pending_first_frame = 0;
|
||
+ } else {
|
||
+ int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||
+ if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
|
||
+ return AVERROR(EAGAIN);
|
||
+ if (rc == CUFRAMES_ERR_DISCONNECTED)
|
||
+ return AVERROR_EOF;
|
||
+ if (rc != CUFRAMES_OK || !frame) {
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
|
||
+ return AVERROR_EXTERNAL;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ const size_t need = (size_t)c->width * c->height * 3 / 2;
|
||
+ if (need > c->host_buffer_size) {
|
||
+ if (c->host_buffer) cudaFreeHost(c->host_buffer);
|
||
+ cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
|
||
+ if (cerr != cudaSuccess) {
|
||
+ cuframes_subscriber_release(c->sub, frame);
|
||
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
|
||
+ cudaGetErrorString(cerr));
|
||
+ return AVERROR(ENOMEM);
|
||
+ }
|
||
+ c->host_buffer_size = need;
|
||
+ }
|
||
+
|
||
+ int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
|
||
+ int64_t pts_ns = cuframes_frame_pts_ns(frame);
|
||
+ cuframes_subscriber_release(c->sub, frame);
|
||
+ if (err < 0) return err;
|
||
+
|
||
+ int rc = av_new_packet(pkt, (int)need);
|
||
+ if (rc < 0) return rc;
|
||
+ memcpy(pkt->data, c->host_buffer, need);
|
||
+ pkt->stream_index = 0;
|
||
+ pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
|
||
+ pkt->flags |= AV_PKT_FLAG_KEY;
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+static int cuframes_read_close(AVFormatContext *s)
|
||
+{
|
||
+ CuframesDemuxerContext *c = s->priv_data;
|
||
+ if (c->first_frame) {
|
||
+ if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
|
||
+ c->first_frame = NULL;
|
||
+ }
|
||
+ if (c->sub) {
|
||
+ cuframes_subscriber_destroy(c->sub);
|
||
+ c->sub = NULL;
|
||
+ }
|
||
+ if (c->cuda_stream) {
|
||
+ cudaStreamDestroy(c->cuda_stream);
|
||
+ c->cuda_stream = NULL;
|
||
+ }
|
||
+ if (c->host_buffer) {
|
||
+ cudaFreeHost(c->host_buffer);
|
||
+ c->host_buffer = NULL;
|
||
+ c->host_buffer_size = 0;
|
||
+ }
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+static int cuframes_probe(const AVProbeData *p)
|
||
+{
|
||
+ /* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
|
||
+ return 0;
|
||
+}
|
||
+
|
||
+const FFInputFormat ff_cuframes_demuxer = {
|
||
+ .p.name = "cuframes",
|
||
+ .p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
|
||
+ .p.flags = AVFMT_NOFILE,
|
||
+ .p.priv_class = &cuframes_demuxer_class,
|
||
+ .priv_data_size = sizeof(CuframesDemuxerContext),
|
||
+ .read_probe = cuframes_probe,
|
||
+ .read_header = cuframes_read_header,
|
||
+ .read_packet = cuframes_read_packet,
|
||
+ .read_close = cuframes_read_close,
|
||
+};
|