feat(filter): FFmpeg 7.1 cuframes:// input demuxer (PoC v1) #1

Merged
gx merged 1 commits from feat/ffmpeg-demuxer into main 2026-05-17 09:08:09 +01:00
3 changed files with 696 additions and 0 deletions
+99
View File
@@ -0,0 +1,99 @@
# FFmpeg cuframes demuxer
Custom input format `cuframes://<key>` для FFmpeg 7.x. Подключается к
запущенному [cuframes-rtsp-source](../tools/cuframes-rtsp-source/) (или
любому другому cuframes publisher'у) и выдаёт NV12 raw video stream.
**Цель:** позволить FFmpeg-based потребителям (Frigate, custom recorders,
re-streamers) подписываться на уже декодированные кадры — без своего NVDEC.
Один decode у publisher'а на N consumer'ов.
## Что в этой директории
- `cuframesdec.c` — реализация demuxer'а
- `ffmpeg-7.1-cuframes-demuxer.patch` — patch для FFmpeg 7.1
(правит `libavformat/Makefile`, `libavformat/allformats.c`, `configure`
и кладёт `cuframesdec.c` в `libavformat/`)
## Limitations v1
- Пиксельный формат: только **NV12** (как нативно отдаёт NVDEC через
cuframes-rtsp-source)
- Кадры копируются **GPU → CPU** через `cudaMemcpy2DAsync` — для PoC
совместимости со всеми FFmpeg-pipeline'ами. Zero-copy через
`AVHWFramesContext` — запланировано в v2.
- Один input = один stream; multi-stream publisher'ы — v2.
## Как применить и собрать FFmpeg
```bash
# 1. Клон FFmpeg 7.1
git clone --depth 1 --branch n7.1 https://github.com/FFmpeg/FFmpeg.git ffmpeg
cd ffmpeg
# 2. Apply patch
patch -p1 < /path/to/cuframes/filter/ffmpeg-7.1-cuframes-demuxer.patch
# 3. Configure (минимальный пример; для Frigate-сборки используйте полный
# набор опций из их Dockerfile + добавьте --enable-libcuframes)
./configure \
--enable-libcuframes \
--extra-cflags='-I/path/to/cuframes/include -I/usr/local/cuda/include' \
--extra-ldflags='-L/path/to/cuframes/build/libcuframes -L/usr/local/cuda/lib64' \
...прочие опции вашей сборки...
# 4. Build
make -j$(nproc)
```
## CLI smoke test
```bash
# Терминал 1 — publisher (host):
cuframes-rtsp-source --rtsp rtsp://... --key cam1 --verbose
# Терминал 2 — потребитель через нашу патченую FFmpeg:
LD_LIBRARY_PATH=/path/to/libcuframes ./ffmpeg \
-f cuframes -i cuframes://cam1 \
-c:v copy -f null -
```
Должно быть видно:
```
[cuframes @ ...] cuframes: connected to 'cam1' — 1920x1080 NV12
...
frame= 100 fps= 25 q=-1.0 size=N/A time=00:00:03.96 speed=1x
```
`speed=1x` — pipeline идёт в реальном времени с частотой камеры.
## Опции demuxer'а
```
-cuda_device <int> CUDA device index (must match publisher's). Default 0.
-connect_timeout <int> Wait for publisher (ms); -1 = forever. Default 5000.
```
## Frigate integration (план)
После того как Frigate-image пересобран с patched FFmpeg, в `config.yml`:
```yaml
cameras:
cam_parking:
ffmpeg:
inputs:
- path: cuframes://cam-parking # detect через cuframes
roles: [detect]
- path: rtsp://admin:***@cam/main # recording — прямой RTSP
roles: [record] # mux без decode, как раньше
```
Detect-стрим декодируется один раз в publisher'е cuframes-rtsp-source,
recording по-прежнему mux'ится Frigate'ом из encoded RTSP без decode.
## Upstream FFmpeg PR (план v2+)
После стабилизации API (NV12 → universal hwframe NV12 в CUDA context)
patch можно подать в upstream FFmpeg. Пока что — out-of-tree patch для
custom builds.
+265
View File
@@ -0,0 +1,265 @@
/*
* cuframes input demuxer для FFmpeg 7.x.
*
* Принимает URL вида "cuframes://<key>" — подключается к запущенному
* cuframes-publisher'у и выдаёт NV12 raw video stream.
*
* Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
* выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
* запланировано в v2.
*
* Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
* на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
*
* Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
*/
#include "libavformat/avformat.h"
#include "libavformat/demux.h"
#include "libavformat/internal.h"
#include "libavutil/avstring.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
#include <cuda_runtime.h>
#include <cuframes/cuframes.h>
typedef struct CuframesDemuxerContext {
const AVClass *class_;
/* options */
int cuda_device;
int connect_timeout_ms;
/* state */
cuframes_subscriber_t *sub;
cudaStream_t cuda_stream;
void *host_buffer;
size_t host_buffer_size;
int width;
int height;
int64_t first_pts_ns;
int got_first_pts;
/* первый кадр получаем в read_header чтобы узнать размеры,
* сохраняем его для первого read_packet */
int pending_first_frame;
cuframes_frame_t *first_frame;
} CuframesDemuxerContext;
#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
#define D AV_OPT_FLAG_DECODING_PARAM
static const AVOption cuframes_options[] = {
{ "cuda_device", "CUDA device index (must match publisher's)",
OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
{ "connect_timeout", "wait for publisher (ms); -1 = forever",
OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
{ NULL }
};
static const AVClass cuframes_demuxer_class = {
.class_name = "cuframes demuxer",
.item_name = av_default_item_name,
.option = cuframes_options,
.version = LIBAVUTIL_VERSION_INT,
};
/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
static const char *parse_key(const char *url)
{
if (av_strstart(url, "cuframes://", &url))
return url;
if (av_strstart(url, "cuframes:", &url))
return url;
return url;
}
static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
const cuframes_frame_t *frame,
uint8_t *dst)
{
int32_t w = 0, h = 0;
cuframes_frame_size(frame, &w, &h);
const int32_t pitch_y = cuframes_frame_pitch_y(frame);
const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
uint8_t *y_dev = cu;
uint8_t *uv_dev = cu + (size_t)pitch_y * h;
uint8_t *y_host = dst;
uint8_t *uv_host = dst + (size_t)w * h;
cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
w, h, cudaMemcpyDeviceToHost,
c->cuda_stream);
if (err == cudaSuccess) {
err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
w, h / 2, cudaMemcpyDeviceToHost,
c->cuda_stream);
}
if (err == cudaSuccess)
err = cudaStreamSynchronize(c->cuda_stream);
return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
}
static int cuframes_read_header(AVFormatContext *s)
{
CuframesDemuxerContext *c = s->priv_data;
const char *key = parse_key(s->url);
if (!key || !*key) {
av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
return AVERROR(EINVAL);
}
cudaError_t cerr = cudaSetDevice(c->cuda_device);
if (cerr != cudaSuccess) {
av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
c->cuda_device, cudaGetErrorString(cerr));
return AVERROR_EXTERNAL;
}
cerr = cudaStreamCreate(&c->cuda_stream);
if (cerr != cudaSuccess) {
av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
cudaGetErrorString(cerr));
return AVERROR_EXTERNAL;
}
cuframes_subscriber_config_t cfg = {0};
cfg.key = key;
cfg.consumer_name = NULL; /* auto-generated */
cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
cfg.cuda_device = c->cuda_device;
cfg.connect_timeout_ms = c->connect_timeout_ms;
int rc = cuframes_subscriber_create(&cfg, &c->sub);
if (rc != CUFRAMES_OK) {
av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
key, cuframes_strerror(rc));
return AVERROR_EXTERNAL;
}
/* Получаем первый кадр чтобы узнать width/height. */
cuframes_frame_t *frame = NULL;
rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
if (rc != CUFRAMES_OK || !frame) {
av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
cuframes_strerror(rc));
return AVERROR_EXTERNAL;
}
int32_t w = 0, h = 0;
cuframes_frame_size(frame, &w, &h);
if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
(int)cuframes_frame_format(frame));
cuframes_subscriber_release(c->sub, frame);
return AVERROR(ENOSYS);
}
c->width = w;
c->height = h;
c->first_pts_ns = cuframes_frame_pts_ns(frame);
c->got_first_pts = 1;
c->first_frame = frame;
c->pending_first_frame = 1;
AVStream *st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
st->codecpar->format = AV_PIX_FMT_NV12;
st->codecpar->width = w;
st->codecpar->height = h;
st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
key, w, h);
return 0;
}
static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
{
CuframesDemuxerContext *c = s->priv_data;
cuframes_frame_t *frame;
if (c->pending_first_frame) {
frame = c->first_frame;
c->first_frame = NULL;
c->pending_first_frame = 0;
} else {
int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
return AVERROR(EAGAIN);
if (rc == CUFRAMES_ERR_DISCONNECTED)
return AVERROR_EOF;
if (rc != CUFRAMES_OK || !frame) {
av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
return AVERROR_EXTERNAL;
}
}
const size_t need = (size_t)c->width * c->height * 3 / 2;
if (need > c->host_buffer_size) {
if (c->host_buffer) cudaFreeHost(c->host_buffer);
cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
if (cerr != cudaSuccess) {
cuframes_subscriber_release(c->sub, frame);
av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
cudaGetErrorString(cerr));
return AVERROR(ENOMEM);
}
c->host_buffer_size = need;
}
int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
int64_t pts_ns = cuframes_frame_pts_ns(frame);
cuframes_subscriber_release(c->sub, frame);
if (err < 0) return err;
int rc = av_new_packet(pkt, (int)need);
if (rc < 0) return rc;
memcpy(pkt->data, c->host_buffer, need);
pkt->stream_index = 0;
pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
pkt->flags |= AV_PKT_FLAG_KEY;
return 0;
}
static int cuframes_read_close(AVFormatContext *s)
{
CuframesDemuxerContext *c = s->priv_data;
if (c->first_frame) {
if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
c->first_frame = NULL;
}
if (c->sub) {
cuframes_subscriber_destroy(c->sub);
c->sub = NULL;
}
if (c->cuda_stream) {
cudaStreamDestroy(c->cuda_stream);
c->cuda_stream = NULL;
}
if (c->host_buffer) {
cudaFreeHost(c->host_buffer);
c->host_buffer = NULL;
c->host_buffer_size = 0;
}
return 0;
}
static int cuframes_probe(const AVProbeData *p)
{
/* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
return 0;
}
const FFInputFormat ff_cuframes_demuxer = {
.p.name = "cuframes",
.p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
.p.flags = AVFMT_NOFILE,
.p.priv_class = &cuframes_demuxer_class,
.priv_data_size = sizeof(CuframesDemuxerContext),
.read_probe = cuframes_probe,
.read_header = cuframes_read_header,
.read_packet = cuframes_read_packet,
.read_close = cuframes_read_close,
};
+332
View File
@@ -0,0 +1,332 @@
diff --git a/configure b/configure
index d77a55b..0ece49f 100755
--- a/configure
+++ b/configure
@@ -224,6 +224,7 @@ External library support:
--enable-libcelt enable CELT decoding via libcelt [no]
--enable-libcdio enable audio CD grabbing with libcdio [no]
--enable-libcodec2 enable codec2 en/decoding using libcodec2 [no]
+ --enable-libcuframes enable cuframes CUDA-IPC demuxer via libcuframes [no]
--enable-libdav1d enable AV1 decoding via libdav1d [no]
--enable-libdavs2 enable AVS2 decoding via libdavs2 [no]
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
@@ -1914,6 +1915,7 @@ EXTERNAL_LIBRARY_LIST="
libcaca
libcelt
libcodec2
+ libcuframes
libdav1d
libdc1394
libflite
@@ -3527,6 +3529,7 @@ libdavs2_decoder_select="avs2_parser"
libfdk_aac_decoder_deps="libfdk_aac"
libfdk_aac_encoder_deps="libfdk_aac"
libfdk_aac_encoder_select="audio_frame_queue"
+cuframes_demuxer_deps="libcuframes cuda"
libgme_demuxer_deps="libgme"
libgsm_decoder_deps="libgsm"
libgsm_encoder_deps="libgsm"
@@ -6902,6 +6905,8 @@ enabled libglslang && { check_lib spirv_compiler glslang/Include/glslang_c_inter
require spirv_compiler glslang/Include/glslang_c_interface.h glslang_initialize_process \
-lglslang -lMachineIndependent -lOSDependent -lHLSL -lOGLCompiler -lGenericCodeGen \
-lSPVRemapper -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lpthread -lstdc++ -lm ; }
+enabled libcuframes && require libcuframes cuframes/cuframes.h \
+ cuframes_subscriber_create -lcuframes -lcudart
enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu ||
require libgme gme/gme.h gme_new_emu -lgme -lstdc++; }
enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do
diff --git a/libavformat/Makefile b/libavformat/Makefile
index 7ca68a7..eb10698 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -172,6 +172,7 @@ OBJS-$(CONFIG_CODEC2RAW_DEMUXER) += codec2.o pcm.o
OBJS-$(CONFIG_CODEC2RAW_MUXER) += rawenc.o
OBJS-$(CONFIG_CONCAT_DEMUXER) += concatdec.o
OBJS-$(CONFIG_CRC_MUXER) += crcenc.o
+OBJS-$(CONFIG_CUFRAMES_DEMUXER) += cuframesdec.o
OBJS-$(CONFIG_DATA_DEMUXER) += rawdec.o
OBJS-$(CONFIG_DATA_MUXER) += rawenc.o
OBJS-$(CONFIG_DASH_MUXER) += dash.o dashenc.o hlsplaylist.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 305fa46..871dd40 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -126,6 +126,7 @@ extern const FFInputFormat ff_codec2raw_demuxer;
extern const FFOutputFormat ff_codec2raw_muxer;
extern const FFInputFormat ff_concat_demuxer;
extern const FFOutputFormat ff_crc_muxer;
+extern const FFInputFormat ff_cuframes_demuxer;
extern const FFInputFormat ff_dash_demuxer;
extern const FFOutputFormat ff_dash_muxer;
extern const FFInputFormat ff_data_demuxer;
diff --git a/libavformat/cuframesdec.c b/libavformat/cuframesdec.c
new file mode 100644
index 0000000..b5da5ea
--- /dev/null
+++ b/libavformat/cuframesdec.c
@@ -0,0 +1,265 @@
+/*
+ * cuframes input demuxer для FFmpeg 7.x.
+ *
+ * Принимает URL вида "cuframes://<key>" — подключается к запущенному
+ * cuframes-publisher'у и выдаёт NV12 raw video stream.
+ *
+ * Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
+ * выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
+ * запланировано в v2.
+ *
+ * Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
+ * на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
+ *
+ * Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
+ */
+
+#include "libavformat/avformat.h"
+#include "libavformat/demux.h"
+#include "libavformat/internal.h"
+#include "libavutil/avstring.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include <cuda_runtime.h>
+#include <cuframes/cuframes.h>
+
+typedef struct CuframesDemuxerContext {
+ const AVClass *class_;
+ /* options */
+ int cuda_device;
+ int connect_timeout_ms;
+ /* state */
+ cuframes_subscriber_t *sub;
+ cudaStream_t cuda_stream;
+ void *host_buffer;
+ size_t host_buffer_size;
+ int width;
+ int height;
+ int64_t first_pts_ns;
+ int got_first_pts;
+ /* первый кадр получаем в read_header чтобы узнать размеры,
+ * сохраняем его для первого read_packet */
+ int pending_first_frame;
+ cuframes_frame_t *first_frame;
+} CuframesDemuxerContext;
+
+#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
+#define D AV_OPT_FLAG_DECODING_PARAM
+static const AVOption cuframes_options[] = {
+ { "cuda_device", "CUDA device index (must match publisher's)",
+ OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
+ { "connect_timeout", "wait for publisher (ms); -1 = forever",
+ OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
+ { NULL }
+};
+
+static const AVClass cuframes_demuxer_class = {
+ .class_name = "cuframes demuxer",
+ .item_name = av_default_item_name,
+ .option = cuframes_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
+static const char *parse_key(const char *url)
+{
+ if (av_strstart(url, "cuframes://", &url))
+ return url;
+ if (av_strstart(url, "cuframes:", &url))
+ return url;
+ return url;
+}
+
+static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
+ const cuframes_frame_t *frame,
+ uint8_t *dst)
+{
+ int32_t w = 0, h = 0;
+ cuframes_frame_size(frame, &w, &h);
+ const int32_t pitch_y = cuframes_frame_pitch_y(frame);
+ const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
+ uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
+ uint8_t *y_dev = cu;
+ uint8_t *uv_dev = cu + (size_t)pitch_y * h;
+ uint8_t *y_host = dst;
+ uint8_t *uv_host = dst + (size_t)w * h;
+
+ cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
+ w, h, cudaMemcpyDeviceToHost,
+ c->cuda_stream);
+ if (err == cudaSuccess) {
+ err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
+ w, h / 2, cudaMemcpyDeviceToHost,
+ c->cuda_stream);
+ }
+ if (err == cudaSuccess)
+ err = cudaStreamSynchronize(c->cuda_stream);
+ return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
+}
+
+static int cuframes_read_header(AVFormatContext *s)
+{
+ CuframesDemuxerContext *c = s->priv_data;
+ const char *key = parse_key(s->url);
+ if (!key || !*key) {
+ av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
+ return AVERROR(EINVAL);
+ }
+
+ cudaError_t cerr = cudaSetDevice(c->cuda_device);
+ if (cerr != cudaSuccess) {
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
+ c->cuda_device, cudaGetErrorString(cerr));
+ return AVERROR_EXTERNAL;
+ }
+ cerr = cudaStreamCreate(&c->cuda_stream);
+ if (cerr != cudaSuccess) {
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
+ cudaGetErrorString(cerr));
+ return AVERROR_EXTERNAL;
+ }
+
+ cuframes_subscriber_config_t cfg = {0};
+ cfg.key = key;
+ cfg.consumer_name = NULL; /* auto-generated */
+ cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
+ cfg.cuda_device = c->cuda_device;
+ cfg.connect_timeout_ms = c->connect_timeout_ms;
+
+ int rc = cuframes_subscriber_create(&cfg, &c->sub);
+ if (rc != CUFRAMES_OK) {
+ av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
+ key, cuframes_strerror(rc));
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Получаем первый кадр чтобы узнать width/height. */
+ cuframes_frame_t *frame = NULL;
+ rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
+ if (rc != CUFRAMES_OK || !frame) {
+ av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
+ cuframes_strerror(rc));
+ return AVERROR_EXTERNAL;
+ }
+
+ int32_t w = 0, h = 0;
+ cuframes_frame_size(frame, &w, &h);
+ if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
+ av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
+ (int)cuframes_frame_format(frame));
+ cuframes_subscriber_release(c->sub, frame);
+ return AVERROR(ENOSYS);
+ }
+
+ c->width = w;
+ c->height = h;
+ c->first_pts_ns = cuframes_frame_pts_ns(frame);
+ c->got_first_pts = 1;
+ c->first_frame = frame;
+ c->pending_first_frame = 1;
+
+ AVStream *st = avformat_new_stream(s, NULL);
+ if (!st)
+ return AVERROR(ENOMEM);
+ st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+ st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
+ st->codecpar->format = AV_PIX_FMT_NV12;
+ st->codecpar->width = w;
+ st->codecpar->height = h;
+ st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
+ avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
+
+ av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
+ key, w, h);
+ return 0;
+}
+
+static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+ CuframesDemuxerContext *c = s->priv_data;
+
+ cuframes_frame_t *frame;
+ if (c->pending_first_frame) {
+ frame = c->first_frame;
+ c->first_frame = NULL;
+ c->pending_first_frame = 0;
+ } else {
+ int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
+ if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
+ return AVERROR(EAGAIN);
+ if (rc == CUFRAMES_ERR_DISCONNECTED)
+ return AVERROR_EOF;
+ if (rc != CUFRAMES_OK || !frame) {
+ av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ const size_t need = (size_t)c->width * c->height * 3 / 2;
+ if (need > c->host_buffer_size) {
+ if (c->host_buffer) cudaFreeHost(c->host_buffer);
+ cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
+ if (cerr != cudaSuccess) {
+ cuframes_subscriber_release(c->sub, frame);
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
+ cudaGetErrorString(cerr));
+ return AVERROR(ENOMEM);
+ }
+ c->host_buffer_size = need;
+ }
+
+ int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
+ int64_t pts_ns = cuframes_frame_pts_ns(frame);
+ cuframes_subscriber_release(c->sub, frame);
+ if (err < 0) return err;
+
+ int rc = av_new_packet(pkt, (int)need);
+ if (rc < 0) return rc;
+ memcpy(pkt->data, c->host_buffer, need);
+ pkt->stream_index = 0;
+ pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
+ pkt->flags |= AV_PKT_FLAG_KEY;
+ return 0;
+}
+
+static int cuframes_read_close(AVFormatContext *s)
+{
+ CuframesDemuxerContext *c = s->priv_data;
+ if (c->first_frame) {
+ if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
+ c->first_frame = NULL;
+ }
+ if (c->sub) {
+ cuframes_subscriber_destroy(c->sub);
+ c->sub = NULL;
+ }
+ if (c->cuda_stream) {
+ cudaStreamDestroy(c->cuda_stream);
+ c->cuda_stream = NULL;
+ }
+ if (c->host_buffer) {
+ cudaFreeHost(c->host_buffer);
+ c->host_buffer = NULL;
+ c->host_buffer_size = 0;
+ }
+ return 0;
+}
+
+static int cuframes_probe(const AVProbeData *p)
+{
+ /* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
+ return 0;
+}
+
+const FFInputFormat ff_cuframes_demuxer = {
+ .p.name = "cuframes",
+ .p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
+ .p.flags = AVFMT_NOFILE,
+ .p.priv_class = &cuframes_demuxer_class,
+ .priv_data_size = sizeof(CuframesDemuxerContext),
+ .read_probe = cuframes_probe,
+ .read_header = cuframes_read_header,
+ .read_packet = cuframes_read_packet,
+ .read_close = cuframes_read_close,
+};