feat(filter): FFmpeg 7.1 cuframes:// input demuxer
Out-of-tree patch + sources для FFmpeg-демаксера, который позволяет любому FFmpeg-based потребителю (Frigate, кастомные рекордеры, re-streamers) читать "cuframes://<key>" как обычный URL — без своего NVDEC. Состав: - filter/cuframesdec.c — реализация (libavformat-style) - filter/ffmpeg-7.1-cuframes-demuxer.patch — patch для FFmpeg n7.1 (Makefile / allformats.c / configure) - filter/README.md — инструкции по сборке + CLI smoke test + Frigate plan v1 ограничения (намеренно): - только NV12 - GPU → CPU копия через cudaMemcpy2DAsync (zero-copy AVHWFramesContext — v2) CLI smoke test 2026-05-17 (host build FFmpeg + libcuframes, publisher на камере 192.168.88.98 1920x1080 HEVC 25fps): ffmpeg -f cuframes -i cuframes://cam-ff -c:v copy -f null - → frame=100 fps=25 q=-1.0 speed=1x ✓ → "cuframes: connected to 'cam-ff' — 1920x1080 NV12" Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
# FFmpeg cuframes demuxer
|
||||
|
||||
Custom input format `cuframes://<key>` для FFmpeg 7.x. Подключается к
|
||||
запущенному [cuframes-rtsp-source](../tools/cuframes-rtsp-source/) (или
|
||||
любому другому cuframes publisher'у) и выдаёт NV12 raw video stream.
|
||||
|
||||
**Цель:** позволить FFmpeg-based потребителям (Frigate, custom recorders,
|
||||
re-streamers) подписываться на уже декодированные кадры — без своего NVDEC.
|
||||
Один decode у publisher'а на N consumer'ов.
|
||||
|
||||
## Что в этой директории
|
||||
|
||||
- `cuframesdec.c` — реализация demuxer'а
|
||||
- `ffmpeg-7.1-cuframes-demuxer.patch` — patch для FFmpeg 7.1
|
||||
(правит `libavformat/Makefile`, `libavformat/allformats.c`, `configure`
|
||||
и кладёт `cuframesdec.c` в `libavformat/`)
|
||||
|
||||
## Limitations v1
|
||||
|
||||
- Пиксельный формат: только **NV12** (как нативно отдаёт NVDEC через
|
||||
cuframes-rtsp-source)
|
||||
- Кадры копируются **GPU → CPU** через `cudaMemcpy2DAsync` — для PoC
|
||||
совместимости со всеми FFmpeg-pipeline'ами. Zero-copy через
|
||||
`AVHWFramesContext` — запланировано в v2.
|
||||
- Один input = один stream; multi-stream publisher'ы — v2.
|
||||
|
||||
## Как применить и собрать FFmpeg
|
||||
|
||||
```bash
|
||||
# 1. Клон FFmpeg 7.1
|
||||
git clone --depth 1 --branch n7.1 https://github.com/FFmpeg/FFmpeg.git ffmpeg
|
||||
cd ffmpeg
|
||||
|
||||
# 2. Apply patch
|
||||
patch -p1 < /path/to/cuframes/filter/ffmpeg-7.1-cuframes-demuxer.patch
|
||||
|
||||
# 3. Configure (минимальный пример; для Frigate-сборки используйте полный
|
||||
# набор опций из их Dockerfile + добавьте --enable-libcuframes)
|
||||
./configure \
|
||||
--enable-libcuframes \
|
||||
--extra-cflags='-I/path/to/cuframes/include -I/usr/local/cuda/include' \
|
||||
--extra-ldflags='-L/path/to/cuframes/build/libcuframes -L/usr/local/cuda/lib64' \
|
||||
...прочие опции вашей сборки...
|
||||
|
||||
# 4. Build
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
## CLI smoke test
|
||||
|
||||
```bash
|
||||
# Терминал 1 — publisher (host):
|
||||
cuframes-rtsp-source --rtsp rtsp://... --key cam1 --verbose
|
||||
|
||||
# Терминал 2 — потребитель через нашу патченую FFmpeg:
|
||||
LD_LIBRARY_PATH=/path/to/libcuframes ./ffmpeg \
|
||||
-f cuframes -i cuframes://cam1 \
|
||||
-c:v copy -f null -
|
||||
```
|
||||
|
||||
Должно быть видно:
|
||||
```
|
||||
[cuframes @ ...] cuframes: connected to 'cam1' — 1920x1080 NV12
|
||||
...
|
||||
frame= 100 fps= 25 q=-1.0 size=N/A time=00:00:03.96 speed=1x
|
||||
```
|
||||
|
||||
`speed=1x` — pipeline идёт в реальном времени с частотой камеры.
|
||||
|
||||
## Опции demuxer'а
|
||||
|
||||
```
|
||||
-cuda_device <int> CUDA device index (must match publisher's). Default 0.
|
||||
-connect_timeout <int> Wait for publisher (ms); -1 = forever. Default 5000.
|
||||
```
|
||||
|
||||
## Frigate integration (план)
|
||||
|
||||
После того как Frigate-image пересобран с patched FFmpeg, в `config.yml`:
|
||||
|
||||
```yaml
|
||||
cameras:
|
||||
cam_parking:
|
||||
ffmpeg:
|
||||
inputs:
|
||||
- path: cuframes://cam-parking # detect через cuframes
|
||||
roles: [detect]
|
||||
- path: rtsp://admin:***@cam/main # recording — прямой RTSP
|
||||
roles: [record] # mux без decode, как раньше
|
||||
```
|
||||
|
||||
Detect-стрим декодируется один раз в publisher'е cuframes-rtsp-source,
|
||||
recording по-прежнему mux'ится Frigate'ом из encoded RTSP без decode.
|
||||
|
||||
## Upstream FFmpeg PR (план v2+)
|
||||
|
||||
После стабилизации API (NV12 → universal hwframe NV12 в CUDA context)
|
||||
patch можно подать в upstream FFmpeg. Пока что — out-of-tree patch для
|
||||
custom builds.
|
||||
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
* cuframes input demuxer для FFmpeg 7.x.
|
||||
*
|
||||
* Принимает URL вида "cuframes://<key>" — подключается к запущенному
|
||||
* cuframes-publisher'у и выдаёт NV12 raw video stream.
|
||||
*
|
||||
* Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
|
||||
* выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
|
||||
* запланировано в v2.
|
||||
*
|
||||
* Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
|
||||
* на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
|
||||
*
|
||||
* Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
|
||||
*/
|
||||
|
||||
#include "libavformat/avformat.h"
|
||||
#include "libavformat/demux.h"
|
||||
#include "libavformat/internal.h"
|
||||
#include "libavutil/avstring.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/opt.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuframes/cuframes.h>
|
||||
|
||||
typedef struct CuframesDemuxerContext {
|
||||
const AVClass *class_;
|
||||
/* options */
|
||||
int cuda_device;
|
||||
int connect_timeout_ms;
|
||||
/* state */
|
||||
cuframes_subscriber_t *sub;
|
||||
cudaStream_t cuda_stream;
|
||||
void *host_buffer;
|
||||
size_t host_buffer_size;
|
||||
int width;
|
||||
int height;
|
||||
int64_t first_pts_ns;
|
||||
int got_first_pts;
|
||||
/* первый кадр получаем в read_header чтобы узнать размеры,
|
||||
* сохраняем его для первого read_packet */
|
||||
int pending_first_frame;
|
||||
cuframes_frame_t *first_frame;
|
||||
} CuframesDemuxerContext;
|
||||
|
||||
#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
|
||||
#define D AV_OPT_FLAG_DECODING_PARAM
|
||||
static const AVOption cuframes_options[] = {
|
||||
{ "cuda_device", "CUDA device index (must match publisher's)",
|
||||
OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
|
||||
{ "connect_timeout", "wait for publisher (ms); -1 = forever",
|
||||
OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static const AVClass cuframes_demuxer_class = {
|
||||
.class_name = "cuframes demuxer",
|
||||
.item_name = av_default_item_name,
|
||||
.option = cuframes_options,
|
||||
.version = LIBAVUTIL_VERSION_INT,
|
||||
};
|
||||
|
||||
/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
|
||||
static const char *parse_key(const char *url)
|
||||
{
|
||||
if (av_strstart(url, "cuframes://", &url))
|
||||
return url;
|
||||
if (av_strstart(url, "cuframes:", &url))
|
||||
return url;
|
||||
return url;
|
||||
}
|
||||
|
||||
static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
|
||||
const cuframes_frame_t *frame,
|
||||
uint8_t *dst)
|
||||
{
|
||||
int32_t w = 0, h = 0;
|
||||
cuframes_frame_size(frame, &w, &h);
|
||||
const int32_t pitch_y = cuframes_frame_pitch_y(frame);
|
||||
const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
|
||||
uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
|
||||
uint8_t *y_dev = cu;
|
||||
uint8_t *uv_dev = cu + (size_t)pitch_y * h;
|
||||
uint8_t *y_host = dst;
|
||||
uint8_t *uv_host = dst + (size_t)w * h;
|
||||
|
||||
cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
|
||||
w, h, cudaMemcpyDeviceToHost,
|
||||
c->cuda_stream);
|
||||
if (err == cudaSuccess) {
|
||||
err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
|
||||
w, h / 2, cudaMemcpyDeviceToHost,
|
||||
c->cuda_stream);
|
||||
}
|
||||
if (err == cudaSuccess)
|
||||
err = cudaStreamSynchronize(c->cuda_stream);
|
||||
return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
static int cuframes_read_header(AVFormatContext *s)
|
||||
{
|
||||
CuframesDemuxerContext *c = s->priv_data;
|
||||
const char *key = parse_key(s->url);
|
||||
if (!key || !*key) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
cudaError_t cerr = cudaSetDevice(c->cuda_device);
|
||||
if (cerr != cudaSuccess) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
|
||||
c->cuda_device, cudaGetErrorString(cerr));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
cerr = cudaStreamCreate(&c->cuda_stream);
|
||||
if (cerr != cudaSuccess) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
|
||||
cudaGetErrorString(cerr));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
cuframes_subscriber_config_t cfg = {0};
|
||||
cfg.key = key;
|
||||
cfg.consumer_name = NULL; /* auto-generated */
|
||||
cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
|
||||
cfg.cuda_device = c->cuda_device;
|
||||
cfg.connect_timeout_ms = c->connect_timeout_ms;
|
||||
|
||||
int rc = cuframes_subscriber_create(&cfg, &c->sub);
|
||||
if (rc != CUFRAMES_OK) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
|
||||
key, cuframes_strerror(rc));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
/* Получаем первый кадр чтобы узнать width/height. */
|
||||
cuframes_frame_t *frame = NULL;
|
||||
rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||||
if (rc != CUFRAMES_OK || !frame) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
|
||||
cuframes_strerror(rc));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
int32_t w = 0, h = 0;
|
||||
cuframes_frame_size(frame, &w, &h);
|
||||
if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
|
||||
(int)cuframes_frame_format(frame));
|
||||
cuframes_subscriber_release(c->sub, frame);
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
|
||||
c->width = w;
|
||||
c->height = h;
|
||||
c->first_pts_ns = cuframes_frame_pts_ns(frame);
|
||||
c->got_first_pts = 1;
|
||||
c->first_frame = frame;
|
||||
c->pending_first_frame = 1;
|
||||
|
||||
AVStream *st = avformat_new_stream(s, NULL);
|
||||
if (!st)
|
||||
return AVERROR(ENOMEM);
|
||||
st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
|
||||
st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
|
||||
st->codecpar->format = AV_PIX_FMT_NV12;
|
||||
st->codecpar->width = w;
|
||||
st->codecpar->height = h;
|
||||
st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
|
||||
avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
|
||||
|
||||
av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
|
||||
key, w, h);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||||
{
|
||||
CuframesDemuxerContext *c = s->priv_data;
|
||||
|
||||
cuframes_frame_t *frame;
|
||||
if (c->pending_first_frame) {
|
||||
frame = c->first_frame;
|
||||
c->first_frame = NULL;
|
||||
c->pending_first_frame = 0;
|
||||
} else {
|
||||
int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||||
if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
|
||||
return AVERROR(EAGAIN);
|
||||
if (rc == CUFRAMES_ERR_DISCONNECTED)
|
||||
return AVERROR_EOF;
|
||||
if (rc != CUFRAMES_OK || !frame) {
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t need = (size_t)c->width * c->height * 3 / 2;
|
||||
if (need > c->host_buffer_size) {
|
||||
if (c->host_buffer) cudaFreeHost(c->host_buffer);
|
||||
cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
|
||||
if (cerr != cudaSuccess) {
|
||||
cuframes_subscriber_release(c->sub, frame);
|
||||
av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
|
||||
cudaGetErrorString(cerr));
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
c->host_buffer_size = need;
|
||||
}
|
||||
|
||||
int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
|
||||
int64_t pts_ns = cuframes_frame_pts_ns(frame);
|
||||
cuframes_subscriber_release(c->sub, frame);
|
||||
if (err < 0) return err;
|
||||
|
||||
int rc = av_new_packet(pkt, (int)need);
|
||||
if (rc < 0) return rc;
|
||||
memcpy(pkt->data, c->host_buffer, need);
|
||||
pkt->stream_index = 0;
|
||||
pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
|
||||
pkt->flags |= AV_PKT_FLAG_KEY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cuframes_read_close(AVFormatContext *s)
|
||||
{
|
||||
CuframesDemuxerContext *c = s->priv_data;
|
||||
if (c->first_frame) {
|
||||
if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
|
||||
c->first_frame = NULL;
|
||||
}
|
||||
if (c->sub) {
|
||||
cuframes_subscriber_destroy(c->sub);
|
||||
c->sub = NULL;
|
||||
}
|
||||
if (c->cuda_stream) {
|
||||
cudaStreamDestroy(c->cuda_stream);
|
||||
c->cuda_stream = NULL;
|
||||
}
|
||||
if (c->host_buffer) {
|
||||
cudaFreeHost(c->host_buffer);
|
||||
c->host_buffer = NULL;
|
||||
c->host_buffer_size = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cuframes_probe(const AVProbeData *p)
|
||||
{
|
||||
/* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
|
||||
return 0;
|
||||
}
|
||||
|
||||
const FFInputFormat ff_cuframes_demuxer = {
|
||||
.p.name = "cuframes",
|
||||
.p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
|
||||
.p.flags = AVFMT_NOFILE,
|
||||
.p.priv_class = &cuframes_demuxer_class,
|
||||
.priv_data_size = sizeof(CuframesDemuxerContext),
|
||||
.read_probe = cuframes_probe,
|
||||
.read_header = cuframes_read_header,
|
||||
.read_packet = cuframes_read_packet,
|
||||
.read_close = cuframes_read_close,
|
||||
};
|
||||
@@ -0,0 +1,332 @@
|
||||
diff --git a/configure b/configure
|
||||
index d77a55b..0ece49f 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -224,6 +224,7 @@ External library support:
|
||||
--enable-libcelt enable CELT decoding via libcelt [no]
|
||||
--enable-libcdio enable audio CD grabbing with libcdio [no]
|
||||
--enable-libcodec2 enable codec2 en/decoding using libcodec2 [no]
|
||||
+ --enable-libcuframes enable cuframes CUDA-IPC demuxer via libcuframes [no]
|
||||
--enable-libdav1d enable AV1 decoding via libdav1d [no]
|
||||
--enable-libdavs2 enable AVS2 decoding via libdavs2 [no]
|
||||
--enable-libdc1394 enable IIDC-1394 grabbing using libdc1394
|
||||
@@ -1914,6 +1915,7 @@ EXTERNAL_LIBRARY_LIST="
|
||||
libcaca
|
||||
libcelt
|
||||
libcodec2
|
||||
+ libcuframes
|
||||
libdav1d
|
||||
libdc1394
|
||||
libflite
|
||||
@@ -3527,6 +3529,7 @@ libdavs2_decoder_select="avs2_parser"
|
||||
libfdk_aac_decoder_deps="libfdk_aac"
|
||||
libfdk_aac_encoder_deps="libfdk_aac"
|
||||
libfdk_aac_encoder_select="audio_frame_queue"
|
||||
+cuframes_demuxer_deps="libcuframes cuda"
|
||||
libgme_demuxer_deps="libgme"
|
||||
libgsm_decoder_deps="libgsm"
|
||||
libgsm_encoder_deps="libgsm"
|
||||
@@ -6902,6 +6905,8 @@ enabled libglslang && { check_lib spirv_compiler glslang/Include/glslang_c_inter
|
||||
require spirv_compiler glslang/Include/glslang_c_interface.h glslang_initialize_process \
|
||||
-lglslang -lMachineIndependent -lOSDependent -lHLSL -lOGLCompiler -lGenericCodeGen \
|
||||
-lSPVRemapper -lSPIRV -lSPIRV-Tools-opt -lSPIRV-Tools -lpthread -lstdc++ -lm ; }
|
||||
+enabled libcuframes && require libcuframes cuframes/cuframes.h \
|
||||
+ cuframes_subscriber_create -lcuframes -lcudart
|
||||
enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu ||
|
||||
require libgme gme/gme.h gme_new_emu -lgme -lstdc++; }
|
||||
enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do
|
||||
diff --git a/libavformat/Makefile b/libavformat/Makefile
|
||||
index 7ca68a7..eb10698 100644
|
||||
--- a/libavformat/Makefile
|
||||
+++ b/libavformat/Makefile
|
||||
@@ -172,6 +172,7 @@ OBJS-$(CONFIG_CODEC2RAW_DEMUXER) += codec2.o pcm.o
|
||||
OBJS-$(CONFIG_CODEC2RAW_MUXER) += rawenc.o
|
||||
OBJS-$(CONFIG_CONCAT_DEMUXER) += concatdec.o
|
||||
OBJS-$(CONFIG_CRC_MUXER) += crcenc.o
|
||||
+OBJS-$(CONFIG_CUFRAMES_DEMUXER) += cuframesdec.o
|
||||
OBJS-$(CONFIG_DATA_DEMUXER) += rawdec.o
|
||||
OBJS-$(CONFIG_DATA_MUXER) += rawenc.o
|
||||
OBJS-$(CONFIG_DASH_MUXER) += dash.o dashenc.o hlsplaylist.o
|
||||
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
|
||||
index 305fa46..871dd40 100644
|
||||
--- a/libavformat/allformats.c
|
||||
+++ b/libavformat/allformats.c
|
||||
@@ -126,6 +126,7 @@ extern const FFInputFormat ff_codec2raw_demuxer;
|
||||
extern const FFOutputFormat ff_codec2raw_muxer;
|
||||
extern const FFInputFormat ff_concat_demuxer;
|
||||
extern const FFOutputFormat ff_crc_muxer;
|
||||
+extern const FFInputFormat ff_cuframes_demuxer;
|
||||
extern const FFInputFormat ff_dash_demuxer;
|
||||
extern const FFOutputFormat ff_dash_muxer;
|
||||
extern const FFInputFormat ff_data_demuxer;
|
||||
diff --git a/libavformat/cuframesdec.c b/libavformat/cuframesdec.c
|
||||
new file mode 100644
|
||||
index 0000000..b5da5ea
|
||||
--- /dev/null
|
||||
+++ b/libavformat/cuframesdec.c
|
||||
@@ -0,0 +1,265 @@
|
||||
+/*
|
||||
+ * cuframes input demuxer для FFmpeg 7.x.
|
||||
+ *
|
||||
+ * Принимает URL вида "cuframes://<key>" — подключается к запущенному
|
||||
+ * cuframes-publisher'у и выдаёт NV12 raw video stream.
|
||||
+ *
|
||||
+ * Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
|
||||
+ * выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
|
||||
+ * запланировано в v2.
|
||||
+ *
|
||||
+ * Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
|
||||
+ * на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
|
||||
+ *
|
||||
+ * Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
|
||||
+ */
|
||||
+
|
||||
+#include "libavformat/avformat.h"
|
||||
+#include "libavformat/demux.h"
|
||||
+#include "libavformat/internal.h"
|
||||
+#include "libavutil/avstring.h"
|
||||
+#include "libavutil/imgutils.h"
|
||||
+#include "libavutil/opt.h"
|
||||
+
|
||||
+#include <cuda_runtime.h>
|
||||
+#include <cuframes/cuframes.h>
|
||||
+
|
||||
+typedef struct CuframesDemuxerContext {
|
||||
+ const AVClass *class_;
|
||||
+ /* options */
|
||||
+ int cuda_device;
|
||||
+ int connect_timeout_ms;
|
||||
+ /* state */
|
||||
+ cuframes_subscriber_t *sub;
|
||||
+ cudaStream_t cuda_stream;
|
||||
+ void *host_buffer;
|
||||
+ size_t host_buffer_size;
|
||||
+ int width;
|
||||
+ int height;
|
||||
+ int64_t first_pts_ns;
|
||||
+ int got_first_pts;
|
||||
+ /* первый кадр получаем в read_header чтобы узнать размеры,
|
||||
+ * сохраняем его для первого read_packet */
|
||||
+ int pending_first_frame;
|
||||
+ cuframes_frame_t *first_frame;
|
||||
+} CuframesDemuxerContext;
|
||||
+
|
||||
+#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
|
||||
+#define D AV_OPT_FLAG_DECODING_PARAM
|
||||
+static const AVOption cuframes_options[] = {
|
||||
+ { "cuda_device", "CUDA device index (must match publisher's)",
|
||||
+ OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
|
||||
+ { "connect_timeout", "wait for publisher (ms); -1 = forever",
|
||||
+ OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
|
||||
+ { NULL }
|
||||
+};
|
||||
+
|
||||
+static const AVClass cuframes_demuxer_class = {
|
||||
+ .class_name = "cuframes demuxer",
|
||||
+ .item_name = av_default_item_name,
|
||||
+ .option = cuframes_options,
|
||||
+ .version = LIBAVUTIL_VERSION_INT,
|
||||
+};
|
||||
+
|
||||
+/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
|
||||
+static const char *parse_key(const char *url)
|
||||
+{
|
||||
+ if (av_strstart(url, "cuframes://", &url))
|
||||
+ return url;
|
||||
+ if (av_strstart(url, "cuframes:", &url))
|
||||
+ return url;
|
||||
+ return url;
|
||||
+}
|
||||
+
|
||||
+static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
|
||||
+ const cuframes_frame_t *frame,
|
||||
+ uint8_t *dst)
|
||||
+{
|
||||
+ int32_t w = 0, h = 0;
|
||||
+ cuframes_frame_size(frame, &w, &h);
|
||||
+ const int32_t pitch_y = cuframes_frame_pitch_y(frame);
|
||||
+ const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
|
||||
+ uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
|
||||
+ uint8_t *y_dev = cu;
|
||||
+ uint8_t *uv_dev = cu + (size_t)pitch_y * h;
|
||||
+ uint8_t *y_host = dst;
|
||||
+ uint8_t *uv_host = dst + (size_t)w * h;
|
||||
+
|
||||
+ cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
|
||||
+ w, h, cudaMemcpyDeviceToHost,
|
||||
+ c->cuda_stream);
|
||||
+ if (err == cudaSuccess) {
|
||||
+ err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
|
||||
+ w, h / 2, cudaMemcpyDeviceToHost,
|
||||
+ c->cuda_stream);
|
||||
+ }
|
||||
+ if (err == cudaSuccess)
|
||||
+ err = cudaStreamSynchronize(c->cuda_stream);
|
||||
+ return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
|
||||
+}
|
||||
+
|
||||
+static int cuframes_read_header(AVFormatContext *s)
|
||||
+{
|
||||
+ CuframesDemuxerContext *c = s->priv_data;
|
||||
+ const char *key = parse_key(s->url);
|
||||
+ if (!key || !*key) {
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
|
||||
+ return AVERROR(EINVAL);
|
||||
+ }
|
||||
+
|
||||
+ cudaError_t cerr = cudaSetDevice(c->cuda_device);
|
||||
+ if (cerr != cudaSuccess) {
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
|
||||
+ c->cuda_device, cudaGetErrorString(cerr));
|
||||
+ return AVERROR_EXTERNAL;
|
||||
+ }
|
||||
+ cerr = cudaStreamCreate(&c->cuda_stream);
|
||||
+ if (cerr != cudaSuccess) {
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
|
||||
+ cudaGetErrorString(cerr));
|
||||
+ return AVERROR_EXTERNAL;
|
||||
+ }
|
||||
+
|
||||
+ cuframes_subscriber_config_t cfg = {0};
|
||||
+ cfg.key = key;
|
||||
+ cfg.consumer_name = NULL; /* auto-generated */
|
||||
+ cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
|
||||
+ cfg.cuda_device = c->cuda_device;
|
||||
+ cfg.connect_timeout_ms = c->connect_timeout_ms;
|
||||
+
|
||||
+ int rc = cuframes_subscriber_create(&cfg, &c->sub);
|
||||
+ if (rc != CUFRAMES_OK) {
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
|
||||
+ key, cuframes_strerror(rc));
|
||||
+ return AVERROR_EXTERNAL;
|
||||
+ }
|
||||
+
|
||||
+ /* Получаем первый кадр чтобы узнать width/height. */
|
||||
+ cuframes_frame_t *frame = NULL;
|
||||
+ rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||||
+ if (rc != CUFRAMES_OK || !frame) {
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
|
||||
+ cuframes_strerror(rc));
|
||||
+ return AVERROR_EXTERNAL;
|
||||
+ }
|
||||
+
|
||||
+ int32_t w = 0, h = 0;
|
||||
+ cuframes_frame_size(frame, &w, &h);
|
||||
+ if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
|
||||
+ (int)cuframes_frame_format(frame));
|
||||
+ cuframes_subscriber_release(c->sub, frame);
|
||||
+ return AVERROR(ENOSYS);
|
||||
+ }
|
||||
+
|
||||
+ c->width = w;
|
||||
+ c->height = h;
|
||||
+ c->first_pts_ns = cuframes_frame_pts_ns(frame);
|
||||
+ c->got_first_pts = 1;
|
||||
+ c->first_frame = frame;
|
||||
+ c->pending_first_frame = 1;
|
||||
+
|
||||
+ AVStream *st = avformat_new_stream(s, NULL);
|
||||
+ if (!st)
|
||||
+ return AVERROR(ENOMEM);
|
||||
+ st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
|
||||
+ st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
|
||||
+ st->codecpar->format = AV_PIX_FMT_NV12;
|
||||
+ st->codecpar->width = w;
|
||||
+ st->codecpar->height = h;
|
||||
+ st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
|
||||
+ avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
|
||||
+
|
||||
+ av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
|
||||
+ key, w, h);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||||
+{
|
||||
+ CuframesDemuxerContext *c = s->priv_data;
|
||||
+
|
||||
+ cuframes_frame_t *frame;
|
||||
+ if (c->pending_first_frame) {
|
||||
+ frame = c->first_frame;
|
||||
+ c->first_frame = NULL;
|
||||
+ c->pending_first_frame = 0;
|
||||
+ } else {
|
||||
+ int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||||
+ if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
|
||||
+ return AVERROR(EAGAIN);
|
||||
+ if (rc == CUFRAMES_ERR_DISCONNECTED)
|
||||
+ return AVERROR_EOF;
|
||||
+ if (rc != CUFRAMES_OK || !frame) {
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
|
||||
+ return AVERROR_EXTERNAL;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ const size_t need = (size_t)c->width * c->height * 3 / 2;
|
||||
+ if (need > c->host_buffer_size) {
|
||||
+ if (c->host_buffer) cudaFreeHost(c->host_buffer);
|
||||
+ cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
|
||||
+ if (cerr != cudaSuccess) {
|
||||
+ cuframes_subscriber_release(c->sub, frame);
|
||||
+ av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
|
||||
+ cudaGetErrorString(cerr));
|
||||
+ return AVERROR(ENOMEM);
|
||||
+ }
|
||||
+ c->host_buffer_size = need;
|
||||
+ }
|
||||
+
|
||||
+ int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
|
||||
+ int64_t pts_ns = cuframes_frame_pts_ns(frame);
|
||||
+ cuframes_subscriber_release(c->sub, frame);
|
||||
+ if (err < 0) return err;
|
||||
+
|
||||
+ int rc = av_new_packet(pkt, (int)need);
|
||||
+ if (rc < 0) return rc;
|
||||
+ memcpy(pkt->data, c->host_buffer, need);
|
||||
+ pkt->stream_index = 0;
|
||||
+ pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
|
||||
+ pkt->flags |= AV_PKT_FLAG_KEY;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int cuframes_read_close(AVFormatContext *s)
|
||||
+{
|
||||
+ CuframesDemuxerContext *c = s->priv_data;
|
||||
+ if (c->first_frame) {
|
||||
+ if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
|
||||
+ c->first_frame = NULL;
|
||||
+ }
|
||||
+ if (c->sub) {
|
||||
+ cuframes_subscriber_destroy(c->sub);
|
||||
+ c->sub = NULL;
|
||||
+ }
|
||||
+ if (c->cuda_stream) {
|
||||
+ cudaStreamDestroy(c->cuda_stream);
|
||||
+ c->cuda_stream = NULL;
|
||||
+ }
|
||||
+ if (c->host_buffer) {
|
||||
+ cudaFreeHost(c->host_buffer);
|
||||
+ c->host_buffer = NULL;
|
||||
+ c->host_buffer_size = 0;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int cuframes_probe(const AVProbeData *p)
|
||||
+{
|
||||
+ /* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+const FFInputFormat ff_cuframes_demuxer = {
|
||||
+ .p.name = "cuframes",
|
||||
+ .p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
|
||||
+ .p.flags = AVFMT_NOFILE,
|
||||
+ .p.priv_class = &cuframes_demuxer_class,
|
||||
+ .priv_data_size = sizeof(CuframesDemuxerContext),
|
||||
+ .read_probe = cuframes_probe,
|
||||
+ .read_header = cuframes_read_header,
|
||||
+ .read_packet = cuframes_read_packet,
|
||||
+ .read_close = cuframes_read_close,
|
||||
+};
|
||||
Reference in New Issue
Block a user