Files
gx 99df68f69c feat(filter): FFmpeg 7.1 cuframes:// input demuxer
Out-of-tree patch + sources для FFmpeg-демаксера, который позволяет любому
FFmpeg-based потребителю (Frigate, кастомные рекордеры, re-streamers)
читать "cuframes://<key>" как обычный URL — без своего NVDEC.

Состав:
- filter/cuframesdec.c — реализация (libavformat-style)
- filter/ffmpeg-7.1-cuframes-demuxer.patch — patch для FFmpeg n7.1
  (Makefile / allformats.c / configure)
- filter/README.md — инструкции по сборке + CLI smoke test + Frigate plan

v1 ограничения (намеренно):
- только NV12
- GPU → CPU копия через cudaMemcpy2DAsync (zero-copy AVHWFramesContext — v2)

CLI smoke test 2026-05-17 (host build FFmpeg + libcuframes,
publisher на камере 192.168.88.98 1920x1080 HEVC 25fps):
  ffmpeg -f cuframes -i cuframes://cam-ff -c:v copy -f null -
  → frame=100 fps=25 q=-1.0 speed=1x  ✓
  → "cuframes: connected to 'cam-ff' — 1920x1080 NV12"

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-17 09:02:12 +01:00

266 lines
9.1 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* cuframes input demuxer для FFmpeg 7.x.
*
* Принимает URL вида "cuframes://<key>" — подключается к запущенному
* cuframes-publisher'у и выдаёт NV12 raw video stream.
*
* Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
* выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
* запланировано в v2.
*
* Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
* на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
*
* Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
*/
#include "libavformat/avformat.h"
#include "libavformat/demux.h"
#include "libavformat/internal.h"
#include "libavutil/avstring.h"
#include "libavutil/imgutils.h"
#include "libavutil/opt.h"
#include <cuda_runtime.h>
#include <cuframes/cuframes.h>
typedef struct CuframesDemuxerContext {
const AVClass *class_;
/* options */
int cuda_device;
int connect_timeout_ms;
/* state */
cuframes_subscriber_t *sub;
cudaStream_t cuda_stream;
void *host_buffer;
size_t host_buffer_size;
int width;
int height;
int64_t first_pts_ns;
int got_first_pts;
/* первый кадр получаем в read_header чтобы узнать размеры,
* сохраняем его для первого read_packet */
int pending_first_frame;
cuframes_frame_t *first_frame;
} CuframesDemuxerContext;
#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
#define D AV_OPT_FLAG_DECODING_PARAM
static const AVOption cuframes_options[] = {
{ "cuda_device", "CUDA device index (must match publisher's)",
OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
{ "connect_timeout", "wait for publisher (ms); -1 = forever",
OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
{ NULL }
};
static const AVClass cuframes_demuxer_class = {
.class_name = "cuframes demuxer",
.item_name = av_default_item_name,
.option = cuframes_options,
.version = LIBAVUTIL_VERSION_INT,
};
/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
static const char *parse_key(const char *url)
{
if (av_strstart(url, "cuframes://", &url))
return url;
if (av_strstart(url, "cuframes:", &url))
return url;
return url;
}
static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
const cuframes_frame_t *frame,
uint8_t *dst)
{
int32_t w = 0, h = 0;
cuframes_frame_size(frame, &w, &h);
const int32_t pitch_y = cuframes_frame_pitch_y(frame);
const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
uint8_t *y_dev = cu;
uint8_t *uv_dev = cu + (size_t)pitch_y * h;
uint8_t *y_host = dst;
uint8_t *uv_host = dst + (size_t)w * h;
cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
w, h, cudaMemcpyDeviceToHost,
c->cuda_stream);
if (err == cudaSuccess) {
err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
w, h / 2, cudaMemcpyDeviceToHost,
c->cuda_stream);
}
if (err == cudaSuccess)
err = cudaStreamSynchronize(c->cuda_stream);
return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
}
static int cuframes_read_header(AVFormatContext *s)
{
CuframesDemuxerContext *c = s->priv_data;
const char *key = parse_key(s->url);
if (!key || !*key) {
av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
return AVERROR(EINVAL);
}
cudaError_t cerr = cudaSetDevice(c->cuda_device);
if (cerr != cudaSuccess) {
av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
c->cuda_device, cudaGetErrorString(cerr));
return AVERROR_EXTERNAL;
}
cerr = cudaStreamCreate(&c->cuda_stream);
if (cerr != cudaSuccess) {
av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
cudaGetErrorString(cerr));
return AVERROR_EXTERNAL;
}
cuframes_subscriber_config_t cfg = {0};
cfg.key = key;
cfg.consumer_name = NULL; /* auto-generated */
cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
cfg.cuda_device = c->cuda_device;
cfg.connect_timeout_ms = c->connect_timeout_ms;
int rc = cuframes_subscriber_create(&cfg, &c->sub);
if (rc != CUFRAMES_OK) {
av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
key, cuframes_strerror(rc));
return AVERROR_EXTERNAL;
}
/* Получаем первый кадр чтобы узнать width/height. */
cuframes_frame_t *frame = NULL;
rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
if (rc != CUFRAMES_OK || !frame) {
av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
cuframes_strerror(rc));
return AVERROR_EXTERNAL;
}
int32_t w = 0, h = 0;
cuframes_frame_size(frame, &w, &h);
if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
(int)cuframes_frame_format(frame));
cuframes_subscriber_release(c->sub, frame);
return AVERROR(ENOSYS);
}
c->width = w;
c->height = h;
c->first_pts_ns = cuframes_frame_pts_ns(frame);
c->got_first_pts = 1;
c->first_frame = frame;
c->pending_first_frame = 1;
AVStream *st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
st->codecpar->format = AV_PIX_FMT_NV12;
st->codecpar->width = w;
st->codecpar->height = h;
st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
key, w, h);
return 0;
}
static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
{
CuframesDemuxerContext *c = s->priv_data;
cuframes_frame_t *frame;
if (c->pending_first_frame) {
frame = c->first_frame;
c->first_frame = NULL;
c->pending_first_frame = 0;
} else {
int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
return AVERROR(EAGAIN);
if (rc == CUFRAMES_ERR_DISCONNECTED)
return AVERROR_EOF;
if (rc != CUFRAMES_OK || !frame) {
av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
return AVERROR_EXTERNAL;
}
}
const size_t need = (size_t)c->width * c->height * 3 / 2;
if (need > c->host_buffer_size) {
if (c->host_buffer) cudaFreeHost(c->host_buffer);
cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
if (cerr != cudaSuccess) {
cuframes_subscriber_release(c->sub, frame);
av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
cudaGetErrorString(cerr));
return AVERROR(ENOMEM);
}
c->host_buffer_size = need;
}
int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
int64_t pts_ns = cuframes_frame_pts_ns(frame);
cuframes_subscriber_release(c->sub, frame);
if (err < 0) return err;
int rc = av_new_packet(pkt, (int)need);
if (rc < 0) return rc;
memcpy(pkt->data, c->host_buffer, need);
pkt->stream_index = 0;
pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
pkt->flags |= AV_PKT_FLAG_KEY;
return 0;
}
static int cuframes_read_close(AVFormatContext *s)
{
CuframesDemuxerContext *c = s->priv_data;
if (c->first_frame) {
if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
c->first_frame = NULL;
}
if (c->sub) {
cuframes_subscriber_destroy(c->sub);
c->sub = NULL;
}
if (c->cuda_stream) {
cudaStreamDestroy(c->cuda_stream);
c->cuda_stream = NULL;
}
if (c->host_buffer) {
cudaFreeHost(c->host_buffer);
c->host_buffer = NULL;
c->host_buffer_size = 0;
}
return 0;
}
static int cuframes_probe(const AVProbeData *p)
{
/* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
return 0;
}
const FFInputFormat ff_cuframes_demuxer = {
.p.name = "cuframes",
.p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
.p.flags = AVFMT_NOFILE,
.p.priv_class = &cuframes_demuxer_class,
.priv_data_size = sizeof(CuframesDemuxerContext),
.read_probe = cuframes_probe,
.read_header = cuframes_read_header,
.read_packet = cuframes_read_packet,
.read_close = cuframes_read_close,
};