dcecd42de4
Snapshot FFmpeg n7.1 (release tag) с применённым patch'ем для cuframes input format. Используется как FFMPEG_REPO_OVERRIDE в NickM-27/FFmpeg-Builds fork для статической сборки patched binary под Frigate (Debian 12 / glibc 2.36). Apply changes: + libavformat/cuframesdec.c (новый — реализация демуксера) M libavformat/Makefile (CONFIG_CUFRAMES_DEMUXER target) M libavformat/allformats.c (extern declaration) M configure (--enable-libcuframes option + dep check) Patch source: https://git.goldix.org/gx/cuframes (filter/ffmpeg-7.1-cuframes-demuxer.patch) History сброшена (snapshot вместо fork) потому что upstream shallow clone не позволял push в gitea. Полная история FFmpeg — на github.com/FFmpeg/FFmpeg n7.1.
266 lines
9.1 KiB
C
266 lines
9.1 KiB
C
/*
|
||
* cuframes input demuxer для FFmpeg 7.x.
|
||
*
|
||
* Принимает URL вида "cuframes://<key>" — подключается к запущенному
|
||
* cuframes-publisher'у и выдаёт NV12 raw video stream.
|
||
*
|
||
* Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем
|
||
* выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx —
|
||
* запланировано в v2.
|
||
*
|
||
* Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а
|
||
* на N consumer'ов), пусть и с GPU↔CPU round-trip'ом.
|
||
*
|
||
* Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds)
|
||
*/
|
||
|
||
#include "libavformat/avformat.h"
|
||
#include "libavformat/demux.h"
|
||
#include "libavformat/internal.h"
|
||
#include "libavutil/avstring.h"
|
||
#include "libavutil/imgutils.h"
|
||
#include "libavutil/opt.h"
|
||
|
||
#include <cuda_runtime.h>
|
||
#include <cuframes/cuframes.h>
|
||
|
||
typedef struct CuframesDemuxerContext {
|
||
const AVClass *class_;
|
||
/* options */
|
||
int cuda_device;
|
||
int connect_timeout_ms;
|
||
/* state */
|
||
cuframes_subscriber_t *sub;
|
||
cudaStream_t cuda_stream;
|
||
void *host_buffer;
|
||
size_t host_buffer_size;
|
||
int width;
|
||
int height;
|
||
int64_t first_pts_ns;
|
||
int got_first_pts;
|
||
/* первый кадр получаем в read_header чтобы узнать размеры,
|
||
* сохраняем его для первого read_packet */
|
||
int pending_first_frame;
|
||
cuframes_frame_t *first_frame;
|
||
} CuframesDemuxerContext;
|
||
|
||
#define OFFSET(x) offsetof(CuframesDemuxerContext, x)
|
||
#define D AV_OPT_FLAG_DECODING_PARAM
|
||
static const AVOption cuframes_options[] = {
|
||
{ "cuda_device", "CUDA device index (must match publisher's)",
|
||
OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D },
|
||
{ "connect_timeout", "wait for publisher (ms); -1 = forever",
|
||
OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D },
|
||
{ NULL }
|
||
};
|
||
|
||
static const AVClass cuframes_demuxer_class = {
|
||
.class_name = "cuframes demuxer",
|
||
.item_name = av_default_item_name,
|
||
.option = cuframes_options,
|
||
.version = LIBAVUTIL_VERSION_INT,
|
||
};
|
||
|
||
/* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */
|
||
static const char *parse_key(const char *url)
|
||
{
|
||
if (av_strstart(url, "cuframes://", &url))
|
||
return url;
|
||
if (av_strstart(url, "cuframes:", &url))
|
||
return url;
|
||
return url;
|
||
}
|
||
|
||
static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c,
|
||
const cuframes_frame_t *frame,
|
||
uint8_t *dst)
|
||
{
|
||
int32_t w = 0, h = 0;
|
||
cuframes_frame_size(frame, &w, &h);
|
||
const int32_t pitch_y = cuframes_frame_pitch_y(frame);
|
||
const int32_t pitch_uv = cuframes_frame_pitch_uv(frame);
|
||
uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame);
|
||
uint8_t *y_dev = cu;
|
||
uint8_t *uv_dev = cu + (size_t)pitch_y * h;
|
||
uint8_t *y_host = dst;
|
||
uint8_t *uv_host = dst + (size_t)w * h;
|
||
|
||
cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y,
|
||
w, h, cudaMemcpyDeviceToHost,
|
||
c->cuda_stream);
|
||
if (err == cudaSuccess) {
|
||
err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv,
|
||
w, h / 2, cudaMemcpyDeviceToHost,
|
||
c->cuda_stream);
|
||
}
|
||
if (err == cudaSuccess)
|
||
err = cudaStreamSynchronize(c->cuda_stream);
|
||
return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL;
|
||
}
|
||
|
||
static int cuframes_read_header(AVFormatContext *s)
|
||
{
|
||
CuframesDemuxerContext *c = s->priv_data;
|
||
const char *key = parse_key(s->url);
|
||
if (!key || !*key) {
|
||
av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url);
|
||
return AVERROR(EINVAL);
|
||
}
|
||
|
||
cudaError_t cerr = cudaSetDevice(c->cuda_device);
|
||
if (cerr != cudaSuccess) {
|
||
av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n",
|
||
c->cuda_device, cudaGetErrorString(cerr));
|
||
return AVERROR_EXTERNAL;
|
||
}
|
||
cerr = cudaStreamCreate(&c->cuda_stream);
|
||
if (cerr != cudaSuccess) {
|
||
av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n",
|
||
cudaGetErrorString(cerr));
|
||
return AVERROR_EXTERNAL;
|
||
}
|
||
|
||
cuframes_subscriber_config_t cfg = {0};
|
||
cfg.key = key;
|
||
cfg.consumer_name = NULL; /* auto-generated */
|
||
cfg.mode = CUFRAMES_MODE_NEWEST_ONLY;
|
||
cfg.cuda_device = c->cuda_device;
|
||
cfg.connect_timeout_ms = c->connect_timeout_ms;
|
||
|
||
int rc = cuframes_subscriber_create(&cfg, &c->sub);
|
||
if (rc != CUFRAMES_OK) {
|
||
av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n",
|
||
key, cuframes_strerror(rc));
|
||
return AVERROR_EXTERNAL;
|
||
}
|
||
|
||
/* Получаем первый кадр чтобы узнать width/height. */
|
||
cuframes_frame_t *frame = NULL;
|
||
rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||
if (rc != CUFRAMES_OK || !frame) {
|
||
av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n",
|
||
cuframes_strerror(rc));
|
||
return AVERROR_EXTERNAL;
|
||
}
|
||
|
||
int32_t w = 0, h = 0;
|
||
cuframes_frame_size(frame, &w, &h);
|
||
if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) {
|
||
av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n",
|
||
(int)cuframes_frame_format(frame));
|
||
cuframes_subscriber_release(c->sub, frame);
|
||
return AVERROR(ENOSYS);
|
||
}
|
||
|
||
c->width = w;
|
||
c->height = h;
|
||
c->first_pts_ns = cuframes_frame_pts_ns(frame);
|
||
c->got_first_pts = 1;
|
||
c->first_frame = frame;
|
||
c->pending_first_frame = 1;
|
||
|
||
AVStream *st = avformat_new_stream(s, NULL);
|
||
if (!st)
|
||
return AVERROR(ENOMEM);
|
||
st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
|
||
st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
|
||
st->codecpar->format = AV_PIX_FMT_NV12;
|
||
st->codecpar->width = w;
|
||
st->codecpar->height = h;
|
||
st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2');
|
||
avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */
|
||
|
||
av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n",
|
||
key, w, h);
|
||
return 0;
|
||
}
|
||
|
||
static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt)
|
||
{
|
||
CuframesDemuxerContext *c = s->priv_data;
|
||
|
||
cuframes_frame_t *frame;
|
||
if (c->pending_first_frame) {
|
||
frame = c->first_frame;
|
||
c->first_frame = NULL;
|
||
c->pending_first_frame = 0;
|
||
} else {
|
||
int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000);
|
||
if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK)
|
||
return AVERROR(EAGAIN);
|
||
if (rc == CUFRAMES_ERR_DISCONNECTED)
|
||
return AVERROR_EOF;
|
||
if (rc != CUFRAMES_OK || !frame) {
|
||
av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc));
|
||
return AVERROR_EXTERNAL;
|
||
}
|
||
}
|
||
|
||
const size_t need = (size_t)c->width * c->height * 3 / 2;
|
||
if (need > c->host_buffer_size) {
|
||
if (c->host_buffer) cudaFreeHost(c->host_buffer);
|
||
cudaError_t cerr = cudaMallocHost(&c->host_buffer, need);
|
||
if (cerr != cudaSuccess) {
|
||
cuframes_subscriber_release(c->sub, frame);
|
||
av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n",
|
||
cudaGetErrorString(cerr));
|
||
return AVERROR(ENOMEM);
|
||
}
|
||
c->host_buffer_size = need;
|
||
}
|
||
|
||
int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer);
|
||
int64_t pts_ns = cuframes_frame_pts_ns(frame);
|
||
cuframes_subscriber_release(c->sub, frame);
|
||
if (err < 0) return err;
|
||
|
||
int rc = av_new_packet(pkt, (int)need);
|
||
if (rc < 0) return rc;
|
||
memcpy(pkt->data, c->host_buffer, need);
|
||
pkt->stream_index = 0;
|
||
pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */
|
||
pkt->flags |= AV_PKT_FLAG_KEY;
|
||
return 0;
|
||
}
|
||
|
||
static int cuframes_read_close(AVFormatContext *s)
|
||
{
|
||
CuframesDemuxerContext *c = s->priv_data;
|
||
if (c->first_frame) {
|
||
if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame);
|
||
c->first_frame = NULL;
|
||
}
|
||
if (c->sub) {
|
||
cuframes_subscriber_destroy(c->sub);
|
||
c->sub = NULL;
|
||
}
|
||
if (c->cuda_stream) {
|
||
cudaStreamDestroy(c->cuda_stream);
|
||
c->cuda_stream = NULL;
|
||
}
|
||
if (c->host_buffer) {
|
||
cudaFreeHost(c->host_buffer);
|
||
c->host_buffer = NULL;
|
||
c->host_buffer_size = 0;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
static int cuframes_probe(const AVProbeData *p)
|
||
{
|
||
/* URL-based protocol — probe не используется, см. AVFMT_NOFILE */
|
||
return 0;
|
||
}
|
||
|
||
const FFInputFormat ff_cuframes_demuxer = {
|
||
.p.name = "cuframes",
|
||
.p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"),
|
||
.p.flags = AVFMT_NOFILE,
|
||
.p.priv_class = &cuframes_demuxer_class,
|
||
.priv_data_size = sizeof(CuframesDemuxerContext),
|
||
.read_probe = cuframes_probe,
|
||
.read_header = cuframes_read_header,
|
||
.read_packet = cuframes_read_packet,
|
||
.read_close = cuframes_read_close,
|
||
};
|