/* * cuframes input demuxer для FFmpeg 7.x. * * Принимает URL вида "cuframes://" — подключается к запущенному * cuframes-publisher'у и выдаёт NV12 raw video stream. * * Для PoC v1: GPU NV12 frame копируется на CPU (cudaMemcpy2D), затем * выдаётся как rawvideo packet. Zero-copy через FFmpeg hwframe_ctx — * запланировано в v2. * * Главный выигрыш v1: устраняем дубль-decode (один NVDEC у publisher'а * на N consumer'ов), пусть и с GPU↔CPU round-trip'ом. * * Лицензия: LGPL-2.1+ (соответствует libcuframes и стандарту FFmpeg LGPL builds) */ #include "libavformat/avformat.h" #include "libavformat/demux.h" #include "libavformat/internal.h" #include "libavutil/avstring.h" #include "libavutil/imgutils.h" #include "libavutil/opt.h" #include #include typedef struct CuframesDemuxerContext { const AVClass *class_; /* options */ int cuda_device; int connect_timeout_ms; /* state */ cuframes_subscriber_t *sub; cudaStream_t cuda_stream; void *host_buffer; size_t host_buffer_size; int width; int height; int64_t first_pts_ns; int got_first_pts; /* первый кадр получаем в read_header чтобы узнать размеры, * сохраняем его для первого read_packet */ int pending_first_frame; cuframes_frame_t *first_frame; } CuframesDemuxerContext; #define OFFSET(x) offsetof(CuframesDemuxerContext, x) #define D AV_OPT_FLAG_DECODING_PARAM static const AVOption cuframes_options[] = { { "cuda_device", "CUDA device index (must match publisher's)", OFFSET(cuda_device), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 16, D }, { "connect_timeout", "wait for publisher (ms); -1 = forever", OFFSET(connect_timeout_ms), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, D }, { NULL } }; static const AVClass cuframes_demuxer_class = { .class_name = "cuframes demuxer", .item_name = av_default_item_name, .option = cuframes_options, .version = LIBAVUTIL_VERSION_INT, }; /* "cuframes://cam-parking" → "cam-parking"; также принимает "cuframes:cam-X" */ static const char *parse_key(const char *url) { if (av_strstart(url, "cuframes://", &url)) return url; if (av_strstart(url, "cuframes:", &url)) return url; return url; } static int copy_gpu_nv12_to_host(CuframesDemuxerContext *c, const cuframes_frame_t *frame, uint8_t *dst) { int32_t w = 0, h = 0; cuframes_frame_size(frame, &w, &h); const int32_t pitch_y = cuframes_frame_pitch_y(frame); const int32_t pitch_uv = cuframes_frame_pitch_uv(frame); uint8_t *cu = (uint8_t *)cuframes_frame_cuda_ptr(frame); uint8_t *y_dev = cu; uint8_t *uv_dev = cu + (size_t)pitch_y * h; uint8_t *y_host = dst; uint8_t *uv_host = dst + (size_t)w * h; cudaError_t err = cudaMemcpy2DAsync(y_host, w, y_dev, pitch_y, w, h, cudaMemcpyDeviceToHost, c->cuda_stream); if (err == cudaSuccess) { err = cudaMemcpy2DAsync(uv_host, w, uv_dev, pitch_uv, w, h / 2, cudaMemcpyDeviceToHost, c->cuda_stream); } if (err == cudaSuccess) err = cudaStreamSynchronize(c->cuda_stream); return (err == cudaSuccess) ? 0 : AVERROR_EXTERNAL; } static int cuframes_read_header(AVFormatContext *s) { CuframesDemuxerContext *c = s->priv_data; const char *key = parse_key(s->url); if (!key || !*key) { av_log(s, AV_LOG_ERROR, "cuframes: empty key in URL '%s'\n", s->url); return AVERROR(EINVAL); } cudaError_t cerr = cudaSetDevice(c->cuda_device); if (cerr != cudaSuccess) { av_log(s, AV_LOG_ERROR, "cuframes: cudaSetDevice(%d): %s\n", c->cuda_device, cudaGetErrorString(cerr)); return AVERROR_EXTERNAL; } cerr = cudaStreamCreate(&c->cuda_stream); if (cerr != cudaSuccess) { av_log(s, AV_LOG_ERROR, "cuframes: cudaStreamCreate: %s\n", cudaGetErrorString(cerr)); return AVERROR_EXTERNAL; } cuframes_subscriber_config_t cfg = {0}; cfg.key = key; cfg.consumer_name = NULL; /* auto-generated */ cfg.mode = CUFRAMES_MODE_NEWEST_ONLY; cfg.cuda_device = c->cuda_device; cfg.connect_timeout_ms = c->connect_timeout_ms; int rc = cuframes_subscriber_create(&cfg, &c->sub); if (rc != CUFRAMES_OK) { av_log(s, AV_LOG_ERROR, "cuframes: subscriber_create('%s'): %s\n", key, cuframes_strerror(rc)); return AVERROR_EXTERNAL; } /* Получаем первый кадр чтобы узнать width/height. */ cuframes_frame_t *frame = NULL; rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000); if (rc != CUFRAMES_OK || !frame) { av_log(s, AV_LOG_ERROR, "cuframes: first frame: %s\n", cuframes_strerror(rc)); return AVERROR_EXTERNAL; } int32_t w = 0, h = 0; cuframes_frame_size(frame, &w, &h); if (cuframes_frame_format(frame) != CUFRAMES_FORMAT_NV12) { av_log(s, AV_LOG_ERROR, "cuframes: only NV12 supported in v1 (got %d)\n", (int)cuframes_frame_format(frame)); cuframes_subscriber_release(c->sub, frame); return AVERROR(ENOSYS); } c->width = w; c->height = h; c->first_pts_ns = cuframes_frame_pts_ns(frame); c->got_first_pts = 1; c->first_frame = frame; c->pending_first_frame = 1; AVStream *st = avformat_new_stream(s, NULL); if (!st) return AVERROR(ENOMEM); st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO; st->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO; st->codecpar->format = AV_PIX_FMT_NV12; st->codecpar->width = w; st->codecpar->height = h; st->codecpar->codec_tag = MKTAG('N', 'V', '1', '2'); avpriv_set_pts_info(st, 64, 1, 1000000); /* µs */ av_log(s, AV_LOG_INFO, "cuframes: connected to '%s' — %dx%d NV12\n", key, w, h); return 0; } static int cuframes_read_packet(AVFormatContext *s, AVPacket *pkt) { CuframesDemuxerContext *c = s->priv_data; cuframes_frame_t *frame; if (c->pending_first_frame) { frame = c->first_frame; c->first_frame = NULL; c->pending_first_frame = 0; } else { int rc = cuframes_subscriber_next(c->sub, c->cuda_stream, &frame, 5000); if (rc == CUFRAMES_ERR_TIMEOUT || rc == CUFRAMES_ERR_WOULD_BLOCK) return AVERROR(EAGAIN); if (rc == CUFRAMES_ERR_DISCONNECTED) return AVERROR_EOF; if (rc != CUFRAMES_OK || !frame) { av_log(s, AV_LOG_ERROR, "cuframes: next: %s\n", cuframes_strerror(rc)); return AVERROR_EXTERNAL; } } const size_t need = (size_t)c->width * c->height * 3 / 2; if (need > c->host_buffer_size) { if (c->host_buffer) cudaFreeHost(c->host_buffer); cudaError_t cerr = cudaMallocHost(&c->host_buffer, need); if (cerr != cudaSuccess) { cuframes_subscriber_release(c->sub, frame); av_log(s, AV_LOG_ERROR, "cuframes: cudaMallocHost: %s\n", cudaGetErrorString(cerr)); return AVERROR(ENOMEM); } c->host_buffer_size = need; } int err = copy_gpu_nv12_to_host(c, frame, c->host_buffer); int64_t pts_ns = cuframes_frame_pts_ns(frame); cuframes_subscriber_release(c->sub, frame); if (err < 0) return err; int rc = av_new_packet(pkt, (int)need); if (rc < 0) return rc; memcpy(pkt->data, c->host_buffer, need); pkt->stream_index = 0; pkt->pts = pkt->dts = (pts_ns - c->first_pts_ns) / 1000; /* µs */ pkt->flags |= AV_PKT_FLAG_KEY; return 0; } static int cuframes_read_close(AVFormatContext *s) { CuframesDemuxerContext *c = s->priv_data; if (c->first_frame) { if (c->sub) cuframes_subscriber_release(c->sub, c->first_frame); c->first_frame = NULL; } if (c->sub) { cuframes_subscriber_destroy(c->sub); c->sub = NULL; } if (c->cuda_stream) { cudaStreamDestroy(c->cuda_stream); c->cuda_stream = NULL; } if (c->host_buffer) { cudaFreeHost(c->host_buffer); c->host_buffer = NULL; c->host_buffer_size = 0; } return 0; } static int cuframes_probe(const AVProbeData *p) { /* URL-based protocol — probe не используется, см. AVFMT_NOFILE */ return 0; } const FFInputFormat ff_cuframes_demuxer = { .p.name = "cuframes", .p.long_name = NULL_IF_CONFIG_SMALL("cuframes (CUDA IPC zero-copy frames)"), .p.flags = AVFMT_NOFILE, .p.priv_class = &cuframes_demuxer_class, .priv_data_size = sizeof(CuframesDemuxerContext), .read_probe = cuframes_probe, .read_header = cuframes_read_header, .read_packet = cuframes_read_packet, .read_close = cuframes_read_close, };