Phase 7 audio mixing — attempt + rollback + lessons

Несколько сессий попыток реализовать audio mixing в композитор'е.
Не достигнуто sub-секундной latency со стабильным video+audio.
Откатано на parallel mode (cfc-grid video-only, live от pipeline с audio).

Полный набор выводов и pitfall'ов — docs/LESSONS-audio-mixing-attempts.md.

Главные lesson'ы для будущей попытки:
- mpegts mux libavformat авто-инсёртит h264_mp4toannexb BSF которому
  не нравится Annex-B + inline SPS/PPS — NVENC OUTPUT_SPSPPS per-frame ломает
- SPSC ring drop newest при full, не oldest (consumer's domain)
- av_new_packet (не av_malloc) для av_interleaved_write_frame ownership
- Monotonic PTS на counter (frame_idx, total_samples) — не wallclock
- mediamtx env-var path names не должны иметь '-' (parser limitation)
- Default mediamtx ReadTimeout=10s короткий для burst write'ов

Изменения в repo сохранены для будущей доработки:
- src/writer.c — mpegts backend с audio stream support
- src/audio.c — RTSP AAC consumer + lock-free SPSC ring
- include/cuframes_composer/{writer,audio}.h — public API
- examples/grid_record.c — --format=mpegts + --audio-source flags
- include/cuframes_composer/composer.h — consumer_prefix field
- docker/Dockerfile — libavformat-dev добавлен в builder/runtime

cfc-grid composer стабильно работает на видео (substantially лучше
монолитного pipeline'а с audio bag'ом). TV рекомендуется использовать
rtsp://...:554/cfc-grid + опционально rtsp://...:554/live-audio
parallel.
This commit is contained in:
2026-06-03 14:29:56 +01:00
parent 20b5234c41
commit fa6ab3069a
13 changed files with 1276 additions and 52 deletions
+87 -28
View File
@@ -23,6 +23,8 @@
#include "../include/cuframes_composer/overlay.h"
#include "../include/cuframes_composer/control.h"
#include "../include/cuframes_composer/health.h"
#include "../include/cuframes_composer/writer.h"
#include "../include/cuframes_composer/audio.h"
#include <cuda.h>
@@ -43,7 +45,7 @@ static volatile sig_atomic_t g_stop = 0;
static void on_sig(int s) { (void)s; g_stop = 1; }
typedef struct write_ctx {
FILE *fp;
cfc_writer_t *writer;
uint64_t bytes_written;
uint64_t frames_encoded;
uint64_t idr_count;
@@ -52,9 +54,8 @@ typedef struct write_ctx {
static void on_bitstream(const uint8_t *bs, size_t size, int64_t pts_ns,
int is_idr, void *user)
{
(void)pts_ns;
write_ctx_t *ctx = (write_ctx_t *)user;
if (fwrite(bs, 1, size, ctx->fp) == size) {
if (cfc_writer_write(ctx->writer, bs, size, pts_ns, is_idr) == 0) {
ctx->bytes_written += size;
ctx->frames_encoded++;
if (is_idr) ctx->idr_count++;
@@ -119,6 +120,8 @@ int main(int argc, char **argv)
const char *mqtt_instance = "cfc-grid"; /* --mqtt-instance NAME */
const char *mqtt_user = NULL;
const char *mqtt_pass = NULL;
const char *out_format = "h264"; /* --format h264|mpegts */
const char *audio_source = NULL; /* --audio-source rtsp://.../live-audio */
static struct option opts[] = {
{"out", required_argument, 0, 'o'},
@@ -137,10 +140,12 @@ int main(int argc, char **argv)
{"mqtt-user", required_argument, 0, 'U'},
{"mqtt-pass", required_argument, 0, 'P'},
{"intra-refresh", no_argument, 0, 'R'},
{"format", required_argument, 0, 'F'}, /* h264|mpegts */
{"audio-source", required_argument, 0, 'A'}, /* RTSP audio URL */
{0, 0, 0, 0},
};
int c;
while ((c = getopt_long(argc, argv, "o:c:f:b:W:H:s:r:i:t:C:M:I:U:P:R", opts, NULL)) != -1) {
while ((c = getopt_long(argc, argv, "o:c:f:b:W:H:s:r:i:t:C:M:I:U:P:RF:A:", opts, NULL)) != -1) {
switch (c) {
case 'o': out_path = optarg; break;
case 'c':
@@ -178,6 +183,8 @@ int main(int argc, char **argv)
case 'U': mqtt_user = optarg; break;
case 'P': mqtt_pass = optarg; break;
case 'R': intra_refresh = 1; break;
case 'F': out_format = optarg; break;
case 'A': audio_source = optarg; break;
case 't': {
if (num_texts >= MAX_CELLS) { fprintf(stderr, "max %d texts\n", MAX_CELLS); return 1; }
/* Опциональный prefix "id=NAME:" — задаёт control-plane ID. */
@@ -276,6 +283,7 @@ int main(int argc, char **argv)
.cells = cells,
.num_cells = num_cells,
.cuda_device = 0,
.consumer_prefix = mqtt_instance, /* уникальный namespace на каждый composer */
};
cfc_composer_t *comp = NULL;
if (cfc_composer_create(&ccfg, &comp) != 0) {
@@ -407,28 +415,73 @@ int main(int argc, char **argv)
return 1;
}
/* Output: "-" / "/dev/stdout" / "pipe:1" = stdout (для pipe в ffmpeg).
* stdout не закрывается через fclose чтобы не убивать дочерний процесс
* raньше времени. */
write_ctx_t wctx = { 0 };
int is_stdout = (!strcmp(out_path, "-") || !strcmp(out_path, "pipe:1") ||
!strcmp(out_path, "/dev/stdout"));
if (is_stdout) {
wctx.fp = stdout;
/* line-buffer'инг disabled — пишем full-buffered для производительности.
* Caller'у нужно flush при exit. */
setvbuf(stdout, NULL, _IOFBF, 1024 * 1024);
} else {
wctx.fp = fopen(out_path, "wb");
if (!wctx.fp) {
fprintf(stderr, "fopen(%s): %s\n", out_path, strerror(errno));
cfc_encoder_destroy(enc);
cfc_composer_destroy(comp);
return 1;
/* Audio consumer (опциональный, Phase 7). Запускаем РАНЬШЕ writer'а
* чтобы успеть получить codec params (sample_rate, channels, extradata)
* до avformat_write_header — иначе audio stream'у не будет правильного
* setup'а. Polling до 5 секунд. */
cfc_audio_t *audio = NULL;
int audio_sample_rate = 0, audio_channels = 0;
const uint8_t *audio_extradata = NULL;
size_t audio_extradata_size = 0;
if (audio_source) {
cfc_audio_config_t acfg = { .rtsp_url = audio_source };
if (cfc_audio_create(&acfg, &audio) != 0) {
fprintf(stderr, "[grid_record] audio create failed, продолжаю без audio\n");
} else {
fprintf(stderr, "[grid_record] жду audio codec params от %s ...\n", audio_source);
/* 30 секунд polling — audio source (cuda-grid-audio) может ещё
* подниматься после recreate стeка. Audio thread сам retry'ится
* с exp backoff. */
for (int i = 0; i < 300; i++) { /* 300 × 100ms = 30s */
if (cfc_audio_get_codec_params(audio, &audio_sample_rate,
&audio_channels, &audio_extradata,
&audio_extradata_size) == 0) {
fprintf(stderr,
"[grid_record] audio готов: AAC %dHz %dch extradata=%zub\n",
audio_sample_rate, audio_channels, audio_extradata_size);
break;
}
struct timespec ts = { .tv_sec = 0, .tv_nsec = 100 * 1000 * 1000 };
nanosleep(&ts, NULL);
}
if (audio_sample_rate == 0) {
fprintf(stderr, "[grid_record] audio params не получены за 30с, без audio\n");
cfc_audio_destroy(audio); audio = NULL;
}
}
}
fprintf(stderr, "[grid_record] начало записи в %s (Ctrl+C для остановки)\n",
out_path);
/* Writer: mpegts с video + опциональным audio. */
uint8_t spspps[256]; size_t spspps_len = sizeof(spspps);
cfc_encoder_get_sequence_params(enc, spspps, &spspps_len);
cfc_writer_config_t wcfg = {
.path = out_path,
.format = out_format,
.width = out_w,
.height = out_h,
.fps_num = fps,
.fps_den = 1,
.bitrate_kbps = bitrate,
.extradata = spspps,
.extradata_size = spspps_len,
.has_audio = audio ? 1 : 0,
.audio_sample_rate = audio_sample_rate,
.audio_channels = audio_channels,
.audio_extradata = audio_extradata,
.audio_extradata_size = audio_extradata_size,
};
write_ctx_t wctx = { 0 };
if (cfc_writer_create(&wcfg, &wctx.writer) != 0) {
fprintf(stderr, "cfc_writer_create(%s, %s) failed\n", out_path, out_format);
if (audio) cfc_audio_destroy(audio);
cfc_encoder_destroy(enc);
cfc_composer_destroy(comp);
return 1;
}
fprintf(stderr, "[grid_record] начало записи в %s [format=%s%s] (Ctrl+C для остановки)\n",
out_path, out_format, audio ? "+audio" : "");
/* Main loop — frame cadence по wall clock'у. */
struct timespec ts_start;
@@ -461,12 +514,18 @@ int main(int argc, char **argv)
}
int64_t pts_ns = (now_us - start_us) * 1000;
/* Не break'аем при encode/write failure — это обычно временно
* (mediamtx reconnect, socket broken). Просто логируем и продолжаем,
* следующая encode/write попытается заново. */
if (cfc_encoder_encode_frame(enc, out_y, out_pitch, pts_ns,
on_bitstream, &wctx) != 0) {
fprintf(stderr, "[grid_record] encode failed\n");
break;
static int warned = 0;
if (!warned) { fprintf(stderr, "[grid_record] encode failed (продолжаю)\n"); warned = 1; }
}
/* Drain audio packets — пишем сразу после video frame. */
if (audio) cfc_audio_drain(audio, wctx.writer, 8);
if (wctx.frames_encoded > 0 && wctx.frames_encoded % 50 == 0) {
double elapsed = (now_us - start_us) / 1e6;
cfc_composer_health_t h;
@@ -497,8 +556,8 @@ int main(int argc, char **argv)
(unsigned long long)wctx.idr_count,
wctx.bytes_written / 1048576.0);
fflush(wctx.fp);
if (!is_stdout) fclose(wctx.fp);
cfc_writer_close(wctx.writer);
if (audio) cfc_audio_destroy(audio);
if (ctl) cfc_control_destroy(ctl);
if (hpub) cfc_health_destroy(hpub);
cfc_encoder_destroy(enc);