Phase 7 audio mixing — attempt + rollback + lessons
Несколько сессий попыток реализовать audio mixing в композитор'е.
Не достигнуто sub-секундной latency со стабильным video+audio.
Откатано на parallel mode (cfc-grid video-only, live от pipeline с audio).
Полный набор выводов и pitfall'ов — docs/LESSONS-audio-mixing-attempts.md.
Главные lesson'ы для будущей попытки:
- mpegts mux libavformat авто-инсёртит h264_mp4toannexb BSF которому
не нравится Annex-B + inline SPS/PPS — NVENC OUTPUT_SPSPPS per-frame ломает
- SPSC ring drop newest при full, не oldest (consumer's domain)
- av_new_packet (не av_malloc) для av_interleaved_write_frame ownership
- Monotonic PTS на counter (frame_idx, total_samples) — не wallclock
- mediamtx env-var path names не должны иметь '-' (parser limitation)
- Default mediamtx ReadTimeout=10s короткий для burst write'ов
Изменения в repo сохранены для будущей доработки:
- src/writer.c — mpegts backend с audio stream support
- src/audio.c — RTSP AAC consumer + lock-free SPSC ring
- include/cuframes_composer/{writer,audio}.h — public API
- examples/grid_record.c — --format=mpegts + --audio-source flags
- include/cuframes_composer/composer.h — consumer_prefix field
- docker/Dockerfile — libavformat-dev добавлен в builder/runtime
cfc-grid composer стабильно работает на видео (substantially лучше
монолитного pipeline'а с audio bag'ом). TV рекомендуется использовать
rtsp://...:554/cfc-grid + опционально rtsp://...:554/live-audio
parallel.
This commit is contained in:
+87
-28
@@ -23,6 +23,8 @@
|
||||
#include "../include/cuframes_composer/overlay.h"
|
||||
#include "../include/cuframes_composer/control.h"
|
||||
#include "../include/cuframes_composer/health.h"
|
||||
#include "../include/cuframes_composer/writer.h"
|
||||
#include "../include/cuframes_composer/audio.h"
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
@@ -43,7 +45,7 @@ static volatile sig_atomic_t g_stop = 0;
|
||||
static void on_sig(int s) { (void)s; g_stop = 1; }
|
||||
|
||||
typedef struct write_ctx {
|
||||
FILE *fp;
|
||||
cfc_writer_t *writer;
|
||||
uint64_t bytes_written;
|
||||
uint64_t frames_encoded;
|
||||
uint64_t idr_count;
|
||||
@@ -52,9 +54,8 @@ typedef struct write_ctx {
|
||||
static void on_bitstream(const uint8_t *bs, size_t size, int64_t pts_ns,
|
||||
int is_idr, void *user)
|
||||
{
|
||||
(void)pts_ns;
|
||||
write_ctx_t *ctx = (write_ctx_t *)user;
|
||||
if (fwrite(bs, 1, size, ctx->fp) == size) {
|
||||
if (cfc_writer_write(ctx->writer, bs, size, pts_ns, is_idr) == 0) {
|
||||
ctx->bytes_written += size;
|
||||
ctx->frames_encoded++;
|
||||
if (is_idr) ctx->idr_count++;
|
||||
@@ -119,6 +120,8 @@ int main(int argc, char **argv)
|
||||
const char *mqtt_instance = "cfc-grid"; /* --mqtt-instance NAME */
|
||||
const char *mqtt_user = NULL;
|
||||
const char *mqtt_pass = NULL;
|
||||
const char *out_format = "h264"; /* --format h264|mpegts */
|
||||
const char *audio_source = NULL; /* --audio-source rtsp://.../live-audio */
|
||||
|
||||
static struct option opts[] = {
|
||||
{"out", required_argument, 0, 'o'},
|
||||
@@ -137,10 +140,12 @@ int main(int argc, char **argv)
|
||||
{"mqtt-user", required_argument, 0, 'U'},
|
||||
{"mqtt-pass", required_argument, 0, 'P'},
|
||||
{"intra-refresh", no_argument, 0, 'R'},
|
||||
{"format", required_argument, 0, 'F'}, /* h264|mpegts */
|
||||
{"audio-source", required_argument, 0, 'A'}, /* RTSP audio URL */
|
||||
{0, 0, 0, 0},
|
||||
};
|
||||
int c;
|
||||
while ((c = getopt_long(argc, argv, "o:c:f:b:W:H:s:r:i:t:C:M:I:U:P:R", opts, NULL)) != -1) {
|
||||
while ((c = getopt_long(argc, argv, "o:c:f:b:W:H:s:r:i:t:C:M:I:U:P:RF:A:", opts, NULL)) != -1) {
|
||||
switch (c) {
|
||||
case 'o': out_path = optarg; break;
|
||||
case 'c':
|
||||
@@ -178,6 +183,8 @@ int main(int argc, char **argv)
|
||||
case 'U': mqtt_user = optarg; break;
|
||||
case 'P': mqtt_pass = optarg; break;
|
||||
case 'R': intra_refresh = 1; break;
|
||||
case 'F': out_format = optarg; break;
|
||||
case 'A': audio_source = optarg; break;
|
||||
case 't': {
|
||||
if (num_texts >= MAX_CELLS) { fprintf(stderr, "max %d texts\n", MAX_CELLS); return 1; }
|
||||
/* Опциональный prefix "id=NAME:" — задаёт control-plane ID. */
|
||||
@@ -276,6 +283,7 @@ int main(int argc, char **argv)
|
||||
.cells = cells,
|
||||
.num_cells = num_cells,
|
||||
.cuda_device = 0,
|
||||
.consumer_prefix = mqtt_instance, /* уникальный namespace на каждый composer */
|
||||
};
|
||||
cfc_composer_t *comp = NULL;
|
||||
if (cfc_composer_create(&ccfg, &comp) != 0) {
|
||||
@@ -407,28 +415,73 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Output: "-" / "/dev/stdout" / "pipe:1" = stdout (для pipe в ffmpeg).
|
||||
* stdout не закрывается через fclose чтобы не убивать дочерний процесс
|
||||
* raньше времени. */
|
||||
write_ctx_t wctx = { 0 };
|
||||
int is_stdout = (!strcmp(out_path, "-") || !strcmp(out_path, "pipe:1") ||
|
||||
!strcmp(out_path, "/dev/stdout"));
|
||||
if (is_stdout) {
|
||||
wctx.fp = stdout;
|
||||
/* line-buffer'инг disabled — пишем full-buffered для производительности.
|
||||
* Caller'у нужно flush при exit. */
|
||||
setvbuf(stdout, NULL, _IOFBF, 1024 * 1024);
|
||||
} else {
|
||||
wctx.fp = fopen(out_path, "wb");
|
||||
if (!wctx.fp) {
|
||||
fprintf(stderr, "fopen(%s): %s\n", out_path, strerror(errno));
|
||||
cfc_encoder_destroy(enc);
|
||||
cfc_composer_destroy(comp);
|
||||
return 1;
|
||||
/* Audio consumer (опциональный, Phase 7). Запускаем РАНЬШЕ writer'а
|
||||
* чтобы успеть получить codec params (sample_rate, channels, extradata)
|
||||
* до avformat_write_header — иначе audio stream'у не будет правильного
|
||||
* setup'а. Polling до 5 секунд. */
|
||||
cfc_audio_t *audio = NULL;
|
||||
int audio_sample_rate = 0, audio_channels = 0;
|
||||
const uint8_t *audio_extradata = NULL;
|
||||
size_t audio_extradata_size = 0;
|
||||
|
||||
if (audio_source) {
|
||||
cfc_audio_config_t acfg = { .rtsp_url = audio_source };
|
||||
if (cfc_audio_create(&acfg, &audio) != 0) {
|
||||
fprintf(stderr, "[grid_record] audio create failed, продолжаю без audio\n");
|
||||
} else {
|
||||
fprintf(stderr, "[grid_record] жду audio codec params от %s ...\n", audio_source);
|
||||
/* 30 секунд polling — audio source (cuda-grid-audio) может ещё
|
||||
* подниматься после recreate стeка. Audio thread сам retry'ится
|
||||
* с exp backoff. */
|
||||
for (int i = 0; i < 300; i++) { /* 300 × 100ms = 30s */
|
||||
if (cfc_audio_get_codec_params(audio, &audio_sample_rate,
|
||||
&audio_channels, &audio_extradata,
|
||||
&audio_extradata_size) == 0) {
|
||||
fprintf(stderr,
|
||||
"[grid_record] audio готов: AAC %dHz %dch extradata=%zub\n",
|
||||
audio_sample_rate, audio_channels, audio_extradata_size);
|
||||
break;
|
||||
}
|
||||
struct timespec ts = { .tv_sec = 0, .tv_nsec = 100 * 1000 * 1000 };
|
||||
nanosleep(&ts, NULL);
|
||||
}
|
||||
if (audio_sample_rate == 0) {
|
||||
fprintf(stderr, "[grid_record] audio params не получены за 30с, без audio\n");
|
||||
cfc_audio_destroy(audio); audio = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "[grid_record] начало записи в %s (Ctrl+C для остановки)\n",
|
||||
out_path);
|
||||
|
||||
/* Writer: mpegts с video + опциональным audio. */
|
||||
uint8_t spspps[256]; size_t spspps_len = sizeof(spspps);
|
||||
cfc_encoder_get_sequence_params(enc, spspps, &spspps_len);
|
||||
|
||||
cfc_writer_config_t wcfg = {
|
||||
.path = out_path,
|
||||
.format = out_format,
|
||||
.width = out_w,
|
||||
.height = out_h,
|
||||
.fps_num = fps,
|
||||
.fps_den = 1,
|
||||
.bitrate_kbps = bitrate,
|
||||
.extradata = spspps,
|
||||
.extradata_size = spspps_len,
|
||||
.has_audio = audio ? 1 : 0,
|
||||
.audio_sample_rate = audio_sample_rate,
|
||||
.audio_channels = audio_channels,
|
||||
.audio_extradata = audio_extradata,
|
||||
.audio_extradata_size = audio_extradata_size,
|
||||
};
|
||||
write_ctx_t wctx = { 0 };
|
||||
if (cfc_writer_create(&wcfg, &wctx.writer) != 0) {
|
||||
fprintf(stderr, "cfc_writer_create(%s, %s) failed\n", out_path, out_format);
|
||||
if (audio) cfc_audio_destroy(audio);
|
||||
cfc_encoder_destroy(enc);
|
||||
cfc_composer_destroy(comp);
|
||||
return 1;
|
||||
}
|
||||
fprintf(stderr, "[grid_record] начало записи в %s [format=%s%s] (Ctrl+C для остановки)\n",
|
||||
out_path, out_format, audio ? "+audio" : "");
|
||||
|
||||
/* Main loop — frame cadence по wall clock'у. */
|
||||
struct timespec ts_start;
|
||||
@@ -461,12 +514,18 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
int64_t pts_ns = (now_us - start_us) * 1000;
|
||||
/* Не break'аем при encode/write failure — это обычно временно
|
||||
* (mediamtx reconnect, socket broken). Просто логируем и продолжаем,
|
||||
* следующая encode/write попытается заново. */
|
||||
if (cfc_encoder_encode_frame(enc, out_y, out_pitch, pts_ns,
|
||||
on_bitstream, &wctx) != 0) {
|
||||
fprintf(stderr, "[grid_record] encode failed\n");
|
||||
break;
|
||||
static int warned = 0;
|
||||
if (!warned) { fprintf(stderr, "[grid_record] encode failed (продолжаю)\n"); warned = 1; }
|
||||
}
|
||||
|
||||
/* Drain audio packets — пишем сразу после video frame. */
|
||||
if (audio) cfc_audio_drain(audio, wctx.writer, 8);
|
||||
|
||||
if (wctx.frames_encoded > 0 && wctx.frames_encoded % 50 == 0) {
|
||||
double elapsed = (now_us - start_us) / 1e6;
|
||||
cfc_composer_health_t h;
|
||||
@@ -497,8 +556,8 @@ int main(int argc, char **argv)
|
||||
(unsigned long long)wctx.idr_count,
|
||||
wctx.bytes_written / 1048576.0);
|
||||
|
||||
fflush(wctx.fp);
|
||||
if (!is_stdout) fclose(wctx.fp);
|
||||
cfc_writer_close(wctx.writer);
|
||||
if (audio) cfc_audio_destroy(audio);
|
||||
if (ctl) cfc_control_destroy(ctl);
|
||||
if (hpub) cfc_health_destroy(hpub);
|
||||
cfc_encoder_destroy(enc);
|
||||
|
||||
Reference in New Issue
Block a user