8c7abbc4e8
release / build runtime Docker image (push) Failing after 1s
release / build source tarball (push) Successful in 5s
build / cmake build (CUDA 12.4, Ubuntu 22.04) (push) Successful in 1m40s
build / ffmpeg filter patch (out-of-tree) (push) Successful in 1m22s
test-u4-runner / u4 runner smoke test (push) Has been cancelled
Protocol bump V2→V3:
+ shm header: cudaIpcEventHandle_t slot_event_handles[CUFRAMES_MAX_RING]
+ producer creates ring_size events (вместо одного global)
+ producer.do_publish records event[slot] (вместо pub->event)
+ consumer opens all slot events при subscribe
+ consumer waits event[slot_idx] specifically (вместо global producer_event)
Backward compat:
- Legacy pub->event сохранён + ipc_event_handle export'ится — v0.2 consumers
видят его и работают по-старому (с post-sync verify hack из 517107d).
- v0.3 consumer auto-detects proto_version >= 3, fallback к legacy если
cudaIpcOpenEventHandle на slot fail (graceful degradation).
Effect (15-sec sample на Phase 7 single-cam, motion):
v0.1 production: dup runs 34.7%, max 14 frames (560ms freeze)
v0.2.1 fix: dup runs 10%, max 6, 0 back-jumps detected
v0.3 per-slot: dup runs 1.9%, max 5, 3 back-jumps (likely encoder
static-content artifacts, not real race)
Размер shm header: 7424 → 8448 bytes (+1024 для slot_event_handles).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
320 lines
15 KiB
C
320 lines
15 KiB
C
/* Internal shared types для libcuframes implementation.
|
||
* Не публикуется в include/.
|
||
*
|
||
* Layout соответствует docs/protocol.md (byte-exact).
|
||
*/
|
||
|
||
#ifndef CUFRAMES_INTERNAL_H
|
||
#define CUFRAMES_INTERNAL_H
|
||
|
||
#define _GNU_SOURCE
|
||
#include <cuda_runtime.h>
|
||
#include <pthread.h>
|
||
#include <stdatomic.h>
|
||
#include <stdint.h>
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <sys/types.h>
|
||
|
||
#include "cuframes/cuframes.h"
|
||
|
||
/* ─── Protocol constants ──────────────────────────────────────────────── */
|
||
|
||
#define CUFRAMES_MAGIC 0xCC7C1DCCu
|
||
#define CUFRAMES_PROTOCOL_V1 1u
|
||
#define CUFRAMES_PROTOCOL_V2 2u /* v0.2 — packet ring support */
|
||
#define CUFRAMES_PROTOCOL_V3 3u /* v0.3 — per-slot CUDA events (no TOCTOU race) */
|
||
#define CUFRAMES_MAX_SUBSCRIBERS 32
|
||
#define CUFRAMES_MAX_RING 16
|
||
#define CUFRAMES_MAX_KEY_LEN 63
|
||
#define CUFRAMES_MAX_NAME_LEN 31
|
||
#define CUFRAMES_RUNTIME_DIR "/run/cuframes"
|
||
#define CUFRAMES_SHM_PREFIX "/cuframes-"
|
||
#define CUFRAMES_PKT_SHM_SUFFIX "-packets" /* /cuframes-<key>-packets */
|
||
|
||
/* Packet ring constants (см. docs/protocol.md §10) */
|
||
#define CUFRAMES_PKT_MAGIC 0xCC7C1DCDu /* frames magic + 1 */
|
||
#define CUFRAMES_PKT_EXTRADATA_MAX 4096u
|
||
#define CUFRAMES_PKT_DEFAULT_SLOTS 64u
|
||
#define CUFRAMES_PKT_DEFAULT_DATA_SIZE (8u * 1024u * 1024u) /* 8 MB */
|
||
#define CUFRAMES_PKT_DEFAULT_MAX_SIZE (2u * 1024u * 1024u) /* 2 MB */
|
||
#define CUFRAMES_PKT_MAX_SLOTS 1024u
|
||
|
||
/* Packet flags (см. docs/protocol.md §10.6) */
|
||
#define CUFRAMES_PKT_FLAG_KEY 0x01u
|
||
#define CUFRAMES_PKT_FLAG_CORRUPT 0x02u
|
||
#define CUFRAMES_PKT_FLAG_DISCONTINUITY 0x04u
|
||
#define CUFRAMES_PKT_FLAG_LAST_IN_AU 0x08u
|
||
|
||
/* ─── Shared memory layout (см. docs/protocol.md §2) ──────────────────── */
|
||
|
||
/* Frame meta — packed 64 байт */
|
||
typedef struct __attribute__((packed)) cuframes_shm_meta {
|
||
uint32_t format;
|
||
int32_t width;
|
||
int32_t height;
|
||
int32_t pitch_y;
|
||
int32_t pitch_uv;
|
||
uint32_t bits_per_pixel;
|
||
uint64_t frame_size_bytes;
|
||
uint8_t reserved[32];
|
||
} cuframes_shm_meta_t;
|
||
_Static_assert(sizeof(cuframes_shm_meta_t) == 64, "shm meta must be 64 bytes");
|
||
|
||
/* Slot descriptor — packed 192 байт */
|
||
typedef struct __attribute__((packed)) cuframes_shm_slot {
|
||
_Atomic uint64_t seq; /* UINT64_MAX = invalid */
|
||
_Atomic int64_t pts_ns;
|
||
_Atomic uint64_t ack_bitmap;
|
||
uint64_t written_bytes;
|
||
cudaIpcMemHandle_t mem_handle; /* 64 байта */
|
||
uint8_t cuda_ptr_external[32]; /* informative pointer бite-string */
|
||
uint8_t reserved_a[16];
|
||
uint8_t reserved_b[48];
|
||
} cuframes_shm_slot_t;
|
||
_Static_assert(sizeof(cuframes_shm_slot_t) == 192, "slot descriptor must be 192 bytes");
|
||
|
||
/* Subscriber slot — packed 128 байт */
|
||
typedef struct __attribute__((packed)) cuframes_shm_subscriber {
|
||
_Atomic uint64_t state; /* 0=free, 1=connecting, 2=active, 3=draining */
|
||
uint64_t consumer_pid;
|
||
_Atomic uint64_t last_seen_seq;
|
||
_Atomic int64_t last_ack_ns;
|
||
char consumer_name[32]; /* null-terminated */
|
||
uint8_t reserved[64];
|
||
} cuframes_shm_subscriber_t;
|
||
_Static_assert(sizeof(cuframes_shm_subscriber_t) == 128, "subscriber slot must be 128 bytes");
|
||
|
||
/* Shared header (header + slots[N] + subscribers[M]). Total ≤ 8KB. */
|
||
typedef struct __attribute__((packed)) cuframes_shm_header {
|
||
uint32_t magic;
|
||
uint32_t proto_version;
|
||
uint32_t lib_version_major;
|
||
uint32_t lib_version_minor;
|
||
uint32_t lib_version_patch;
|
||
uint32_t reserved_a;
|
||
uint64_t producer_pid;
|
||
uint64_t ring_size;
|
||
uint64_t ownership_mode;
|
||
uint64_t policy;
|
||
uint64_t max_subscribers;
|
||
cuframes_shm_meta_t meta; /* offset 0x40, 64 bytes */
|
||
cudaIpcEventHandle_t ipc_event_handle; /* offset 0x80, 64 bytes */
|
||
_Atomic uint64_t global_seq; /* offset 0xC0 */
|
||
_Atomic uint64_t subscriber_bitmap;
|
||
_Atomic uint64_t shutdown_flag;
|
||
uint8_t reserved_b[40];
|
||
/* offset 0x100 — variable-length tail */
|
||
cuframes_shm_slot_t slots[CUFRAMES_MAX_RING]; /* 192 × 16 = 3072 */
|
||
cuframes_shm_subscriber_t subscribers[CUFRAMES_MAX_SUBSCRIBERS]; /* 128 × 32 = 4096 */
|
||
/* v0.3 — per-slot CUDA event handles. Producer records event per publish;
|
||
* consumer waits event[slot_idx] specifically (не global ipc_event_handle
|
||
* который signals только для последнего published frame). Закрывает TOCTOU
|
||
* race в slot read. 64 × 16 = 1024 bytes. */
|
||
cudaIpcEventHandle_t slot_event_handles[CUFRAMES_MAX_RING];
|
||
} cuframes_shm_header_t;
|
||
|
||
/* Layout sanity checks (docs/protocol.md §2 table) */
|
||
_Static_assert(offsetof(cuframes_shm_header_t, magic) == 0x0000, "magic offset");
|
||
_Static_assert(offsetof(cuframes_shm_header_t, proto_version) == 0x0004, "proto_version offset");
|
||
_Static_assert(offsetof(cuframes_shm_header_t, producer_pid) == 0x0018, "producer_pid offset");
|
||
_Static_assert(offsetof(cuframes_shm_header_t, ring_size) == 0x0020, "ring_size offset");
|
||
_Static_assert(offsetof(cuframes_shm_header_t, meta) == 0x0040, "meta offset");
|
||
_Static_assert(offsetof(cuframes_shm_header_t, ipc_event_handle) == 0x0080, "event handle offset");
|
||
_Static_assert(offsetof(cuframes_shm_header_t, global_seq) == 0x00C0, "global_seq offset");
|
||
_Static_assert(offsetof(cuframes_shm_header_t, slots) == 0x0100, "slots offset");
|
||
|
||
/* ─── Packet ring shared memory layout (docs/protocol.md §10) ──────────── */
|
||
|
||
/* Packet slot entry — packed 64 байт */
|
||
typedef struct __attribute__((packed)) cuframes_pkt_slot {
|
||
_Atomic uint64_t seq; /* UINT64_MAX = invalid */
|
||
int64_t pts_ns;
|
||
int64_t dts_ns;
|
||
uint64_t data_offset; /* absolute byte cursor; % data_size = ring offset */
|
||
uint32_t data_size;
|
||
uint32_t flags;
|
||
uint8_t reserved[24];
|
||
} cuframes_pkt_slot_t;
|
||
_Static_assert(sizeof(cuframes_pkt_slot_t) == 64, "packet slot must be 64 bytes");
|
||
|
||
/* Packet ring header (fixed 0x1040 = 4160 bytes). Followed by slots[N] + data[]. */
|
||
typedef struct __attribute__((packed)) cuframes_pkt_header {
|
||
uint32_t magic; /* CUFRAMES_PKT_MAGIC */
|
||
uint32_t proto_version; /* 2 */
|
||
uint32_t ring_slots;
|
||
uint32_t data_size;
|
||
uint32_t codec_id; /* AV_CODEC_ID_H264 / HEVC / ... */
|
||
uint32_t codec_extradata_size; /* ≤ CUFRAMES_PKT_EXTRADATA_MAX */
|
||
uint64_t producer_pid;
|
||
_Atomic uint64_t global_seq;
|
||
_Atomic uint64_t last_keyframe_seq;
|
||
_Atomic uint64_t write_offset;
|
||
_Atomic uint64_t shutdown_flag;
|
||
uint8_t codec_extradata[CUFRAMES_PKT_EXTRADATA_MAX];
|
||
/* offset 0x1040 — slots[ring_slots], then data[data_size] */
|
||
} cuframes_pkt_header_t;
|
||
|
||
_Static_assert(offsetof(cuframes_pkt_header_t, magic) == 0x0000, "pkt magic offset");
|
||
_Static_assert(offsetof(cuframes_pkt_header_t, proto_version) == 0x0004, "pkt proto offset");
|
||
_Static_assert(offsetof(cuframes_pkt_header_t, producer_pid) == 0x0018, "pkt pid offset");
|
||
_Static_assert(offsetof(cuframes_pkt_header_t, global_seq) == 0x0020, "pkt global_seq offset");
|
||
_Static_assert(offsetof(cuframes_pkt_header_t, write_offset) == 0x0030, "pkt write_offset offset");
|
||
_Static_assert(offsetof(cuframes_pkt_header_t, codec_extradata) == 0x0040, "pkt extradata offset");
|
||
_Static_assert(sizeof(cuframes_pkt_header_t) == 0x1040, "pkt header must be 0x1040 bytes");
|
||
|
||
/* Computed SHM layout helper:
|
||
* total = sizeof(cuframes_pkt_header_t) + slots*sizeof(slot) + data_size
|
||
*/
|
||
static inline size_t cuframes_pkt_shm_size(uint32_t slots, uint32_t data_size) {
|
||
return sizeof(cuframes_pkt_header_t)
|
||
+ (size_t)slots * sizeof(cuframes_pkt_slot_t)
|
||
+ (size_t)data_size;
|
||
}
|
||
|
||
/* Pointers into mmap'ed pkt SHM (computed from header base) */
|
||
static inline cuframes_pkt_slot_t * cuframes_pkt_slots(cuframes_pkt_header_t *hdr) {
|
||
return (cuframes_pkt_slot_t *)((uint8_t *)hdr + sizeof(cuframes_pkt_header_t));
|
||
}
|
||
static inline uint8_t * cuframes_pkt_data(cuframes_pkt_header_t *hdr) {
|
||
return (uint8_t *)hdr + sizeof(cuframes_pkt_header_t)
|
||
+ (size_t)hdr->ring_slots * sizeof(cuframes_pkt_slot_t);
|
||
}
|
||
|
||
/* Opaque ring handle — содержит state и mapping для publisher или subscriber. */
|
||
typedef struct cuframes_pkt_ring {
|
||
int shm_fd;
|
||
void *shm_base;
|
||
size_t shm_size;
|
||
cuframes_pkt_header_t *hdr;
|
||
char shm_name[128]; /* /cuframes-<key>-packets */
|
||
int is_publisher;
|
||
} cuframes_pkt_ring_t;
|
||
|
||
/* ─── Socket protocol messages (docs/protocol.md §3) ───────────────────── */
|
||
|
||
#define CUFRAMES_MSG_HELLO_REQ 0x01
|
||
#define CUFRAMES_MSG_HELLO_RESP 0x02
|
||
#define CUFRAMES_MSG_SUBSCRIBE_REQ 0x03
|
||
#define CUFRAMES_MSG_SUBSCRIBE_RESP 0x04
|
||
#define CUFRAMES_MSG_UNSUBSCRIBE 0x10
|
||
#define CUFRAMES_MSG_EVENT_FD 0x20
|
||
#define CUFRAMES_MSG_SHUTDOWN 0x30
|
||
#define CUFRAMES_MSG_PING 0xF0
|
||
#define CUFRAMES_MSG_PONG 0xF1
|
||
#define CUFRAMES_MSG_ERROR 0xFE
|
||
|
||
#define CUFRAMES_MAX_MSG_PAYLOAD 4096
|
||
|
||
typedef struct __attribute__((packed)) cuframes_msg_header {
|
||
uint32_t msg_type;
|
||
uint32_t payload_length;
|
||
} cuframes_msg_header_t;
|
||
|
||
typedef struct __attribute__((packed)) cuframes_msg_hello_req {
|
||
uint32_t proto_version;
|
||
uint32_t consumer_name_len;
|
||
/* followed by name bytes */
|
||
/* + int32_t cuda_device + uint32_t mode + uint8_t reserved[12] */
|
||
} cuframes_msg_hello_req_t;
|
||
|
||
typedef struct __attribute__((packed)) cuframes_msg_hello_resp {
|
||
int32_t result;
|
||
uint32_t proto_version_actual;
|
||
uint32_t ring_size;
|
||
uint32_t ownership_mode;
|
||
cuframes_shm_meta_t meta;
|
||
uint32_t shm_path_len;
|
||
/* followed by path bytes */
|
||
/* + uint8_t reserved[12] */
|
||
} cuframes_msg_hello_resp_t;
|
||
|
||
typedef struct __attribute__((packed)) cuframes_msg_subscribe_resp {
|
||
int32_t result;
|
||
uint32_t assigned_bit;
|
||
uint64_t initial_seq;
|
||
uint8_t reserved[12];
|
||
} cuframes_msg_subscribe_resp_t;
|
||
|
||
/* ─── Logging (minimal — to stderr) ────────────────────────────────────── */
|
||
|
||
#define CUFRAMES_LOG_ERROR(fmt, ...) \
|
||
fprintf(stderr, "[cuframes ERROR] " fmt "\n", ##__VA_ARGS__)
|
||
#define CUFRAMES_LOG_WARN(fmt, ...) \
|
||
fprintf(stderr, "[cuframes WARN] " fmt "\n", ##__VA_ARGS__)
|
||
#define CUFRAMES_LOG_INFO(fmt, ...) \
|
||
do { if (getenv("CUFRAMES_DEBUG")) \
|
||
fprintf(stderr, "[cuframes] " fmt "\n", ##__VA_ARGS__); } while (0)
|
||
|
||
/* ─── Internal helpers ────────────────────────────────────────────────── */
|
||
|
||
/* Build path /run/cuframes/<key>.sock — returns CUFRAMES_OK or INVALID_ARG */
|
||
int cuframes_internal_socket_path(const char *key, char *out, size_t out_size);
|
||
/* Build /cuframes-<key> (for shm_open) */
|
||
int cuframes_internal_shm_name(const char *key, char *out, size_t out_size);
|
||
/* Build /cuframes-<key>-packets (for shm_open) */
|
||
int cuframes_internal_pkt_shm_name(const char *key, char *out, size_t out_size);
|
||
/* Validate key per protocol.md (alphanum/_/-, 1..63 chars) */
|
||
int cuframes_internal_validate_key(const char *key);
|
||
/* Calculate frame size + pitch для format/W/H */
|
||
int cuframes_internal_calc_size(cuframes_format_t format, int32_t w, int32_t h,
|
||
size_t *size_out, int32_t *pitch_y_out, int32_t *pitch_uv_out);
|
||
/* Ensure /run/cuframes exists */
|
||
int cuframes_internal_ensure_runtime_dir(void);
|
||
/* Check if pid alive */
|
||
int cuframes_internal_pid_alive(pid_t pid);
|
||
|
||
/* TLV send/recv helpers — returns 0 on success, negative cuframes_error_t */
|
||
int cuframes_internal_send_msg(int sock_fd, uint32_t msg_type,
|
||
const void *payload, uint32_t payload_len);
|
||
int cuframes_internal_recv_msg(int sock_fd, uint32_t *msg_type_out,
|
||
void *payload, uint32_t *payload_len_inout,
|
||
int32_t timeout_ms);
|
||
|
||
/* ─── Packet ring helpers (libcuframes/src/packet_ring.c) ─────────────── */
|
||
|
||
/* Publisher: create SHM + initialize header + slots. Stale recovery как у frames. */
|
||
int cuframes_internal_pkt_ring_create(const char *key,
|
||
uint32_t slots,
|
||
uint32_t data_size,
|
||
uint32_t codec_id,
|
||
cuframes_pkt_ring_t *ring_out);
|
||
|
||
/* Publisher: set codec extradata (SPS/PPS). Must be called before first publish.
|
||
* Если size > CUFRAMES_PKT_EXTRADATA_MAX → ERR_INVALID_ARG. */
|
||
int cuframes_internal_pkt_ring_set_extradata(cuframes_pkt_ring_t *ring,
|
||
const void *extradata,
|
||
size_t size);
|
||
|
||
/* Publisher: publish single encoded packet. Slow consumer = overwrite oldest.
|
||
* Returns CUFRAMES_ERR_PACKET_OVERSIZED если size > data_size. */
|
||
int cuframes_internal_pkt_ring_publish(cuframes_pkt_ring_t *ring,
|
||
const void *data, size_t size,
|
||
int64_t pts_ns, int64_t dts_ns,
|
||
uint32_t flags);
|
||
|
||
/* Subscriber: open existing SHM by shm name (from HELLO_RESP packet_shm_path). */
|
||
int cuframes_internal_pkt_ring_open(const char *shm_name,
|
||
cuframes_pkt_ring_t *ring_out);
|
||
|
||
/* Subscriber: read next packet.
|
||
* *seq_inout — currently held seq (we read seq_inout+1); updated on success.
|
||
* out_buf must have ≥ max_packet_size bytes; out_size receives actual size.
|
||
* Returns:
|
||
* CUFRAMES_OK on success
|
||
* CUFRAMES_ERR_PACKET_OVERRUN если publisher уехал — caller resync on keyframe
|
||
* CUFRAMES_ERR_TIMEOUT если нет нового packet
|
||
* CUFRAMES_ERR_DISCONNECTED если publisher shutdown */
|
||
int cuframes_internal_pkt_ring_read(cuframes_pkt_ring_t *ring,
|
||
uint64_t *seq_inout,
|
||
void *out_buf, size_t out_buf_max,
|
||
size_t *out_size,
|
||
int64_t *out_pts, int64_t *out_dts,
|
||
uint32_t *out_flags);
|
||
|
||
/* Publisher OR Subscriber: cleanup mmap + close FD. Publisher additionally shm_unlink. */
|
||
void cuframes_internal_pkt_ring_destroy(cuframes_pkt_ring_t *ring);
|
||
|
||
#endif /* CUFRAMES_INTERNAL_H */
|