Files
cuframes/spike/vmm_fd_pingpong/producer.c
T
gx 4862247fe2
build / cmake build (CUDA 12.4, Ubuntu 22.04) (push) Successful in 1m46s
build / ffmpeg filter patch (out-of-tree) (push) Failing after 1m30s
v0.4: VMM + POSIX FD — namespace decoupling (no pid share required)
Заменяет cudaMalloc + cudaIpcGetMemHandle на cuMemCreate (VMM) +
cuMemExportToShareableHandle(POSIX_FILE_DESCRIPTOR). FDs передаются consumer'у
через sendmsg(SCM_RIGHTS) в handshake. Frigate (s6-overlay не даёт share PID)
и любой другой consumer работают БЕЗ pid namespace share — только volume mount
unix socket'a /run/cuframes и IPC share для /dev/shm header.

Sync: cudaEventRecord+IPC events → cuStreamSynchronize в do_publish.
Producer ждёт ~1 ms что stream flush'нулся, потом atomic_store(seq).
Consumer читает seq через memory_order_acquire и копирует DtoD без
event wait — HW coherence гарантирована на одном GPU.

ABI break (согласован с user'ом):
  - magic 0xCC7C1DCC → 0xCC7C1DCE (старые consumers fail cleanly)
  - protocol V3 → V4
  - libcuframes.so.0 SOVERSION остаётся, но .so.0.3.0 → .so.0.4.0
  - EXTERNAL ownership убран (VMM требует cuMemCreate-allocated memory,
    нельзя export'нуть произвольный cudaMalloc-pointer как POSIX FD)
  - cuframes-rtsp-source переведён на LIBRARY mode + один D2D memcpy
    в acquire'нутый slot (overhead малый — публишер всё равно делал такой
    D2D из FFmpeg hwframe pool в EXTERNAL pool раньше)

Размер: granularity 2 MB на 5090 → NV12 1920×1080 (~3.1 MB) округляется до
4 MB, +1 MB на slot × 16 × 4 камеры = +64 MB VRAM. Терпимо.

Packet ring (cuframes_packets://) НЕ затронут — отдельный SHM с своим
magic, работает как раньше.

PoC + smoke в spike/:
  - vmm_fd_pingpong/ — minimal cuMemCreate+FD round-trip
  - smoke_v04/ — full publisher+subscriber, 100/100 frames без pid share

Base image: Dockerfile.runtime → CUDA 12.4 (был 13.0). Matching prod
pipeline + Frigate base, иначе libcudart conflict при load.

Compose stack (localhost-infra repo) — параллельный commit:
  - убран pid: container:cuframes-pub-parking из subscribers
  - image теги: gx/cuframes:0.4, gx/cuda-grid-pipeline:phase8,
    gx/frigate:cuframes-v0.4

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-25 20:13:31 +01:00

104 lines
3.5 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "common.h"
#include <errno.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
/* Send fd через SCM_RIGHTS вместе с (uint64_t size, uint8_t magic) payload. */
static int send_fd(int sock, int fd, uint64_t size, uint8_t magic) {
struct msghdr msg = {0};
char ctrl[CMSG_SPACE(sizeof(int))];
struct iovec iov[2];
iov[0].iov_base = &size; iov[0].iov_len = sizeof(size);
iov[1].iov_base = &magic; iov[1].iov_len = sizeof(magic);
msg.msg_iov = iov; msg.msg_iovlen = 2;
msg.msg_control = ctrl; msg.msg_controllen = sizeof(ctrl);
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
ssize_t n = sendmsg(sock, &msg, 0);
if (n < 0) { perror("sendmsg"); return -1; }
return 0;
}
int main(void) {
CHECK(cuInit(0));
CUdevice dev;
CHECK(cuDeviceGet(&dev, 0));
CUcontext ctx;
CHECK(cuCtxCreate(&ctx, 0, dev));
CUmemAllocationProp prop = {0};
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = dev;
prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
size_t granularity = 0;
CHECK(cuMemGetAllocationGranularity(&granularity, &prop,
CU_MEM_ALLOC_GRANULARITY_MINIMUM));
fprintf(stderr, "producer: granularity=%zu\n", granularity);
size_t size = ((POOL_SIZE + granularity - 1) / granularity) * granularity;
fprintf(stderr, "producer: alloc size=%zu\n", size);
CUmemGenericAllocationHandle mem;
CHECK(cuMemCreate(&mem, size, &prop, 0));
CUdeviceptr ptr;
CHECK(cuMemAddressReserve(&ptr, size, 0, 0, 0));
CHECK(cuMemMap(ptr, size, 0, mem, 0));
CUmemAccessDesc access = {0};
access.location = prop.location;
access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
CHECK(cuMemSetAccess(ptr, size, &access, 1));
/* Fill with MAGIC pattern */
CHECK(cuMemsetD8(ptr, MAGIC_BYTE, size));
CHECK(cuCtxSynchronize());
int fd;
CHECK(cuMemExportToShareableHandle(&fd, mem,
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, 0));
fprintf(stderr, "producer: exported fd=%d for handle\n", fd);
/* Unix socket server */
unlink(SOCK_PATH);
int srv = socket(AF_UNIX, SOCK_STREAM, 0);
if (srv < 0) { perror("socket"); return 1; }
struct sockaddr_un sa = {.sun_family = AF_UNIX};
strncpy(sa.sun_path, SOCK_PATH, sizeof(sa.sun_path) - 1);
if (bind(srv, (struct sockaddr *)&sa, sizeof(sa)) < 0) { perror("bind"); return 1; }
if (listen(srv, 1) < 0) { perror("listen"); return 1; }
fprintf(stderr, "producer: listening on %s, awaiting consumer...\n", SOCK_PATH);
int cli = accept(srv, NULL, NULL);
if (cli < 0) { perror("accept"); return 1; }
if (send_fd(cli, fd, (uint64_t)size, MAGIC_BYTE) < 0) return 1;
fprintf(stderr, "producer: sent fd + size=%zu + magic=0x%02x\n",
size, MAGIC_BYTE);
/* Wait for consumer ACK */
char ack;
if (read(cli, &ack, 1) != 1) { perror("read ack"); return 1; }
fprintf(stderr, "producer: got ACK=0x%02x\n", (unsigned char)ack);
close(cli);
close(srv);
unlink(SOCK_PATH);
close(fd);
CHECK(cuMemUnmap(ptr, size));
CHECK(cuMemAddressFree(ptr, size));
CHECK(cuMemRelease(mem));
CHECK(cuCtxDestroy(ctx));
fprintf(stderr, "producer: done\n");
return ack == 'O' ? 0 : 1;
}