4862247fe2
Заменяет cudaMalloc + cudaIpcGetMemHandle на cuMemCreate (VMM) +
cuMemExportToShareableHandle(POSIX_FILE_DESCRIPTOR). FDs передаются consumer'у
через sendmsg(SCM_RIGHTS) в handshake. Frigate (s6-overlay не даёт share PID)
и любой другой consumer работают БЕЗ pid namespace share — только volume mount
unix socket'a /run/cuframes и IPC share для /dev/shm header.
Sync: cudaEventRecord+IPC events → cuStreamSynchronize в do_publish.
Producer ждёт ~1 ms что stream flush'нулся, потом atomic_store(seq).
Consumer читает seq через memory_order_acquire и копирует DtoD без
event wait — HW coherence гарантирована на одном GPU.
ABI break (согласован с user'ом):
- magic 0xCC7C1DCC → 0xCC7C1DCE (старые consumers fail cleanly)
- protocol V3 → V4
- libcuframes.so.0 SOVERSION остаётся, но .so.0.3.0 → .so.0.4.0
- EXTERNAL ownership убран (VMM требует cuMemCreate-allocated memory,
нельзя export'нуть произвольный cudaMalloc-pointer как POSIX FD)
- cuframes-rtsp-source переведён на LIBRARY mode + один D2D memcpy
в acquire'нутый slot (overhead малый — публишер всё равно делал такой
D2D из FFmpeg hwframe pool в EXTERNAL pool раньше)
Размер: granularity 2 MB на 5090 → NV12 1920×1080 (~3.1 MB) округляется до
4 MB, +1 MB на slot × 16 × 4 камеры = +64 MB VRAM. Терпимо.
Packet ring (cuframes_packets://) НЕ затронут — отдельный SHM с своим
magic, работает как раньше.
PoC + smoke в spike/:
- vmm_fd_pingpong/ — minimal cuMemCreate+FD round-trip
- smoke_v04/ — full publisher+subscriber, 100/100 frames без pid share
Base image: Dockerfile.runtime → CUDA 12.4 (был 13.0). Matching prod
pipeline + Frigate base, иначе libcudart conflict при load.
Compose stack (localhost-infra repo) — параллельный commit:
- убран pid: container:cuframes-pub-parking из subscribers
- image теги: gx/cuframes:0.4, gx/cuda-grid-pipeline:phase8,
gx/frigate:cuframes-v0.4
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
104 lines
3.5 KiB
C
104 lines
3.5 KiB
C
#include "common.h"
|
||
#include <errno.h>
|
||
#include <string.h>
|
||
#include <sys/socket.h>
|
||
#include <sys/un.h>
|
||
#include <unistd.h>
|
||
|
||
/* Send fd через SCM_RIGHTS вместе с (uint64_t size, uint8_t magic) payload. */
|
||
static int send_fd(int sock, int fd, uint64_t size, uint8_t magic) {
|
||
struct msghdr msg = {0};
|
||
char ctrl[CMSG_SPACE(sizeof(int))];
|
||
struct iovec iov[2];
|
||
iov[0].iov_base = &size; iov[0].iov_len = sizeof(size);
|
||
iov[1].iov_base = &magic; iov[1].iov_len = sizeof(magic);
|
||
msg.msg_iov = iov; msg.msg_iovlen = 2;
|
||
msg.msg_control = ctrl; msg.msg_controllen = sizeof(ctrl);
|
||
struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
|
||
cmsg->cmsg_level = SOL_SOCKET;
|
||
cmsg->cmsg_type = SCM_RIGHTS;
|
||
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
|
||
memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
|
||
ssize_t n = sendmsg(sock, &msg, 0);
|
||
if (n < 0) { perror("sendmsg"); return -1; }
|
||
return 0;
|
||
}
|
||
|
||
int main(void) {
|
||
CHECK(cuInit(0));
|
||
CUdevice dev;
|
||
CHECK(cuDeviceGet(&dev, 0));
|
||
CUcontext ctx;
|
||
CHECK(cuCtxCreate(&ctx, 0, dev));
|
||
|
||
CUmemAllocationProp prop = {0};
|
||
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
|
||
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
|
||
prop.location.id = dev;
|
||
prop.requestedHandleTypes = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
|
||
|
||
size_t granularity = 0;
|
||
CHECK(cuMemGetAllocationGranularity(&granularity, &prop,
|
||
CU_MEM_ALLOC_GRANULARITY_MINIMUM));
|
||
fprintf(stderr, "producer: granularity=%zu\n", granularity);
|
||
|
||
size_t size = ((POOL_SIZE + granularity - 1) / granularity) * granularity;
|
||
fprintf(stderr, "producer: alloc size=%zu\n", size);
|
||
|
||
CUmemGenericAllocationHandle mem;
|
||
CHECK(cuMemCreate(&mem, size, &prop, 0));
|
||
|
||
CUdeviceptr ptr;
|
||
CHECK(cuMemAddressReserve(&ptr, size, 0, 0, 0));
|
||
CHECK(cuMemMap(ptr, size, 0, mem, 0));
|
||
|
||
CUmemAccessDesc access = {0};
|
||
access.location = prop.location;
|
||
access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
|
||
CHECK(cuMemSetAccess(ptr, size, &access, 1));
|
||
|
||
/* Fill with MAGIC pattern */
|
||
CHECK(cuMemsetD8(ptr, MAGIC_BYTE, size));
|
||
CHECK(cuCtxSynchronize());
|
||
|
||
int fd;
|
||
CHECK(cuMemExportToShareableHandle(&fd, mem,
|
||
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, 0));
|
||
fprintf(stderr, "producer: exported fd=%d for handle\n", fd);
|
||
|
||
/* Unix socket server */
|
||
unlink(SOCK_PATH);
|
||
int srv = socket(AF_UNIX, SOCK_STREAM, 0);
|
||
if (srv < 0) { perror("socket"); return 1; }
|
||
struct sockaddr_un sa = {.sun_family = AF_UNIX};
|
||
strncpy(sa.sun_path, SOCK_PATH, sizeof(sa.sun_path) - 1);
|
||
if (bind(srv, (struct sockaddr *)&sa, sizeof(sa)) < 0) { perror("bind"); return 1; }
|
||
if (listen(srv, 1) < 0) { perror("listen"); return 1; }
|
||
|
||
fprintf(stderr, "producer: listening on %s, awaiting consumer...\n", SOCK_PATH);
|
||
int cli = accept(srv, NULL, NULL);
|
||
if (cli < 0) { perror("accept"); return 1; }
|
||
|
||
if (send_fd(cli, fd, (uint64_t)size, MAGIC_BYTE) < 0) return 1;
|
||
fprintf(stderr, "producer: sent fd + size=%zu + magic=0x%02x\n",
|
||
size, MAGIC_BYTE);
|
||
|
||
/* Wait for consumer ACK */
|
||
char ack;
|
||
if (read(cli, &ack, 1) != 1) { perror("read ack"); return 1; }
|
||
fprintf(stderr, "producer: got ACK=0x%02x\n", (unsigned char)ack);
|
||
|
||
close(cli);
|
||
close(srv);
|
||
unlink(SOCK_PATH);
|
||
close(fd);
|
||
|
||
CHECK(cuMemUnmap(ptr, size));
|
||
CHECK(cuMemAddressFree(ptr, size));
|
||
CHECK(cuMemRelease(mem));
|
||
CHECK(cuCtxDestroy(ctx));
|
||
|
||
fprintf(stderr, "producer: done\n");
|
||
return ack == 'O' ? 0 : 1;
|
||
}
|