commit ba68550f4c55c48310f54e8a41acfa01b0391a5d Author: Evgeny Demchenko Date: Wed Jun 3 04:28:33 2026 +0100 Phase 1: NVENC через dlopen + источник через cuframes_subscriber Скелет проекта cuframes-composer (LGPL-2.1+) и MVP кодирования одного источника в файл H.264. Что включает Phase 1: - LICENSE (LGPL-2.1+), README с поэтапным планом, корневой CMake - Подмодули: cuframes v0.4 (pinned), nv-codec-headers (n12.2.72.0) - include/cuframes_composer/source.h — публичный API источника с явной машиной состояний (DISCONNECTED → CONNECTING → ACTIVE → STALE → DEAD) и snapshot-паттерном для чтения без блокировки - include/cuframes_composer/nvenc.h — публичный API кодировщика на CUdeviceptr-вход (zero-copy через VMM-mapped указатели) - src/nvenc_loader.{h,c} — dlopen libnvidia-encode.so.1 и инициализация таблицы функций NVENC через NvEncodeAPICreateInstance. Идёт через pthread_once. Сделано отдельно чтобы держать LGPL-совместимость: проприетарный SDK не статически линкуется - src/nvenc.c — обвязка над NVENC: open session, init encoder, кеш registered resources, encode/lock/unlock, flush с EOS, поддержка H.264 CBR low-latency, preset GUID p1/p4/p7 - src/source.c — обвязка над cuframes_subscriber c фоновым потоком, exponential backoff reconnect (1с → 30с), и переходами по таймаутам для stale/dead-детекта - examples/simple_record — smoke-test программа: подписка на cuframes, кодирование, запись в .h264 файл, корректное завершение по SIGINT diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9ac3c0d --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Build outputs +build/ +build-*/ +cmake-build-*/ +*.o +*.so +*.a + +# IDE +.vscode/ +.idea/ +.cache/ +compile_commands.json + +# Editor backups +*~ +.*.swp + +# Local config +.env +.env.local + +# Coverage / profiling +*.gcda +*.gcno +*.gcov +callgrind.out.* +perf.data* + +# Generated docs +docs/_build/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..4a277a3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "third_party/cuframes"] + path = third_party/cuframes + url = /home/claude/projects/cuframes +[submodule "third_party/nv-codec-headers"] + path = third_party/nv-codec-headers + url = https://github.com/FFmpeg/nv-codec-headers.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..f1ae01c --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,75 @@ +cmake_minimum_required(VERSION 3.20) +project(cuframes-composer + VERSION 0.1.0 + DESCRIPTION "Multi-source video grid composer на CUDA + NVENC + RTSP" + LANGUAGES C +) + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +# ── Опции сборки ──────────────────────────────────────────────────────── +option(BUILD_EXAMPLES "Сборка smoke-test программ из examples/" ON) +option(BUILD_TESTS "Сборка модульных и интеграционных тестов" OFF) + +# ── Зависимости ───────────────────────────────────────────────────────── + +# CUDA Toolkit — для cudart, заголовков cuda.h (Driver API), nvcc. +# NVENC SDK сам идёт через nv-codec-headers + dlopen libnvidia-encode.so, +# поэтому CUDA::nvenc намеренно НЕ линкуем (см. дизайн-документ часть 1.6). +find_package(CUDAToolkit 12.0 REQUIRED) +find_package(Threads REQUIRED) + +# dl — для dlopen libnvidia-encode.so в runtime +find_library(LIBDL_LIBRARY dl REQUIRED) + +# ── Сторонние библиотеки (subomodules в third_party/) ─────────────────── + +# cuframes — статически линкуем libcuframes. cuframes_static — это static lib +# который определён в third_party/cuframes/libcuframes/CMakeLists.txt. +# PIC обязателен — cuframes_static линкуется в наш SHARED libcuframes_composer. +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +set(BUILD_TESTING OFF CACHE BOOL "" FORCE) +set(BUILD_EXAMPLES OFF CACHE BOOL "Cuframes examples" FORCE) +set(BUILD_TOOLS OFF CACHE BOOL "Cuframes tools" FORCE) +set(BUILD_FFMPEG_FILTER OFF CACHE BOOL "" FORCE) +set(BUILD_PYTHON_BINDINGS OFF CACHE BOOL "" FORCE) +add_subdirectory(third_party/cuframes) +# Восстанавливаем BUILD_EXAMPLES для наших собственных examples/ +set(BUILD_EXAMPLES ON CACHE BOOL "" FORCE) + +# nv-codec-headers — header-only, нужны заголовки nvEncodeAPI.h +# (NVENC) и cuviddec.h (NVDEC, на будущее). Не как target, просто include path. +set(NVCODEC_HEADERS_DIR + "${CMAKE_CURRENT_SOURCE_DIR}/third_party/nv-codec-headers/include") +if(NOT EXISTS "${NVCODEC_HEADERS_DIR}/ffnvcodec/nvEncodeAPI.h") + message(FATAL_ERROR + "nv-codec-headers заголовки не найдены в ${NVCODEC_HEADERS_DIR}/ffnvcodec/. " + "Выполни: git submodule update --init --recursive") +endif() + +# ── Подпроекты ────────────────────────────────────────────────────────── + +# src/ — основная библиотека композитора +add_subdirectory(src) + +# examples/ — smoke-test программы (по фазам разработки) +if(BUILD_EXAMPLES) + add_subdirectory(examples) +endif() + +# ── Сводка конфигурации ───────────────────────────────────────────────── +message(STATUS "") +message(STATUS "cuframes-composer ${PROJECT_VERSION} конфигурация:") +message(STATUS " Build type: ${CMAKE_BUILD_TYPE}") +message(STATUS " CUDA Toolkit: ${CUDAToolkit_VERSION}") +message(STATUS " CUDA include: ${CUDAToolkit_INCLUDE_DIRS}") +message(STATUS " nv-codec-headers: ${NVCODEC_HEADERS_DIR}") +message(STATUS " BUILD_EXAMPLES: ${BUILD_EXAMPLES}") +message(STATUS " BUILD_TESTS: ${BUILD_TESTS}") +message(STATUS "") diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f6683e7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,501 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Moe Ghoul, President of Vice + +That's all there is to it! diff --git a/README.md b/README.md new file mode 100644 index 0000000..06e49e7 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# cuframes-composer + +Стандалонный композитор-демон для multi-source видео grid через CUDA + NVENC + RTSP. + +Заменяет монолитный ffmpeg-конвейер (`ffmpeg + vf_cuda_grid` фильтр) для случаев, когда нужно: + +- Поток продолжает работать при потере любого числа источников (graceful degradation) +- Композитор сам управляет частотой кадров и обработкой ошибок без зависимости от семантики ffmpeg-демухера +- Минимум перемещений данных: zero-copy CUDA от источника `cuframes` напрямую в NVENC + +## Статус + +**Phase 1 — MVP.** В разработке. Не для боевой эксплуатации. + +См. [дизайн-документ](https://git.goldix.org/gx/cuframes/raw/branch/main/docs/DESIGN-composer-daemon.md) для архитектурных решений и поэтапного плана. + +## Зависимости + +- [cuframes](https://git.goldix.org/gx/cuframes) — библиотека zero-copy передачи кадров. Подключена как git submodule. +- [nv-codec-headers](https://github.com/FFmpeg/nv-codec-headers) — MIT-licensed заголовки NVENC API. Подключена как git submodule. Сама библиотека `libnvidia-encode.so` грузится через `dlopen` при старте (это даёт LGPL-совместимость — см. дизайн-документ часть 1.6). +- CUDA Toolkit 12.x+ (для cuda runtime и компиляции) +- NVIDIA драйвер 525+ (для NVENC и `cuMemCreate` POSIX FD) +- Linux 64-bit (POSIX shm, SCM_RIGHTS) + +Дополнительно по фазам: +- Phase 3: `libfreetype` (текст), `lodepng` через submodule (PNG-декодирование) +- Phase 4: `libzmq` (управление) + +## Сборка + +```bash +git clone --recursive git@git.goldix.org:gx/cuframes-composer.git +cd cuframes-composer +cmake -B build -G Ninja +ninja -C build +``` + +## Поэтапный план + +| фаза | срок | результат | +|---|---|---| +| 1 | 1 неделя | один источник → NVENC → файл .h264 (доказательство zero-copy) | +| 2 | 2 недели | четыре источника + композиция через `libcugrid` | +| 3 | 2 недели | оверлеи + RTSP push к mediamtx + AAC passthrough из `/live-audio` | +| 4 | 1 неделя | паритет ZMQ-управления с фильтром `vf_cuda_grid` | +| 5 | 1 неделя | боевое развёртывание + MQTT health + watchdog | +| 6 | 2 недели | тесты + бенчмарки + документация | + +Итого ~9 недель для одного разработчика. + +## Лицензия + +LGPL-2.1-or-later. См. файл [LICENSE](LICENSE). + +NVENC SDK headers (`third_party/nv-codec-headers`) — MIT license, совместима с LGPL. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000..9f40164 --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,12 @@ +# Smoke-test программы — по одной на каждую фазу разработки. +# +# Phase 1: simple_record — подключиться к одному cuframes публишеру, +# закодировать через NVENC, записать H.264 в файл. Доказывает что +# zero-copy NVENC из VMM-памяти cuframes работает. +# +# Phase 2: composer_4_to_file — 4 источника + композиция → файл. +# Phase 3: composer_rtsp — 4 источника + композиция → RTSP push. + +add_executable(simple_record simple_record.c) +target_link_libraries(simple_record PRIVATE cuframes_composer_static) +target_include_directories(simple_record PRIVATE ${CMAKE_SOURCE_DIR}/include) diff --git a/examples/simple_record.c b/examples/simple_record.c new file mode 100644 index 0000000..3e5af66 --- /dev/null +++ b/examples/simple_record.c @@ -0,0 +1,270 @@ +/* simple_record — Phase 1 smoke test. + * + * Подписывается на один cuframes-источник, кодирует каждый кадр через + * NVENC и пишет H.264 Annex-B byte stream в файл. Завершение по SIGINT. + * + * Цель — доказать end-to-end zero-copy: NV12 frame из VMM-памяти publisher'а + * напрямую попадает в NVENC без промежуточных копий через CPU. + * + * Использование: + * simple_record --key cam-parking --out test.h264 [--fps 25] [--bitrate 5000] + * + * Полученный файл проверяется через ffmpeg: + * ffprobe test.h264 → должно показать h264 stream + * ffmpeg -i test.h264 ... → должен декодироваться + * + * Лицензия: LGPL-2.1+ + */ + +#include "../include/cuframes_composer/nvenc.h" +#include "../include/cuframes_composer/source.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static volatile sig_atomic_t g_stop = 0; + +static void on_sigint(int sig) +{ + (void)sig; + g_stop = 1; +} + +/* user-data, передаваемая encoder callback'у */ +typedef struct write_ctx { + FILE *fp; + uint64_t bytes_written; + uint64_t frames_encoded; + uint64_t idr_count; +} write_ctx_t; + +/* Encoder callback — пишем H.264 Annex-B bytes как есть. */ +static void on_bitstream(const uint8_t *bs, size_t size, int64_t pts_ns, + int is_idr, void *user) +{ + (void)pts_ns; + write_ctx_t *ctx = (write_ctx_t *)user; + if (fwrite(bs, 1, size, ctx->fp) != size) { + fprintf(stderr, "[simple_record] fwrite failed: %s\n", strerror(errno)); + } else { + ctx->bytes_written += size; + ctx->frames_encoded++; + if (is_idr) ctx->idr_count++; + } +} + +static const char *cu_err(CUresult r) +{ + const char *s = NULL; + cuGetErrorString(r, &s); + return s ? s : "unknown"; +} + +#define CUCHECK(expr) do { \ + CUresult _r = (expr); \ + if (_r != CUDA_SUCCESS) { \ + fprintf(stderr, "[simple_record] %s failed: %s\n", #expr, cu_err(_r)); \ + return 1; \ + } \ +} while (0) + +int main(int argc, char **argv) +{ + const char *key = NULL; + const char *out_path = NULL; + int fps = 25; + int bitrate_kbps = 5000; + int max_seconds = 0; /* 0 = до SIGINT */ + + static struct option opts[] = { + {"key", required_argument, 0, 'k'}, + {"out", required_argument, 0, 'o'}, + {"fps", required_argument, 0, 'f'}, + {"bitrate", required_argument, 0, 'b'}, + {"seconds", required_argument, 0, 's'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0}, + }; + int c; + while ((c = getopt_long(argc, argv, "k:o:f:b:s:h", opts, NULL)) != -1) { + switch (c) { + case 'k': key = optarg; break; + case 'o': out_path = optarg; break; + case 'f': fps = atoi(optarg); break; + case 'b': bitrate_kbps = atoi(optarg); break; + case 's': max_seconds = atoi(optarg); break; + case 'h': + default: + fprintf(stderr, + "Использование: %s --key --out \n" + " [--fps 25] [--bitrate 5000] [--seconds N]\n", + argv[0]); + return c == 'h' ? 0 : 1; + } + } + if (!key || !out_path) { + fprintf(stderr, "[simple_record] требуются --key и --out\n"); + return 1; + } + + signal(SIGINT, on_sigint); + signal(SIGTERM, on_sigint); + + /* 1) CUDA primary context на устройстве 0. cuframes-subscriber и NVENC + * оба должны работать в одном контексте. */ + CUCHECK(cuInit(0)); + CUdevice dev; + CUCHECK(cuDeviceGet(&dev, 0)); + CUcontext ctx; + CUCHECK(cuDevicePrimaryCtxRetain(&ctx, dev)); + CUCHECK(cuCtxPushCurrent(ctx)); + + /* 2) Открыть source. */ + cfc_source_config_t scfg = { + .key = key, + .consumer_name = "simple_record", + .cuda_device = 0, + }; + cfc_source_t *src = NULL; + if (cfc_source_create(&scfg, &src) != 0) { + fprintf(stderr, "[simple_record] cfc_source_create failed\n"); + return 1; + } + + /* 3) Ждать первый кадр чтобы узнать размер. До 30 секунд. */ + cfc_source_snapshot_t snap = { 0 }; + int waited_ms = 0; + while (!g_stop && waited_ms < 30000) { + cfc_source_get_latest(src, &snap); + if (snap.state == CFC_SOURCE_ACTIVE && snap.width > 0) break; + struct timespec ts = {.tv_sec = 0, .tv_nsec = 50 * 1000 * 1000}; + nanosleep(&ts, NULL); + waited_ms += 50; + } + if (g_stop) goto cleanup_src; + if (snap.width <= 0 || snap.height <= 0) { + fprintf(stderr, + "[simple_record] не дождался первого кадра за 30с (state=%d)\n", + snap.state); + goto cleanup_src; + } + fprintf(stderr, + "[simple_record] первый кадр: %dx%d pitch=%d → создаю encoder\n", + snap.width, snap.height, snap.pitch_y); + + /* 4) Создать encoder под полученный размер. */ + cfc_encoder_config_t ecfg = { + .cuda_ctx = ctx, + .width = snap.width, + .height = snap.height, + .fps_num = fps, + .fps_den = 1, + .bitrate_kbps = bitrate_kbps, + .gop_size = fps, /* IDR раз в секунду — стандарт для RTSP */ + .num_b_frames = 0, /* low-latency: B-кадры мешают */ + .preset = "ll", + }; + cfc_encoder_t *enc = NULL; + if (cfc_encoder_create(&ecfg, &enc) != 0) { + fprintf(stderr, "[simple_record] cfc_encoder_create failed\n"); + goto cleanup_src; + } + + /* 5) Открыть выходной файл. */ + write_ctx_t wctx = { 0 }; + wctx.fp = fopen(out_path, "wb"); + if (!wctx.fp) { + fprintf(stderr, "[simple_record] fopen(%s) failed: %s\n", + out_path, strerror(errno)); + goto cleanup_enc; + } + + /* 6) Главный цикл — забираем кадры по seq, кодируем. */ + uint64_t last_seq = 0; + int64_t start_us; + struct timespec ts_start; + clock_gettime(CLOCK_MONOTONIC, &ts_start); + start_us = (int64_t)ts_start.tv_sec * 1000000 + ts_start.tv_nsec / 1000; + + fprintf(stderr, "[simple_record] начало записи в %s (Ctrl+C для остановки)\n", + out_path); + + while (!g_stop) { + cfc_source_get_latest(src, &snap); + if (snap.state != CFC_SOURCE_ACTIVE) { + /* Источник не active — короткий sleep и снова. */ + struct timespec ts = {.tv_sec = 0, .tv_nsec = 10 * 1000 * 1000}; + nanosleep(&ts, NULL); + continue; + } + if (snap.seq == last_seq) { + /* Тот же кадр что и был — ждём новый. Спим четверть кадрового + * интервала чтобы не крутить CPU впустую. */ + int sleep_ns = 1000000000 / fps / 4; + struct timespec ts = {.tv_sec = 0, .tv_nsec = sleep_ns}; + nanosleep(&ts, NULL); + continue; + } + last_seq = snap.seq; + + if (cfc_encoder_encode_frame(enc, snap.ptr, snap.pitch_y, + snap.pts_ns, on_bitstream, &wctx) != 0) { + fprintf(stderr, "[simple_record] encode_frame failed\n"); + break; + } + + /* Прогресс каждые 100 кадров. */ + if (wctx.frames_encoded > 0 && wctx.frames_encoded % 100 == 0) { + struct timespec now; + clock_gettime(CLOCK_MONOTONIC, &now); + int64_t now_us = (int64_t)now.tv_sec * 1000000 + now.tv_nsec / 1000; + double elapsed_s = (now_us - start_us) / 1e6; + fprintf(stderr, + "[simple_record] %llu кадров, %llu IDR, %.1f МБ за %.1fс (%.1f fps)\n", + (unsigned long long)wctx.frames_encoded, + (unsigned long long)wctx.idr_count, + wctx.bytes_written / 1048576.0, + elapsed_s, + wctx.frames_encoded / elapsed_s); + } + + if (max_seconds > 0) { + struct timespec now; + clock_gettime(CLOCK_MONOTONIC, &now); + int64_t elapsed = ((int64_t)now.tv_sec * 1000000 + now.tv_nsec / 1000) - start_us; + if (elapsed / 1000000 >= max_seconds) { + fprintf(stderr, "[simple_record] достигнут лимит %dс\n", max_seconds); + break; + } + } + } + + fprintf(stderr, "[simple_record] flush encoder\n"); + cfc_encoder_flush(enc, on_bitstream, &wctx); + + fprintf(stderr, + "[simple_record] итого: %llu кадров, %llu IDR, %.2f МБ\n", + (unsigned long long)wctx.frames_encoded, + (unsigned long long)wctx.idr_count, + wctx.bytes_written / 1048576.0); + + fclose(wctx.fp); + +cleanup_enc: + cfc_encoder_destroy(enc); +cleanup_src: + cfc_source_destroy(src); + cuCtxPopCurrent(NULL); + cuDevicePrimaryCtxRelease(dev); + return 0; +} diff --git a/include/cuframes_composer/nvenc.h b/include/cuframes_composer/nvenc.h new file mode 100644 index 0000000..d40fbf4 --- /dev/null +++ b/include/cuframes_composer/nvenc.h @@ -0,0 +1,107 @@ +/* cuframes-composer — обвязка вокруг NVIDIA NVENC API. + * + * Динамически грузит libnvidia-encode.so через dlopen, чтобы пакет + * cuframes-composer оставался под LGPL-2.1+ без статической линковки + * проприетарного SDK. См. дизайн-документ часть 1.6. + * + * Принимает на вход CUdeviceptr на NV12 frame (zero-copy через + * NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR + nvEncRegisterResource). + * Выдаёт сжатый H.264 bitstream через callback (caller записывает его + * в файл / RTP-пакетизирует / etc). + * + * Lifecycle: + * create(cfg) — open session, init encoder + * encode_frame(...) — на каждый входной кадр (один CUdeviceptr) + * flush(...) — в конце потока, чтобы вытащить остатки B-кадров + * destroy(...) — закрыть session, выгрузить SDK + * + * Поток должен быть single-threaded для одного encoder'а (NVENC API + * не реентрабельный для одной сессии). + * + * Лицензия: LGPL-2.1+ + */ + +#ifndef CUFRAMES_COMPOSER_NVENC_H +#define CUFRAMES_COMPOSER_NVENC_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Параметры кодировщика. Для Phase 1 минимальный набор; в будущих фазах + * будут расширяться для RTSP (rate control, GOP, intra-refresh, и т.п.). */ +typedef struct cfc_encoder_config { + CUcontext cuda_ctx; /* CUDA-контекст, в котором лежат входные VMM-буферы */ + int32_t width; /* ширина кадра в пикселях */ + int32_t height; /* высота кадра в пикселях */ + int32_t fps_num; /* числитель частоты кадров (25) */ + int32_t fps_den; /* знаменатель частоты кадров (1) */ + int32_t bitrate_kbps; /* битрейт в килобитах в секунду (5000 = 5 Мбит/с) */ + int32_t gop_size; /* интервал между keyframe'ами в кадрах (25 = 1 IDR в секунду) */ + int32_t num_b_frames; /* B-кадры (0 для low-latency RTSP) */ + + /* Пресет — соответствует NV_ENC_TUNING_INFO + preset GUID. + * "ll" = low-latency, "p4" = preset P4 (баланс), "p7" = highest quality. */ + const char *preset; /* "ll", "p4", "p7" — по умолчанию "ll" */ +} cfc_encoder_config_t; + +typedef struct cfc_encoder cfc_encoder_t; + +/* Callback для записанного H.264 bitstream'а. Вызывается синхронно из + * cfc_encoder_encode_frame / cfc_encoder_flush. Указатель действителен только + * на время вызова (буфер NVENC будет разблокирован после возврата). */ +typedef void (*cfc_encoder_output_cb)( + const uint8_t *bitstream, /* данные H.264 (Annex B byte stream) */ + size_t size, /* размер в байтах */ + int64_t pts_ns, /* presentation timestamp (передан в encode_frame) */ + int is_idr, /* 1 если кадр IDR (keyframe) */ + void *user /* user data, переданный в encode_frame */ +); + +/* Создать encoder. Возвращает 0 при успехе. */ +int cfc_encoder_create(const cfc_encoder_config_t *cfg, cfc_encoder_t **out); + +/* Закодировать один кадр. ptr — CUdeviceptr на начало NV12 frame в VRAM, + * pitch — ширина строки в байтах (для NV12 равна width). + * + * Callback может быть вызван 0 или 1 раз для одного encode_frame: NVENC + * может буферизовать кадры внутри (особенно при B-кадрах). Чтобы вытащить + * последние буферизованные — вызвать flush в конце потока. */ +int cfc_encoder_encode_frame( + cfc_encoder_t *enc, + CUdeviceptr ptr, + int pitch, + int64_t pts_ns, + cfc_encoder_output_cb cb, + void *user +); + +/* Завершить поток. Передаёт NVENC флаг end-of-stream, выдаёт оставшиеся + * закодированные кадры через callback. После flush encoder можно либо + * destroy, либо использовать снова с новым GOP'ом. */ +int cfc_encoder_flush( + cfc_encoder_t *enc, + cfc_encoder_output_cb cb, + void *user +); + +/* Выдать SPS/PPS — заголовки H.264 sequence/picture parameter sets. + * Нужны для записи в начало MP4-контейнера или для отправки в SDP при RTSP. */ +int cfc_encoder_get_sequence_params( + cfc_encoder_t *enc, + uint8_t *out, /* буфер caller'а */ + size_t *inout_size /* при вызове — размер буфера, при возврате — реальный размер */ +); + +/* Закрыть encoder, выгрузить SDK. */ +int cfc_encoder_destroy(cfc_encoder_t *enc); + +#ifdef __cplusplus +} +#endif + +#endif /* CUFRAMES_COMPOSER_NVENC_H */ diff --git a/include/cuframes_composer/source.h b/include/cuframes_composer/source.h new file mode 100644 index 0000000..b1c3490 --- /dev/null +++ b/include/cuframes_composer/source.h @@ -0,0 +1,84 @@ +/* cuframes-composer — обвязка над cuframes_subscriber с явной машиной + * состояний и автоматическим переподключением. Создаёт собственный поток + * для блокирующего cuframes_subscriber_next и держит снимок последнего + * успешного кадра, который читатели берут не блокируясь. + * + * Используется в композиторе для каждого входного источника (одна камера). + * + * Lifecycle: + * create() → запускается поток, который начинает с DISCONNECTED, идёт + * в CONNECTING (попытка подписаться), затем ACTIVE при успехе + * get_latest() → не блокируется, возвращает снимок последнего кадра + * + текущее состояние + * destroy() → останавливает поток, освобождает ресурсы cuframes + * + * Thread safety: create/destroy — main thread. get_latest — любой поток. + * + * Лицензия: LGPL-2.1+ + */ + +#ifndef CUFRAMES_COMPOSER_SOURCE_H +#define CUFRAMES_COMPOSER_SOURCE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Состояние источника. ACTIVE — единственное «здоровое» состояние, + * остальные означают какую-то форму потери связи или ожидания. */ +typedef enum cfc_source_state { + CFC_SOURCE_DISCONNECTED = 0, /* стартовое или после destroy подписки */ + CFC_SOURCE_CONNECTING, /* идёт subscribe handshake */ + CFC_SOURCE_ACTIVE, /* получаем кадры, последний < N мс назад */ + CFC_SOURCE_STALE, /* подписка жива, но кадры не приходят */ + CFC_SOURCE_DEAD, /* подписка отвалилась, ждём backoff до retry */ +} cfc_source_state_t; + +/* Конфигурация одного источника. */ +typedef struct cfc_source_config { + const char *key; /* cuframes key (например "cam-parking") */ + const char *consumer_name; /* имя для cuframes, должно быть уникально на publisher */ + int32_t cuda_device; /* индекс CUDA-устройства, обычно 0 */ + int32_t reconnect_min_ms; /* минимальный backoff при DEAD → CONNECTING (по умолчанию 1000) */ + int32_t reconnect_max_ms; /* максимальный backoff (по умолчанию 30000) */ + int32_t stale_threshold_ms; /* без кадра > N → ACTIVE → STALE (по умолчанию 500) */ + int32_t dead_threshold_ms; /* без кадра > N → STALE → DEAD (по умолчанию 5000) */ +} cfc_source_config_t; + +typedef struct cfc_source cfc_source_t; + +/* Снимок последнего успешного кадра. ptr указывает на VMM-mapped CUDA-память + * cuframes publisher'а. Он действителен до следующего вызова get_latest: + * после этого источник может перейти на следующий слот ring buffer'а. */ +typedef struct cfc_source_snapshot { + CUdeviceptr ptr; /* указатель на NV12 frame (Y plane) */ + int32_t width; + int32_t height; + int32_t pitch_y; + int32_t pitch_uv; + int64_t pts_ns; /* timestamp от publisher'а (CLOCK_MONOTONIC ns) */ + uint64_t seq; /* sequence number */ + cfc_source_state_t state; /* текущее состояние источника */ + int64_t last_frame_age_us; /* сколько микросекунд назад последний успешный кадр; -1 если никогда */ +} cfc_source_snapshot_t; + +/* Создать источник. Возвращает 0 при успехе. Старт асинхронный: + * cfc_source_create возвращает сразу, фоновый поток выполняет subscribe. + * До первого успешного кадра состояние будет CONNECTING либо DEAD. */ +int cfc_source_create(const cfc_source_config_t *cfg, cfc_source_t **out); + +/* Получить снимок последнего кадра. Не блокируется. Возвращает 0 при успехе. + * Поле state в out выставляется всегда (даже если кадров ещё не было). */ +int cfc_source_get_latest(cfc_source_t *src, cfc_source_snapshot_t *out); + +/* Освободить источник. Останавливает поток (joins), отключает подписку. */ +int cfc_source_destroy(cfc_source_t *src); + +#ifdef __cplusplus +} +#endif + +#endif /* CUFRAMES_COMPOSER_SOURCE_H */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..54f0a15 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,63 @@ +# Основная библиотека композитора — `libcuframes_composer.so`. Содержит: +# - source.c подписка к cuframes публишеру + state machine +# - nvenc_loader.c dlopen libnvidia-encode.so + загрузка API таблицы +# - nvenc.c обвязка вокруг NVENC SDK (init/encode/teardown) +# +# Дальше по фазам: +# Phase 2: compose.c (CUDA composition), ringbuf.c (SPSC swap) +# Phase 3: rtsp_publisher.c, rtp_h264.c, overlay.c, png_decode.c, text_render.c +# Phase 4: control_zmq.c +# Phase 5: health_mqtt.c + +set(COMPOSER_SOURCES + source.c + nvenc_loader.c + nvenc.c +) + +add_library(cuframes_composer SHARED ${COMPOSER_SOURCES}) +add_library(cuframes_composer_static STATIC ${COMPOSER_SOURCES}) + +foreach(target cuframes_composer cuframes_composer_static) + target_include_directories(${target} + PUBLIC + $ + $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${NVCODEC_HEADERS_DIR} + ) + target_compile_features(${target} PRIVATE c_std_11) + target_compile_options(${target} PRIVATE + -Wall -Wextra -Wpedantic + $<$:-O0 -g3> + $<$:-O2 -g> + ) + target_link_libraries(${target} + PUBLIC + cuframes_static # из third_party/cuframes + CUDA::cudart + CUDA::cuda_driver + Threads::Threads + ${LIBDL_LIBRARY} # для dlopen libnvidia-encode.so + rt + ) +endforeach() + +set_target_properties(cuframes_composer PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION 0 +) + +# Install rules +include(GNUInstallDirs) +install(TARGETS cuframes_composer cuframes_composer_static + EXPORT cuframesComposerTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +install(DIRECTORY ${CMAKE_SOURCE_DIR}/include/cuframes_composer + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING PATTERN "*.h" +) diff --git a/src/nvenc.c b/src/nvenc.c new file mode 100644 index 0000000..73cfeb1 --- /dev/null +++ b/src/nvenc.c @@ -0,0 +1,413 @@ +/* NVENC обвязка — реализация публичного API cuframes_composer/nvenc.h. + * + * Lifecycle NVENC: + * 1) nvEncOpenEncodeSessionEx(CUDA context) → handle + * 2) nvEncGetEncodeGUIDs → проверить что H264 поддерживается + * 3) nvEncGetEncodePresetConfigEx(preset) → дефолтный config от пресета + * 4) Override полей config'а (bitrate, GOP, B-frames) + * 5) nvEncInitializeEncoder(params{config, GUID, размер, fps, ...}) + * 6) Создать pool of output bitstream buffers (через CreateBitstreamBuffer) + * + * Encode loop: + * 1) nvEncRegisterResource(CUdeviceptr, pitch) → registered_resource + * (кешируем — один cuframes-slot регистрируется один раз и переиспользуется) + * 2) nvEncMapInputResource(registered) → NV_ENC_INPUT_PTR (mapped) + * 3) nvEncEncodePicture(picParams{mapped, output_buffer, pts, ...}) + * может вернуть NV_ENC_ERR_NEED_MORE_INPUT — это OK, означает буферизация + * 4) Если encode успешен (NV_ENC_SUCCESS): + * nvEncLockBitstream → выгрузить bytes → callback → nvEncUnlockBitstream + * 5) nvEncUnmapInputResource + * + * Flush (end-of-stream): + * 1) encodePicture с completionEvent=NULL и pictureStruct=NV_ENC_PIC_STRUCT_FRAME + * + endOfStream=1 → вытащит буферизованные кадры + * 2) Lock/Unlock в цикле пока есть выходные кадры + * + * Phase 1 ограничения (упрощения, будут сняты в следующих фазах): + * - Один output bitstream buffer (нет async encode, sync flow) + * - Линейный кеш registered resources (для Phase 2 → hash map) + * - Без SEI / metadata insertion (для RTSP добавится в Phase 3) + * - Без adaptive bitrate (Phase 3) + * + * Reference: NVIDIA Video Codec SDK Sample код + ffmpeg/libavcodec/nvenc.c + * (использован как образец сборки picParams и initParams). + */ + +#include "../include/cuframes_composer/nvenc.h" +#include "nvenc_loader.h" + +#include +#include +#include +#include + +/* Максимальный размер кеша зарегистрированных ресурсов. cuframes ring buffer + * для одного источника — 16 slots, для 4 источников — 64. Phase 1 = 16. */ +#define CFC_MAX_REGISTERED_RESOURCES 64 + +/* Размер выходного bitstream buffer'а. Default NVENC = 1 MB / frame — + * хватает с запасом для 1080p H.264 даже на keyframe. */ +#define CFC_BITSTREAM_BUFFER_SIZE (2 * 1024 * 1024) + +typedef struct registered_resource { + CUdeviceptr ptr; /* ключ кеша */ + int pitch; + NV_ENC_REGISTERED_PTR regptr; /* возвращается nvEncRegisterResource */ +} registered_resource_t; + +struct cfc_encoder { + void *session; /* NVENC session handle */ + NV_ENC_BUFFER_FORMAT input_format; /* NV12 */ + int width, height; + int fps_num, fps_den; + + /* Output bitstream buffer (один на Phase 1, потом pool) */ + NV_ENC_OUTPUT_PTR output_bitstream; + + /* Кеш зарегистрированных входных буферов */ + registered_resource_t registered[CFC_MAX_REGISTERED_RESOURCES]; + int registered_count; + pthread_mutex_t registered_mu; +}; + +/* ── Helpers ──────────────────────────────────────────────────────────── */ + +/* Безопасно вернуть управление с ошибкой + лог. */ +#define NVE_CHECK(expr, label) do { \ + NVENCSTATUS _s = (expr); \ + if (_s != NV_ENC_SUCCESS) { \ + fprintf(stderr, "[cfc/nvenc] %s:%d %s → %s\n", \ + __FILE__, __LINE__, #expr, cfc_nvenc_status_str(_s)); \ + rc = -1; goto label; \ + } \ +} while (0) + +/* GUID H.264 кодека. Это стандартная константа из NVENC SDK. */ +static const GUID nv_codec_h264 = NV_ENC_CODEC_H264_GUID; + +/* GUID пресетов — выбираются по строковому имени из cfc_encoder_config.preset. */ +static const GUID nv_preset_p1 = NV_ENC_PRESET_P1_GUID; /* fastest */ +static const GUID nv_preset_p4 = NV_ENC_PRESET_P4_GUID; /* balanced */ +static const GUID nv_preset_p7 = NV_ENC_PRESET_P7_GUID; /* highest quality */ + +/* Найти/выбрать preset GUID по строковому имени. */ +static const GUID *select_preset(const char *name) +{ + if (!name || !*name || !strcmp(name, "ll") || !strcmp(name, "p4")) return &nv_preset_p4; + if (!strcmp(name, "p1")) return &nv_preset_p1; + if (!strcmp(name, "p7")) return &nv_preset_p7; + fprintf(stderr, "[cfc/nvenc] unknown preset '%s', using p4\n", name); + return &nv_preset_p4; +} + +/* Регистрация CUDA-указателя в NVENC (или поиск в кеше). Возвращает + * NV_ENC_REGISTERED_PTR или NULL при ошибке. Thread-safe через mutex. */ +static NV_ENC_REGISTERED_PTR get_or_register(cfc_encoder_t *enc, + CUdeviceptr ptr, int pitch) +{ + pthread_mutex_lock(&enc->registered_mu); + + /* Поиск в кеше */ + for (int i = 0; i < enc->registered_count; i++) { + if (enc->registered[i].ptr == ptr && enc->registered[i].pitch == pitch) { + NV_ENC_REGISTERED_PTR rp = enc->registered[i].regptr; + pthread_mutex_unlock(&enc->registered_mu); + return rp; + } + } + + if (enc->registered_count >= CFC_MAX_REGISTERED_RESOURCES) { + pthread_mutex_unlock(&enc->registered_mu); + fprintf(stderr, "[cfc/nvenc] registered cache overflow (max %d)\n", + CFC_MAX_REGISTERED_RESOURCES); + return NULL; + } + + /* Регистрация. NV12 формат: Y plane на полном разрешении, UV plane + * на половинном (interleaved). Размер NV_ENC_REGISTER_RESOURCE — это + * вся NV12-плоскость (Y + UV, height * pitch * 1.5). */ + NV_ENC_REGISTER_RESOURCE reg = { 0 }; + reg.version = NV_ENC_REGISTER_RESOURCE_VER; + reg.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; + reg.width = enc->width; + reg.height = enc->height; + reg.pitch = pitch; + reg.resourceToRegister = (void *)ptr; + reg.bufferFormat = enc->input_format; + reg.bufferUsage = NV_ENC_INPUT_IMAGE; + + NVENCSTATUS s = g_nvenc_funcs.nvEncRegisterResource(enc->session, ®); + if (s != NV_ENC_SUCCESS) { + pthread_mutex_unlock(&enc->registered_mu); + fprintf(stderr, + "[cfc/nvenc] nvEncRegisterResource failed: %s (ptr=%p pitch=%d)\n", + cfc_nvenc_status_str(s), (void *)ptr, pitch); + return NULL; + } + + int idx = enc->registered_count++; + enc->registered[idx].ptr = ptr; + enc->registered[idx].pitch = pitch; + enc->registered[idx].regptr = reg.registeredResource; + + pthread_mutex_unlock(&enc->registered_mu); + return reg.registeredResource; +} + +/* Вытащить готовый закодированный кадр и отдать через callback. + * Возвращает 0 если ничего нет (NEED_MORE_INPUT), 1 если выдал, -1 ошибка. */ +static int drain_output(cfc_encoder_t *enc, + cfc_encoder_output_cb cb, void *user) +{ + NV_ENC_LOCK_BITSTREAM lock = { 0 }; + lock.version = NV_ENC_LOCK_BITSTREAM_VER; + lock.outputBitstream = enc->output_bitstream; + + NVENCSTATUS s = g_nvenc_funcs.nvEncLockBitstream(enc->session, &lock); + if (s == NV_ENC_ERR_NEED_MORE_INPUT) { + return 0; + } + if (s != NV_ENC_SUCCESS) { + fprintf(stderr, "[cfc/nvenc] nvEncLockBitstream failed: %s\n", + cfc_nvenc_status_str(s)); + return -1; + } + + int is_idr = (lock.pictureType == NV_ENC_PIC_TYPE_IDR); + int64_t pts_ns = (int64_t)lock.outputTimeStamp; + + if (cb) { + cb((const uint8_t *)lock.bitstreamBufferPtr, lock.bitstreamSizeInBytes, + pts_ns, is_idr, user); + } + + s = g_nvenc_funcs.nvEncUnlockBitstream(enc->session, enc->output_bitstream); + if (s != NV_ENC_SUCCESS) { + fprintf(stderr, "[cfc/nvenc] nvEncUnlockBitstream failed: %s\n", + cfc_nvenc_status_str(s)); + return -1; + } + return 1; +} + +/* ── Public API ───────────────────────────────────────────────────────── */ + +int cfc_encoder_create(const cfc_encoder_config_t *cfg, cfc_encoder_t **out) +{ + if (!cfg || !out) return -1; + if (cfg->width <= 0 || cfg->height <= 0) return -1; + if (cfg->fps_num <= 0 || cfg->fps_den <= 0) return -1; + if (cfg->bitrate_kbps <= 0) return -1; + + if (cfc_nvenc_loader_init() != 0) return -1; + + cfc_encoder_t *enc = calloc(1, sizeof(*enc)); + if (!enc) return -1; + enc->width = cfg->width; + enc->height = cfg->height; + enc->fps_num = cfg->fps_num; + enc->fps_den = cfg->fps_den; + enc->input_format = NV_ENC_BUFFER_FORMAT_NV12; + pthread_mutex_init(&enc->registered_mu, NULL); + + int rc = 0; + + /* 1) Open session с CUDA context. */ + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS sp = { 0 }; + sp.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + sp.device = cfg->cuda_ctx; + sp.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + sp.apiVersion = NVENCAPI_VERSION; + + NVE_CHECK(g_nvenc_funcs.nvEncOpenEncodeSessionEx(&sp, &enc->session), + fail_alloc); + + /* 2) Получить дефолтный config от preset'а. NVENC сам подберёт оптимальные + * параметры для выбранного preset'а + tuning info. */ + const GUID *preset_guid = select_preset(cfg->preset); + + NV_ENC_PRESET_CONFIG preset_cfg = { 0 }; + preset_cfg.version = NV_ENC_PRESET_CONFIG_VER; + preset_cfg.presetCfg.version = NV_ENC_CONFIG_VER; + + NVE_CHECK(g_nvenc_funcs.nvEncGetEncodePresetConfigEx( + enc->session, nv_codec_h264, *preset_guid, + NV_ENC_TUNING_INFO_LOW_LATENCY, &preset_cfg), + fail_session); + + NV_ENC_CONFIG ec = preset_cfg.presetCfg; + + /* 3) Override config: bitrate, GOP, B-frames, repeat SPS/PPS. */ + ec.gopLength = cfg->gop_size > 0 ? cfg->gop_size : 25; + ec.frameIntervalP = cfg->num_b_frames + 1; /* 1 = только P, 2 = 1 B, etc */ + ec.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR; + ec.rcParams.averageBitRate = cfg->bitrate_kbps * 1000; + ec.rcParams.maxBitRate = cfg->bitrate_kbps * 1000; + ec.rcParams.vbvBufferSize = cfg->bitrate_kbps * 1000; + ec.rcParams.vbvInitialDelay = cfg->bitrate_kbps * 1000; + /* Repeat SPS/PPS перед каждым IDR — важно для RTSP/MP4 где decoder + * может присоединиться к потоку в любой момент. */ + ec.encodeCodecConfig.h264Config.repeatSPSPPS = 1; + ec.encodeCodecConfig.h264Config.idrPeriod = ec.gopLength; + + /* 4) Init encoder. */ + NV_ENC_INITIALIZE_PARAMS ip = { 0 }; + ip.version = NV_ENC_INITIALIZE_PARAMS_VER; + ip.encodeGUID = nv_codec_h264; + ip.presetGUID = *preset_guid; + ip.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY; + ip.encodeWidth = cfg->width; + ip.encodeHeight = cfg->height; + ip.darWidth = cfg->width; + ip.darHeight = cfg->height; + ip.frameRateNum = cfg->fps_num; + ip.frameRateDen = cfg->fps_den; + ip.enablePTD = 1; /* Picture type decision — пусть NVENC сам решает */ + ip.encodeConfig = &ec; + + NVE_CHECK(g_nvenc_funcs.nvEncInitializeEncoder(enc->session, &ip), + fail_session); + + /* 5) Создать output bitstream buffer. Phase 1 — один. */ + NV_ENC_CREATE_BITSTREAM_BUFFER cb = { 0 }; + cb.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; + NVE_CHECK(g_nvenc_funcs.nvEncCreateBitstreamBuffer(enc->session, &cb), + fail_session); + enc->output_bitstream = cb.bitstreamBuffer; + + *out = enc; + return 0; + +fail_session: + g_nvenc_funcs.nvEncDestroyEncoder(enc->session); +fail_alloc: + pthread_mutex_destroy(&enc->registered_mu); + free(enc); + return rc; +} + +int cfc_encoder_encode_frame(cfc_encoder_t *enc, CUdeviceptr ptr, int pitch, + int64_t pts_ns, + cfc_encoder_output_cb cb, void *user) +{ + if (!enc) return -1; + + NV_ENC_REGISTERED_PTR regptr = get_or_register(enc, ptr, pitch); + if (!regptr) return -1; + + NV_ENC_MAP_INPUT_RESOURCE mp = { 0 }; + mp.version = NV_ENC_MAP_INPUT_RESOURCE_VER; + mp.registeredResource = regptr; + + NVENCSTATUS s = g_nvenc_funcs.nvEncMapInputResource(enc->session, &mp); + if (s != NV_ENC_SUCCESS) { + fprintf(stderr, "[cfc/nvenc] nvEncMapInputResource failed: %s\n", + cfc_nvenc_status_str(s)); + return -1; + } + + NV_ENC_PIC_PARAMS pp = { 0 }; + pp.version = NV_ENC_PIC_PARAMS_VER; + pp.inputBuffer = mp.mappedResource; + pp.outputBitstream = enc->output_bitstream; + pp.bufferFmt = mp.mappedBufferFmt; + pp.inputWidth = enc->width; + pp.inputHeight = enc->height; + pp.inputPitch = pitch; + pp.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; + pp.inputTimeStamp = (uint64_t)pts_ns; + + s = g_nvenc_funcs.nvEncEncodePicture(enc->session, &pp); + int rc = 0; + if (s == NV_ENC_ERR_NEED_MORE_INPUT) { + /* OK, NVENC буферизует — выходного кадра пока нет. */ + } else if (s == NV_ENC_SUCCESS) { + /* Есть готовый закодированный кадр — вытащить через callback. */ + if (drain_output(enc, cb, user) < 0) rc = -1; + } else { + fprintf(stderr, "[cfc/nvenc] nvEncEncodePicture failed: %s\n", + cfc_nvenc_status_str(s)); + rc = -1; + } + + NVENCSTATUS us = g_nvenc_funcs.nvEncUnmapInputResource(enc->session, + mp.mappedResource); + if (us != NV_ENC_SUCCESS) { + fprintf(stderr, "[cfc/nvenc] nvEncUnmapInputResource failed: %s\n", + cfc_nvenc_status_str(us)); + rc = -1; + } + return rc; +} + +int cfc_encoder_flush(cfc_encoder_t *enc, cfc_encoder_output_cb cb, void *user) +{ + if (!enc) return -1; + + /* Send EOS signal. */ + NV_ENC_PIC_PARAMS pp = { 0 }; + pp.version = NV_ENC_PIC_PARAMS_VER; + pp.encodePicFlags = NV_ENC_PIC_FLAG_EOS; + + NVENCSTATUS s = g_nvenc_funcs.nvEncEncodePicture(enc->session, &pp); + if (s != NV_ENC_SUCCESS && s != NV_ENC_ERR_NEED_MORE_INPUT) { + fprintf(stderr, "[cfc/nvenc] flush: nvEncEncodePicture(EOS) failed: %s\n", + cfc_nvenc_status_str(s)); + return -1; + } + + /* Вытащить все буферизованные кадры. */ + int got; + do { + got = drain_output(enc, cb, user); + } while (got > 0); + + return got < 0 ? -1 : 0; +} + +int cfc_encoder_get_sequence_params(cfc_encoder_t *enc, + uint8_t *out, size_t *inout_size) +{ + if (!enc || !out || !inout_size) return -1; + + NV_ENC_SEQUENCE_PARAM_PAYLOAD spp = { 0 }; + spp.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; + spp.inBufferSize = (uint32_t)*inout_size; + spp.spsppsBuffer = out; + uint32_t written = 0; + spp.outSPSPPSPayloadSize = &written; + + NVENCSTATUS s = g_nvenc_funcs.nvEncGetSequenceParams(enc->session, &spp); + if (s != NV_ENC_SUCCESS) { + fprintf(stderr, "[cfc/nvenc] nvEncGetSequenceParams failed: %s\n", + cfc_nvenc_status_str(s)); + return -1; + } + *inout_size = written; + return 0; +} + +int cfc_encoder_destroy(cfc_encoder_t *enc) +{ + if (!enc) return 0; + + /* Unregister всех зарегистрированных входных буферов. */ + pthread_mutex_lock(&enc->registered_mu); + for (int i = 0; i < enc->registered_count; i++) { + g_nvenc_funcs.nvEncUnregisterResource(enc->session, + enc->registered[i].regptr); + } + enc->registered_count = 0; + pthread_mutex_unlock(&enc->registered_mu); + pthread_mutex_destroy(&enc->registered_mu); + + if (enc->output_bitstream) { + g_nvenc_funcs.nvEncDestroyBitstreamBuffer(enc->session, + enc->output_bitstream); + } + if (enc->session) { + g_nvenc_funcs.nvEncDestroyEncoder(enc->session); + } + free(enc); + return 0; +} diff --git a/src/nvenc_loader.c b/src/nvenc_loader.c new file mode 100644 index 0000000..f0edf5f --- /dev/null +++ b/src/nvenc_loader.c @@ -0,0 +1,109 @@ +/* Динамическая загрузка libnvidia-encode.so и инициализация таблицы + * NVENC API. См. nvenc_loader.h для контракта. + * + * Lib path: libnvidia-encode.so.1 (стандартное имя в Linux x86_64, + * поставляется NVIDIA-драйвером в /usr/lib/x86_64-linux-gnu/). + * + * API version: используем макрос NVENCAPI_VERSION из nv-codec-headers. + * SDK сам проверит совместимость драйвера (вернёт NV_ENC_ERR_INVALID_VERSION + * если драйвер старше чем заголовки требуют). + */ + +#include "nvenc_loader.h" +#include +#include +#include +#include + +NV_ENCODE_API_FUNCTION_LIST g_nvenc_funcs; + +static pthread_once_t g_init_once = PTHREAD_ONCE_INIT; +static int g_init_result = -1; + +/* Сигнатура единственного символа который мы dlsym'им из libnvidia-encode.so. + * Все остальные функции SDK достаются через NvEncodeAPICreateInstance, который + * заполняет таблицу указателями. */ +typedef NVENCSTATUS (NVENCAPI *NvEncodeAPICreateInstance_fn)(NV_ENCODE_API_FUNCTION_LIST *); + +static void load_once(void) +{ + /* RTLD_LAZY — функции резолвятся по мере вызова, что нормально для + * подсистемы которую мы используем целиком. RTLD_NODELETE — не выгружать + * lib при dlclose (мы и так держим до выхода процесса). */ + void *handle = dlopen("libnvidia-encode.so.1", RTLD_LAZY | RTLD_NODELETE); + if (!handle) { + /* Fallback на имя без версии (некоторые системы держат symlink). */ + handle = dlopen("libnvidia-encode.so", RTLD_LAZY | RTLD_NODELETE); + } + if (!handle) { + fprintf(stderr, + "[cfc/nvenc] dlopen libnvidia-encode.so.1 failed: %s\n" + " → NVIDIA-драйвер не установлен либо libnvidia-encode не в LD_LIBRARY_PATH\n", + dlerror()); + return; + } + + NvEncodeAPICreateInstance_fn create_instance = + (NvEncodeAPICreateInstance_fn)dlsym(handle, "NvEncodeAPICreateInstance"); + if (!create_instance) { + fprintf(stderr, + "[cfc/nvenc] dlsym NvEncodeAPICreateInstance failed: %s\n" + " → libnvidia-encode.so есть, но без ожидаемого символа\n", + dlerror()); + return; + } + + memset(&g_nvenc_funcs, 0, sizeof(g_nvenc_funcs)); + g_nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER; + + NVENCSTATUS s = create_instance(&g_nvenc_funcs); + if (s != NV_ENC_SUCCESS) { + fprintf(stderr, + "[cfc/nvenc] NvEncodeAPICreateInstance failed: %s (status %d)\n" + " → версия драйвера несовместима с NVENC API %u.%u\n", + cfc_nvenc_status_str(s), s, + NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION); + return; + } + + g_init_result = 0; +} + +int cfc_nvenc_loader_init(void) +{ + pthread_once(&g_init_once, load_once); + return g_init_result; +} + +const char *cfc_nvenc_status_str(NVENCSTATUS s) +{ + switch (s) { + case NV_ENC_SUCCESS: return "NV_ENC_SUCCESS"; + case NV_ENC_ERR_NO_ENCODE_DEVICE: return "NV_ENC_ERR_NO_ENCODE_DEVICE"; + case NV_ENC_ERR_UNSUPPORTED_DEVICE: return "NV_ENC_ERR_UNSUPPORTED_DEVICE"; + case NV_ENC_ERR_INVALID_ENCODERDEVICE: return "NV_ENC_ERR_INVALID_ENCODERDEVICE"; + case NV_ENC_ERR_INVALID_DEVICE: return "NV_ENC_ERR_INVALID_DEVICE"; + case NV_ENC_ERR_DEVICE_NOT_EXIST: return "NV_ENC_ERR_DEVICE_NOT_EXIST"; + case NV_ENC_ERR_INVALID_PTR: return "NV_ENC_ERR_INVALID_PTR"; + case NV_ENC_ERR_INVALID_EVENT: return "NV_ENC_ERR_INVALID_EVENT"; + case NV_ENC_ERR_INVALID_PARAM: return "NV_ENC_ERR_INVALID_PARAM"; + case NV_ENC_ERR_INVALID_CALL: return "NV_ENC_ERR_INVALID_CALL"; + case NV_ENC_ERR_OUT_OF_MEMORY: return "NV_ENC_ERR_OUT_OF_MEMORY"; + case NV_ENC_ERR_ENCODER_NOT_INITIALIZED: return "NV_ENC_ERR_ENCODER_NOT_INITIALIZED"; + case NV_ENC_ERR_UNSUPPORTED_PARAM: return "NV_ENC_ERR_UNSUPPORTED_PARAM"; + case NV_ENC_ERR_LOCK_BUSY: return "NV_ENC_ERR_LOCK_BUSY"; + case NV_ENC_ERR_NOT_ENOUGH_BUFFER: return "NV_ENC_ERR_NOT_ENOUGH_BUFFER"; + case NV_ENC_ERR_INVALID_VERSION: return "NV_ENC_ERR_INVALID_VERSION"; + case NV_ENC_ERR_MAP_FAILED: return "NV_ENC_ERR_MAP_FAILED"; + case NV_ENC_ERR_NEED_MORE_INPUT: return "NV_ENC_ERR_NEED_MORE_INPUT"; + case NV_ENC_ERR_ENCODER_BUSY: return "NV_ENC_ERR_ENCODER_BUSY"; + case NV_ENC_ERR_EVENT_NOT_REGISTERD: return "NV_ENC_ERR_EVENT_NOT_REGISTERD"; + case NV_ENC_ERR_GENERIC: return "NV_ENC_ERR_GENERIC"; + case NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY: return "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY"; + case NV_ENC_ERR_UNIMPLEMENTED: return "NV_ENC_ERR_UNIMPLEMENTED"; + case NV_ENC_ERR_RESOURCE_REGISTER_FAILED: return "NV_ENC_ERR_RESOURCE_REGISTER_FAILED"; + case NV_ENC_ERR_RESOURCE_NOT_REGISTERED: return "NV_ENC_ERR_RESOURCE_NOT_REGISTERED"; + case NV_ENC_ERR_RESOURCE_NOT_MAPPED: return "NV_ENC_ERR_RESOURCE_NOT_MAPPED"; + default: return "NV_ENC_ERR_UNKNOWN"; + } +} diff --git a/src/nvenc_loader.h b/src/nvenc_loader.h new file mode 100644 index 0000000..ad3dfd7 --- /dev/null +++ b/src/nvenc_loader.h @@ -0,0 +1,29 @@ +/* Внутренний заголовок — динамическая загрузка libnvidia-encode.so + * через dlopen, чтобы не линковаться статически с проприетарным SDK. + * + * Загружает символ `NvEncodeAPICreateInstance`, вызывает его для + * наполнения NV_ENCODE_API_FUNCTION_LIST — таблицы указателей на все + * функции NVENC. Дальше весь код в src/nvenc.c вызывает функции через + * эту таблицу. + */ + +#ifndef CFC_NVENC_LOADER_H +#define CFC_NVENC_LOADER_H + +#include + +/* Глобально кешированная таблица функций NVENC. После первого успешного + * cfc_nvenc_loader_init() остаётся валидной до выхода процесса. */ +extern NV_ENCODE_API_FUNCTION_LIST g_nvenc_funcs; + +/* Загружает libnvidia-encode.so и наполняет g_nvenc_funcs. + * Возвращает 0 при успехе, -1 если SDK не найден или версия несовместима. + * Идемпотентно — повторные вызовы дешёвые (возвращают 0 если уже загружено). + * + * Thread-safe для первого вызова через pthread_once. */ +int cfc_nvenc_loader_init(void); + +/* Текстовое описание ошибки NVENC SDK по коду NVENCSTATUS. */ +const char *cfc_nvenc_status_str(NVENCSTATUS s); + +#endif /* CFC_NVENC_LOADER_H */ diff --git a/src/source.c b/src/source.c new file mode 100644 index 0000000..dff89f5 --- /dev/null +++ b/src/source.c @@ -0,0 +1,294 @@ +/* Обвязка над cuframes_subscriber. Реализация публичного API + * cuframes_composer/source.h. + * + * Архитектура: + * Отдельный поток на источник. cuframes_subscriber_next блокирующий, поток + * циклически зовёт его с таймаутом. На каждый успешный кадр — обновляет + * snapshot (под mutex'ом). На таймаут — переходит к проверке состояния. + * + * Snapshot pattern: + * Главный поток (тот что зовёт get_latest) не блокируется на subscribe. + * Он читает under mutex последний сохранённый CUdeviceptr + meta. + * Mutex короткий (просто копия указателя и нескольких int'ов). + * + * State machine: + * DISCONNECTED → создаём подписку → CONNECTING + * CONNECTING → подписка успешна, первый кадр получен → ACTIVE + * → подписка fail → DEAD (ждём reconnect_backoff) + * ACTIVE → последний кадр < stale_threshold_ms → остаёмся ACTIVE + * → > stale_threshold_ms → STALE + * STALE → новый кадр → ACTIVE + * → нет > dead_threshold_ms → DEAD (destroy subscriber) + * DEAD → ждём backoff (exp от reconnect_min до reconnect_max) → CONNECTING + * + * Phase 1 особенности: + * - Один источник на cfc_source_t (нет multi-source агрегации, это compose). + * - cuframes_subscriber_release вызывается при следующем get_latest либо + * при выходе. Это означает что caller не может «удерживать» snapshot + * дольше чем до следующего get_latest — должен прочитать сразу. + * В Phase 2 это уточнится при переходе на double buffering. + */ + +#include "../include/cuframes_composer/source.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Внутренний таймаут блокирующего cuframes_subscriber_next. Короткий + * чтобы поток мог периодически проверять stop_flag и состояние. */ +#define CFC_SOURCE_NEXT_TIMEOUT_MS 200 + +struct cfc_source { + cfc_source_config_t cfg; + char key_copy[64]; /* персистентная копия cfg.key */ + char name_copy[32]; /* персистентная копия cfg.consumer_name */ + + pthread_t thread; + int thread_started; + _Atomic int stop_flag; + + pthread_mutex_t state_mu; + cfc_source_state_t state; + + /* Snapshot — обновляется потоком, читается через get_latest. */ + cuframes_subscriber_t *sub; /* nullable — есть только в ACTIVE/STALE */ + cuframes_frame_t *current_frame; /* удерживаемый frame; release при следующем next() */ + cfc_source_snapshot_t snapshot; + int64_t last_frame_us; /* CLOCK_MONOTONIC момент последнего успешного кадра */ +}; + +static int64_t now_us(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; +} + +static void set_state(cfc_source_t *src, cfc_source_state_t s) +{ + pthread_mutex_lock(&src->state_mu); + if (src->state != s) { + src->state = s; + src->snapshot.state = s; + } + pthread_mutex_unlock(&src->state_mu); +} + +static void release_current_frame(cfc_source_t *src) +{ + if (src->current_frame && src->sub) { + cuframes_subscriber_release(src->sub, src->current_frame); + src->current_frame = NULL; + } +} + +static int try_subscribe(cfc_source_t *src) +{ + cuframes_subscriber_config_t scfg = { 0 }; + scfg.key = src->key_copy; + scfg.consumer_name = src->name_copy; + scfg.mode = CUFRAMES_MODE_NEWEST_ONLY; + scfg.cuda_device = src->cfg.cuda_device; + scfg.connect_timeout_ms = 2000; + + int r = cuframes_subscriber_create(&scfg, &src->sub); + if (r != CUFRAMES_OK) { + fprintf(stderr, + "[cfc/source:%s] subscriber_create failed: %s\n", + src->name_copy, cuframes_strerror(r)); + return -1; + } + return 0; +} + +static void destroy_subscriber(cfc_source_t *src) +{ + release_current_frame(src); + if (src->sub) { + cuframes_subscriber_destroy(src->sub); + src->sub = NULL; + } +} + +/* Основной поток. Цикл: subscribe → next → update snapshot → проверка + * stale/dead → reconnect при необходимости. */ +static void *source_thread(void *arg) +{ + cfc_source_t *src = (cfc_source_t *)arg; + int64_t reconnect_backoff_ms = src->cfg.reconnect_min_ms; + + while (!atomic_load(&src->stop_flag)) { + cfc_source_state_t cur; + pthread_mutex_lock(&src->state_mu); + cur = src->state; + pthread_mutex_unlock(&src->state_mu); + + switch (cur) { + + case CFC_SOURCE_DISCONNECTED: + case CFC_SOURCE_DEAD: { + /* Ждём backoff либо stop. */ + int64_t wait_ms = reconnect_backoff_ms; + while (wait_ms > 0 && !atomic_load(&src->stop_flag)) { + int chunk = wait_ms > 100 ? 100 : (int)wait_ms; + struct timespec ts = {.tv_sec = chunk / 1000, + .tv_nsec = (long)(chunk % 1000) * 1000000L}; + nanosleep(&ts, NULL); + wait_ms -= chunk; + } + if (atomic_load(&src->stop_flag)) break; + set_state(src, CFC_SOURCE_CONNECTING); + break; + } + + case CFC_SOURCE_CONNECTING: { + if (try_subscribe(src) == 0) { + set_state(src, CFC_SOURCE_ACTIVE); + reconnect_backoff_ms = src->cfg.reconnect_min_ms; /* сброс backoff */ + } else { + set_state(src, CFC_SOURCE_DEAD); + /* Удвоить backoff до max. */ + reconnect_backoff_ms *= 2; + if (reconnect_backoff_ms > src->cfg.reconnect_max_ms) { + reconnect_backoff_ms = src->cfg.reconnect_max_ms; + } + } + break; + } + + case CFC_SOURCE_ACTIVE: + case CFC_SOURCE_STALE: { + cuframes_frame_t *frame = NULL; + int r = cuframes_subscriber_next(src->sub, NULL, &frame, + CFC_SOURCE_NEXT_TIMEOUT_MS); + + if (r == CUFRAMES_OK) { + /* Освобождаем предыдущий кадр (если был удержан caller'ом + * во время предыдущего snapshot — поздно, но это Phase 1 + * упрощение, см. описание сверху). */ + release_current_frame(src); + src->current_frame = frame; + + /* Обновляем snapshot под mutex'ом. */ + int32_t w = 0, h = 0; + cuframes_frame_size(frame, &w, &h); + pthread_mutex_lock(&src->state_mu); + src->snapshot.ptr = (CUdeviceptr)cuframes_frame_cuda_ptr(frame); + src->snapshot.width = w; + src->snapshot.height = h; + src->snapshot.pitch_y = cuframes_frame_pitch_y(frame); + src->snapshot.pitch_uv = cuframes_frame_pitch_uv(frame); + src->snapshot.pts_ns = cuframes_frame_pts_ns(frame); + src->snapshot.seq = cuframes_frame_seq(frame); + src->last_frame_us = now_us(); + src->snapshot.last_frame_age_us = 0; + if (src->state == CFC_SOURCE_STALE) { + src->state = CFC_SOURCE_ACTIVE; + src->snapshot.state = CFC_SOURCE_ACTIVE; + } + pthread_mutex_unlock(&src->state_mu); + + } else if (r == CUFRAMES_ERR_TIMEOUT || r == CUFRAMES_ERR_WOULD_BLOCK) { + /* Нет нового кадра — проверим age, может быть STALE/DEAD. */ + int64_t age_ms = (now_us() - src->last_frame_us) / 1000; + if (age_ms > src->cfg.dead_threshold_ms) { + fprintf(stderr, + "[cfc/source:%s] no frame for %lldms → DEAD\n", + src->name_copy, (long long)age_ms); + destroy_subscriber(src); + set_state(src, CFC_SOURCE_DEAD); + } else if (age_ms > src->cfg.stale_threshold_ms) { + set_state(src, CFC_SOURCE_STALE); + } + } else if (r == CUFRAMES_ERR_DISCONNECTED) { + fprintf(stderr, + "[cfc/source:%s] DISCONNECTED from publisher\n", + src->name_copy); + destroy_subscriber(src); + set_state(src, CFC_SOURCE_DEAD); + } else { + fprintf(stderr, + "[cfc/source:%s] cuframes_subscriber_next failed: %s\n", + src->name_copy, cuframes_strerror(r)); + destroy_subscriber(src); + set_state(src, CFC_SOURCE_DEAD); + } + break; + } + } + } + + destroy_subscriber(src); + return NULL; +} + +/* ── Public API ───────────────────────────────────────────────────────── */ + +int cfc_source_create(const cfc_source_config_t *cfg, cfc_source_t **out) +{ + if (!cfg || !cfg->key || !cfg->consumer_name || !out) return -1; + + cfc_source_t *src = calloc(1, sizeof(*src)); + if (!src) return -1; + src->cfg = *cfg; + strncpy(src->key_copy, cfg->key, sizeof(src->key_copy) - 1); + strncpy(src->name_copy, cfg->consumer_name, sizeof(src->name_copy) - 1); + src->cfg.key = src->key_copy; + src->cfg.consumer_name = src->name_copy; + + /* Дефолты */ + if (src->cfg.reconnect_min_ms <= 0) src->cfg.reconnect_min_ms = 1000; + if (src->cfg.reconnect_max_ms <= 0) src->cfg.reconnect_max_ms = 30000; + if (src->cfg.stale_threshold_ms <= 0) src->cfg.stale_threshold_ms = 500; + if (src->cfg.dead_threshold_ms <= 0) src->cfg.dead_threshold_ms = 5000; + + pthread_mutex_init(&src->state_mu, NULL); + src->state = CFC_SOURCE_DISCONNECTED; + src->snapshot.state = CFC_SOURCE_DISCONNECTED; + src->snapshot.last_frame_age_us = -1; + atomic_init(&src->stop_flag, 0); + + if (pthread_create(&src->thread, NULL, source_thread, src) != 0) { + pthread_mutex_destroy(&src->state_mu); + free(src); + return -1; + } + src->thread_started = 1; + + *out = src; + return 0; +} + +int cfc_source_get_latest(cfc_source_t *src, cfc_source_snapshot_t *out) +{ + if (!src || !out) return -1; + pthread_mutex_lock(&src->state_mu); + *out = src->snapshot; + if (src->last_frame_us > 0) { + out->last_frame_age_us = now_us() - src->last_frame_us; + } else { + out->last_frame_age_us = -1; + } + pthread_mutex_unlock(&src->state_mu); + return 0; +} + +int cfc_source_destroy(cfc_source_t *src) +{ + if (!src) return 0; + atomic_store(&src->stop_flag, 1); + if (src->thread_started) { + pthread_join(src->thread, NULL); + } + pthread_mutex_destroy(&src->state_mu); + free(src); + return 0; +} diff --git a/third_party/cuframes b/third_party/cuframes new file mode 160000 index 0000000..655649f --- /dev/null +++ b/third_party/cuframes @@ -0,0 +1 @@ +Subproject commit 655649f4d81477ccabe11cce037101c8ad397e0f diff --git a/third_party/nv-codec-headers b/third_party/nv-codec-headers new file mode 160000 index 0000000..c692783 --- /dev/null +++ b/third_party/nv-codec-headers @@ -0,0 +1 @@ +Subproject commit c69278340ab1d5559c7d7bf0edf615dc33ddbba7