Files
vf-cuda-grid/controller/cuda_grid_controller/__main__.py
T
gx 6081e33e5a controller: PipelineMonitor — auto-restore overlay state после pipeline restart
Pipeline filter state (overlays, layout, cell_map, audio) живёт в RAM
ffmpeg process. При recreate container (compose up, OOM, NVENC crash,
config change) state lost — controller'у нужно re-push.

Раньше user'у приходилось вручную:
  curl POST /layout/.../set
  docker restart cuda-grid-controller  # для browser/dynamic re-register

Теперь автоматизировано:
  PipelineMonitor polls ZMQ каждые 3 sec (no-op set_layout).
  On timeout/error → mark instance lost.
  First success after lost → trigger restore:
    1. set_layout к state.active_layout
    2. set_audio_output_enabled к state.audio_output_enabled
    3. re-push все overlays из state.overlays
    4. browser/dynamic/frigate hooks: mark_all_unregistered() —
       их loops автоматически re-add на next iteration

Verified test: docker restart cuda-grid-pipeline → 10 sec downtime →
monitor logs lost+restored+restore_done с count=6 overlays.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-24 07:35:27 +01:00

195 lines
6.1 KiB
Python

"""Entry point: `cuda-grid-controller --config controller.yaml`."""
from __future__ import annotations
import asyncio
import logging
import sys
from pathlib import Path
import structlog
import typer
import uvicorn
from .config import Config
from .dispatch import CommandDispatcher
from .browser_overlays import BrowserRenderer, DashboardCfg
from .dynamic_overlays import ChartCfg, ChatCfg, DynamicRenderer
from .pipeline_monitor import PipelineMonitor
from .frigate_bridge import FrigateBridge, FrigateBridgeCfg
from .http_api import create_app
from .mqtt_loop import MqttLoop
from .snapshot_history import SnapshotHistory
from .state import ControllerState
from .watchdog import StreamWatchdog, WatchdogCfg
cli = typer.Typer(add_completion=False)
def _configure_logging(level: str) -> None:
logging.basicConfig(
format="%(message)s",
level=getattr(logging, level.upper(), logging.INFO),
)
structlog.configure(
processors=[
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer(),
]
)
async def _run(cfg: Config) -> None:
state = ControllerState()
# Init active_layout = default_layout per instance
for inst in cfg.instances:
await state.set_layout(inst.name, inst.default_layout)
dispatcher = CommandDispatcher(cfg, state)
# Frigate bridge (опционально) — передаём dispatcher для auto-overlay generation
frigate_bridge: FrigateBridge | None = None
if cfg.frigate:
try:
fcfg = FrigateBridgeCfg.model_validate(cfg.frigate)
if fcfg.enabled:
frigate_bridge = FrigateBridge(fcfg, dispatcher=dispatcher)
except Exception as e:
structlog.get_logger().warning(
"frigate_bridge.config_invalid", error=str(e)
)
# Dynamic overlays (charts/chats) — Phase 6
dynamic_renderer: DynamicRenderer | None = None
browser_renderer: BrowserRenderer | None = None
if cfg.dynamic_overlays:
try:
d = cfg.dynamic_overlays
charts = [ChartCfg.model_validate(c) for c in (d.get("charts") or [])]
chats = [ChatCfg.model_validate(c) for c in (d.get("chats") or [])]
if charts or chats:
dynamic_renderer = DynamicRenderer(
icon_dir=Path(cfg.icon_dir),
dispatcher=dispatcher,
charts=charts,
chats=chats,
)
dashboards = [DashboardCfg.model_validate(b)
for b in (d.get("dashboards") or [])]
if dashboards:
browser_renderer = BrowserRenderer(
icon_dir=Path(cfg.icon_dir),
dispatcher=dispatcher,
dashboards=dashboards,
)
except Exception as e:
structlog.get_logger().warning("dynamic_overlays.config_invalid", error=str(e))
mqtt = MqttLoop(cfg, state, dispatcher.handle,
frigate_bridge=frigate_bridge,
dynamic_renderer=dynamic_renderer)
# Wire dispatcher events → MQTT publishes
dispatcher.on_state_change = mqtt.publish_state
dispatcher.on_event = mqtt.publish_event
# Snapshot history (Phase 6+) — periodic capture per instance
snapshot_hist = SnapshotHistory(cfg)
# Stream watchdog (Phase 1 resilience, issue #3) — monitor mediamtx paths
watchdog: StreamWatchdog | None = None
if cfg.watchdog:
try:
wcfg = WatchdogCfg.model_validate(cfg.watchdog)
if wcfg.enabled:
watchdog = StreamWatchdog(wcfg, dispatcher,
mqtt_publish_event=None) # set after mqtt
except Exception as e:
structlog.get_logger().warning("watchdog.config_invalid", error=str(e))
# HTTP REST
app = create_app(cfg, state, dispatcher,
snapshot_history=snapshot_hist,
frigate_bridge=frigate_bridge)
server = uvicorn.Server(
uvicorn.Config(
app,
host=cfg.http.host,
port=cfg.http.port,
log_level=cfg.log.level.lower(),
)
)
log = structlog.get_logger()
log.info(
"controller.starting",
instances=[i.name for i in cfg.instances],
mqtt=f"{cfg.broker.host}:{cfg.broker.port}",
http=f"{cfg.http.host}:{cfg.http.port}",
)
# Start dynamic renderer задачи (если есть)
if dynamic_renderer:
await dynamic_renderer.start()
if browser_renderer:
await browser_renderer.start()
await snapshot_hist.start()
if watchdog:
watchdog._publish_event = mqtt.publish_event
await watchdog.start()
# Pipeline monitor — detect ffmpeg restart + auto-restore overlay state.
pipeline_monitor = PipelineMonitor(
cfg=cfg, state=state, dispatcher=dispatcher,
browser_renderer=browser_renderer,
dynamic_renderer=dynamic_renderer,
frigate_bridge=frigate_bridge,
)
await pipeline_monitor.start()
try:
await asyncio.gather(
mqtt.run(),
server.serve(),
)
except asyncio.CancelledError:
log.info("controller.shutdown")
finally:
await pipeline_monitor.stop()
if dynamic_renderer:
await dynamic_renderer.stop()
if browser_renderer:
await browser_renderer.stop()
await snapshot_hist.stop()
if watchdog:
await watchdog.stop()
await dispatcher.close()
await mqtt.stop()
@cli.command()
def run(
config: Path = typer.Option(
Path("controller.yaml"),
"--config",
"-c",
help="YAML config path",
),
) -> None:
"""Запустить controller."""
if not config.exists():
typer.echo(f"config not found: {config}", err=True)
raise typer.Exit(1)
cfg = Config.from_yaml(config)
_configure_logging(cfg.log.level)
asyncio.run(_run(cfg))
def main() -> None:
cli()
if __name__ == "__main__":
main()