6081e33e5a
Pipeline filter state (overlays, layout, cell_map, audio) живёт в RAM
ffmpeg process. При recreate container (compose up, OOM, NVENC crash,
config change) state lost — controller'у нужно re-push.
Раньше user'у приходилось вручную:
curl POST /layout/.../set
docker restart cuda-grid-controller # для browser/dynamic re-register
Теперь автоматизировано:
PipelineMonitor polls ZMQ каждые 3 sec (no-op set_layout).
On timeout/error → mark instance lost.
First success after lost → trigger restore:
1. set_layout к state.active_layout
2. set_audio_output_enabled к state.audio_output_enabled
3. re-push все overlays из state.overlays
4. browser/dynamic/frigate hooks: mark_all_unregistered() —
их loops автоматически re-add на next iteration
Verified test: docker restart cuda-grid-pipeline → 10 sec downtime →
monitor logs lost+restored+restore_done с count=6 overlays.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
195 lines
6.1 KiB
Python
195 lines
6.1 KiB
Python
"""Entry point: `cuda-grid-controller --config controller.yaml`."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import structlog
|
|
import typer
|
|
import uvicorn
|
|
|
|
from .config import Config
|
|
from .dispatch import CommandDispatcher
|
|
from .browser_overlays import BrowserRenderer, DashboardCfg
|
|
from .dynamic_overlays import ChartCfg, ChatCfg, DynamicRenderer
|
|
from .pipeline_monitor import PipelineMonitor
|
|
from .frigate_bridge import FrigateBridge, FrigateBridgeCfg
|
|
from .http_api import create_app
|
|
from .mqtt_loop import MqttLoop
|
|
from .snapshot_history import SnapshotHistory
|
|
from .state import ControllerState
|
|
from .watchdog import StreamWatchdog, WatchdogCfg
|
|
|
|
cli = typer.Typer(add_completion=False)
|
|
|
|
|
|
def _configure_logging(level: str) -> None:
|
|
logging.basicConfig(
|
|
format="%(message)s",
|
|
level=getattr(logging, level.upper(), logging.INFO),
|
|
)
|
|
structlog.configure(
|
|
processors=[
|
|
structlog.processors.add_log_level,
|
|
structlog.processors.TimeStamper(fmt="iso"),
|
|
structlog.dev.ConsoleRenderer(),
|
|
]
|
|
)
|
|
|
|
|
|
async def _run(cfg: Config) -> None:
|
|
state = ControllerState()
|
|
# Init active_layout = default_layout per instance
|
|
for inst in cfg.instances:
|
|
await state.set_layout(inst.name, inst.default_layout)
|
|
|
|
dispatcher = CommandDispatcher(cfg, state)
|
|
|
|
# Frigate bridge (опционально) — передаём dispatcher для auto-overlay generation
|
|
frigate_bridge: FrigateBridge | None = None
|
|
if cfg.frigate:
|
|
try:
|
|
fcfg = FrigateBridgeCfg.model_validate(cfg.frigate)
|
|
if fcfg.enabled:
|
|
frigate_bridge = FrigateBridge(fcfg, dispatcher=dispatcher)
|
|
except Exception as e:
|
|
structlog.get_logger().warning(
|
|
"frigate_bridge.config_invalid", error=str(e)
|
|
)
|
|
|
|
# Dynamic overlays (charts/chats) — Phase 6
|
|
dynamic_renderer: DynamicRenderer | None = None
|
|
browser_renderer: BrowserRenderer | None = None
|
|
if cfg.dynamic_overlays:
|
|
try:
|
|
d = cfg.dynamic_overlays
|
|
charts = [ChartCfg.model_validate(c) for c in (d.get("charts") or [])]
|
|
chats = [ChatCfg.model_validate(c) for c in (d.get("chats") or [])]
|
|
if charts or chats:
|
|
dynamic_renderer = DynamicRenderer(
|
|
icon_dir=Path(cfg.icon_dir),
|
|
dispatcher=dispatcher,
|
|
charts=charts,
|
|
chats=chats,
|
|
)
|
|
dashboards = [DashboardCfg.model_validate(b)
|
|
for b in (d.get("dashboards") or [])]
|
|
if dashboards:
|
|
browser_renderer = BrowserRenderer(
|
|
icon_dir=Path(cfg.icon_dir),
|
|
dispatcher=dispatcher,
|
|
dashboards=dashboards,
|
|
)
|
|
except Exception as e:
|
|
structlog.get_logger().warning("dynamic_overlays.config_invalid", error=str(e))
|
|
|
|
mqtt = MqttLoop(cfg, state, dispatcher.handle,
|
|
frigate_bridge=frigate_bridge,
|
|
dynamic_renderer=dynamic_renderer)
|
|
|
|
# Wire dispatcher events → MQTT publishes
|
|
dispatcher.on_state_change = mqtt.publish_state
|
|
dispatcher.on_event = mqtt.publish_event
|
|
|
|
# Snapshot history (Phase 6+) — periodic capture per instance
|
|
snapshot_hist = SnapshotHistory(cfg)
|
|
|
|
# Stream watchdog (Phase 1 resilience, issue #3) — monitor mediamtx paths
|
|
watchdog: StreamWatchdog | None = None
|
|
if cfg.watchdog:
|
|
try:
|
|
wcfg = WatchdogCfg.model_validate(cfg.watchdog)
|
|
if wcfg.enabled:
|
|
watchdog = StreamWatchdog(wcfg, dispatcher,
|
|
mqtt_publish_event=None) # set after mqtt
|
|
except Exception as e:
|
|
structlog.get_logger().warning("watchdog.config_invalid", error=str(e))
|
|
|
|
# HTTP REST
|
|
app = create_app(cfg, state, dispatcher,
|
|
snapshot_history=snapshot_hist,
|
|
frigate_bridge=frigate_bridge)
|
|
server = uvicorn.Server(
|
|
uvicorn.Config(
|
|
app,
|
|
host=cfg.http.host,
|
|
port=cfg.http.port,
|
|
log_level=cfg.log.level.lower(),
|
|
)
|
|
)
|
|
|
|
log = structlog.get_logger()
|
|
log.info(
|
|
"controller.starting",
|
|
instances=[i.name for i in cfg.instances],
|
|
mqtt=f"{cfg.broker.host}:{cfg.broker.port}",
|
|
http=f"{cfg.http.host}:{cfg.http.port}",
|
|
)
|
|
|
|
# Start dynamic renderer задачи (если есть)
|
|
if dynamic_renderer:
|
|
await dynamic_renderer.start()
|
|
if browser_renderer:
|
|
await browser_renderer.start()
|
|
await snapshot_hist.start()
|
|
if watchdog:
|
|
watchdog._publish_event = mqtt.publish_event
|
|
await watchdog.start()
|
|
|
|
# Pipeline monitor — detect ffmpeg restart + auto-restore overlay state.
|
|
pipeline_monitor = PipelineMonitor(
|
|
cfg=cfg, state=state, dispatcher=dispatcher,
|
|
browser_renderer=browser_renderer,
|
|
dynamic_renderer=dynamic_renderer,
|
|
frigate_bridge=frigate_bridge,
|
|
)
|
|
await pipeline_monitor.start()
|
|
|
|
try:
|
|
await asyncio.gather(
|
|
mqtt.run(),
|
|
server.serve(),
|
|
)
|
|
except asyncio.CancelledError:
|
|
log.info("controller.shutdown")
|
|
finally:
|
|
await pipeline_monitor.stop()
|
|
if dynamic_renderer:
|
|
await dynamic_renderer.stop()
|
|
if browser_renderer:
|
|
await browser_renderer.stop()
|
|
await snapshot_hist.stop()
|
|
if watchdog:
|
|
await watchdog.stop()
|
|
await dispatcher.close()
|
|
await mqtt.stop()
|
|
|
|
|
|
@cli.command()
|
|
def run(
|
|
config: Path = typer.Option(
|
|
Path("controller.yaml"),
|
|
"--config",
|
|
"-c",
|
|
help="YAML config path",
|
|
),
|
|
) -> None:
|
|
"""Запустить controller."""
|
|
if not config.exists():
|
|
typer.echo(f"config not found: {config}", err=True)
|
|
raise typer.Exit(1)
|
|
cfg = Config.from_yaml(config)
|
|
_configure_logging(cfg.log.level)
|
|
asyncio.run(_run(cfg))
|
|
|
|
|
|
def main() -> None:
|
|
cli()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|