Files
vf-cuda-grid/controller/cuda_grid_controller/__main__.py
T
gx 155038aabb controller: stream watchdog (Phase 1 resilience, issue #3)
StreamWatchdog (watchdog.py) — polls mediamtx /v3/paths/list каждые N sec.
Если ожидаемый path missing > threshold → emit MQTT event stream_lost +
показывает text overlay 'OFFLINE'. При восстановлении — stream_restored +
remove overlay.

Config:
  watchdog:
    enabled: true
    mediamtx_api_url: http://cuda-grid-mediamtx:9997
    poll_interval_sec: 5.0
    lost_threshold_sec: 15.0
    paths:
      - mediamtx_path: live-audio
        instance: tv_grid
        label: Audio
        overlay_when_lost: true

httpx добавлен в Dockerfile.

Сегодняшний incident (audio sidecar потерял connection с mediamtx →
pipeline restart loop) — watchdog обнаружит missing live-audio через
15 sec + покажет TV-side warning. Manual restart audio sidecar still
needed (watchdog auto-restart — Phase 2).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 10:07:12 +01:00

170 lines
5.1 KiB
Python

"""Entry point: `cuda-grid-controller --config controller.yaml`."""
from __future__ import annotations
import asyncio
import logging
import sys
from pathlib import Path
import structlog
import typer
import uvicorn
from .config import Config
from .dispatch import CommandDispatcher
from .dynamic_overlays import ChartCfg, ChatCfg, DynamicRenderer
from .frigate_bridge import FrigateBridge, FrigateBridgeCfg
from .http_api import create_app
from .mqtt_loop import MqttLoop
from .snapshot_history import SnapshotHistory
from .state import ControllerState
from .watchdog import StreamWatchdog, WatchdogCfg
cli = typer.Typer(add_completion=False)
def _configure_logging(level: str) -> None:
logging.basicConfig(
format="%(message)s",
level=getattr(logging, level.upper(), logging.INFO),
)
structlog.configure(
processors=[
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer(),
]
)
async def _run(cfg: Config) -> None:
state = ControllerState()
# Init active_layout = default_layout per instance
for inst in cfg.instances:
await state.set_layout(inst.name, inst.default_layout)
dispatcher = CommandDispatcher(cfg, state)
# Frigate bridge (опционально) — передаём dispatcher для auto-overlay generation
frigate_bridge: FrigateBridge | None = None
if cfg.frigate:
try:
fcfg = FrigateBridgeCfg.model_validate(cfg.frigate)
if fcfg.enabled:
frigate_bridge = FrigateBridge(fcfg, dispatcher=dispatcher)
except Exception as e:
structlog.get_logger().warning(
"frigate_bridge.config_invalid", error=str(e)
)
# Dynamic overlays (charts/chats) — Phase 6
dynamic_renderer: DynamicRenderer | None = None
if cfg.dynamic_overlays:
try:
d = cfg.dynamic_overlays
charts = [ChartCfg.model_validate(c) for c in (d.get("charts") or [])]
chats = [ChatCfg.model_validate(c) for c in (d.get("chats") or [])]
if charts or chats:
dynamic_renderer = DynamicRenderer(
icon_dir=Path(cfg.icon_dir),
dispatcher=dispatcher,
charts=charts,
chats=chats,
)
except Exception as e:
structlog.get_logger().warning("dynamic_overlays.config_invalid", error=str(e))
mqtt = MqttLoop(cfg, state, dispatcher.handle,
frigate_bridge=frigate_bridge,
dynamic_renderer=dynamic_renderer)
# Wire dispatcher events → MQTT publishes
dispatcher.on_state_change = mqtt.publish_state
dispatcher.on_event = mqtt.publish_event
# Snapshot history (Phase 6+) — periodic capture per instance
snapshot_hist = SnapshotHistory(cfg)
# Stream watchdog (Phase 1 resilience, issue #3) — monitor mediamtx paths
watchdog: StreamWatchdog | None = None
if cfg.watchdog:
try:
wcfg = WatchdogCfg.model_validate(cfg.watchdog)
if wcfg.enabled:
watchdog = StreamWatchdog(wcfg, dispatcher,
mqtt_publish_event=None) # set after mqtt
except Exception as e:
structlog.get_logger().warning("watchdog.config_invalid", error=str(e))
# HTTP REST
app = create_app(cfg, state, dispatcher,
snapshot_history=snapshot_hist,
frigate_bridge=frigate_bridge)
server = uvicorn.Server(
uvicorn.Config(
app,
host=cfg.http.host,
port=cfg.http.port,
log_level=cfg.log.level.lower(),
)
)
log = structlog.get_logger()
log.info(
"controller.starting",
instances=[i.name for i in cfg.instances],
mqtt=f"{cfg.broker.host}:{cfg.broker.port}",
http=f"{cfg.http.host}:{cfg.http.port}",
)
# Start dynamic renderer задачи (если есть)
if dynamic_renderer:
await dynamic_renderer.start()
await snapshot_hist.start()
if watchdog:
watchdog._publish_event = mqtt.publish_event
await watchdog.start()
try:
await asyncio.gather(
mqtt.run(),
server.serve(),
)
except asyncio.CancelledError:
log.info("controller.shutdown")
finally:
if dynamic_renderer:
await dynamic_renderer.stop()
await snapshot_hist.stop()
if watchdog:
await watchdog.stop()
await dispatcher.close()
await mqtt.stop()
@cli.command()
def run(
config: Path = typer.Option(
Path("controller.yaml"),
"--config",
"-c",
help="YAML config path",
),
) -> None:
"""Запустить controller."""
if not config.exists():
typer.echo(f"config not found: {config}", err=True)
raise typer.Exit(1)
cfg = Config.from_yaml(config)
_configure_logging(cfg.log.level)
asyncio.run(_run(cfg))
def main() -> None:
cli()
if __name__ == "__main__":
main()