first stab at adding video

2026-04-12 04:11:52 -04:00
parent 680c5b04cc
commit 2818b41004
37 changed files with 2982 additions and 24 deletions
@@ -0,0 +1,57 @@
+"""Phase 5 component test: MuseTalk lip-sync + ffmpeg mux.
+
+Verifies the full library-mode per-turn path:
+- Pre-bake a library clip.
+- Generate a stand-in TTS waveform (sine tone).
+- Call ``VideoEngine.generate_speaking_clip`` and get a valid MP4 back.
+
+Writes the resulting clip to ``tests/component/_out/phase5_speaking.mp4``.
+
+Run:
+    docker compose exec voice-chat python -m tests.component.test_05_musetalk_lipsync
+"""
+from __future__ import annotations
+
+import sys
+
+from server.video import VideoConfig, VideoEngine
+from tests.component._common import (
+    ensure_sample_avatar,
+    get_logger,
+    synth_tone,
+    write_bytes,
+)
+
+log = get_logger("test_05")
+
+
+def run():
+    avatar_path = ensure_sample_avatar()
+    cfg = VideoConfig.from_dict(
+        {
+            "enabled": True,
+            "mode": "library",
+            "resolution": 480,
+            "fps": 16,
+            "library": {"base_clip_count": 1, "base_clip_seconds": 4},
+        }
+    )
+    engine = VideoEngine(cfg)
+    engine.load_models()
+    engine.set_avatar(avatar_path)
+
+    audio = synth_tone(seconds=3.0, sample_rate=24000, freq=220.0)
+    log.info("Generating library-mode speaking clip (3s audio)...")
+    mp4 = engine.generate_speaking_clip(
+        audio_f32=audio,
+        sample_rate=24000,
+        reply_text="Hello, this is a lip-sync test.",
+    )
+    assert isinstance(mp4, bytes) and len(mp4) > 0
+    assert mp4[4:8] == b"ftyp"
+    out = write_bytes("phase5_speaking.mp4", mp4)
+    log.info("PASS: speaking clip written to %s (%d bytes)", out, len(mp4))
+
+
+if __name__ == "__main__":
+    run()