"""Unit tests for pure-python logic inside VideoEngine. No models are loaded: we instantiate ``VideoEngine`` and hand-stub its ``_wan22`` / ``_musetalk`` attributes to test prompt derivation, library round-robin, and frame fitting. """ import numpy as np import pytest from server.video import VideoConfig, VideoEngine @pytest.fixture def engine(): cfg = VideoConfig.from_dict( { "enabled": True, "mode": "reflective", "fps": 16, "reflective": { "clip_prompt_template": "A: {reply_hint} B", "prompt_reply_words": 5, }, } ) return VideoEngine(cfg) def test_derive_prompt_truncates_to_word_limit(engine): out = engine._derive_prompt("one two three four five six seven eight") assert out == "A: one two three four five B" def test_derive_prompt_handles_empty_reply(engine): out = engine._derive_prompt("") assert out == "A: calm and friendly B" out2 = engine._derive_prompt(None) # type: ignore[arg-type] assert out2 == "A: calm and friendly B" def test_derive_prompt_strips_and_passes_through(engine): out = engine._derive_prompt(" hello world ") assert out == "A: hello world B" def test_is_ready_false_without_models(engine): # Models haven't been loaded — is_ready must be False so the pipeline # falls back to the PCM streaming path. assert engine.is_ready() is False def test_pick_library_frames_round_robin(engine): engine.cfg.mode = "library" engine.cfg.fps = 2 # Two base clips, 4 frames each. a = np.tile(np.array([[[[0, 0, 0]]]], dtype=np.uint8), (4, 1, 1, 1)) b = np.tile(np.array([[[[255, 255, 255]]]], dtype=np.uint8), (4, 1, 1, 1)) engine.speaking_base_frames = [a, b] # 2s of audio at 16kHz → 4 frames at fps=2 audio = np.zeros(16000 * 2, dtype=np.float32) f1 = engine._pick_library_frames(audio, 16000) f2 = engine._pick_library_frames(audio, 16000) f3 = engine._pick_library_frames(audio, 16000) assert f1.shape == (4, 1, 1, 3) assert f1[0, 0, 0, 0] == 0 # first pick = clip A assert f2[0, 0, 0, 0] == 255 # second pick = clip B assert f3[0, 0, 0, 0] == 0 # wraps back to A def test_pick_library_frames_trims_to_audio_duration(engine): engine.cfg.mode = "library" engine.cfg.fps = 4 frames = np.zeros((20, 1, 1, 3), dtype=np.uint8) engine.speaking_base_frames = [frames] # 1s audio → 4 frames audio = np.zeros(16000, dtype=np.float32) out = engine._pick_library_frames(audio, 16000) assert out.shape == (4, 1, 1, 3) def test_pick_library_frames_loops_for_long_audio(engine): engine.cfg.mode = "library" engine.cfg.fps = 4 frames = np.zeros((4, 1, 1, 3), dtype=np.uint8) engine.speaking_base_frames = [frames] # 3s audio → 12 frames, base has only 4 audio = np.zeros(16000 * 3, dtype=np.float32) out = engine._pick_library_frames(audio, 16000) assert out.shape == (12, 1, 1, 3) def test_pick_library_frames_raises_when_empty(engine): engine.cfg.mode = "library" engine.speaking_base_frames = [] with pytest.raises(RuntimeError, match="no pre-baked base clips"): engine._pick_library_frames(np.zeros(100, dtype=np.float32), 16000) def test_generate_speaking_clip_raises_when_not_ready(engine): with pytest.raises(RuntimeError, match="not ready"): engine.generate_speaking_clip( audio_f32=np.zeros(100, dtype=np.float32), sample_rate=16000, reply_text="hi", )