Files
live-voice-chat/tests/unit/test_video_engine_logic.py
T
2026-04-12 04:11:52 -04:00

107 lines
3.5 KiB
Python

"""Unit tests for pure-python logic inside VideoEngine.
No models are loaded: we instantiate ``VideoEngine`` and hand-stub its
``_wan22`` / ``_musetalk`` attributes to test prompt derivation, library
round-robin, and frame fitting.
"""
import numpy as np
import pytest
from server.video import VideoConfig, VideoEngine
@pytest.fixture
def engine():
cfg = VideoConfig.from_dict(
{
"enabled": True,
"mode": "reflective",
"fps": 16,
"reflective": {
"clip_prompt_template": "A: {reply_hint} B",
"prompt_reply_words": 5,
},
}
)
return VideoEngine(cfg)
def test_derive_prompt_truncates_to_word_limit(engine):
out = engine._derive_prompt("one two three four five six seven eight")
assert out == "A: one two three four five B"
def test_derive_prompt_handles_empty_reply(engine):
out = engine._derive_prompt("")
assert out == "A: calm and friendly B"
out2 = engine._derive_prompt(None) # type: ignore[arg-type]
assert out2 == "A: calm and friendly B"
def test_derive_prompt_strips_and_passes_through(engine):
out = engine._derive_prompt(" hello world ")
assert out == "A: hello world B"
def test_is_ready_false_without_models(engine):
# Models haven't been loaded — is_ready must be False so the pipeline
# falls back to the PCM streaming path.
assert engine.is_ready() is False
def test_pick_library_frames_round_robin(engine):
engine.cfg.mode = "library"
engine.cfg.fps = 2
# Two base clips, 4 frames each.
a = np.tile(np.array([[[[0, 0, 0]]]], dtype=np.uint8), (4, 1, 1, 1))
b = np.tile(np.array([[[[255, 255, 255]]]], dtype=np.uint8), (4, 1, 1, 1))
engine.speaking_base_frames = [a, b]
# 2s of audio at 16kHz → 4 frames at fps=2
audio = np.zeros(16000 * 2, dtype=np.float32)
f1 = engine._pick_library_frames(audio, 16000)
f2 = engine._pick_library_frames(audio, 16000)
f3 = engine._pick_library_frames(audio, 16000)
assert f1.shape == (4, 1, 1, 3)
assert f1[0, 0, 0, 0] == 0 # first pick = clip A
assert f2[0, 0, 0, 0] == 255 # second pick = clip B
assert f3[0, 0, 0, 0] == 0 # wraps back to A
def test_pick_library_frames_trims_to_audio_duration(engine):
engine.cfg.mode = "library"
engine.cfg.fps = 4
frames = np.zeros((20, 1, 1, 3), dtype=np.uint8)
engine.speaking_base_frames = [frames]
# 1s audio → 4 frames
audio = np.zeros(16000, dtype=np.float32)
out = engine._pick_library_frames(audio, 16000)
assert out.shape == (4, 1, 1, 3)
def test_pick_library_frames_loops_for_long_audio(engine):
engine.cfg.mode = "library"
engine.cfg.fps = 4
frames = np.zeros((4, 1, 1, 3), dtype=np.uint8)
engine.speaking_base_frames = [frames]
# 3s audio → 12 frames, base has only 4
audio = np.zeros(16000 * 3, dtype=np.float32)
out = engine._pick_library_frames(audio, 16000)
assert out.shape == (12, 1, 1, 3)
def test_pick_library_frames_raises_when_empty(engine):
engine.cfg.mode = "library"
engine.speaking_base_frames = []
with pytest.raises(RuntimeError, match="no pre-baked base clips"):
engine._pick_library_frames(np.zeros(100, dtype=np.float32), 16000)
def test_generate_speaking_clip_raises_when_not_ready(engine):
with pytest.raises(RuntimeError, match="not ready"):
engine.generate_speaking_clip(
audio_f32=np.zeros(100, dtype=np.float32),
sample_rate=16000,
reply_text="hi",
)