107 lines
3.5 KiB
Python
107 lines
3.5 KiB
Python
"""Unit tests for pure-python logic inside VideoEngine.
|
|
|
|
No models are loaded: we instantiate ``VideoEngine`` and hand-stub its
|
|
``_wan22`` / ``_musetalk`` attributes to test prompt derivation, library
|
|
round-robin, and frame fitting.
|
|
"""
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from server.video import VideoConfig, VideoEngine
|
|
|
|
|
|
@pytest.fixture
|
|
def engine():
|
|
cfg = VideoConfig.from_dict(
|
|
{
|
|
"enabled": True,
|
|
"mode": "reflective",
|
|
"fps": 16,
|
|
"reflective": {
|
|
"clip_prompt_template": "A: {reply_hint} B",
|
|
"prompt_reply_words": 5,
|
|
},
|
|
}
|
|
)
|
|
return VideoEngine(cfg)
|
|
|
|
|
|
def test_derive_prompt_truncates_to_word_limit(engine):
|
|
out = engine._derive_prompt("one two three four five six seven eight")
|
|
assert out == "A: one two three four five B"
|
|
|
|
|
|
def test_derive_prompt_handles_empty_reply(engine):
|
|
out = engine._derive_prompt("")
|
|
assert out == "A: calm and friendly B"
|
|
out2 = engine._derive_prompt(None) # type: ignore[arg-type]
|
|
assert out2 == "A: calm and friendly B"
|
|
|
|
|
|
def test_derive_prompt_strips_and_passes_through(engine):
|
|
out = engine._derive_prompt(" hello world ")
|
|
assert out == "A: hello world B"
|
|
|
|
|
|
def test_is_ready_false_without_models(engine):
|
|
# Models haven't been loaded — is_ready must be False so the pipeline
|
|
# falls back to the PCM streaming path.
|
|
assert engine.is_ready() is False
|
|
|
|
|
|
def test_pick_library_frames_round_robin(engine):
|
|
engine.cfg.mode = "library"
|
|
engine.cfg.fps = 2
|
|
# Two base clips, 4 frames each.
|
|
a = np.tile(np.array([[[[0, 0, 0]]]], dtype=np.uint8), (4, 1, 1, 1))
|
|
b = np.tile(np.array([[[[255, 255, 255]]]], dtype=np.uint8), (4, 1, 1, 1))
|
|
engine.speaking_base_frames = [a, b]
|
|
# 2s of audio at 16kHz → 4 frames at fps=2
|
|
audio = np.zeros(16000 * 2, dtype=np.float32)
|
|
|
|
f1 = engine._pick_library_frames(audio, 16000)
|
|
f2 = engine._pick_library_frames(audio, 16000)
|
|
f3 = engine._pick_library_frames(audio, 16000)
|
|
assert f1.shape == (4, 1, 1, 3)
|
|
assert f1[0, 0, 0, 0] == 0 # first pick = clip A
|
|
assert f2[0, 0, 0, 0] == 255 # second pick = clip B
|
|
assert f3[0, 0, 0, 0] == 0 # wraps back to A
|
|
|
|
|
|
def test_pick_library_frames_trims_to_audio_duration(engine):
|
|
engine.cfg.mode = "library"
|
|
engine.cfg.fps = 4
|
|
frames = np.zeros((20, 1, 1, 3), dtype=np.uint8)
|
|
engine.speaking_base_frames = [frames]
|
|
# 1s audio → 4 frames
|
|
audio = np.zeros(16000, dtype=np.float32)
|
|
out = engine._pick_library_frames(audio, 16000)
|
|
assert out.shape == (4, 1, 1, 3)
|
|
|
|
|
|
def test_pick_library_frames_loops_for_long_audio(engine):
|
|
engine.cfg.mode = "library"
|
|
engine.cfg.fps = 4
|
|
frames = np.zeros((4, 1, 1, 3), dtype=np.uint8)
|
|
engine.speaking_base_frames = [frames]
|
|
# 3s audio → 12 frames, base has only 4
|
|
audio = np.zeros(16000 * 3, dtype=np.float32)
|
|
out = engine._pick_library_frames(audio, 16000)
|
|
assert out.shape == (12, 1, 1, 3)
|
|
|
|
|
|
def test_pick_library_frames_raises_when_empty(engine):
|
|
engine.cfg.mode = "library"
|
|
engine.speaking_base_frames = []
|
|
with pytest.raises(RuntimeError, match="no pre-baked base clips"):
|
|
engine._pick_library_frames(np.zeros(100, dtype=np.float32), 16000)
|
|
|
|
|
|
def test_generate_speaking_clip_raises_when_not_ready(engine):
|
|
with pytest.raises(RuntimeError, match="not ready"):
|
|
engine.generate_speaking_clip(
|
|
audio_f32=np.zeros(100, dtype=np.float32),
|
|
sample_rate=16000,
|
|
reply_text="hi",
|
|
)
|