Files
live-voice-chat/tests/component/test_06_reflective.py
2026-04-12 04:11:52 -04:00

70 lines
2.1 KiB
Python

"""Phase 6 component test: reflective mode (fresh Wan2.2 clip per turn).
Verifies that with ``mode=reflective``, ``generate_speaking_clip`` runs
the Wan2.2 image-to-video pipeline once per call (so the base frames
differ from turn to turn) and the prompt is derived from the reply text.
Run:
docker compose exec voice-chat python -m tests.component.test_06_reflective
"""
from __future__ import annotations
import numpy as np
from server.video import VideoConfig, VideoEngine
from tests.component._common import (
ensure_sample_avatar,
get_logger,
synth_tone,
write_bytes,
)
log = get_logger("test_06")
def run():
avatar_path = ensure_sample_avatar()
cfg = VideoConfig.from_dict(
{
"enabled": True,
"mode": "reflective",
"resolution": 480,
"fps": 16,
"reflective": {"clip_seconds": 3},
}
)
engine = VideoEngine(cfg)
engine.load_models()
engine.set_avatar(avatar_path)
# Verify prompt derivation includes the reply hint
prompt = engine._derive_prompt(
"The assistant walks along a sunny beach watching seagulls."
)
log.info("derived prompt: %s", prompt)
assert "beach" in prompt, "reply_hint did not survive template interpolation"
audio = synth_tone(seconds=3.0)
log.info("Generating reflective speaking clip #1...")
mp4_a = engine.generate_speaking_clip(
audio, 24000, "The assistant walks along a sunny beach watching seagulls."
)
write_bytes("phase6_reflective_beach.mp4", mp4_a)
log.info("Generating reflective speaking clip #2...")
mp4_b = engine.generate_speaking_clip(
audio, 24000, "Now the character stands in a snow-covered forest at dusk."
)
write_bytes("phase6_reflective_snow.mp4", mp4_b)
# Not a strict assertion (same prompt could yield identical bytes if seeded),
# but with different prompts and random seeds the blobs should differ.
if mp4_a != mp4_b:
log.info("PASS: reflective clips differ as expected")
else:
log.warning("clips are byte-identical — check that seeds are random")
if __name__ == "__main__":
run()