"""Phase 6 component test: reflective mode (fresh Wan2.2 clip per turn). Verifies that with ``mode=reflective``, ``generate_speaking_clip`` runs the Wan2.2 image-to-video pipeline once per call (so the base frames differ from turn to turn) and the prompt is derived from the reply text. Run: docker compose exec voice-chat python -m tests.component.test_06_reflective """ from __future__ import annotations import numpy as np from server.video import VideoConfig, VideoEngine from tests.component._common import ( ensure_sample_avatar, get_logger, synth_tone, write_bytes, ) log = get_logger("test_06") def run(): avatar_path = ensure_sample_avatar() cfg = VideoConfig.from_dict( { "enabled": True, "mode": "reflective", "resolution": 480, "fps": 16, "reflective": {"clip_seconds": 3}, } ) engine = VideoEngine(cfg) engine.load_models() engine.set_avatar(avatar_path) # Verify prompt derivation includes the reply hint prompt = engine._derive_prompt( "The assistant walks along a sunny beach watching seagulls." ) log.info("derived prompt: %s", prompt) assert "beach" in prompt, "reply_hint did not survive template interpolation" audio = synth_tone(seconds=3.0) log.info("Generating reflective speaking clip #1...") mp4_a = engine.generate_speaking_clip( audio, 24000, "The assistant walks along a sunny beach watching seagulls." ) write_bytes("phase6_reflective_beach.mp4", mp4_a) log.info("Generating reflective speaking clip #2...") mp4_b = engine.generate_speaking_clip( audio, 24000, "Now the character stands in a snow-covered forest at dusk." ) write_bytes("phase6_reflective_snow.mp4", mp4_b) # Not a strict assertion (same prompt could yield identical bytes if seeded), # but with different prompts and random seeds the blobs should differ. if mp4_a != mp4_b: log.info("PASS: reflective clips differ as expected") else: log.warning("clips are byte-identical — check that seeds are random") if __name__ == "__main__": run()