live-voice-chat/requirements.txt

# torch and auto-gptq are installed in the Dockerfile with GPU-specific index URLs.
# For local dev outside Docker: pip install torch --index-url https://download.pytorch.org/whl/cu128
transformers==4.57.6
optimum>=1.19
compressed-tensors>=0.5.0
onnxruntime>=1.17.0
huggingface-hub>=0.20.0
qwen-asr==0.0.6
kokoro==0.9.4
fastapi>=0.115.0
uvicorn[standard]>=0.30.0
numpy
soundfile
scipy
python-multipart
pyyaml

# --- Avatar video (optional, only used when config.video.enabled=true) ---
# Video frame I/O (used by video_models/wan22.py and the muxer).
imageio[ffmpeg]>=2.34
av>=12.0
pyzmq>=25.0
gguf>=0.6.0
# sgl-kernel: installed from SGLang's cu128 wheel index in Dockerfile
# (PyPI version lacks SM120/Blackwell CUDA kernels)
# LightX2V (Wan2.2-Lightning) and MuseTalk are installed from source in the
# Dockerfile because neither ships a stable PyPI release yet. See lines
# "LightX2V from source" / "MuseTalk from source" in Dockerfile.