9debc56137
Benchmarks the dense 5B Turbo model (Q8_0 GGUF + fp8 T5) as a lower-VRAM alternative to the 14B MoE pipeline. Includes dtype patches for dense WanModel, Wan 2.2 VAE config (48 channels, 16x spatial), and Blackwell fp8 workaround. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
27 lines
549 B
YAML
27 lines
549 B
YAML
services:
|
|
lightx2v-5b:
|
|
image: voice-chat-voice-chat:latest
|
|
volumes:
|
|
- huggingface-cache:/cache/huggingface
|
|
- ../../:/app
|
|
working_dir: /app
|
|
environment:
|
|
- DTYPE=FP16
|
|
- HF_HOME=/cache/huggingface
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
shm_size: "8g"
|
|
ipc: host
|
|
profiles:
|
|
- experimental
|
|
|
|
volumes:
|
|
huggingface-cache:
|
|
name: voice-chat_huggingface-cache
|
|
external: true
|