Add LightX2V + Wan2.2-TI2V-5B-Turbo GGUF experiment

Benchmarks the dense 5B Turbo model (Q8_0 GGUF + fp8 T5) as a lower-VRAM alternative to the 14B MoE pipeline. Includes dtype patches for dense WanModel, Wan 2.2 VAE config (48 channels, 16x spatial), and Blackwell fp8 workaround. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 01:27:45 -04:00
parent 56923ff424
commit 9debc56137
8 changed files with 407 additions and 0 deletions
@@ -0,0 +1,26 @@
+services:
+  lightx2v-5b:
+    image: voice-chat-voice-chat:latest
+    volumes:
+      - huggingface-cache:/cache/huggingface
+      - ../../:/app
+    working_dir: /app
+    environment:
+      - DTYPE=FP16
+      - HF_HOME=/cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    shm_size: "8g"
+    ipc: host
+    profiles:
+      - experimental
+
+volumes:
+  huggingface-cache:
+    name: voice-chat_huggingface-cache
+    external: true