Files
live-voice-chat/experimental/lightx2v_5b/setup_model.py
T
bhetherman 9debc56137 Add LightX2V + Wan2.2-TI2V-5B-Turbo GGUF experiment
Benchmarks the dense 5B Turbo model (Q8_0 GGUF + fp8 T5) as a
lower-VRAM alternative to the 14B MoE pipeline. Includes dtype
patches for dense WanModel, Wan 2.2 VAE config (48 channels, 16x
spatial), and Blackwell fp8 workaround.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 01:27:45 -04:00

55 lines
1.7 KiB
Python

"""Stage Wan2.2-TI2V-5B-Turbo GGUF pipeline for LightX2V.
Downloads:
1. Base `Wan-AI/Wan2.2-TI2V-5B` snapshot (configs, T5, VAE — skip bf16 DIT shards).
2. Turbo Q8 GGUF DIT from `hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF`.
Quant file can be overridden via GGUF_FILE env (default Q8_0).
Idempotent: huggingface_hub handles caching.
"""
from __future__ import annotations
import os
from huggingface_hub import hf_hub_download, snapshot_download
BASE_REPO = "Wan-AI/Wan2.2-TI2V-5B"
GGUF_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"
GGUF_FILE = os.environ.get(
"GGUF_FILE", "Wan2_2-TI2V-5B-Turbo-Q8_0.gguf"
)
T5_FP8_REPO = "lightx2v/Encoders"
T5_FP8_FILE = "models_t5_umt5-xxl-enc-fp8.safetensors"
def main() -> None:
print(f"\n=== 1/2 Snapshot base pipeline {BASE_REPO} ===", flush=True)
# The base repo ships bf16 DIT shards we don't need (we use the Turbo GGUF instead).
base_dir = snapshot_download(
repo_id=BASE_REPO,
ignore_patterns=[
"*.pt",
"diffusion_pytorch_model*.safetensors",
],
)
print(f"Base pipeline at: {base_dir}")
print(f"\n=== 2/3 Download {GGUF_FILE} from {GGUF_REPO} ===", flush=True)
gguf_path = hf_hub_download(repo_id=GGUF_REPO, filename=GGUF_FILE)
print(f"GGUF DIT at: {gguf_path}")
print(f"\n=== 3/3 Download fp8 T5 from {T5_FP8_REPO} ===", flush=True)
t5_path = hf_hub_download(repo_id=T5_FP8_REPO, filename=T5_FP8_FILE)
print(f"fp8 T5 at: {t5_path}")
print(f"\n{'=' * 50}")
print("Ready. Export to test_i2v.py via env:")
print(f" BASE_DIR={base_dir}")
print(f" DIT_GGUF={gguf_path}")
print(f" T5_FP8={t5_path}")
if __name__ == "__main__":
main()