live-voice-chat/experimental/lightx2v_5b/setup_model.py

"""Stage Wan2.2-TI2V-5B-Turbo GGUF pipeline for LightX2V.

Downloads:
  1. Base `Wan-AI/Wan2.2-TI2V-5B` snapshot (configs, T5, VAE — skip bf16 DIT shards).
  2. Turbo Q8 GGUF DIT from `hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF`.

Quant file can be overridden via GGUF_FILE env (default Q8_0).

Idempotent: huggingface_hub handles caching.
"""
from __future__ import annotations

import os

from huggingface_hub import hf_hub_download, snapshot_download

BASE_REPO = "Wan-AI/Wan2.2-TI2V-5B"
GGUF_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"
GGUF_FILE = os.environ.get(
    "GGUF_FILE", "Wan2_2-TI2V-5B-Turbo-Q8_0.gguf"
)
T5_FP8_REPO = "lightx2v/Encoders"
T5_FP8_FILE = "models_t5_umt5-xxl-enc-fp8.safetensors"


def main() -> None:
    print(f"\n=== 1/2 Snapshot base pipeline {BASE_REPO} ===", flush=True)
    # The base repo ships bf16 DIT shards we don't need (we use the Turbo GGUF instead).
    base_dir = snapshot_download(
        repo_id=BASE_REPO,
        ignore_patterns=[
            "*.pt",
            "diffusion_pytorch_model*.safetensors",
        ],
    )
    print(f"Base pipeline at: {base_dir}")

    print(f"\n=== 2/3 Download {GGUF_FILE} from {GGUF_REPO} ===", flush=True)
    gguf_path = hf_hub_download(repo_id=GGUF_REPO, filename=GGUF_FILE)
    print(f"GGUF DIT at: {gguf_path}")

    print(f"\n=== 3/3 Download fp8 T5 from {T5_FP8_REPO} ===", flush=True)
    t5_path = hf_hub_download(repo_id=T5_FP8_REPO, filename=T5_FP8_FILE)
    print(f"fp8 T5 at: {t5_path}")

    print(f"\n{'=' * 50}")
    print("Ready. Export to test_i2v.py via env:")
    print(f"  BASE_DIR={base_dir}")
    print(f"  DIT_GGUF={gguf_path}")
    print(f"  T5_FP8={t5_path}")


if __name__ == "__main__":
    main()