t5 encoder fp8 seems to be working

This commit is contained in:
2026-04-12 13:50:34 -04:00
parent 2818b41004
commit fcf0be38bc
13 changed files with 505 additions and 67 deletions
+28 -10
View File
@@ -1,15 +1,22 @@
"""Phase 2 component test: Wan2.2-Lightning fp8 pipeline + LoRA stacking.
"""Phase 2 component test: Wan2.2 pipeline + LoRA stacking.
Verifies:
- ``Wan22Pipeline`` loads successfully against the fp8 distill path
(exercises the real LightX2V set_config init_runner flow).
- ``Wan22Pipeline`` loads successfully (exercises the real LightX2V
set_config -> init_runner flow).
- ``load_loras`` / ``unload_loras`` survive with the two user LoRAs at
``/cache/loras/wan22-[HL]-e8.safetensors``.
Requires GPU and a first-run download of both HF repos (base support files
~12 GB, fp8 DIT ~30 GB). If LightX2V isn't installed the test is skipped.
Supports both fp8 and GGUF DIT quantisation. Set the ``DIT_QUANT``
environment variable to switch (default: ``fp8-sgl``).
Run:
DIT_QUANT=gguf-Q4_K_M docker compose exec voice-chat \
python -m tests.component.test_02_wan22_loras
Requires GPU and a first-run download of both HF repos (base support files
~12 GB, DIT size depends on quant — fp8 ~30 GB, GGUF Q4_K_M ~19 GB).
If LightX2V isn't installed the test is skipped.
Run (default fp8):
docker compose exec voice-chat python -m tests.component.test_02_wan22_loras
"""
from __future__ import annotations
@@ -21,7 +28,17 @@ from tests.component._common import get_logger
log = get_logger("test_02")
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
# --- Quant-dependent defaults ------------------------------------------------
DIT_QUANT = os.environ.get("DIT_QUANT", "fp8-sgl")
if DIT_QUANT.startswith("gguf-"):
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
else:
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
LORA_HIGH = "/cache/loras/wan22-H-e8.safetensors"
LORA_LOW = "/cache/loras/wan22-L-e8.safetensors"
@@ -37,15 +54,16 @@ def run():
from server.video import LoRASpec
log.info("[case 1] Instantiate Wan22Pipeline "
"(first run downloads ~42 GB total)...")
"(quant=%s, dit_repo=%s)...", DIT_QUANT, DIT_REPO)
try:
pipe = Wan22Pipeline(
base_repo="Wan-AI/Wan2.2-I2V-A14B",
fp8_repo="lightx2v/Wan2.2-Distill-Models",
dit_repo=DIT_REPO,
config_json=CONFIG_JSON,
model_cls="wan2.2_moe_distill",
resolution=480,
fps=16,
dit_quant_scheme=DIT_QUANT,
)
except Exception as e:
log.error("FAIL: Wan22Pipeline construction raised: %s", e)
@@ -56,7 +74,7 @@ def run():
log.info(" PASS: pipeline constructed")
# --- LoRAs ---
log.info("[case 2] load_loras with empty list no-op")
log.info("[case 2] load_loras with empty list -> no-op")
pipe.load_loras([])
log.info(" PASS")