t5 encoder fp8 seems to be working
This commit is contained in:
@@ -1,15 +1,22 @@
|
||||
"""Phase 2 component test: Wan2.2-Lightning fp8 pipeline + LoRA stacking.
|
||||
"""Phase 2 component test: Wan2.2 pipeline + LoRA stacking.
|
||||
|
||||
Verifies:
|
||||
- ``Wan22Pipeline`` loads successfully against the fp8 distill path
|
||||
(exercises the real LightX2V set_config → init_runner flow).
|
||||
- ``Wan22Pipeline`` loads successfully (exercises the real LightX2V
|
||||
set_config -> init_runner flow).
|
||||
- ``load_loras`` / ``unload_loras`` survive with the two user LoRAs at
|
||||
``/cache/loras/wan22-[HL]-e8.safetensors``.
|
||||
|
||||
Requires GPU and a first-run download of both HF repos (base support files
|
||||
~12 GB, fp8 DIT ~30 GB). If LightX2V isn't installed the test is skipped.
|
||||
Supports both fp8 and GGUF DIT quantisation. Set the ``DIT_QUANT``
|
||||
environment variable to switch (default: ``fp8-sgl``).
|
||||
|
||||
Run:
|
||||
DIT_QUANT=gguf-Q4_K_M docker compose exec voice-chat \
|
||||
python -m tests.component.test_02_wan22_loras
|
||||
|
||||
Requires GPU and a first-run download of both HF repos (base support files
|
||||
~12 GB, DIT size depends on quant — fp8 ~30 GB, GGUF Q4_K_M ~19 GB).
|
||||
If LightX2V isn't installed the test is skipped.
|
||||
|
||||
Run (default fp8):
|
||||
docker compose exec voice-chat python -m tests.component.test_02_wan22_loras
|
||||
"""
|
||||
from __future__ import annotations
|
||||
@@ -21,7 +28,17 @@ from tests.component._common import get_logger
|
||||
|
||||
log = get_logger("test_02")
|
||||
|
||||
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
|
||||
# --- Quant-dependent defaults ------------------------------------------------
|
||||
|
||||
DIT_QUANT = os.environ.get("DIT_QUANT", "fp8-sgl")
|
||||
|
||||
if DIT_QUANT.startswith("gguf-"):
|
||||
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
|
||||
DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
|
||||
else:
|
||||
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
|
||||
DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
|
||||
|
||||
LORA_HIGH = "/cache/loras/wan22-H-e8.safetensors"
|
||||
LORA_LOW = "/cache/loras/wan22-L-e8.safetensors"
|
||||
|
||||
@@ -37,15 +54,16 @@ def run():
|
||||
from server.video import LoRASpec
|
||||
|
||||
log.info("[case 1] Instantiate Wan22Pipeline "
|
||||
"(first run downloads ~42 GB total)...")
|
||||
"(quant=%s, dit_repo=%s)...", DIT_QUANT, DIT_REPO)
|
||||
try:
|
||||
pipe = Wan22Pipeline(
|
||||
base_repo="Wan-AI/Wan2.2-I2V-A14B",
|
||||
fp8_repo="lightx2v/Wan2.2-Distill-Models",
|
||||
dit_repo=DIT_REPO,
|
||||
config_json=CONFIG_JSON,
|
||||
model_cls="wan2.2_moe_distill",
|
||||
resolution=480,
|
||||
fps=16,
|
||||
dit_quant_scheme=DIT_QUANT,
|
||||
)
|
||||
except Exception as e:
|
||||
log.error("FAIL: Wan22Pipeline construction raised: %s", e)
|
||||
@@ -56,7 +74,7 @@ def run():
|
||||
log.info(" PASS: pipeline constructed")
|
||||
|
||||
# --- LoRAs ---
|
||||
log.info("[case 2] load_loras with empty list → no-op")
|
||||
log.info("[case 2] load_loras with empty list -> no-op")
|
||||
pipe.load_loras([])
|
||||
log.info(" PASS")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user