working ok

2026-04-16 10:00:37 -04:00
parent 9debc56137
commit 129df7d1fa
24 changed files with 674 additions and 539 deletions
@@ -1,26 +1,26 @@
-"""Phase 2 component test: Wan2.2 pipeline + LoRA stacking.
+"""Phase 2 component test: dense Wan2.2-TI2V-5B-Turbo pipeline + LoRA stacking.

 Verifies:
 - ``Wan22Pipeline`` loads successfully (exercises the real LightX2V
  set_config -> init_runner flow).
- ``load_loras`` / ``unload_loras`` survive with the two user LoRAs at
-  ``/cache/loras/wan22-[HL]-e8.safetensors``.
+- ``load_loras`` / ``unload_loras`` survive with any user LoRAs at
+  ``/cache/loras/*.safetensors`` (target='both', dense single DIT).

-Supports both fp8 and GGUF DIT quantisation.  Set the ``DIT_QUANT``
-environment variable to switch (default: ``fp8-sgl``).
+Supports any GGUF quant published in hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF.
+Set ``DIT_QUANT`` to switch (default: ``gguf-Q8_0``).

    DIT_QUANT=gguf-Q4_K_M docker compose exec voice-chat \
        python -m tests.component.test_02_wan22_loras

-Requires GPU and a first-run download of both HF repos (base support files
-~12 GB, DIT size depends on quant — fp8 ~30 GB, GGUF Q4_K_M ~19 GB).
+Requires GPU and a first-run download of the base repo + GGUF DIT.
 If LightX2V isn't installed the test is skipped.

-Run (default fp8):
+Run:
    docker compose exec voice-chat python -m tests.component.test_02_wan22_loras
 """
 from __future__ import annotations

+import glob
 import os
 import sys

@@ -28,19 +28,9 @@ from tests.component._common import get_logger

 log = get_logger("test_02")

-# --- Quant-dependent defaults ------------------------------------------------
-
-DIT_QUANT = os.environ.get("DIT_QUANT", "fp8-sgl")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
-
-LORA_HIGH = "/cache/loras/wan22-H-e8.safetensors"
-LORA_LOW = "/cache/loras/wan22-L-e8.safetensors"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -57,13 +47,14 @@ def run():
             "(quant=%s, dit_repo=%s)...", DIT_QUANT, DIT_REPO)
    try:
        pipe = Wan22Pipeline(
-            base_repo="Wan-AI/Wan2.2-I2V-A14B",
+            base_repo="Wan-AI/Wan2.2-TI2V-5B",
            dit_repo=DIT_REPO,
            config_json=CONFIG_JSON,
-            model_cls="wan2.2_moe_distill",
+            model_cls="wan2.2",
            resolution=480,
            fps=16,
            dit_quant_scheme=DIT_QUANT,
+            t5_quantized=True,
        )
    except Exception as e:
        log.error("FAIL: Wan22Pipeline construction raised: %s", e)
@@ -78,34 +69,27 @@ def run():
    pipe.load_loras([])
    log.info("  PASS")

-    if not (os.path.isfile(LORA_HIGH) and os.path.isfile(LORA_LOW)):
-        log.warning("SKIP: expected LoRA files not found at %s / %s",
-                    LORA_HIGH, LORA_LOW)
+    lora_files = sorted(glob.glob("/cache/loras/*.safetensors"))
+    if not lora_files:
+        log.warning("SKIP: no LoRA files found in /cache/loras/")
        log.info("ALL PASSED (partial — LoRA cases skipped)")
        return

-    log.info("[case 3] load_loras with the two MoE distill LoRAs")
+    lora_path = lora_files[0]
+    log.info("[case 3] load_loras with one 5B-compatible LoRA (%s)", lora_path)
    specs = [
        LoRASpec(
-            path=LORA_HIGH,
+            path=lora_path,
            weight=1.0,
-            target="high_noise",
-            name="wan22-H-e8",
-        ),
-        LoRASpec(
-            path=LORA_LOW,
-            weight=1.0,
-            target="low_noise",
-            name="wan22-L-e8",
+            target="both",
+            name=os.path.basename(lora_path),
        ),
    ]
    try:
        pipe.load_loras(specs)
    except Exception as e:
        log.error("FAIL: load_loras raised: %s", e)
-        log.error("Check: switch_lora support for wan2.2_moe_distill in the "
-                  "installed LightX2V build. If it errors there, pre-declare "
-                  "LoRAs in the config_json 'lora_configs' field instead.")
+        log.error("Check: LoRA checkpoint shape matches dense 5B DIT.")
        sys.exit(3)
    log.info("  PASS: LoRAs applied")

@@ -81,9 +81,9 @@ def run():
    body = {
        "loras": [
            {"path": "/cache/loras/a.safetensors", "weight": 0.8,
-             "target": "high_noise", "name": "test-a"},
+             "target": "both", "name": "test-a"},
            {"path": "/cache/loras/b.safetensors", "weight": 0.4,
-             "target": "low_noise"},
+             "target": "both"},
        ]
    }
    resp = client.post("/api/reload-loras", json=body)
@@ -32,28 +32,20 @@ def run():
    write_bytes("phase8_idle_noloras.mp4", idle_a)
    log.info("idle (no LoRAs) sha256=%s", hash_a[:16])

-    # Hot-reload with a distill LoRA
-    specs = [
-        LoRASpec(
-            path="lightx2v/Wan2.2-Distill-Loras:"
-                 "wan2.2_i2v_A14b_high_noise_lora_rank64_lightx2v_4step.safetensors",
-            weight=1.0,
-            target="high_noise",
-            name="distill-hi",
-        ),
-    ]
-    engine.load_loras(specs)
+    # Hot-reload flow: unload (no-op), reload empty list, verify clip still generates.
+    # There are no published 5B-Turbo-compatible LoRAs yet; when one exists,
+    # construct a LoRASpec(path=..., target="both", weight=1.0) and compare hashes.
+    engine.load_loras([])
    engine.set_avatar(avatar_path)
    idle_b = engine.get_idle_clip()
    assert idle_b is not None
    hash_b = hashlib.sha256(idle_b).hexdigest()
-    write_bytes("phase8_idle_withlora.mp4", idle_b)
-    log.info("idle (with LoRA) sha256=%s", hash_b[:16])
+    write_bytes("phase8_idle_reloaded.mp4", idle_b)
+    log.info("idle (post-reload) sha256=%s", hash_b[:16])

-    if hash_a != hash_b:
-        log.info("PASS: idle clip changed after LoRA reload")
-    else:
-        log.warning("clips identical — LoRA may not be applied; eyeball _out/*.mp4")
+    log.info("PASS: hot-reload round-trip completed "
+             "(hash match=%s — expected without a real LoRA applied).",
+             hash_a == hash_b)


 if __name__ == "__main__":
@@ -1,10 +1,10 @@
-"""Quick smoke test: generate a video clip with the GGUF pipeline.
+"""Quick smoke test: generate a video clip with the dense 5B Turbo GGUF pipeline.

 Calls Wan22Pipeline.generate_i2v directly (no MuseTalk, no VideoEngine)
 and writes the result to tests/component/_out/phase9_gguf.mp4.

 Run:
-    docker compose exec -e DIT_QUANT=gguf-Q4_K_M voice-chat \
+    docker compose exec -e DIT_QUANT=gguf-Q8_0 voice-chat \
        python -m tests.component.test_09_gguf_generate
 """
 from __future__ import annotations
@@ -16,14 +16,9 @@ from tests.component._common import ensure_sample_avatar, get_logger, write_byte

 log = get_logger("test_09")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -38,10 +33,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -17,14 +17,9 @@ from tests.component._common import get_logger

 log = get_logger("test_10")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -36,10 +31,10 @@ def run():

    log.info("Building pipeline (quant=%s) — this loads T5 + DIT weights...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -22,14 +22,9 @@ from tests.component._common import ensure_sample_avatar, get_logger

 log = get_logger("test_11")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -44,10 +39,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -20,14 +20,9 @@ from tests.component._common import ensure_sample_avatar, get_logger

 log = get_logger("test_12")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -42,10 +37,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -19,14 +19,9 @@ from tests.component._common import ensure_sample_avatar, get_logger, write_byte

 log = get_logger("test_13")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -41,10 +36,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,