working ok

2026-04-16 10:00:37 -04:00
parent 9debc56137
commit 129df7d1fa
24 changed files with 674 additions and 539 deletions
@@ -9,25 +9,45 @@ python -m pytest tests/unit -v
 ```

 These exercise pure logic: config parsing, prompt derivation, LoRA spec
-parsing, frame-length fitting, library round-robin selection. They do not
-touch CUDA, Wan2.2, MuseTalk, or ffmpeg. Safe to run on Windows, outside
-Docker, without any models installed.
+parsing, frame-length fitting, library round-robin selection, the
+pipeline's video branch, and ffmpeg mux argument shaping. They do not
+touch CUDA, Wan2.2, MuseTalk, or a real ffmpeg binary. Safe to run on
+Windows, outside Docker, without any models installed.
+
+Current unit files:
+
+- `test_video_config.py` — `VideoConfig.from_dict` round-trip, LoRA target validation
+- `test_video_engine_logic.py` — prompt derivation, library cursor, frame fitting
+- `test_pipeline_video_branch.py` — pipeline takes the video path iff engine is ready
+- `test_musetalk_fit_frames.py` — frame-length adjustment to match audio duration
+- `test_muxer_ffmpeg.py` — ffmpeg command construction

 ## Component tests — slow, GPU-required, run inside Docker

-Each script in `tests/component/` exercises one subsystem end-to-end against
-the real models. They are ordered to match the implementation phases:
+Each script in `tests/component/` exercises one subsystem end-to-end
+against the real models. The numbered prefix reflects the implementation
+phase each script gates, and also serves as a reasonable run order when
+debugging a fresh environment:

 | Script | Phase | Tests |
 |---|---|---|
 | `test_01_video_skeleton.py` | 1 | VideoEngine loads, config gate respected |
 | `test_02_wan22_loras.py` | 2 | Wan2.2 pipeline loads, LoRA stack applies |
-| `test_03_idle_clip.py` | 3 | set_avatar → idle MP4, written to disk for eyeballing |
+| `test_03_idle_clip.py` | 3 | `set_avatar` → idle MP4, written to disk for eyeballing |
 | `test_04_library_prebake.py` | 4 | library mode pre-bakes N base clips |
 | `test_05_musetalk_lipsync.py` | 5 | MuseTalk lip-sync on library frames + ffmpeg mux |
 | `test_06_reflective.py` | 6 | reflective mode: fresh Wan2.2 per reply |
 | `test_07_endpoints.py` | 7 | HTTP endpoints return sane responses |
-| `test_08_lora_reload.py` | 8 | /api/reload-loras swaps LoRAs live |
+| `test_08_lora_reload.py` | 8 | `/api/reload-loras` swaps LoRAs live |
+| `test_09_gguf_generate.py` | 9 | GGUF-quantised DIT end-to-end I2V generation |
+| `test_10_t5_encode.py` | 10 | T5 encoder (optionally fp8-quantised) on CUDA |
+| `test_11_image_encode.py` | 11 | Avatar image → VAE latent path |
+| `test_12_dit_single_step.py` | 12 | Single DIT step on the loaded expert(s) |
+| `test_13_vae_decode.py` | 13 | VAE decode back to RGB frames |
+
+Tests 09-13 are focused on the GGUF + Blackwell (SM120) path and are how
+new quant schemes / attention backends get validated before wiring them
+into the full pipeline.

 Run one:

@@ -36,7 +56,7 @@ Run one:
 docker compose exec voice-chat python -m tests.component.test_03_idle_clip
 ```

-Run all (slow, ~20+ minutes on 5090):
+Run all (slow, ~20+ minutes on a 5090):

 ```
 docker compose exec voice-chat python -m tests.component.run_all
@@ -1,26 +1,26 @@
-"""Phase 2 component test: Wan2.2 pipeline + LoRA stacking.
+"""Phase 2 component test: dense Wan2.2-TI2V-5B-Turbo pipeline + LoRA stacking.

 Verifies:
 - ``Wan22Pipeline`` loads successfully (exercises the real LightX2V
  set_config -> init_runner flow).
- ``load_loras`` / ``unload_loras`` survive with the two user LoRAs at
-  ``/cache/loras/wan22-[HL]-e8.safetensors``.
+- ``load_loras`` / ``unload_loras`` survive with any user LoRAs at
+  ``/cache/loras/*.safetensors`` (target='both', dense single DIT).

-Supports both fp8 and GGUF DIT quantisation.  Set the ``DIT_QUANT``
-environment variable to switch (default: ``fp8-sgl``).
+Supports any GGUF quant published in hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF.
+Set ``DIT_QUANT`` to switch (default: ``gguf-Q8_0``).

    DIT_QUANT=gguf-Q4_K_M docker compose exec voice-chat \
        python -m tests.component.test_02_wan22_loras

-Requires GPU and a first-run download of both HF repos (base support files
-~12 GB, DIT size depends on quant — fp8 ~30 GB, GGUF Q4_K_M ~19 GB).
+Requires GPU and a first-run download of the base repo + GGUF DIT.
 If LightX2V isn't installed the test is skipped.

-Run (default fp8):
+Run:
    docker compose exec voice-chat python -m tests.component.test_02_wan22_loras
 """
 from __future__ import annotations

+import glob
 import os
 import sys

@@ -28,19 +28,9 @@ from tests.component._common import get_logger

 log = get_logger("test_02")

-# --- Quant-dependent defaults ------------------------------------------------
-
-DIT_QUANT = os.environ.get("DIT_QUANT", "fp8-sgl")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
-
-LORA_HIGH = "/cache/loras/wan22-H-e8.safetensors"
-LORA_LOW = "/cache/loras/wan22-L-e8.safetensors"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -57,13 +47,14 @@ def run():
             "(quant=%s, dit_repo=%s)...", DIT_QUANT, DIT_REPO)
    try:
        pipe = Wan22Pipeline(
-            base_repo="Wan-AI/Wan2.2-I2V-A14B",
+            base_repo="Wan-AI/Wan2.2-TI2V-5B",
            dit_repo=DIT_REPO,
            config_json=CONFIG_JSON,
-            model_cls="wan2.2_moe_distill",
+            model_cls="wan2.2",
            resolution=480,
            fps=16,
            dit_quant_scheme=DIT_QUANT,
+            t5_quantized=True,
        )
    except Exception as e:
        log.error("FAIL: Wan22Pipeline construction raised: %s", e)
@@ -78,34 +69,27 @@ def run():
    pipe.load_loras([])
    log.info("  PASS")

-    if not (os.path.isfile(LORA_HIGH) and os.path.isfile(LORA_LOW)):
-        log.warning("SKIP: expected LoRA files not found at %s / %s",
-                    LORA_HIGH, LORA_LOW)
+    lora_files = sorted(glob.glob("/cache/loras/*.safetensors"))
+    if not lora_files:
+        log.warning("SKIP: no LoRA files found in /cache/loras/")
        log.info("ALL PASSED (partial — LoRA cases skipped)")
        return

-    log.info("[case 3] load_loras with the two MoE distill LoRAs")
+    lora_path = lora_files[0]
+    log.info("[case 3] load_loras with one 5B-compatible LoRA (%s)", lora_path)
    specs = [
        LoRASpec(
-            path=LORA_HIGH,
+            path=lora_path,
            weight=1.0,
-            target="high_noise",
-            name="wan22-H-e8",
-        ),
-        LoRASpec(
-            path=LORA_LOW,
-            weight=1.0,
-            target="low_noise",
-            name="wan22-L-e8",
+            target="both",
+            name=os.path.basename(lora_path),
        ),
    ]
    try:
        pipe.load_loras(specs)
    except Exception as e:
        log.error("FAIL: load_loras raised: %s", e)
-        log.error("Check: switch_lora support for wan2.2_moe_distill in the "
-                  "installed LightX2V build. If it errors there, pre-declare "
-                  "LoRAs in the config_json 'lora_configs' field instead.")
+        log.error("Check: LoRA checkpoint shape matches dense 5B DIT.")
        sys.exit(3)
    log.info("  PASS: LoRAs applied")

@@ -81,9 +81,9 @@ def run():
    body = {
        "loras": [
            {"path": "/cache/loras/a.safetensors", "weight": 0.8,
-             "target": "high_noise", "name": "test-a"},
+             "target": "both", "name": "test-a"},
            {"path": "/cache/loras/b.safetensors", "weight": 0.4,
-             "target": "low_noise"},
+             "target": "both"},
        ]
    }
    resp = client.post("/api/reload-loras", json=body)
@@ -32,28 +32,20 @@ def run():
    write_bytes("phase8_idle_noloras.mp4", idle_a)
    log.info("idle (no LoRAs) sha256=%s", hash_a[:16])

-    # Hot-reload with a distill LoRA
-    specs = [
-        LoRASpec(
-            path="lightx2v/Wan2.2-Distill-Loras:"
-                 "wan2.2_i2v_A14b_high_noise_lora_rank64_lightx2v_4step.safetensors",
-            weight=1.0,
-            target="high_noise",
-            name="distill-hi",
-        ),
-    ]
-    engine.load_loras(specs)
+    # Hot-reload flow: unload (no-op), reload empty list, verify clip still generates.
+    # There are no published 5B-Turbo-compatible LoRAs yet; when one exists,
+    # construct a LoRASpec(path=..., target="both", weight=1.0) and compare hashes.
+    engine.load_loras([])
    engine.set_avatar(avatar_path)
    idle_b = engine.get_idle_clip()
    assert idle_b is not None
    hash_b = hashlib.sha256(idle_b).hexdigest()
-    write_bytes("phase8_idle_withlora.mp4", idle_b)
-    log.info("idle (with LoRA) sha256=%s", hash_b[:16])
+    write_bytes("phase8_idle_reloaded.mp4", idle_b)
+    log.info("idle (post-reload) sha256=%s", hash_b[:16])

-    if hash_a != hash_b:
-        log.info("PASS: idle clip changed after LoRA reload")
-    else:
-        log.warning("clips identical — LoRA may not be applied; eyeball _out/*.mp4")
+    log.info("PASS: hot-reload round-trip completed "
+             "(hash match=%s — expected without a real LoRA applied).",
+             hash_a == hash_b)


 if __name__ == "__main__":
@@ -1,10 +1,10 @@
-"""Quick smoke test: generate a video clip with the GGUF pipeline.
+"""Quick smoke test: generate a video clip with the dense 5B Turbo GGUF pipeline.

 Calls Wan22Pipeline.generate_i2v directly (no MuseTalk, no VideoEngine)
 and writes the result to tests/component/_out/phase9_gguf.mp4.

 Run:
-    docker compose exec -e DIT_QUANT=gguf-Q4_K_M voice-chat \
+    docker compose exec -e DIT_QUANT=gguf-Q8_0 voice-chat \
        python -m tests.component.test_09_gguf_generate
 """
 from __future__ import annotations
@@ -16,14 +16,9 @@ from tests.component._common import ensure_sample_avatar, get_logger, write_byte

 log = get_logger("test_09")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -38,10 +33,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -17,14 +17,9 @@ from tests.component._common import get_logger

 log = get_logger("test_10")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -36,10 +31,10 @@ def run():

    log.info("Building pipeline (quant=%s) — this loads T5 + DIT weights...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -22,14 +22,9 @@ from tests.component._common import ensure_sample_avatar, get_logger

 log = get_logger("test_11")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -44,10 +39,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -20,14 +20,9 @@ from tests.component._common import ensure_sample_avatar, get_logger

 log = get_logger("test_12")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -42,10 +37,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -19,14 +19,9 @@ from tests.component._common import ensure_sample_avatar, get_logger, write_byte

 log = get_logger("test_13")

-DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
-
-if DIT_QUANT.startswith("gguf-"):
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
-    DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
-else:
-    CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
-    DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
+DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q8_0")
+CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"
+DIT_REPO = "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"


 def run():
@@ -41,10 +36,10 @@ def run():

    log.info("Building pipeline (quant=%s)...", DIT_QUANT)
    pipe = Wan22Pipeline(
-        base_repo="Wan-AI/Wan2.2-I2V-A14B",
+        base_repo="Wan-AI/Wan2.2-TI2V-5B",
        dit_repo=DIT_REPO,
        config_json=CONFIG_JSON,
-        model_cls="wan2.2_moe_distill",
+        model_cls="wan2.2",
        resolution=480,
        fps=16,
        dit_quant_scheme=DIT_QUANT,
@@ -64,32 +64,39 @@ def test_lora_parse_full():
        {
            "loras": [
                {
-                    "path": "/tmp/hi.safetensors",
+                    "path": "/tmp/a.safetensors",
                    "weight": 0.7,
-                    "target": "high_noise",
-                    "name": "hi-noise-style",
+                    "target": "both",
+                    "name": "style-a",
                },
                {
-                    "path": "/tmp/lo.safetensors",
+                    "path": "/tmp/b.safetensors",
                    "weight": 0.4,
-                    "target": "low_noise",
-                    "name": "lo-noise-style",
+                    "target": "both",
+                    "name": "style-b",
                },
            ]
        }
    )
    assert len(cfg.loras) == 2
-    assert cfg.loras[0].target == "high_noise"
-    assert cfg.loras[0].name == "hi-noise-style"
-    assert cfg.loras[1].target == "low_noise"
+    assert cfg.loras[0].target == "both"
+    assert cfg.loras[0].name == "style-a"
+    assert cfg.loras[1].target == "both"
    assert cfg.loras[1].weight == 0.4


-def test_lora_invalid_target_falls_back_to_both():
+def test_lora_legacy_moe_target_coerced_to_both():
+    """Legacy MoE configs with target='high_noise'/'low_noise' get coerced."""
    cfg = VideoConfig.from_dict(
-        {"loras": [{"path": "/tmp/x.safetensors", "target": "bogus"}]}
+        {
+            "loras": [
+                {"path": "/tmp/hi.safetensors", "target": "high_noise"},
+                {"path": "/tmp/lo.safetensors", "target": "low_noise"},
+                {"path": "/tmp/x.safetensors", "target": "bogus"},
+            ]
+        }
    )
-    assert cfg.loras[0].target == "both"
+    assert all(l.target == "both" for l in cfg.loras)


 def test_lora_entries_without_path_are_dropped():
@@ -107,8 +114,8 @@ def test_models_section_override():
                "wan22_base_repo": "/local/weights/wan22",
                "wan22_dit_repo": "/local/weights/wan22-dit",
                "wan22_dit_quant_scheme": "gguf-Q4_K_M",
-                "wan22_config_json": "/local/cfg/fp8.json",
-                "wan22_model_cls": "wan2.2_moe",
+                "wan22_config_json": "/local/cfg/turbo.json",
+                "wan22_model_cls": "wan2.2",
                "musetalk_path": "/local/weights/musetalk",
            }
        }
@@ -116,18 +123,16 @@ def test_models_section_override():
    assert cfg.wan22_base_repo == "/local/weights/wan22"
    assert cfg.wan22_dit_repo == "/local/weights/wan22-dit"
    assert cfg.wan22_dit_quant_scheme == "gguf-Q4_K_M"
-    assert cfg.wan22_config_json == "/local/cfg/fp8.json"
-    assert cfg.wan22_model_cls == "wan2.2_moe"
+    assert cfg.wan22_config_json == "/local/cfg/turbo.json"
+    assert cfg.wan22_model_cls == "wan2.2"
    assert cfg.musetalk_model_path == "/local/weights/musetalk"


-def test_models_section_backwards_compat_fp8_repo():
-    """Old config key wan22_fp8_repo still works via fallback."""
-    cfg = VideoConfig.from_dict(
-        {
-            "models": {
-                "wan22_fp8_repo": "/local/weights/wan22-fp8",
-            }
-        }
-    )
-    assert cfg.wan22_dit_repo == "/local/weights/wan22-fp8"
+def test_models_section_defaults_to_5b_turbo():
+    cfg = VideoConfig.from_dict({})
+    assert cfg.wan22_base_repo == "Wan-AI/Wan2.2-TI2V-5B"
+    assert cfg.wan22_dit_repo == "hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF"
+    assert cfg.wan22_dit_quant_scheme == "gguf-Q8_0"
+    assert cfg.wan22_t5_quantized is True
+    assert cfg.wan22_model_cls == "wan2.2"
+    assert cfg.wan22_config_json == "/app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json"