t5 encoder fp8 seems to be working

This commit is contained in:
2026-04-12 13:50:34 -04:00
parent 2818b41004
commit fcf0be38bc
13 changed files with 505 additions and 67 deletions
Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

+28 -10
View File
@@ -1,15 +1,22 @@
"""Phase 2 component test: Wan2.2-Lightning fp8 pipeline + LoRA stacking.
"""Phase 2 component test: Wan2.2 pipeline + LoRA stacking.
Verifies:
- ``Wan22Pipeline`` loads successfully against the fp8 distill path
(exercises the real LightX2V set_config init_runner flow).
- ``Wan22Pipeline`` loads successfully (exercises the real LightX2V
set_config -> init_runner flow).
- ``load_loras`` / ``unload_loras`` survive with the two user LoRAs at
``/cache/loras/wan22-[HL]-e8.safetensors``.
Requires GPU and a first-run download of both HF repos (base support files
~12 GB, fp8 DIT ~30 GB). If LightX2V isn't installed the test is skipped.
Supports both fp8 and GGUF DIT quantisation. Set the ``DIT_QUANT``
environment variable to switch (default: ``fp8-sgl``).
Run:
DIT_QUANT=gguf-Q4_K_M docker compose exec voice-chat \
python -m tests.component.test_02_wan22_loras
Requires GPU and a first-run download of both HF repos (base support files
~12 GB, DIT size depends on quant — fp8 ~30 GB, GGUF Q4_K_M ~19 GB).
If LightX2V isn't installed the test is skipped.
Run (default fp8):
docker compose exec voice-chat python -m tests.component.test_02_wan22_loras
"""
from __future__ import annotations
@@ -21,7 +28,17 @@ from tests.component._common import get_logger
log = get_logger("test_02")
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
# --- Quant-dependent defaults ------------------------------------------------
DIT_QUANT = os.environ.get("DIT_QUANT", "fp8-sgl")
if DIT_QUANT.startswith("gguf-"):
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
else:
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
LORA_HIGH = "/cache/loras/wan22-H-e8.safetensors"
LORA_LOW = "/cache/loras/wan22-L-e8.safetensors"
@@ -37,15 +54,16 @@ def run():
from server.video import LoRASpec
log.info("[case 1] Instantiate Wan22Pipeline "
"(first run downloads ~42 GB total)...")
"(quant=%s, dit_repo=%s)...", DIT_QUANT, DIT_REPO)
try:
pipe = Wan22Pipeline(
base_repo="Wan-AI/Wan2.2-I2V-A14B",
fp8_repo="lightx2v/Wan2.2-Distill-Models",
dit_repo=DIT_REPO,
config_json=CONFIG_JSON,
model_cls="wan2.2_moe_distill",
resolution=480,
fps=16,
dit_quant_scheme=DIT_QUANT,
)
except Exception as e:
log.error("FAIL: Wan22Pipeline construction raised: %s", e)
@@ -56,7 +74,7 @@ def run():
log.info(" PASS: pipeline constructed")
# --- LoRAs ---
log.info("[case 2] load_loras with empty list no-op")
log.info("[case 2] load_loras with empty list -> no-op")
pipe.load_loras([])
log.info(" PASS")
+83
View File
@@ -0,0 +1,83 @@
"""Quick smoke test: generate a video clip with the GGUF pipeline.
Calls Wan22Pipeline.generate_i2v directly (no MuseTalk, no VideoEngine)
and writes the result to tests/component/_out/phase9_gguf.mp4.
Run:
docker compose exec -e DIT_QUANT=gguf-Q4_K_M voice-chat \
python -m tests.component.test_09_gguf_generate
"""
from __future__ import annotations
import os
import sys
from tests.component._common import ensure_sample_avatar, get_logger, write_bytes
log = get_logger("test_09")
DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
if DIT_QUANT.startswith("gguf-"):
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
else:
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
def run():
try:
from server.video_models.wan22 import Wan22Pipeline
except ImportError as e:
log.error("Import failed: %s", e)
sys.exit(0)
avatar = ensure_sample_avatar()
log.info("Avatar: %s", avatar)
log.info("Building pipeline (quant=%s)...", DIT_QUANT)
pipe = Wan22Pipeline(
base_repo="Wan-AI/Wan2.2-I2V-A14B",
dit_repo=DIT_REPO,
config_json=CONFIG_JSON,
model_cls="wan2.2_moe_distill",
resolution=480,
fps=16,
dit_quant_scheme=DIT_QUANT,
t5_quantized=True,
)
log.info("Pipeline ready.")
# Debug: verify DTYPE is set correctly for GGUF
from lightx2v.utils.envs import GET_DTYPE
log.info("GET_DTYPE() = %s (DTYPE env = %s)", GET_DTYPE(), os.environ.get("DTYPE"))
log.info("Generating 3-second i2v clip...")
frames = pipe.generate_i2v(
image_path=avatar,
prompt="a person looking at the camera, natural lighting, soft focus background",
seconds=3,
seed=42,
)
log.info("Got frames: shape=%s dtype=%s", frames.shape, frames.dtype)
# Encode to MP4
import imageio.v3 as iio
import tempfile
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tf:
tmp = tf.name
try:
iio.imwrite(tmp, frames, fps=16, codec="libx264")
with open(tmp, "rb") as f:
mp4_bytes = f.read()
finally:
os.remove(tmp)
out = write_bytes("phase9_gguf.mp4", mp4_bytes)
log.info("PASS: video written to %s (%d bytes, %d frames)", out, len(mp4_bytes), frames.shape[0])
if __name__ == "__main__":
run()
+88
View File
@@ -0,0 +1,88 @@
"""Smoke test: T5 text encoding under GGUF pipeline.
Builds the Wan22Pipeline (loads all weights including DIT) but only
exercises the T5 encoder — no image-to-video generation. Validates that
the DTYPE=FP16 ↔ BF16 patching lets T5 encode a prompt successfully.
Run:
docker compose exec -e DIT_QUANT=gguf-Q4_K_M voice-chat \
python -m tests.component.test_10_t5_encode
"""
from __future__ import annotations
import os
import sys
from tests.component._common import get_logger
log = get_logger("test_10")
DIT_QUANT = os.environ.get("DIT_QUANT", "gguf-Q4_K_M")
if DIT_QUANT.startswith("gguf-"):
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_gguf_distill.json"
DIT_REPO = "QuantStack/Wan2.2-I2V-A14B-GGUF"
else:
CONFIG_JSON = "/app/configs/lightx2v/wan22_i2v_fp8_distill.json"
DIT_REPO = "lightx2v/Wan2.2-Distill-Models"
def run():
try:
from server.video_models.wan22 import Wan22Pipeline
except ImportError as e:
log.error("Import failed: %s", e)
sys.exit(0)
log.info("Building pipeline (quant=%s) — this loads T5 + DIT weights...", DIT_QUANT)
pipe = Wan22Pipeline(
base_repo="Wan-AI/Wan2.2-I2V-A14B",
dit_repo=DIT_REPO,
config_json=CONFIG_JSON,
model_cls="wan2.2_moe_distill",
resolution=480,
fps=16,
dit_quant_scheme=DIT_QUANT,
t5_quantized=True,
)
log.info("Pipeline ready.")
# Check DTYPE state after init
from lightx2v.utils.envs import GET_DTYPE
log.info("GET_DTYPE() = %s (DTYPE env = %s)", GET_DTYPE(), os.environ.get("DTYPE"))
# Run only the T5 text encoder
runner = pipe._runner
prompt = "a person looking at the camera, natural lighting"
log.info("Running T5 text encoder on prompt: %r", prompt)
import copy
input_info = copy.deepcopy(pipe._input_info_template)
input_info.prompt = prompt
runner.run_text_encoder(input_info)
log.info("T5 encode complete.")
# Inspect output — check all dataclass fields for tensor results
import torch
for attr in vars(input_info):
val = getattr(input_info, attr)
if isinstance(val, torch.Tensor):
log.info(" %s: shape=%s dtype=%s device=%s", attr, val.shape, val.dtype, val.device)
# Verify DTYPE is back to FP16 after T5 runs (if GGUF)
if DIT_QUANT.startswith("gguf-"):
current = GET_DTYPE()
import torch
expected = torch.float16
if current == expected:
log.info("PASS: DTYPE correctly restored to FP16 after T5 encode.")
else:
log.error("FAIL: DTYPE is %s after T5, expected %s", current, expected)
sys.exit(1)
log.info("PASS: T5 encoding succeeded under %s pipeline.", DIT_QUANT)
if __name__ == "__main__":
run()