working ok

2026-04-16 10:00:37 -04:00
parent 9debc56137
commit 129df7d1fa
24 changed files with 674 additions and 539 deletions
@@ -13,9 +13,9 @@ llm:
    url: http://host.docker.internal:1234   # host.docker.internal resolves to your PC from inside Docker
    model: ""           # leave empty to use whatever model LM Studio has loaded

-# Avatar video generation (Wan2.2-Lightning fp8 via LightX2V + MuseTalk lip-sync)
+# Avatar video generation (Wan2.2-TI2V-5B-Turbo GGUF via LightX2V + MuseTalk lip-sync)
 video:
-  enabled: false                  # master toggle — when false, video models are not loaded
+  enabled: true                  # master toggle — when false, video models are not loaded
  backend: lightx2v               # only option for now
  mode: reflective                # "library" (pre-baked clips) | "reflective" (fresh per turn)
  resolution: 480                 # 480 or 720
@@ -25,6 +25,12 @@ video:
    base_clip_count: 4            # how many speaking base clips to pre-generate per avatar
    base_clip_seconds: 6          # duration of each pre-baked clip

+  # MuseTalk audio-driven lip-sync. When disabled, Wan2.2 base frames are
+  # used as-is without a lip-sync pass — useful when MuseTalk isn't installed
+  # or while iterating on the base pipeline.
+  musetalk:
+    enabled: false                # toggle lip-sync on/off
+
  reflective:
    clip_seconds: 5               # target length of each fresh Wan2.2 clip per turn
    clip_prompt_template: >-
@@ -33,35 +39,33 @@ video:
    prompt_reply_words: 18        # max words lifted from reply to inject as {reply_hint}

  # Model sources for the video stack. T5/VAE/tokenizer come from the
-  # Wan-AI base repo. DIT weights come from wan22_dit_repo in the format
-  # specified by wan22_dit_quant_scheme. Both repos download on first run
-  # into HF_HOME=/cache/huggingface.
+  # Wan-AI base repo. The single dense DIT comes from wan22_dit_repo as
+  # GGUF (Turbo 4-step distill). Both repos download on first run into
+  # HF_HOME=/cache/huggingface.
  #
-  # Supported dit_quant_scheme values:
-  #   fp8-sgl    — fp8 e4m3 safetensors (~15 GB/expert, from lightx2v/Wan2.2-Distill-Models)
-  #   gguf-Q4_K_M — GGUF 4-bit (~9.65 GB/expert, from QuantStack/Wan2.2-I2V-A14B-GGUF)
-  #   gguf-Q8_0  — GGUF 8-bit (~15.4 GB/expert)
-  #   (any gguf-<level> supported by LightX2V — see base_model.py MM_WEIGHT_REGISTER)
+  # Supported dit_quant_scheme values (dense 5B Turbo — GGUF only):
+  #   gguf-Q8_0   — 8-bit, ~6 GB DIT, ~6.5 GB VRAM at load (default)
+  #   gguf-Q4_K_M — 4-bit, ~3.5 GB DIT, lower VRAM for tight budgets
+  #   (any gguf-<level> published in hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF)
  models:
-    wan22_base_repo: Wan-AI/Wan2.2-I2V-A14B
-    wan22_dit_repo: QuantStack/Wan2.2-I2V-A14B-GGUF
-    wan22_dit_quant_scheme: gguf-Q4_K_M
+    wan22_base_repo: Wan-AI/Wan2.2-TI2V-5B
+    wan22_dit_repo: hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF
+    wan22_dit_quant_scheme: gguf-Q8_0
    wan22_t5_quantized: true
-    wan22_model_cls: wan2.2_moe_distill
-    wan22_config_json: /app/configs/lightx2v/wan22_i2v_gguf_distill.json
+    wan22_model_cls: wan2.2
+    wan22_config_json: /app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json
    musetalk_path: TMElyralab/MuseTalk

-  # LoRAs applied to the fp8 base at load time via runtime switch_lora.
-  # Wan2.2 is a MoE with separate high-noise and low-noise sub-models —
-  # `target` picks which sub-model each LoRA attaches to. The two files
-  # below are the user-supplied ./loras/wan22-[HL]-e8.safetensors mounted
-  # into the container at /cache/loras/.
-  loras:
-    - path: /cache/loras/wan22-H-e8.safetensors
-      weight: 1.0
-      target: high_noise
-      name: wan22-H-e8
-    - path: /cache/loras/wan22-L-e8.safetensors
-      weight: 1.0
-      target: low_noise
-      name: wan22-L-e8
+  # LoRAs applied to the dense 5B DIT at load time via LightX2V's
+  # lora_dynamic_apply path (merged during GGUF dequant). Dense has a
+  # single set of weights so `target` is always `both`.
+  #
+  # The old MoE-trained wan22-H-e8 / wan22-L-e8 LoRAs are NOT compatible
+  # with the 5B DIT and are disabled here. Future 5B-compatible LoRAs
+  # should follow the shape shown below.
+  loras: []
+  # loras:
+  #   - path: /cache/loras/your-5b-lora.safetensors
+  #     weight: 1.0
+  #     target: both
+  #     name: your-5b-lora