t5 encoder fp8 seems to be working
This commit is contained in:
+14
-7
@@ -32,16 +32,23 @@ video:
|
||||
casual gestures, natural lighting, soft focus background
|
||||
prompt_reply_words: 18 # max words lifted from reply to inject as {reply_hint}
|
||||
|
||||
# Model sources for the video stack. The fp8 e4m3 4-step distilled DIT
|
||||
# weights from lightx2v/Wan2.2-Distill-Models are ~15 GB each (vs ~28 GB
|
||||
# bf16) — that's the "save VRAM" path. T5/VAE/tokenizer still come from
|
||||
# the Wan-AI base repo. Both repos download on first run into
|
||||
# HF_HOME=/cache/huggingface.
|
||||
# Model sources for the video stack. T5/VAE/tokenizer come from the
|
||||
# Wan-AI base repo. DIT weights come from wan22_dit_repo in the format
|
||||
# specified by wan22_dit_quant_scheme. Both repos download on first run
|
||||
# into HF_HOME=/cache/huggingface.
|
||||
#
|
||||
# Supported dit_quant_scheme values:
|
||||
# fp8-sgl — fp8 e4m3 safetensors (~15 GB/expert, from lightx2v/Wan2.2-Distill-Models)
|
||||
# gguf-Q4_K_M — GGUF 4-bit (~9.65 GB/expert, from QuantStack/Wan2.2-I2V-A14B-GGUF)
|
||||
# gguf-Q8_0 — GGUF 8-bit (~15.4 GB/expert)
|
||||
# (any gguf-<level> supported by LightX2V — see base_model.py MM_WEIGHT_REGISTER)
|
||||
models:
|
||||
wan22_base_repo: Wan-AI/Wan2.2-I2V-A14B
|
||||
wan22_fp8_repo: lightx2v/Wan2.2-Distill-Models
|
||||
wan22_dit_repo: QuantStack/Wan2.2-I2V-A14B-GGUF
|
||||
wan22_dit_quant_scheme: gguf-Q4_K_M
|
||||
wan22_t5_quantized: true
|
||||
wan22_model_cls: wan2.2_moe_distill
|
||||
wan22_config_json: /app/configs/lightx2v/wan22_i2v_fp8_distill.json
|
||||
wan22_config_json: /app/configs/lightx2v/wan22_i2v_gguf_distill.json
|
||||
musetalk_path: TMElyralab/MuseTalk
|
||||
|
||||
# LoRAs applied to the fp8 base at load time via runtime switch_lora.
|
||||
|
||||
Reference in New Issue
Block a user