working ok
This commit is contained in:
@@ -1,36 +0,0 @@
|
||||
{
|
||||
"_comment": "Wan2.2 i2v MoE 4-step distill, fp8 e4m3 quantized. Built for 24 GB-class GPUs — cpu_offload keeps DIT layers swapping in block-by-block. Derived from LightX2V's configs/distill/wan22/wan_moe_i2v_distill_4090.json plus the quant scheme + ckpt overrides from wan_moe_i2v_distill_quant.json. high_noise_quantized_ckpt / low_noise_quantized_ckpt are filled in at runtime by server/video_models/wan22.py with absolute paths to the files downloaded into HF_HOME.",
|
||||
|
||||
"infer_steps": 4,
|
||||
"target_video_length": 81,
|
||||
"text_len": 512,
|
||||
|
||||
"resize_mode": "adaptive",
|
||||
"resolution": "480p",
|
||||
"target_height": 480,
|
||||
"target_width": 480,
|
||||
"fps": 16,
|
||||
|
||||
"self_attn_1_type": "flash_attn3",
|
||||
"cross_attn_1_type": "flash_attn3",
|
||||
"cross_attn_2_type": "flash_attn3",
|
||||
|
||||
"sample_guide_scale": [3.5, 3.5],
|
||||
"sample_shift": 5.0,
|
||||
"enable_cfg": false,
|
||||
|
||||
"cpu_offload": true,
|
||||
"offload_granularity": "block",
|
||||
"lazy_load": true,
|
||||
"t5_cpu_offload": true,
|
||||
"vae_cpu_offload": false,
|
||||
|
||||
"use_image_encoder": false,
|
||||
|
||||
"boundary_step_index": 2,
|
||||
"denoising_step_list": [1000, 750, 500, 250],
|
||||
|
||||
"dit_quantized": true,
|
||||
"dit_quant_scheme": "fp8-sgl",
|
||||
"t5_quantized": false
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"_comment": "LightX2V config for Wan2.2-TI2V-5B-Turbo (dense, GGUF). Single DIT checkpoint (not MoE). dit_quantized_ckpt is filled in at runtime by Wan22Pipeline.",
|
||||
|
||||
"infer_steps": 4,
|
||||
"target_video_length": 81,
|
||||
"text_len": 512,
|
||||
|
||||
"resize_mode": "adaptive",
|
||||
"resolution": "480p",
|
||||
"target_height": 480,
|
||||
"target_width": 480,
|
||||
"fps": 16,
|
||||
|
||||
"vae_stride": [4, 16, 16],
|
||||
"num_channels_latents": 48,
|
||||
|
||||
"self_attn_1_type": "torch_sdpa",
|
||||
"cross_attn_1_type": "torch_sdpa",
|
||||
"cross_attn_2_type": "torch_sdpa",
|
||||
"modulate_type": "torch",
|
||||
"rope_type": "torch",
|
||||
|
||||
"sample_guide_scale": 1.0,
|
||||
"sample_shift": 5.0,
|
||||
"enable_cfg": false,
|
||||
|
||||
"cpu_offload": false,
|
||||
"offload_granularity": "model",
|
||||
"t5_cpu_offload": true,
|
||||
"vae_cpu_offload": false,
|
||||
|
||||
"use_image_encoder": false,
|
||||
|
||||
"denoising_step_list": [1000, 750, 500, 250],
|
||||
|
||||
"dit_quantized": true,
|
||||
"dit_quant_scheme": "gguf-Q8_0",
|
||||
"t5_quantized": true,
|
||||
"t5_quant_scheme": "fp8-sgl"
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
{
|
||||
"_comment": "Wan2.2 i2v MoE 4-step distill, GGUF quantized. Uses QuantStack/Wan2.2-I2V-A14B-GGUF checkpoints instead of fp8 safetensors. GGUF does not support block-level offload so offload_granularity is set to 'model' — the entire DIT is moved to GPU when active. With Q4_K_M (~9.65 GB per expert) this fits comfortably in 24+ GB VRAM. high_noise_quantized_ckpt / low_noise_quantized_ckpt are filled in at runtime by server/video_models/wan22.py. IMPORTANT: GGUF dequantizes to fp16, so you must set DTYPE=FP16 in the container environment.",
|
||||
|
||||
"infer_steps": 4,
|
||||
"target_video_length": 81,
|
||||
"text_len": 512,
|
||||
|
||||
"resize_mode": "adaptive",
|
||||
"resolution": "480p",
|
||||
"target_height": 480,
|
||||
"target_width": 480,
|
||||
"fps": 16,
|
||||
|
||||
"_comment_attn": "flash_attn3/sageattn3 aren't installed (no Blackwell-ready pre-built wheels). Use PyTorch SDPA which works on SM120.",
|
||||
"self_attn_1_type": "torch_sdpa",
|
||||
"cross_attn_1_type": "torch_sdpa",
|
||||
"cross_attn_2_type": "torch_sdpa",
|
||||
|
||||
"_comment_modulate": "Triton fuse_scale_shift_kernel segfaults during JIT compile on Blackwell SM120 (triton 3.4 + cu128). Use the PyTorch modulate fallback until the Triton issue is resolved.",
|
||||
"modulate_type": "torch",
|
||||
"_comment_rope": "flashinfer not installed; fall back to PyTorch rope.",
|
||||
"rope_type": "torch",
|
||||
|
||||
"sample_guide_scale": [3.5, 3.5],
|
||||
"sample_shift": 5.0,
|
||||
"enable_cfg": false,
|
||||
|
||||
"cpu_offload": true,
|
||||
"offload_granularity": "model",
|
||||
"t5_cpu_offload": true,
|
||||
"vae_cpu_offload": false,
|
||||
|
||||
"use_image_encoder": false,
|
||||
|
||||
"boundary_step_index": 2,
|
||||
"denoising_step_list": [1000, 750, 500, 250],
|
||||
|
||||
"dit_quantized": true,
|
||||
"dit_quant_scheme": "gguf-Q4_K_M",
|
||||
"t5_quantized": false
|
||||
}
|
||||
Reference in New Issue
Block a user