{ "_comment": "Wan2.2 i2v MoE 4-step distill, GGUF quantized. Uses QuantStack/Wan2.2-I2V-A14B-GGUF checkpoints instead of fp8 safetensors. GGUF does not support block-level offload so offload_granularity is set to 'model' — the entire DIT is moved to GPU when active. With Q4_K_M (~9.65 GB per expert) this fits comfortably in 24+ GB VRAM. high_noise_quantized_ckpt / low_noise_quantized_ckpt are filled in at runtime by server/video_models/wan22.py. IMPORTANT: GGUF dequantizes to fp16, so you must set DTYPE=FP16 in the container environment.", "infer_steps": 4, "target_video_length": 81, "text_len": 512, "resize_mode": "adaptive", "resolution": "480p", "target_height": 480, "target_width": 480, "fps": 16, "self_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3", "cross_attn_2_type": "flash_attn3", "sample_guide_scale": [3.5, 3.5], "sample_shift": 5.0, "enable_cfg": false, "cpu_offload": true, "offload_granularity": "model", "t5_cpu_offload": true, "vae_cpu_offload": false, "use_image_encoder": false, "boundary_step_index": 2, "denoising_step_list": [1000, 750, 500, 250], "dit_quantized": true, "dit_quant_scheme": "gguf-Q4_K_M", "t5_quantized": false }