Files
live-voice-chat/config.yml
T
2026-04-16 10:00:37 -04:00

72 lines
3.2 KiB
YAML

# LLM backend: "local" or "lmstudio"
llm:
backend: local # change to "lmstudio" to use LM Studio instead
max_cache_tokens: 4096 # max KV-cache size per session (tokens); 0 to disable caching
system_prompt: >-
You are a helpful voice assistant.
Keep your responses extremely concise but natural for spoken conversation.
Do not use markdown, bullet points, code blocks, emojis, or any formatting that doesn't work in speech.
# Settings used only when backend = "lmstudio"
lmstudio:
url: http://host.docker.internal:1234 # host.docker.internal resolves to your PC from inside Docker
model: "" # leave empty to use whatever model LM Studio has loaded
# Avatar video generation (Wan2.2-TI2V-5B-Turbo GGUF via LightX2V + MuseTalk lip-sync)
video:
enabled: true # master toggle — when false, video models are not loaded
backend: lightx2v # only option for now
mode: reflective # "library" (pre-baked clips) | "reflective" (fresh per turn)
resolution: 480 # 480 or 720
fps: 16 # Wan2.2 native rate; MuseTalk resamples as needed
library:
base_clip_count: 4 # how many speaking base clips to pre-generate per avatar
base_clip_seconds: 6 # duration of each pre-baked clip
# MuseTalk audio-driven lip-sync. When disabled, Wan2.2 base frames are
# used as-is without a lip-sync pass — useful when MuseTalk isn't installed
# or while iterating on the base pipeline.
musetalk:
enabled: false # toggle lip-sync on/off
reflective:
clip_seconds: 5 # target length of each fresh Wan2.2 clip per turn
clip_prompt_template: >-
webcam view of a person speaking, {reply_hint},
casual gestures, natural lighting, soft focus background
prompt_reply_words: 18 # max words lifted from reply to inject as {reply_hint}
# Model sources for the video stack. T5/VAE/tokenizer come from the
# Wan-AI base repo. The single dense DIT comes from wan22_dit_repo as
# GGUF (Turbo 4-step distill). Both repos download on first run into
# HF_HOME=/cache/huggingface.
#
# Supported dit_quant_scheme values (dense 5B Turbo — GGUF only):
# gguf-Q8_0 — 8-bit, ~6 GB DIT, ~6.5 GB VRAM at load (default)
# gguf-Q4_K_M — 4-bit, ~3.5 GB DIT, lower VRAM for tight budgets
# (any gguf-<level> published in hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF)
models:
wan22_base_repo: Wan-AI/Wan2.2-TI2V-5B
wan22_dit_repo: hum-ma/Wan2.2-TI2V-5B-Turbo-GGUF
wan22_dit_quant_scheme: gguf-Q8_0
wan22_t5_quantized: true
wan22_model_cls: wan2.2
wan22_config_json: /app/configs/lightx2v/wan22_i2v_gguf_5b_turbo.json
musetalk_path: TMElyralab/MuseTalk
# LoRAs applied to the dense 5B DIT at load time via LightX2V's
# lora_dynamic_apply path (merged during GGUF dequant). Dense has a
# single set of weights so `target` is always `both`.
#
# The old MoE-trained wan22-H-e8 / wan22-L-e8 LoRAs are NOT compatible
# with the 5B DIT and are disabled here. Future 5B-compatible LoRAs
# should follow the shape shown below.
loras: []
# loras:
# - path: /cache/loras/your-5b-lora.safetensors
# weight: 1.0
# target: both
# name: your-5b-lora