first stab at adding video

This commit is contained in:
2026-04-12 04:11:52 -04:00
parent 680c5b04cc
commit 2818b41004
37 changed files with 2982 additions and 24 deletions
+26 -2
View File
@@ -5,6 +5,7 @@ from server.vad import StreamingVAD
from server.asr import ASREngine
from server.llm import LLMEngine
from server.tts import TTSEngine
from server.video import VideoConfig, VideoEngine
log = logging.getLogger(__name__)
@@ -31,6 +32,7 @@ class ModelManager:
self.asr_engine: ASREngine | None = None
self.llm_engine: LLMEngine | None = None
self.tts_engine: TTSEngine | None = None
self.video_engine: VideoEngine | None = None
def load_all(self):
"""Load all models sequentially. Call from the main process."""
@@ -38,6 +40,7 @@ class ModelManager:
self._load_asr()
self._load_llm()
self._load_tts()
self._load_video()
log.info("All models loaded successfully.")
def _load_vad(self):
@@ -84,8 +87,8 @@ class ModelManager:
log.info("Loading Qwen3-4B (GPTQ 4-bit)...")
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Qwen/Qwen3.5-0.8B"
# model_name = "Qwen/Qwen3.5-0.8B"
model_name = "dphn/Dolphin-X1-8B-FP8"
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = get_device()
model = AutoModelForCausalLM.from_pretrained(
@@ -101,6 +104,27 @@ class ModelManager:
self.tts_engine = TTSEngine()
log.info("Kokoro TTS loaded.")
def _load_video(self):
"""Load the avatar video stack iff config.video.enabled is true.
Leaves ``video_engine`` as None when disabled so existing voice flow
is untouched. Later phases replace this stub with actual Wan2.2 +
MuseTalk loading inside ``VideoEngine``.
"""
from server.config import config
video_cfg_raw = config.get("video", {}) or {}
if not video_cfg_raw.get("enabled", False):
log.info("Video engine disabled (config.video.enabled=false). Skipping load.")
return
log.info("Loading avatar video engine...")
cfg = VideoConfig.from_dict(video_cfg_raw)
self.video_engine = VideoEngine(cfg)
if cfg.loras:
self.video_engine.load_loras(cfg.loras)
log.info("Avatar video engine loaded (mode=%s).", cfg.mode)
def create_vad(self) -> StreamingVAD:
"""Create a new StreamingVAD instance for a client session."""
return StreamingVAD(self.vad_model)