first stab at adding video
This commit is contained in:
+26
-2
@@ -5,6 +5,7 @@ from server.vad import StreamingVAD
|
||||
from server.asr import ASREngine
|
||||
from server.llm import LLMEngine
|
||||
from server.tts import TTSEngine
|
||||
from server.video import VideoConfig, VideoEngine
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -31,6 +32,7 @@ class ModelManager:
|
||||
self.asr_engine: ASREngine | None = None
|
||||
self.llm_engine: LLMEngine | None = None
|
||||
self.tts_engine: TTSEngine | None = None
|
||||
self.video_engine: VideoEngine | None = None
|
||||
|
||||
def load_all(self):
|
||||
"""Load all models sequentially. Call from the main process."""
|
||||
@@ -38,6 +40,7 @@ class ModelManager:
|
||||
self._load_asr()
|
||||
self._load_llm()
|
||||
self._load_tts()
|
||||
self._load_video()
|
||||
log.info("All models loaded successfully.")
|
||||
|
||||
def _load_vad(self):
|
||||
@@ -84,8 +87,8 @@ class ModelManager:
|
||||
log.info("Loading Qwen3-4B (GPTQ 4-bit)...")
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
model_name = "Qwen/Qwen3.5-0.8B"
|
||||
|
||||
# model_name = "Qwen/Qwen3.5-0.8B"
|
||||
model_name = "dphn/Dolphin-X1-8B-FP8"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
device = get_device()
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
@@ -101,6 +104,27 @@ class ModelManager:
|
||||
self.tts_engine = TTSEngine()
|
||||
log.info("Kokoro TTS loaded.")
|
||||
|
||||
def _load_video(self):
|
||||
"""Load the avatar video stack iff config.video.enabled is true.
|
||||
|
||||
Leaves ``video_engine`` as None when disabled so existing voice flow
|
||||
is untouched. Later phases replace this stub with actual Wan2.2 +
|
||||
MuseTalk loading inside ``VideoEngine``.
|
||||
"""
|
||||
from server.config import config
|
||||
|
||||
video_cfg_raw = config.get("video", {}) or {}
|
||||
if not video_cfg_raw.get("enabled", False):
|
||||
log.info("Video engine disabled (config.video.enabled=false). Skipping load.")
|
||||
return
|
||||
|
||||
log.info("Loading avatar video engine...")
|
||||
cfg = VideoConfig.from_dict(video_cfg_raw)
|
||||
self.video_engine = VideoEngine(cfg)
|
||||
if cfg.loras:
|
||||
self.video_engine.load_loras(cfg.loras)
|
||||
log.info("Avatar video engine loaded (mode=%s).", cfg.mode)
|
||||
|
||||
def create_vad(self) -> StreamingVAD:
|
||||
"""Create a new StreamingVAD instance for a client session."""
|
||||
return StreamingVAD(self.vad_model)
|
||||
|
||||
Reference in New Issue
Block a user