add lm studio option

This commit is contained in:
2026-04-08 10:17:20 -04:00
parent 08c5757b31
commit c7c4019ecc
6 changed files with 112 additions and 13 deletions
+26 -13
View File
@@ -46,7 +46,7 @@ class ModelManager:
from server.vad import SileroVADOnnx
model_path = hf_hub_download(
repo_id="onnx-community/silero-vad", filename="silero_vad.onnx"
repo_id="onnx-community/silero-vad", filename="onnx/model.onnx"
)
self.vad_model = SileroVADOnnx(model_path)
log.info("Silero VAD loaded (ONNX, CPU).")
@@ -66,19 +66,32 @@ class ModelManager:
log.info("Qwen3-ASR-0.6B loaded.")
def _load_llm(self):
log.info("Loading Qwen3-4B (GPTQ 4-bit)...")
from transformers import AutoModelForCausalLM, AutoTokenizer
from server.config import config
model_name = "Qwen/Qwen3.5-0.8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = get_device()
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map=device,
)
self.llm_engine = LLMEngine(model, tokenizer)
log.info("Qwen3-4B-GPTQ-Int4 loaded (~2.5GB VRAM).")
backend = config.get("llm", {}).get("backend", "local")
if backend == "lmstudio":
from server.llm import LMStudioEngine
lms = config.get("llm", {}).get("lmstudio", {})
url = lms.get("url", "http://host.docker.internal:1234")
model = lms.get("model", "") or ""
log.info(f"Using LM Studio backend at {url} (model={model or 'server default'})")
self.llm_engine = LMStudioEngine(url, model)
else:
log.info("Loading Qwen3-4B (GPTQ 4-bit)...")
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Qwen/Qwen3.5-0.8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = get_device()
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map=device,
)
self.llm_engine = LLMEngine(model, tokenizer)
log.info("Qwen3-4B-GPTQ-Int4 loaded (~2.5GB VRAM).")
def _load_tts(self):
log.info("Loading Kokoro TTS...")