add lm studio option
This commit is contained in:
+26
-13
@@ -46,7 +46,7 @@ class ModelManager:
|
||||
from server.vad import SileroVADOnnx
|
||||
|
||||
model_path = hf_hub_download(
|
||||
repo_id="onnx-community/silero-vad", filename="silero_vad.onnx"
|
||||
repo_id="onnx-community/silero-vad", filename="onnx/model.onnx"
|
||||
)
|
||||
self.vad_model = SileroVADOnnx(model_path)
|
||||
log.info("Silero VAD loaded (ONNX, CPU).")
|
||||
@@ -66,19 +66,32 @@ class ModelManager:
|
||||
log.info("Qwen3-ASR-0.6B loaded.")
|
||||
|
||||
def _load_llm(self):
|
||||
log.info("Loading Qwen3-4B (GPTQ 4-bit)...")
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from server.config import config
|
||||
|
||||
model_name = "Qwen/Qwen3.5-0.8B"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
device = get_device()
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
device_map=device,
|
||||
)
|
||||
self.llm_engine = LLMEngine(model, tokenizer)
|
||||
log.info("Qwen3-4B-GPTQ-Int4 loaded (~2.5GB VRAM).")
|
||||
backend = config.get("llm", {}).get("backend", "local")
|
||||
|
||||
if backend == "lmstudio":
|
||||
from server.llm import LMStudioEngine
|
||||
|
||||
lms = config.get("llm", {}).get("lmstudio", {})
|
||||
url = lms.get("url", "http://host.docker.internal:1234")
|
||||
model = lms.get("model", "") or ""
|
||||
log.info(f"Using LM Studio backend at {url} (model={model or 'server default'})")
|
||||
self.llm_engine = LMStudioEngine(url, model)
|
||||
else:
|
||||
log.info("Loading Qwen3-4B (GPTQ 4-bit)...")
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
model_name = "Qwen/Qwen3.5-0.8B"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
device = get_device()
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
device_map=device,
|
||||
)
|
||||
self.llm_engine = LLMEngine(model, tokenizer)
|
||||
log.info("Qwen3-4B-GPTQ-Int4 loaded (~2.5GB VRAM).")
|
||||
|
||||
def _load_tts(self):
|
||||
log.info("Loading Kokoro TTS...")
|
||||
|
||||
Reference in New Issue
Block a user