26 lines
637 B
Python
26 lines
637 B
Python
import numpy as np
|
|
|
|
|
|
class ASREngine:
|
|
"""Wraps Qwen3-ASR for speech-to-text transcription."""
|
|
|
|
def __init__(self, model):
|
|
self.model = model
|
|
|
|
def transcribe(self, audio_16k: np.ndarray) -> str:
|
|
"""Transcribe a complete utterance.
|
|
|
|
Args:
|
|
audio_16k: Float32 numpy array at 16kHz sample rate.
|
|
|
|
Returns:
|
|
Transcribed text string.
|
|
"""
|
|
results = self.model.transcribe(
|
|
audio=(audio_16k, 16000),
|
|
language=None, # auto-detect
|
|
)
|
|
if results and results[0].text:
|
|
return results[0].text.strip()
|
|
return ""
|