initial commit

This commit is contained in:
2026-04-07 03:58:35 -04:00
commit ce41bca422
17 changed files with 1184 additions and 0 deletions
+52
View File
@@ -0,0 +1,52 @@
import numpy as np
import torch
class StreamingVAD:
"""Wraps Silero VAD for streaming chunk-by-chunk speech detection."""
def __init__(self, model, threshold: float = 0.5, min_silence_ms: int = 400):
from silero_vad import VADIterator
self.iterator = VADIterator(
model,
sampling_rate=16000,
threshold=threshold,
min_silence_duration_ms=min_silence_ms,
)
self.audio_buffer: list[np.ndarray] = []
self.is_speaking = False
def process_chunk(self, chunk_16k: np.ndarray) -> np.ndarray | None:
"""Feed a 512-sample chunk at 16kHz.
Returns the complete utterance as a numpy array when speech ends,
or None if still accumulating.
"""
tensor = torch.from_numpy(chunk_16k).float()
speech_dict = self.iterator(tensor, return_seconds=False)
if speech_dict:
if "start" in speech_dict:
self.is_speaking = True
self.audio_buffer = []
if "end" in speech_dict:
self.is_speaking = False
if self.audio_buffer:
result = np.concatenate(self.audio_buffer)
self.audio_buffer = []
self.iterator.reset_states()
return result
self.iterator.reset_states()
return None
if self.is_speaking:
self.audio_buffer.append(chunk_16k.copy())
return None
def reset(self):
"""Reset VAD state for a new conversation turn."""
self.audio_buffer = []
self.is_speaking = False
self.iterator.reset_states()