import numpy as np from scipy.signal import resample_poly from math import gcd def pcm_bytes_to_float32(pcm_bytes: bytes, dtype=np.int16) -> np.ndarray: """Convert raw PCM bytes (16-bit signed int) to float32 in [-1, 1].""" audio = np.frombuffer(pcm_bytes, dtype=dtype) return audio.astype(np.float32) / 32768.0 def float32_to_pcm_bytes(audio) -> bytes: """Convert float32 audio in [-1, 1] to 16-bit PCM bytes. Accepts numpy arrays or PyTorch tensors. """ if not isinstance(audio, np.ndarray): audio = audio.detach().cpu().numpy() clamped = np.clip(audio, -1.0, 1.0) return (clamped * 32767).astype(np.int16).tobytes() def resample(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray: """Resample audio from orig_sr to target_sr using polyphase filtering.""" if orig_sr == target_sr: return audio divisor = gcd(orig_sr, target_sr) up = target_sr // divisor down = orig_sr // divisor return resample_poly(audio, up, down).astype(audio.dtype) def split_sentences(text: str) -> tuple[list[str], str]: """Split text into completed sentences and a remaining buffer. Returns (sentences, remaining_buffer). Splits on sentence-ending punctuation followed by whitespace. """ sentences = [] buffer = text terminators = ".!?" i = 0 start = 0 while i < len(buffer): if buffer[i] in terminators: # Look ahead for whitespace or end of string end = i + 1 while end < len(buffer) and buffer[end] in terminators: end += 1 if end >= len(buffer) or buffer[end] == " " or buffer[end] == "\n": sentence = buffer[start:end].strip() if sentence: sentences.append(sentence) start = end i = end else: i += 1 else: i += 1 remaining = buffer[start:].strip() return sentences, remaining