64 lines
2.0 KiB
Python
64 lines
2.0 KiB
Python
import numpy as np
|
|
from scipy.signal import resample_poly
|
|
from math import gcd
|
|
|
|
|
|
def pcm_bytes_to_float32(pcm_bytes: bytes, dtype=np.int16) -> np.ndarray:
|
|
"""Convert raw PCM bytes (16-bit signed int) to float32 in [-1, 1]."""
|
|
audio = np.frombuffer(pcm_bytes, dtype=dtype)
|
|
return audio.astype(np.float32) / 32768.0
|
|
|
|
|
|
def float32_to_pcm_bytes(audio) -> bytes:
|
|
"""Convert float32 audio in [-1, 1] to 16-bit PCM bytes.
|
|
|
|
Accepts numpy arrays or PyTorch tensors.
|
|
"""
|
|
if not isinstance(audio, np.ndarray):
|
|
audio = audio.detach().cpu().numpy()
|
|
clamped = np.clip(audio, -1.0, 1.0)
|
|
return (clamped * 32767).astype(np.int16).tobytes()
|
|
|
|
|
|
def resample(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
|
|
"""Resample audio from orig_sr to target_sr using polyphase filtering."""
|
|
if orig_sr == target_sr:
|
|
return audio
|
|
divisor = gcd(orig_sr, target_sr)
|
|
up = target_sr // divisor
|
|
down = orig_sr // divisor
|
|
return resample_poly(audio, up, down).astype(audio.dtype)
|
|
|
|
|
|
def split_sentences(text: str) -> tuple[list[str], str]:
|
|
"""Split text into completed sentences and a remaining buffer.
|
|
|
|
Returns (sentences, remaining_buffer).
|
|
Splits on sentence-ending punctuation followed by whitespace.
|
|
"""
|
|
sentences = []
|
|
buffer = text
|
|
terminators = ".!?"
|
|
|
|
i = 0
|
|
start = 0
|
|
while i < len(buffer):
|
|
if buffer[i] in terminators:
|
|
# Look ahead for whitespace or end of string
|
|
end = i + 1
|
|
while end < len(buffer) and buffer[end] in terminators:
|
|
end += 1
|
|
if end >= len(buffer) or buffer[end] == " " or buffer[end] == "\n":
|
|
sentence = buffer[start:end].strip()
|
|
if sentence:
|
|
sentences.append(sentence)
|
|
start = end
|
|
i = end
|
|
else:
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
|
|
remaining = buffer[start:].strip()
|
|
return sentences, remaining
|