initial commit
This commit is contained in:
@@ -0,0 +1,63 @@
|
||||
import numpy as np
|
||||
from scipy.signal import resample_poly
|
||||
from math import gcd
|
||||
|
||||
|
||||
def pcm_bytes_to_float32(pcm_bytes: bytes, dtype=np.int16) -> np.ndarray:
|
||||
"""Convert raw PCM bytes (16-bit signed int) to float32 in [-1, 1]."""
|
||||
audio = np.frombuffer(pcm_bytes, dtype=dtype)
|
||||
return audio.astype(np.float32) / 32768.0
|
||||
|
||||
|
||||
def float32_to_pcm_bytes(audio) -> bytes:
|
||||
"""Convert float32 audio in [-1, 1] to 16-bit PCM bytes.
|
||||
|
||||
Accepts numpy arrays or PyTorch tensors.
|
||||
"""
|
||||
if not isinstance(audio, np.ndarray):
|
||||
audio = audio.detach().cpu().numpy()
|
||||
clamped = np.clip(audio, -1.0, 1.0)
|
||||
return (clamped * 32767).astype(np.int16).tobytes()
|
||||
|
||||
|
||||
def resample(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
|
||||
"""Resample audio from orig_sr to target_sr using polyphase filtering."""
|
||||
if orig_sr == target_sr:
|
||||
return audio
|
||||
divisor = gcd(orig_sr, target_sr)
|
||||
up = target_sr // divisor
|
||||
down = orig_sr // divisor
|
||||
return resample_poly(audio, up, down).astype(audio.dtype)
|
||||
|
||||
|
||||
def split_sentences(text: str) -> tuple[list[str], str]:
|
||||
"""Split text into completed sentences and a remaining buffer.
|
||||
|
||||
Returns (sentences, remaining_buffer).
|
||||
Splits on sentence-ending punctuation followed by whitespace.
|
||||
"""
|
||||
sentences = []
|
||||
buffer = text
|
||||
terminators = ".!?"
|
||||
|
||||
i = 0
|
||||
start = 0
|
||||
while i < len(buffer):
|
||||
if buffer[i] in terminators:
|
||||
# Look ahead for whitespace or end of string
|
||||
end = i + 1
|
||||
while end < len(buffer) and buffer[end] in terminators:
|
||||
end += 1
|
||||
if end >= len(buffer) or buffer[end] == " " or buffer[end] == "\n":
|
||||
sentence = buffer[start:end].strip()
|
||||
if sentence:
|
||||
sentences.append(sentence)
|
||||
start = end
|
||||
i = end
|
||||
else:
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
remaining = buffer[start:].strip()
|
||||
return sentences, remaining
|
||||
Reference in New Issue
Block a user