306 lines
7.3 KiB
JavaScript
306 lines
7.3 KiB
JavaScript
// --- State ---
|
|
let ws = null;
|
|
let audioCtx = null;
|
|
let micStream = null;
|
|
let workletNode = null;
|
|
let micActive = false;
|
|
let nextPlayTime = 0;
|
|
let isPlaying = false;
|
|
|
|
const PLAYBACK_SR = 24000; // TTS output sample rate
|
|
const MIC_SR = 16000;
|
|
const BARGE_IN_THRESHOLD = 0.03; // RMS energy threshold for barge-in
|
|
const BARGE_IN_FRAMES = 2; // Consecutive frames above threshold to trigger
|
|
let bargeInCount = 0;
|
|
|
|
const chatArea = document.getElementById("chat-area");
|
|
const statusBadge = document.getElementById("status-badge");
|
|
const micBtn = document.getElementById("mic-btn");
|
|
|
|
// --- WebSocket ---
|
|
|
|
function connectWS() {
|
|
const proto = location.protocol === "https:" ? "wss:" : "ws:";
|
|
ws = new WebSocket(`${proto}//${location.host}/ws/chat`);
|
|
ws.binaryType = "arraybuffer";
|
|
|
|
ws.onopen = () => {
|
|
setStatus("listening");
|
|
};
|
|
|
|
ws.onclose = () => {
|
|
setStatus("disconnected");
|
|
setTimeout(connectWS, 2000);
|
|
};
|
|
|
|
ws.onerror = () => {
|
|
ws.close();
|
|
};
|
|
|
|
ws.onmessage = (event) => {
|
|
if (event.data instanceof ArrayBuffer) {
|
|
playAudioChunk(event.data);
|
|
} else {
|
|
handleJSON(JSON.parse(event.data));
|
|
}
|
|
};
|
|
}
|
|
|
|
function handleJSON(msg) {
|
|
switch (msg.type) {
|
|
case "status":
|
|
setStatus(msg.state);
|
|
break;
|
|
|
|
case "interrupt":
|
|
stopPlayback();
|
|
// Trim the assistant message to what was spoken, then finalize
|
|
finalizeAssistantMessage();
|
|
break;
|
|
|
|
case "transcript":
|
|
addMessage("user", msg.text);
|
|
break;
|
|
|
|
case "response_text":
|
|
if (msg.final) {
|
|
finalizeAssistantMessage();
|
|
} else {
|
|
appendAssistantText(msg.text);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// --- Status ---
|
|
|
|
function setStatus(state) {
|
|
statusBadge.textContent =
|
|
state === "listening"
|
|
? "Listening"
|
|
: state === "thinking"
|
|
? "Thinking..."
|
|
: state === "speaking"
|
|
? "Speaking"
|
|
: state === "disconnected"
|
|
? "Disconnected"
|
|
: state;
|
|
statusBadge.className = state;
|
|
}
|
|
|
|
// --- Chat Messages ---
|
|
|
|
let currentAssistantEl = null;
|
|
let currentAssistantText = "";
|
|
|
|
function addMessage(role, text) {
|
|
const el = document.createElement("div");
|
|
el.className = `message ${role}`;
|
|
el.textContent = text;
|
|
chatArea.appendChild(el);
|
|
chatArea.scrollTop = chatArea.scrollHeight;
|
|
}
|
|
|
|
function appendAssistantText(text) {
|
|
if (!currentAssistantEl) {
|
|
currentAssistantEl = document.createElement("div");
|
|
currentAssistantEl.className = "message assistant";
|
|
chatArea.appendChild(currentAssistantEl);
|
|
currentAssistantText = "";
|
|
}
|
|
currentAssistantText += (currentAssistantText ? " " : "") + text;
|
|
currentAssistantEl.textContent = currentAssistantText;
|
|
chatArea.scrollTop = chatArea.scrollHeight;
|
|
}
|
|
|
|
function finalizeAssistantMessage() {
|
|
currentAssistantEl = null;
|
|
currentAssistantText = "";
|
|
}
|
|
|
|
// --- Audio Playback ---
|
|
|
|
let activeSources = [];
|
|
|
|
function getPlaybackCtx() {
|
|
if (!audioCtx || audioCtx.state === "closed") {
|
|
audioCtx = new AudioContext({ sampleRate: PLAYBACK_SR });
|
|
}
|
|
return audioCtx;
|
|
}
|
|
|
|
function playAudioChunk(arrayBuffer) {
|
|
const ctx = getPlaybackCtx();
|
|
const int16 = new Int16Array(arrayBuffer);
|
|
const float32 = new Float32Array(int16.length);
|
|
for (let i = 0; i < int16.length; i++) {
|
|
float32[i] = int16[i] / 32768;
|
|
}
|
|
|
|
const buffer = ctx.createBuffer(1, float32.length, PLAYBACK_SR);
|
|
buffer.getChannelData(0).set(float32);
|
|
|
|
const source = ctx.createBufferSource();
|
|
source.buffer = buffer;
|
|
source.connect(ctx.destination);
|
|
|
|
activeSources.push(source);
|
|
isPlaying = true;
|
|
source.onended = () => {
|
|
activeSources = activeSources.filter((s) => s !== source);
|
|
if (activeSources.length === 0) {
|
|
isPlaying = false;
|
|
bargeInCount = 0;
|
|
}
|
|
};
|
|
|
|
const now = ctx.currentTime;
|
|
if (nextPlayTime < now) {
|
|
nextPlayTime = now + 0.01;
|
|
}
|
|
source.start(nextPlayTime);
|
|
nextPlayTime += buffer.duration;
|
|
}
|
|
|
|
function stopPlayback() {
|
|
for (const source of activeSources) {
|
|
try {
|
|
source.stop();
|
|
} catch (_) {}
|
|
}
|
|
activeSources = [];
|
|
nextPlayTime = 0;
|
|
isPlaying = false;
|
|
bargeInCount = 0;
|
|
}
|
|
|
|
// --- Microphone ---
|
|
|
|
async function toggleMic() {
|
|
if (micActive) {
|
|
stopMic();
|
|
} else {
|
|
await startMic();
|
|
}
|
|
}
|
|
|
|
async function startMic() {
|
|
try {
|
|
// Ensure playback context exists (needed for user gesture)
|
|
getPlaybackCtx();
|
|
if (audioCtx.state === "suspended") {
|
|
await audioCtx.resume();
|
|
}
|
|
|
|
micStream = await navigator.mediaDevices.getUserMedia({
|
|
audio: {
|
|
sampleRate: MIC_SR,
|
|
channelCount: 1,
|
|
echoCancellation: true,
|
|
noiseSuppression: true,
|
|
autoGainControl: true,
|
|
},
|
|
});
|
|
|
|
// Create a separate context at 16kHz for mic capture
|
|
const micCtx = new AudioContext({ sampleRate: MIC_SR });
|
|
const source = micCtx.createMediaStreamSource(micStream);
|
|
|
|
await micCtx.audioWorklet.addModule("/static/processor.js");
|
|
workletNode = new AudioWorkletNode(micCtx, "pcm-processor");
|
|
source.connect(workletNode);
|
|
|
|
workletNode.port.onmessage = (e) => {
|
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
ws.send(e.data);
|
|
|
|
// Client-side barge-in: detect mic energy while playing
|
|
if (isPlaying) {
|
|
const samples = new Int16Array(e.data);
|
|
let sum = 0;
|
|
for (let i = 0; i < samples.length; i++) {
|
|
const s = samples[i] / 32768;
|
|
sum += s * s;
|
|
}
|
|
const rms = Math.sqrt(sum / samples.length);
|
|
|
|
if (rms > BARGE_IN_THRESHOLD) {
|
|
bargeInCount++;
|
|
if (bargeInCount >= BARGE_IN_FRAMES) {
|
|
// User is speaking over the assistant - interrupt
|
|
stopPlayback();
|
|
finalizeAssistantMessage();
|
|
ws.send(JSON.stringify({ type: "interrupt" }));
|
|
isPlaying = false;
|
|
bargeInCount = 0;
|
|
}
|
|
} else {
|
|
bargeInCount = 0;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
// Store for cleanup
|
|
workletNode._micCtx = micCtx;
|
|
|
|
micActive = true;
|
|
micBtn.classList.add("active");
|
|
|
|
// Connect WebSocket if not already
|
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
connectWS();
|
|
}
|
|
} catch (err) {
|
|
console.error("Mic access failed:", err);
|
|
alert("Could not access microphone. Please allow mic permissions.");
|
|
}
|
|
}
|
|
|
|
function stopMic() {
|
|
if (workletNode) {
|
|
workletNode.disconnect();
|
|
if (workletNode._micCtx) {
|
|
workletNode._micCtx.close();
|
|
}
|
|
workletNode = null;
|
|
}
|
|
if (micStream) {
|
|
micStream.getTracks().forEach((t) => t.stop());
|
|
micStream = null;
|
|
}
|
|
micActive = false;
|
|
micBtn.classList.remove("active");
|
|
}
|
|
|
|
// --- Voice Selection ---
|
|
|
|
async function applyVoice() {
|
|
const voice = document.getElementById("voice-select").value;
|
|
const statusEl = document.getElementById("voice-status");
|
|
|
|
const formData = new FormData();
|
|
formData.append("voice", voice);
|
|
formData.append("lang", "a");
|
|
|
|
statusEl.textContent = "Applying...";
|
|
try {
|
|
const resp = await fetch("/api/set-voice", {
|
|
method: "POST",
|
|
body: formData,
|
|
});
|
|
const data = await resp.json();
|
|
if (data.status === "ok") {
|
|
statusEl.textContent = "Voice: " + voice;
|
|
} else {
|
|
statusEl.textContent = "Failed.";
|
|
}
|
|
} catch (err) {
|
|
statusEl.textContent = "Error: " + err.message;
|
|
}
|
|
}
|
|
|
|
// Expose to HTML onclick
|
|
window.toggleMic = toggleMic;
|
|
window.applyVoice = applyVoice;
|