initial commit
This commit is contained in:
+305
@@ -0,0 +1,305 @@
|
||||
// --- State ---
|
||||
let ws = null;
|
||||
let audioCtx = null;
|
||||
let micStream = null;
|
||||
let workletNode = null;
|
||||
let micActive = false;
|
||||
let nextPlayTime = 0;
|
||||
let isPlaying = false;
|
||||
|
||||
const PLAYBACK_SR = 24000; // TTS output sample rate
|
||||
const MIC_SR = 16000;
|
||||
const BARGE_IN_THRESHOLD = 0.03; // RMS energy threshold for barge-in
|
||||
const BARGE_IN_FRAMES = 2; // Consecutive frames above threshold to trigger
|
||||
let bargeInCount = 0;
|
||||
|
||||
const chatArea = document.getElementById("chat-area");
|
||||
const statusBadge = document.getElementById("status-badge");
|
||||
const micBtn = document.getElementById("mic-btn");
|
||||
|
||||
// --- WebSocket ---
|
||||
|
||||
function connectWS() {
|
||||
const proto = location.protocol === "https:" ? "wss:" : "ws:";
|
||||
ws = new WebSocket(`${proto}//${location.host}/ws/chat`);
|
||||
ws.binaryType = "arraybuffer";
|
||||
|
||||
ws.onopen = () => {
|
||||
setStatus("listening");
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
setStatus("disconnected");
|
||||
setTimeout(connectWS, 2000);
|
||||
};
|
||||
|
||||
ws.onerror = () => {
|
||||
ws.close();
|
||||
};
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
if (event.data instanceof ArrayBuffer) {
|
||||
playAudioChunk(event.data);
|
||||
} else {
|
||||
handleJSON(JSON.parse(event.data));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function handleJSON(msg) {
|
||||
switch (msg.type) {
|
||||
case "status":
|
||||
setStatus(msg.state);
|
||||
break;
|
||||
|
||||
case "interrupt":
|
||||
stopPlayback();
|
||||
// Trim the assistant message to what was spoken, then finalize
|
||||
finalizeAssistantMessage();
|
||||
break;
|
||||
|
||||
case "transcript":
|
||||
addMessage("user", msg.text);
|
||||
break;
|
||||
|
||||
case "response_text":
|
||||
if (msg.final) {
|
||||
finalizeAssistantMessage();
|
||||
} else {
|
||||
appendAssistantText(msg.text);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Status ---
|
||||
|
||||
function setStatus(state) {
|
||||
statusBadge.textContent =
|
||||
state === "listening"
|
||||
? "Listening"
|
||||
: state === "thinking"
|
||||
? "Thinking..."
|
||||
: state === "speaking"
|
||||
? "Speaking"
|
||||
: state === "disconnected"
|
||||
? "Disconnected"
|
||||
: state;
|
||||
statusBadge.className = state;
|
||||
}
|
||||
|
||||
// --- Chat Messages ---
|
||||
|
||||
let currentAssistantEl = null;
|
||||
let currentAssistantText = "";
|
||||
|
||||
function addMessage(role, text) {
|
||||
const el = document.createElement("div");
|
||||
el.className = `message ${role}`;
|
||||
el.textContent = text;
|
||||
chatArea.appendChild(el);
|
||||
chatArea.scrollTop = chatArea.scrollHeight;
|
||||
}
|
||||
|
||||
function appendAssistantText(text) {
|
||||
if (!currentAssistantEl) {
|
||||
currentAssistantEl = document.createElement("div");
|
||||
currentAssistantEl.className = "message assistant";
|
||||
chatArea.appendChild(currentAssistantEl);
|
||||
currentAssistantText = "";
|
||||
}
|
||||
currentAssistantText += (currentAssistantText ? " " : "") + text;
|
||||
currentAssistantEl.textContent = currentAssistantText;
|
||||
chatArea.scrollTop = chatArea.scrollHeight;
|
||||
}
|
||||
|
||||
function finalizeAssistantMessage() {
|
||||
currentAssistantEl = null;
|
||||
currentAssistantText = "";
|
||||
}
|
||||
|
||||
// --- Audio Playback ---
|
||||
|
||||
let activeSources = [];
|
||||
|
||||
function getPlaybackCtx() {
|
||||
if (!audioCtx || audioCtx.state === "closed") {
|
||||
audioCtx = new AudioContext({ sampleRate: PLAYBACK_SR });
|
||||
}
|
||||
return audioCtx;
|
||||
}
|
||||
|
||||
function playAudioChunk(arrayBuffer) {
|
||||
const ctx = getPlaybackCtx();
|
||||
const int16 = new Int16Array(arrayBuffer);
|
||||
const float32 = new Float32Array(int16.length);
|
||||
for (let i = 0; i < int16.length; i++) {
|
||||
float32[i] = int16[i] / 32768;
|
||||
}
|
||||
|
||||
const buffer = ctx.createBuffer(1, float32.length, PLAYBACK_SR);
|
||||
buffer.getChannelData(0).set(float32);
|
||||
|
||||
const source = ctx.createBufferSource();
|
||||
source.buffer = buffer;
|
||||
source.connect(ctx.destination);
|
||||
|
||||
activeSources.push(source);
|
||||
isPlaying = true;
|
||||
source.onended = () => {
|
||||
activeSources = activeSources.filter((s) => s !== source);
|
||||
if (activeSources.length === 0) {
|
||||
isPlaying = false;
|
||||
bargeInCount = 0;
|
||||
}
|
||||
};
|
||||
|
||||
const now = ctx.currentTime;
|
||||
if (nextPlayTime < now) {
|
||||
nextPlayTime = now + 0.01;
|
||||
}
|
||||
source.start(nextPlayTime);
|
||||
nextPlayTime += buffer.duration;
|
||||
}
|
||||
|
||||
function stopPlayback() {
|
||||
for (const source of activeSources) {
|
||||
try {
|
||||
source.stop();
|
||||
} catch (_) {}
|
||||
}
|
||||
activeSources = [];
|
||||
nextPlayTime = 0;
|
||||
isPlaying = false;
|
||||
bargeInCount = 0;
|
||||
}
|
||||
|
||||
// --- Microphone ---
|
||||
|
||||
async function toggleMic() {
|
||||
if (micActive) {
|
||||
stopMic();
|
||||
} else {
|
||||
await startMic();
|
||||
}
|
||||
}
|
||||
|
||||
async function startMic() {
|
||||
try {
|
||||
// Ensure playback context exists (needed for user gesture)
|
||||
getPlaybackCtx();
|
||||
if (audioCtx.state === "suspended") {
|
||||
await audioCtx.resume();
|
||||
}
|
||||
|
||||
micStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
sampleRate: MIC_SR,
|
||||
channelCount: 1,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
});
|
||||
|
||||
// Create a separate context at 16kHz for mic capture
|
||||
const micCtx = new AudioContext({ sampleRate: MIC_SR });
|
||||
const source = micCtx.createMediaStreamSource(micStream);
|
||||
|
||||
await micCtx.audioWorklet.addModule("/static/processor.js");
|
||||
workletNode = new AudioWorkletNode(micCtx, "pcm-processor");
|
||||
source.connect(workletNode);
|
||||
|
||||
workletNode.port.onmessage = (e) => {
|
||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||
ws.send(e.data);
|
||||
|
||||
// Client-side barge-in: detect mic energy while playing
|
||||
if (isPlaying) {
|
||||
const samples = new Int16Array(e.data);
|
||||
let sum = 0;
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
const s = samples[i] / 32768;
|
||||
sum += s * s;
|
||||
}
|
||||
const rms = Math.sqrt(sum / samples.length);
|
||||
|
||||
if (rms > BARGE_IN_THRESHOLD) {
|
||||
bargeInCount++;
|
||||
if (bargeInCount >= BARGE_IN_FRAMES) {
|
||||
// User is speaking over the assistant - interrupt
|
||||
stopPlayback();
|
||||
finalizeAssistantMessage();
|
||||
ws.send(JSON.stringify({ type: "interrupt" }));
|
||||
isPlaying = false;
|
||||
bargeInCount = 0;
|
||||
}
|
||||
} else {
|
||||
bargeInCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Store for cleanup
|
||||
workletNode._micCtx = micCtx;
|
||||
|
||||
micActive = true;
|
||||
micBtn.classList.add("active");
|
||||
|
||||
// Connect WebSocket if not already
|
||||
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
||||
connectWS();
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Mic access failed:", err);
|
||||
alert("Could not access microphone. Please allow mic permissions.");
|
||||
}
|
||||
}
|
||||
|
||||
function stopMic() {
|
||||
if (workletNode) {
|
||||
workletNode.disconnect();
|
||||
if (workletNode._micCtx) {
|
||||
workletNode._micCtx.close();
|
||||
}
|
||||
workletNode = null;
|
||||
}
|
||||
if (micStream) {
|
||||
micStream.getTracks().forEach((t) => t.stop());
|
||||
micStream = null;
|
||||
}
|
||||
micActive = false;
|
||||
micBtn.classList.remove("active");
|
||||
}
|
||||
|
||||
// --- Voice Selection ---
|
||||
|
||||
async function applyVoice() {
|
||||
const voice = document.getElementById("voice-select").value;
|
||||
const statusEl = document.getElementById("voice-status");
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append("voice", voice);
|
||||
formData.append("lang", "a");
|
||||
|
||||
statusEl.textContent = "Applying...";
|
||||
try {
|
||||
const resp = await fetch("/api/set-voice", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
const data = await resp.json();
|
||||
if (data.status === "ok") {
|
||||
statusEl.textContent = "Voice: " + voice;
|
||||
} else {
|
||||
statusEl.textContent = "Failed.";
|
||||
}
|
||||
} catch (err) {
|
||||
statusEl.textContent = "Error: " + err.message;
|
||||
}
|
||||
}
|
||||
|
||||
// Expose to HTML onclick
|
||||
window.toggleMic = toggleMic;
|
||||
window.applyVoice = applyVoice;
|
||||
Reference in New Issue
Block a user