barge-in changes
This commit is contained in:
+64
-14
@@ -13,6 +13,11 @@ const BARGE_IN_THRESHOLD = 0.03; // RMS energy threshold for barge-in
|
||||
const BARGE_IN_FRAMES = 2; // Consecutive frames above threshold to trigger
|
||||
let bargeInCount = 0;
|
||||
|
||||
// --- Text-audio sync state ---
|
||||
let pendingTextChunks = []; // [{chunkId, text}] - text waiting for its audio to arrive
|
||||
let scheduledTextTimers = []; // timer IDs for text display scheduled to match audio playback
|
||||
let lastDisplayedChunkId = -1; // last chunk whose text was actually shown to the user
|
||||
|
||||
const chatArea = document.getElementById("chat-area");
|
||||
const statusBadge = document.getElementById("status-badge");
|
||||
const micBtn = document.getElementById("mic-btn");
|
||||
@@ -54,8 +59,8 @@ function handleJSON(msg) {
|
||||
|
||||
case "interrupt":
|
||||
stopPlayback();
|
||||
// Trim the assistant message to what was spoken, then finalize
|
||||
finalizeAssistantMessage();
|
||||
// Finalize with interrupted marker — text already reflects only what was heard
|
||||
finalizeAssistantMessage(true);
|
||||
break;
|
||||
|
||||
case "transcript":
|
||||
@@ -64,9 +69,15 @@ function handleJSON(msg) {
|
||||
|
||||
case "response_text":
|
||||
if (msg.final) {
|
||||
finalizeAssistantMessage();
|
||||
// All chunks sent; finalize will happen when last audio chunk plays
|
||||
// (or immediately if nothing was queued)
|
||||
if (pendingTextChunks.length === 0 && scheduledTextTimers.length === 0) {
|
||||
finalizeAssistantMessage(false);
|
||||
}
|
||||
// Otherwise, playAudioChunk will finalize after the last scheduled text
|
||||
} else {
|
||||
appendAssistantText(msg.text);
|
||||
// Queue text — it will be displayed when corresponding audio starts playing
|
||||
pendingTextChunks.push({ chunkId: msg.chunk_id, text: msg.text });
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -113,9 +124,20 @@ function appendAssistantText(text) {
|
||||
chatArea.scrollTop = chatArea.scrollHeight;
|
||||
}
|
||||
|
||||
function finalizeAssistantMessage() {
|
||||
function finalizeAssistantMessage(interrupted = false) {
|
||||
if (interrupted && currentAssistantEl && currentAssistantText) {
|
||||
const marker = document.createElement("span");
|
||||
marker.className = "interrupted-marker";
|
||||
marker.textContent = " [interrupted]";
|
||||
currentAssistantEl.appendChild(marker);
|
||||
}
|
||||
currentAssistantEl = null;
|
||||
currentAssistantText = "";
|
||||
// Reset sync state
|
||||
pendingTextChunks = [];
|
||||
for (const tid of scheduledTextTimers) clearTimeout(tid);
|
||||
scheduledTextTimers = [];
|
||||
lastDisplayedChunkId = -1;
|
||||
}
|
||||
|
||||
// --- Audio Playback ---
|
||||
@@ -146,18 +168,38 @@ function playAudioChunk(arrayBuffer) {
|
||||
|
||||
activeSources.push(source);
|
||||
isPlaying = true;
|
||||
source.onended = () => {
|
||||
activeSources = activeSources.filter((s) => s !== source);
|
||||
if (activeSources.length === 0) {
|
||||
isPlaying = false;
|
||||
bargeInCount = 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Pair this audio chunk with the next queued text chunk
|
||||
const textEntry = pendingTextChunks.shift();
|
||||
|
||||
const now = ctx.currentTime;
|
||||
if (nextPlayTime < now) {
|
||||
nextPlayTime = now + 0.01;
|
||||
}
|
||||
|
||||
// Schedule text display to coincide with audio playback start
|
||||
if (textEntry) {
|
||||
const delayMs = Math.max(0, (nextPlayTime - now) * 1000);
|
||||
const tid = setTimeout(() => {
|
||||
appendAssistantText(textEntry.text);
|
||||
lastDisplayedChunkId = textEntry.chunkId;
|
||||
scheduledTextTimers = scheduledTextTimers.filter((t) => t !== tid);
|
||||
}, delayMs);
|
||||
scheduledTextTimers.push(tid);
|
||||
}
|
||||
|
||||
source.onended = () => {
|
||||
activeSources = activeSources.filter((s) => s !== source);
|
||||
if (activeSources.length === 0) {
|
||||
isPlaying = false;
|
||||
bargeInCount = 0;
|
||||
// If all audio has finished and no more text pending, finalize
|
||||
if (pendingTextChunks.length === 0 && scheduledTextTimers.length === 0) {
|
||||
finalizeAssistantMessage(false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
source.start(nextPlayTime);
|
||||
nextPlayTime += buffer.duration;
|
||||
}
|
||||
@@ -172,6 +214,10 @@ function stopPlayback() {
|
||||
nextPlayTime = 0;
|
||||
isPlaying = false;
|
||||
bargeInCount = 0;
|
||||
// Cancel any pending text displays
|
||||
for (const tid of scheduledTextTimers) clearTimeout(tid);
|
||||
scheduledTextTimers = [];
|
||||
pendingTextChunks = [];
|
||||
}
|
||||
|
||||
// --- Microphone ---
|
||||
@@ -229,8 +275,12 @@ async function startMic() {
|
||||
if (bargeInCount >= BARGE_IN_FRAMES) {
|
||||
// User is speaking over the assistant - interrupt
|
||||
stopPlayback();
|
||||
finalizeAssistantMessage();
|
||||
ws.send(JSON.stringify({ type: "interrupt" }));
|
||||
const msg = { type: "interrupt" };
|
||||
if (lastDisplayedChunkId >= 0) {
|
||||
msg.last_chunk_id = lastDisplayedChunkId;
|
||||
}
|
||||
ws.send(JSON.stringify(msg));
|
||||
finalizeAssistantMessage(true);
|
||||
isPlaying = false;
|
||||
bargeInCount = 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user