-
Notifications
You must be signed in to change notification settings - Fork 94
Description
I am sending chunks of audio bytes (each chunk is 10 seconds of audio) through the WebSocket connection to Deepgram. However, Deepgram is only processing and returning the first chunk, and not doing so for subsequent chunks, while keeping the WebSocket connection alive.
It returns with a Metadata object, representing the first chunk, and returns nothing else, and does not trigger events for error
, finalize
, unhandled
, or close
. I've confirmed that I am indeed calling the send
method to send subsequent chunks.
Below are the relevant pieces of code:
def transcribe_live_from_deepgram():
# Implement from here: https://developers.deepgram.com/docs/getting-started-with-live-streaming-audio
try:
config = DeepgramClientOptions(
options={
"keepalive": "true",
"encoding": "linear16",
}
)
deepgram = DeepgramClient(
<my_key>, config)
dg_connection = deepgram.listen.live.v("1")
def on_message(self, result, **kwargs):
sentence = result.channel.alternatives[0].transcript
if len(sentence) == 0:
return
print(f"speaker: {sentence}")
def on_metadata(self, metadata, **kwargs):
print(f"\n\n{metadata}\n\n")
def on_error(self, error, **kwargs):
print(f"\n\n{error}\n\n")
def on_finalize(self, finalize, **kwargs):
print(f"\n\n{finalize}\n\n")
def on_close(self, close, **kwargs):
print(f"\n\n{close}\n\n")
def on_unhandled(self, unhandled, **kwargs):
print(f"\n\n{unhandled}\n\n")
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
dg_connection.on(LiveTranscriptionEvents.Error, on_error)
dg_connection.on(LiveTranscriptionEvents.Close, on_close)
dg_connection.on(LiveTranscriptionEvents.Finalize, on_finalize)
dg_connection.on(LiveTranscriptionEvents.Unhandled, on_unhandled)
options = LiveOptions(
model="nova-2",
language="en-US",
smart_format=True,
)
dg_connection.start(options)
return dg_connection
except Exception as e:
print(f"Could not open socket: {e}")
return
@app.websocket("/ws/listen")
async def transcribe_from_websocket(websocket: WebSocket, client_id: int):
# Start websocket connection to client
await encounter_connection_manager.connect(websocket)
# Start websocket connection to Deepgram
dg_connection = transcribe_live_from_deepgram()
connected = True
try:
while connected:
message = await websocket.receive()
if message:
try:
message_bytes = bytes(base64.b64decode(message['text']))
print(f'Message byte length: {len(message_bytes)}')
dg_connection.send(message_bytes)
...
except Exception as e:
encounter_connection_manager.disconnect(websocket)
...
When I run the following code, it correctly returns the transcribed items for the first chunk, and then it sends the following metadata message:
{
"type": "Metadata",
"transaction_key": "deprecated",
"request_id": "e508a3cc-8c04-4a76-a3ae-801337bdbc80",
"sha256": "181598a8b95bf32daaa3796a7af976098dd1408f426883d390ad010cee7ad727",
"created": "2024-07-02T06:39:02.759Z",
"duration": 9.898687,
"channels": 1,
"models": [
"c0d1a568-ce81-4fea-97e7-bd45cb1fdf3c"
],
"model_info": {
"c0d1a568-ce81-4fea-97e7-bd45cb1fdf3c": {
"name": "2-general-nova",
"version": "2024-01-18.26916",
"arch": "nova-2"
}
}
}
I receive no additional messages for the subsequent chunks. Is there something wrong with setup, or is there a bug?
I've also tried with smaller audio chunks (250ms), but the same issue still occurs.
My client code is as follows:
// Client-side (Browser)
// import RecordRTC from 'recordrtc';
console.log('Recorder client loaded');
let ws = new WebSocket('ws://localhost:8000/ws/listen');
let mediaRecorder;
ws.onmessage = event => {
let responseContainer = document.getElementById('responseContainer');
responseContainer.innerHTML += `<p>${event.data}</p>`;
};
let handleDataAvailable = (event) => {
if (event.size > 0) {
console.log('blob', event)
blobToBase64(event).then(b64 => {
ws.send(b64)
})
}
};
function blobToBase64(blob) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onload = () => {
const base64String = reader.result.split(',')[1];
resolve(base64String);
};
reader.onerror = (error) => reject(error);
});
}
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
let recorder = RecordRTC(stream, {
type: 'audio',
recorderType: StereoAudioRecorder,
mimeType: 'audio/wav',
timeSlice: 10000,
desiredSampRate: 16000,
numberOfAudioChannels: 1,
ondataavailable: handleDataAvailable
});
document.getElementById('startButton').addEventListener('click', () => {
recorder.startRecording();
let status = document.getElementById('status');
status.innerHTML += `<p>Recording</p>`;
});
document.getElementById('pauseButton').addEventListener('click', () => {
recorder.pauseRecording();
let status = document.getElementById('status');
status.innerHTML += `<p>Paused</p>`;
});
document.getElementById('resumeButton').addEventListener('click', () => {
recorder.resumeRecording();
let status = document.getElementById('status');
status.innerHTML += `<p>Playing</p>`;
});
document.getElementById('stopButton').addEventListener('click', () => {
recorder.stopRecording();
let status = document.getElementById('status');
status.innerHTML += `<p>Stopped</p>`;
});
});
ws.onopen = () => {
console.log('WebSocket connection opened');
};
ws.onclose = () => {
console.log('WebSocket connection closed');
};
MacOS 14.4.1
Python Version 3.11.9