Skip to content

Commit

Permalink
added on_recorded_chunk cb
Browse files Browse the repository at this point in the history
  • Loading branch information
KoljaB committed Mar 16, 2024
1 parent e5613ca commit ba6e549
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 17 deletions.
39 changes: 29 additions & 10 deletions RealtimeSTT/audio_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def __init__(self,
on_wakeword_timeout=None,
on_wakeword_detection_start=None,
on_wakeword_detection_end=None,
on_recorded_chunk=None,
debug_mode=False
):
"""
Initializes an audio recorder and transcription
Expand Down Expand Up @@ -246,6 +248,11 @@ def __init__(self,
- on_wakeword_detection_end (callable, default=None): Callback
function to be called when the system stops to listen for
wake words (e.g. because of timeout or wake word detected)
- on_recorded_chunk (callable, default=None): Callback function to be
called when a chunk of audio is recorded. The function is called
with the recorded audio chunk as its argument.
- debug_mode (bool, default=False): If set to True, the system will
print additional debug information to the console.
Raises:
Exception: Errors related to initializing transcription
Expand Down Expand Up @@ -278,6 +285,7 @@ def __init__(self,
self.on_vad_detect_stop = on_vad_detect_stop
self.on_wakeword_detection_start = on_wakeword_detection_start
self.on_wakeword_detection_end = on_wakeword_detection_end
self.on_recorded_chunk = on_recorded_chunk
self.on_transcription_start = on_transcription_start
self.enable_realtime_transcription = enable_realtime_transcription
self.realtime_model_type = realtime_model_type
Expand All @@ -288,6 +296,7 @@ def __init__(self,
self.on_realtime_transcription_stabilized = (
on_realtime_transcription_stabilized
)
self.debug_mode = debug_mode
self.allowed_latency_limit = ALLOWED_LATENCY_LIMIT

self.level = level
Expand Down Expand Up @@ -578,9 +587,6 @@ def _transcription_worker(conn,
transcription = " ".join(seg.text for seg in segments)
transcription = transcription.strip()
conn.send(('success', transcription))
except faster_whisper.WhisperError as e:
logging.error(f"Whisper transcription error: {e}")
conn.send(('error', str(e)))
except Exception as e:
logging.error(f"General transcription error: {e}")
conn.send(('error', str(e)))
Expand Down Expand Up @@ -633,13 +639,14 @@ def _audio_data_worker(audio_queue,

try:
audio_interface = pyaudio.PyAudio()
stream = audio_interface.open(rate=sample_rate,
format=pyaudio.paInt16,
channels=1,
input=True,
frames_per_buffer=buffer_size,
input_device_index=input_device_index,
)
stream = audio_interface.open(
rate=sample_rate,
format=pyaudio.paInt16,
channels=1,
input=True,
frames_per_buffer=buffer_size,
input_device_index=input_device_index,
)

except Exception as e:
logging.exception("Error initializing pyaudio "
Expand Down Expand Up @@ -978,6 +985,8 @@ def _recording_worker(self):
try:

data = self.audio_queue.get()
if self.on_recorded_chunk:
self.on_recorded_chunk(data)

# Handle queue overflow
queue_overflow_logged = False
Expand Down Expand Up @@ -1326,10 +1335,20 @@ def _is_webrtc_speech(self, data, all_frames_must_be_true=False):
if self.webrtc_vad_model.is_speech(frame, self.sample_rate):
speech_frames += 1
if not all_frames_must_be_true:
if self.debug_mode:
print(f"Speech detected in frame {i + 1}"
f" of {num_frames}")
return True
if all_frames_must_be_true:
if self.debug_mode and speech_frames == num_frames:
print(f"Speech detected in {speech_frames} of "
f"{num_frames} frames")
elif self.debug_mode:
print(f"Speech not detected in all {num_frames} frames")
return speech_frames == num_frames
else:
if self.debug_mode:
print(f"Speech not detected in any of {num_frames} frames")
return False

def _check_voice_activity(self, data):
Expand Down
2 changes: 1 addition & 1 deletion requirements-gpu.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
PyAudio==0.2.14
faster-whisper==0.10.0
faster-whisper==1.0.1
pvporcupine==1.9.5
webrtcvad==2.0.10
halo==0.0.31
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
PyAudio==0.2.14
faster-whisper==0.10.0
faster-whisper==1.0.1
pvporcupine==1.9.5
webrtcvad==2.0.10
halo==0.0.31
Expand Down
5 changes: 0 additions & 5 deletions requirements_raw.txt

This file was deleted.

0 comments on commit ba6e549

Please sign in to comment.