-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathlisten.py
42 lines (36 loc) · 1.49 KB
/
listen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
""" Listening class for K9
This program listens converts speech into text.
"""
#import paho.mqtt.client as mqtt
from audio_tools import VADAudio # Voice activity detection
import deepspeech # Mozilla STT
import numpy as np
class Listen():
def __init__(self):
# load deepspeech models for STT
self.model = deepspeech.Model("/home/pi/k9/deepspeech-0.9.3-models.tflite")
self.model.enableExternalScorer("/home/pi/k9/deepspeech-0.9.3-models.scorer")
def listen_for_command(self) -> str:
# load voice activiity detection capability
self.vad_audio = VADAudio(aggressiveness=1,
device=None,
input_rate=16000,
file=None)
self.stream_context = self.model.createStream()
try:
while True:
frames = self.vad_audio.vad_collector()
for frame in frames:
if frame is not None:
self.stream_context.feedAudioContent(np.frombuffer(frame, np.int16))
else:
command = self.stream_context.finishStream()
del self.stream_context
if command != "":
self.vad_audio.destroy()
return command
else:
self.stream_context = self.model.createStream()
except KeyboardInterrupt:
self.stream_context.finishStream()
self.vad_audio.destroy()