-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeech2text1.py
62 lines (53 loc) · 1.71 KB
/
speech2text1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import serial
import sounddevice as sd
import sys
import wavio as wv
import whisper
from scipy.io.wavfile import write
ser = serial.Serial(
port="/dev/ttyACM0",
baudrate=115200
)
# Sampling frequency
freq = 44100
# Recording duration
duration = 5
model = whisper.load_model("medium")
def main():
ser.close()
ser.open()
while (1):
print("\n\nSay something:")
# Start recorder with the given values
# of duration and sample frequency
recording = sd.rec(
int(duration * freq),
samplerate=freq, channels=2
)
# Record audio for the given number of seconds
sd.wait()
# This will convert the NumPy array to an audio
# file with the given sampling frequency
write("recording0.wav", freq, recording)
# Convert the NumPy array to audio file
wv.write("recording1.wav", recording, freq, sampwidth=2)
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio("recording1.wav")
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# detect the spoken language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")
# decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)
# recognized text
text = result.text
print(text)
if ("おしまい" in text):
sys.exit(1)
ser.write((text + "\n").encode())
print("Sent with success!")
if __name__ == "__main__":
main()