-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstt_interface.py
88 lines (67 loc) · 2.67 KB
/
stt_interface.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import pyaudio
import whisper
import numpy as np
import warnings
import streamlit as st
st.set_page_config(layout='wide')
st.header("Speech to Text", divider=True)
def write_text(txt):
st.write(txt)
#start listning
def start():
write_text("Listning..")
#stop listning
def stop():
write_text("Stopped")
# Suppress specific warning
warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead")
# Load the base Whisper model
whisper_model = whisper.load_model("base")
# Audio recording parameters
FORMAT = pyaudio.paInt16 # Audio format
CHANNELS = 1 # Mono audio
RATE = 16000 # Sample rate
CHUNK = 1024 # Buffer size
# Initialize PyAudio
audio = pyaudio.PyAudio()
# Start recording
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
# print("Recording...")
# Buffer to hold audio data
buffer = []
#bottom 3 columns
left, middle, right = st.columns([10,1,1], vertical_alignment="bottom")
start_button = middle.button("Start", on_click=start)
stop_button = right.button("Stop", on_click=stop)
txtbox = st.empty()
if start_button:
try:
with open("transcriptions.txt", "a", encoding='utf-8') as file: # Open file in append mode
while not stop_button:
# Read audio data from the microphone
data = stream.read(CHUNK)
audio_data = np.frombuffer(data, dtype=np.int16)
# Append the chunk to the buffer
buffer.append(audio_data)
# Accumulate approximately 5 seconds of audio before transcribing
if len(buffer) >= int(RATE / CHUNK * 5):
# Concatenate buffer into a single array
audio_buffer = np.concatenate(buffer, axis=0)
# Convert audio data to Whisper format
audio_buffer = audio_buffer.astype(np.float32) / 32768.0
# Transcribe audio data
result = whisper_model.transcribe(audio_buffer)
transcription = result['text']
txtbox.markdown(f"**Transcription:** {transcription}")
# Save transcription to file
file.write(transcription + "\n")
file.flush() # Flush the buffer to ensure immediate write
# Clear the buffer
buffer = []
except KeyboardInterrupt:
print("Recording stopped")
stream.stop_stream()
stream.close()
audio.terminate()