-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
247 lines (213 loc) · 9.19 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import streamlit as st
import streamlit.components.v1 as components
import os
import tempfile
from moviepy.editor import VideoFileClip
from openai import OpenAI
from dotenv import load_dotenv
import time
import re
import mimetypes
import base64
# Load environment variables from .env file
load_dotenv()
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Set up session state for rate limiting and storing results
if 'last_request_time' not in st.session_state:
st.session_state.last_request_time = 0
if 'transcript' not in st.session_state:
st.session_state.transcript = None
if 'translations' not in st.session_state:
st.session_state.translations = {}
if 'show_app' not in st.session_state:
st.session_state.show_app = False
if 'duration' not in st.session_state:
st.session_state.duration = 0
def get_base64_of_bin_file(bin_file):
with open(bin_file, 'rb') as f:
data = f.read()
return base64.b64encode(data).decode()
def set_background(png_file):
bin_str = get_base64_of_bin_file(png_file)
page_bg_img = '''
<style>
.stApp {
background-image: url("data:image/png;base64,%s");
background-size: cover;
}
</style>
''' % bin_str
st.markdown(page_bg_img, unsafe_allow_html=True)
def local_css(file_name):
with open(file_name, "r") as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
def is_valid_video_file(file):
allowed_mime_types = ['video/mp4', 'video/avi', 'video/quicktime']
file_type, _ = mimetypes.guess_type(file.name)
return file_type in allowed_mime_types
def sanitize_input(input_string):
return re.sub(r'[^a-zA-Z\s]', '', input_string)
def rate_limit():
current_time = time.time()
if current_time - st.session_state.last_request_time < 60: # 1 minute cooldown
st.error("Please wait before making another request.")
return False
st.session_state.last_request_time = current_time
return True
def convert_video_to_audio(uploaded_file):
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
temp_video.write(uploaded_file.read())
temp_video_path = temp_video.name
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
try:
with VideoFileClip(temp_video_path) as video:
video.audio.write_audiofile(temp_audio.name)
st.session_state.duration = video.duration # Store duration in session state
return temp_audio.name
except Exception as e:
st.error(f"Error processing video: {str(e)}")
return None
finally:
os.unlink(temp_video_path)
def transcribe_audio(audio_file):
try:
with open(audio_file, "rb") as audio:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio
)
return transcript.text
except Exception as e:
st.error(f"Error transcribing audio: {str(e)}")
return None
def translate_text(text, target_language):
try:
sanitized_language = sanitize_input(target_language)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"You are a translator. Translate the following text to {sanitized_language}."},
{"role": "user", "content": text}
]
)
return response.choices[0].message.content
except Exception as e:
st.error(f"Error translating text: {str(e)}")
return None
def text_to_srt(text, duration):
lines = text.split('. ')
srt_content = ""
start_time = 0
for i, line in enumerate(lines, 1):
end_time = min(start_time + 5, duration) # Assume each sentence takes 5 seconds or until video ends
srt_content += f"{i}\n"
srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
srt_content += f"{line}.\n\n"
start_time = end_time
return srt_content
def format_time(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = int(seconds % 60)
milliseconds = int((seconds % 1) * 1000)
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def show_landing_page():
st.markdown("""
<div class="landing-container">
<h1 class="main-title">TranscribeAI</h1>
<p class="subtitle">Transcribe and translate your videos with AI technology.</p>
</div>
""", unsafe_allow_html=True)
components.html("""
<div class="video-container">
<iframe width="560" height="315" src="https://www.youtube.com/embed/-tM4ouirmCU" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
</div>
""", height=350)
st.markdown("""
<div class="landing-container">
<h2 class="section-title">Features</h2>
<div class="features-container">
<div class="feature-card">
<h3>AI-Powered Transcription</h3>
<p>Accurate transcription using OpenAI's Whisper model</p>
</div>
<div class="feature-card">
<h3>Multi-Language Translation</h3>
<p>Translate your content into multiple languages</p>
</div>
<div class="feature-card">
<h3>Secure File Handling</h3>
<p>Secure file upload and processing</p>
</div>
</div>
</div>
""", unsafe_allow_html=True)
if st.button("Get Started", key="get_started"):
st.session_state.show_app = True
def main():
# Check if API key is set
if not os.getenv("OPENAI_API_KEY"):
st.error("OpenAI API key is not set. Please check your .env file.")
return
# Set background image
set_background('background.png')
# Load custom CSS
local_css("style.css")
show_landing_page()
if st.session_state.show_app:
st.title("AI-Supported Video Processor")
# Language selection
languages = ["Turkish", "English", "Russian", "French", "Spanish", "German", "Italian", "Japanese", "Chinese"]
selected_languages = st.multiselect("Select target languages for translation:", languages)
# Video file upload
uploaded_file = st.file_uploader("Choose a video file (max 200MB)", type=["mp4", "avi", "mov"])
if uploaded_file is not None:
# Check file size and type
if uploaded_file.size > 200 * 1024 * 1024:
st.error("File size exceeds 200MB limit. Please upload a smaller file.")
elif not is_valid_video_file(uploaded_file):
st.error("Invalid file type. Please upload a valid video file.")
else:
st.video(uploaded_file)
if st.button("Process Video") and rate_limit():
with st.spinner("Processing video..."):
# Convert video to audio
audio_file = convert_video_to_audio(uploaded_file)
if audio_file:
# Transcribe audio
st.session_state.transcript = transcribe_audio(audio_file)
if st.session_state.transcript:
st.session_state.translations = {}
for lang in selected_languages:
translation = translate_text(st.session_state.transcript, lang)
if translation:
st.session_state.translations[lang] = translation
# Clean up temporary audio file
os.unlink(audio_file)
# Display results (outside of the file upload block to persist)
if st.session_state.transcript:
st.subheader("Original Transcript")
st.write(st.session_state.transcript)
# Provide download option for transcript in SRT format
srt_transcript = text_to_srt(st.session_state.transcript, st.session_state.duration)
st.download_button(
label="Download Transcript (SRT)",
data=srt_transcript,
file_name="transcript.srt",
mime="text/plain"
)
# Display translations
for lang, translation in st.session_state.translations.items():
st.subheader(f"{lang} Translation")
st.write(translation)
# Provide download option in SRT format
srt_translation = text_to_srt(translation, st.session_state.duration)
st.download_button(
label=f"Download {lang} Translation (SRT)",
data=srt_translation,
file_name=f"{sanitize_input(lang.lower())}_translation.srt",
mime="text/plain"
)
if __name__ == "__main__":
main()