-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
149 lines (119 loc) · 4.92 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os
from flask import Flask, request, jsonify, render_template
from youtube_transcript_api import YouTubeTranscriptApi
from dotenv import load_dotenv
from googleapiclient.discovery import build
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.schema import Document
import math
import markdown
import logging
from flask_cors import CORS
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Load environment variables from .env file
load_dotenv()
app = Flask(__name__)
CORS(app) # Add this line after creating the Flask app
# Get the API key from the environment
groq_api_key = os.getenv('GROQ_API_KEY')
youtube_api_key = os.getenv('YOUTUBE_API_KEY')
# Initialize the YouTube API client
youtube = build('youtube', 'v3', developerKey=youtube_api_key)
# Initialize the LLM model
llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama-3.3-70b-versatile")
# Define the summarization prompt template
prompt = ChatPromptTemplate.from_template(
"""
Summarize the provided youtube transcript. Write a short summary and then Mention all key points in a clear and concise manner in bullet points.
Mention the youtube title and then summarise it.
<context>
{context}
<context>
"""
)
@app.route('/')
def index():
return render_template('index.html')
# Fetch video metadata including the title
def get_video_title(video_id):
try:
video_response = youtube.videos().list(
part="snippet",
id=video_id
).execute()
video_title = video_response['items'][0]['snippet']['title']
return video_title
except Exception as e:
return None
@app.route('/api/transcript/<video_id>', methods=['GET'])
def get_transcript(video_id):
try:
logger.info(f"Fetching transcript for video ID: {video_id}")
transcript = YouTubeTranscriptApi.get_transcript(video_id)
formatted_transcript = format_transcript_with_timestamps(transcript)
logger.info("Fetching video title")
video_title = get_video_title(video_id)
if not video_title:
video_title = "Unknown Title"
logger.warning("Could not fetch video title")
return jsonify({
"title": video_title,
"transcript": formatted_transcript
})
except Exception as e:
logger.error(f"Error in get_transcript: {str(e)}")
return jsonify({"error": str(e)}), 500
@app.route('/api/summarize', methods=['POST'])
def summarize_transcript():
try:
logger.info("Starting summarisation request")
data = request.json
transcript_text = data.get('text')
video_title = data.get('title')
if not transcript_text or not video_title:
return jsonify({'error': 'No transcript or title provided'}), 400
# Combine the video title and transcript text
full_context = f"Youtube Video Title: {video_title}\n\nTranscript: {transcript_text}"
# Convert the combined text to a Document object
document = Document(page_content=full_context)
# Create a chain for summarizing documents
document_chain = create_stuff_documents_chain(llm, prompt)
# Invoke the chain with the document object (wrapped in a list)
response = document_chain.invoke({'context': [document]})
# Check if the response is valid
if not response or response.strip() == "":
return jsonify({'error': 'No summary available'}), 500
# Convert LLM's Markdown response to HTML
html_summary = markdown.markdown(response, extensions=['extra', 'sane_lists'])
# Return both Markdown and HTML in the JSON response
return jsonify({
'summary_markdown': response, # Original Markdown
'summary_html': html_summary # HTML conversion
})
except Exception as e:
logger.error(f"Error in summarize_transcript: {str(e)}")
return jsonify({'error': str(e)}), 500
def format_transcript_with_timestamps(transcript):
formatted_transcript = []
current_time = 0
for entry in transcript:
start_time = entry['start']
duration = entry['duration']
text = entry['text']
# Add a timestamp every 30 seconds
if start_time >= current_time + 30:
current_time = math.floor(start_time / 30) * 30
formatted_transcript.append(f"[{format_time(current_time)}]\n")
formatted_entry = f"{text}\n"
formatted_transcript.append(formatted_entry)
return ''.join(formatted_transcript)
def format_time(seconds):
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
if __name__ == '__main__':
app.run(host='0.0.0.0') # Remove port specification