-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
172 lines (153 loc) · 7.44 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
from classifier import KNearestNeighbours
import streamlit as st
from streamlit_lottie import st_lottie
import json
from bs4 import BeautifulSoup
import requests, io
import PIL.Image
from urllib.request import urlopen
import os
from googleapiclient.discovery import build
import pandas as pd
st.set_page_config(page_title="Tv Series Recommender", page_icon="📺", layout="wide")
# Define a function that we can use to load lottie files from a link.
def load_lottieurl(url: str):
r = requests.get(url)
if r.status_code != 200:
return None
return r.json()
col1, col2 = st.columns([1, 3])
with col1:
lottie = load_lottieurl("https://assets10.lottiefiles.com/packages/lf20_khzniaya.json")
st_lottie(lottie, width=300, height=300)
with col2:
st.write("""
## TV Series Recommender
##### This app will help you to find the best TV series for you.""")
# Load the tv data and tv titles
df = pd.read_csv("imdb_tv_series.csv")
with open("./Data/tv_data.json", "r+", encoding="utf-8") as f:
data = json.load(f)
with open("./Data/tv_titles.json", "r+", encoding="utf-8") as f:
tv_titles = json.load(f)
youtube_api_key = os.environ.get("tv_rec_youtube_api")
youtube = build("youtube", "v3", developerKey=youtube_api_key)
def poster_fetcher(imdb_link):
# Display the poster
url_data = requests.get(imdb_link).text
s_data = BeautifulSoup(url_data, "html.parser")
imdb_dp = s_data.find("meta", property="og:image")
poster_link = imdb_dp.attrs["content"]
u = urlopen(poster_link)
raw_data = u.read()
image = PIL.Image.open(io.BytesIO(raw_data))
image = image.resize((250, 400), PIL.Image.ANTIALIAS)
st.image(image)
def get_tv_info(imdb_link):
# Display the information the tv series
url_data = requests.get(imdb_link).text
s_data = BeautifulSoup(url_data, "html.parser")
imdb_content = s_data.find("meta", property="og:description")
tv_description = imdb_content.attrs["content"]
tv_description = str(tv_description).split(".")
tv_director = tv_description[0]
tv_title = s_data.find("meta", property="og:title")
tv_title = tv_title.attrs["content"]
tv_year = tv_title.split("(")[1].split(")")[0]
tv_cast = str(tv_description[1]).replace("With", "Cast: ").strip()
tv_story = "Plot Summary: " + s_data.find("span", {"data-testid": "plot-xl"}).text + "."
#get rating from span class="sc-7ab21ed2-1 jGRxWM"
# rating = s_data.find("span", {"class": "sc-7ab21ed2-1 jGRxWM"}).text
# get total votes from div class = "sc-7ab21ed2-3 dPVcnq"
rating = s_data.find("div", {"class": "sc-7ab21ed2-3 dPVcnq"}).text
# get genres from df if imdb_link are matching
if imdb_link in df["IMDB_Link"].values:
tv_genres = df.loc[df["IMDB_Link"] == imdb_link, "Genres"].values[0]
else:
tv_genres = "Not Found"
# get runtime from df if imdb_link are matching
if imdb_link in df["IMDB_Link"].values:
tv_runtime = df.loc[df["IMDB_Link"] == imdb_link, "Runtime_minutes"].values[0]
else:
tv_runtime = "Not Found"
request = youtube.search().list(part="snippet", channelType="any", maxResults=1, q=f"{tv_title} Official Trailer")
response = request.execute()
trailer_link = [f"https://www.youtube.com/watch?v={video['id']['videoId']}" \
for video in response['items']]
tv_rating = "Total Rating count: " + rating
return tv_director, tv_cast, tv_story, tv_rating, tv_year, trailer_link, tv_genres, tv_runtime
def knn_tv_recommender(test_point, k):
# Create dummy target variable for the KNN classifier
target = [0 for item in tv_titles]
# Instantiate the KNN classifier
model = KNearestNeighbours(data, target, test_point, k=k)
# Run the algorithm
model.fit()
# Print the list of top k recommended tv series
table = []
for i in model.indices:
# Append the tv series title and its imdb link
table.append([tv_titles[i][0], tv_titles[i][2],data[i][-1]])
print(table)
return table
def run_recommender():
genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Family',
'Fantasy', 'Film-Noir', 'Game-Show', 'History', 'Horror', 'Music', 'Musical', 'Mystery', 'News',
'Reality-TV', 'Romance', 'Sci-Fi', 'Short', 'Sport', 'Thriller', 'War', 'Western']
tv_series = [tv_title[0] for tv_title in tv_titles]
category = ["Select a recommendation category", "Tv Series based", "Genre based"]
category_option = st.selectbox("Select a recommendation type", category)
if category_option == category[0]:
st.error("Please select a recommendation type")
elif category_option == category[1]:
select_tv = st.selectbox("Please select a Tv Series:", tv_series)
number_of_rec = st.slider("How many recommendations do you want?", min_value=5, max_value=20, step=1, value=5)
genres = data[tv_series.index(select_tv)]
test_points = genres
table = knn_tv_recommender(test_points, number_of_rec+1)
table.pop(0)
c = 0
if st.button("Show recommendations"):
for tv_serie, link, ratings in table:
c+=1
director, cast, story, total_rating, tv_year, trailer_link, tv_genres, tv_runtime = get_tv_info(link)
col3, col4 = st.columns(2)
with col3:
st.markdown(f"(**{c}**) [**{tv_serie}**]({link}) **({tv_year})**")
poster_fetcher(link)
st.markdown(f"**{director}**")
st.markdown(f"**{cast}**")
st.markdown(f"**{story}**")
st.markdown(f"**Runtime: {tv_runtime:.0f} minutes.**")
st.markdown(f"**Genres: {tv_genres} .**")
st.markdown(f"**{total_rating}**")
st.markdown(f"**IMDB Rating: {str(ratings)} ⭐**")
with col4:
st.video(trailer_link[0])
elif category_option == category[2]:
select_genre = st.multiselect("Please select a genre:", genres)
if select_genre:
imdb_score = st.slider("Choose an IMDB score:", min_value=1, max_value=10, step=1, value=7)
number_of_rec = st.slider("How many recommendations do you want?", min_value=5, max_value=20, step=1, value=5)
test_point = [1 if genre in select_genre else 0 for genre in genres]
test_point.append(imdb_score)
table = knn_tv_recommender(test_point, number_of_rec)
c = 0
if st.button("Show recommendations"):
for tv_serie, link, ratings in table:
c+=1
director, cast, story, total_rating, tv_year, trailer_link, tv_genres, tv_runtime = get_tv_info(link)
col5, col6 = st.columns(2)
with col5:
st.markdown(f"(**{c}**) [**{tv_serie}**]({link}) **({tv_year})**")
poster_fetcher(link)
st.markdown(f"**{director}**")
st.markdown(f"**{cast}**")
st.markdown(f"**{story}**")
st.markdown(f"**Runtime: {tv_runtime:.0f} minutes.**")
st.markdown(f"**Genres: {tv_genres} .**")
st.markdown(f"**{total_rating}**")
st.markdown(f"**IMDB Rating: {str(ratings)} ⭐**")
with col6:
st.video(trailer_link[0])
run_recommender()