Data analysis study of my favorite sitcom, The Office (US).
First, we will download the libraries we will use.
!pip install pytrends
import holoviews as hv
from wordcloud import WordCloud
from pytrends.request import TrendReq
import plotly.express as px
import pandasql as ps
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os
table = hv.Table(dataset)
table.opts(height=250,width=1200)
table = hv.Table(dataset.describe().T.reset_index())
table.opts(height=150,width=700)
fig = px.choropleth(pytrends.interest_by_region(resolution='COUNTRY',inc_geo_code=True).reset_index(),
color="The Office",
color_continuous_scale='Blues',
locations = "geoName",
locationmode="country names",
projection="natural earth")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},dragmode=False, coloraxis_showscale=False)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
print("Figure: Google search trend for The Office")
fig = px.line(pytrends.interest_over_time().iloc[:,:1].reset_index(),
x='date',
y='The Office')
fig.update_layout(legend_title_text='',paper_bgcolor="white",plot_bgcolor='rgba(0,0,0,0)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='silver')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='silver')
fig.show()
print("Figure: Google search trend for The Office")
fig = px.pie(pytrends.related_queries()["The Office"]['top'], values='value', names='query',color_discrete_sequence=px.colors.qualitative.G10)
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(legend_title_text='Related Queries')
fig.show()
text = ""
for words in dataset["About"].str.split(" "):
for word in words:
text = text + ' ' + word
wordcloud = WordCloud(width=900, height=400, background_color="#0f4c5c").generate(text)
plt.figure(figsize=(20,10))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.margins(x=0, y=0)
plt.show()
df = dataset[["Season","EpisodeTitle","Ratings"]].sort_values("Ratings",ascending=False).head(20).reset_index(drop=True)
table = hv.Table(df)
table.opts(height=530,width=400)