-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyoutubeScrapAnalysis.py
44 lines (37 loc) · 1.16 KB
/
youtubeScrapAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import requests
from bs4 import BeautifulSoup
url = "https://socialblade.com/youtube/"
html = requests.get(url)
print (html.text[:500])
soup = BeautifulSoup(html.text)
body = soup.findAll("div", {"class": "table-body"})
def prepare_table_row(row):
first = [i.text for i in row if i != u'\n']
return dict(rank=int(first[0]),
grade=str(first[1]),
channel=str(first[2]),
videos=float(first[3].replace(",", "")),
subscribers=float(first[4].replace(",", "")),
views=float(first[5].replace(",", ""))
)
print(prepare_table_row(body[0]))
data = []
for tr in body:
datum = prepare_table_row(tr)
for a in tr.find_all('a', href=True):
datum['url'] = a['href']
data.append(datum)
df = pd.DataFrame(data)
print(df)
# now let's plot the results we got
# this plot shows the subscribers of channels
ax = sns.barplot(x=df["subscribers"], y=df["channel"])
plt.show()
# this plot show the views of the channels
ax1 = sns.barplot(x=df["views"], y=df["channel"])
plt.show()