-
Notifications
You must be signed in to change notification settings - Fork 1
/
Wallet scrapper BSC.py
54 lines (43 loc) · 1.5 KB
/
Wallet scrapper BSC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 5 15:33:42 2021
@author: Noah Verner
"""
from bs4 import BeautifulSoup
import requests
import pandas as pd
pd.set_option('display.max_columns', None)
#get html
url ='https://bscscan.com/accounts/1?ps=100'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml')
#get the table
table = soup.find('table', {'class':'table table-hover'})
headers = []
#get the headers of the table and delete the "white space"
for i in table.find_all('th'):
title = i.text.strip()
headers.append(title)
#set the headers to columns in a new dataframe
df = pd.DataFrame(columns=headers)
#get the rows of the table but omit the first row (which are headers)
for row in table.find_all('tr')[1:]:
data = row.find_all('td')
row_data = [td.text.strip() for td in data]
length = len(df)
df.loc[length] = row_data
#set the data of the Txn Count column to float
Txn = df['Txn Count'].values
for e in range(len(Txn)):
myVal = Txn[e].replace(",", "")
Txn[e] = float(myVal)
#combine all the data rows in one single dataframe
a = pd.DataFrame(df)
def tester(mejora):
mejora = mejora[(mejora['Txn Count']>200.0) & (mejora['Txn Count']<1000.0)]
return mejora.to_csv('test_Txn_Count.csv')
tester(a)
#print the variable type of each header and
#print the dataframe with an specific condition set upon the data of the Txn Count
print(df.dtypes)
print(df[(df['Txn Count']>200.0) & (df['Txn Count']<1000.0)])