-
Notifications
You must be signed in to change notification settings - Fork 1
/
scraper.py
52 lines (43 loc) · 1.51 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import requests, csv, json, argparse
from bs4 import BeautifulSoup
file_name = "utd_pmi"
def get_content():
url = "http://ayodonor.pmi.or.id/table.php"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
tableRow = soup.select("table tbody tr")
return tableRow
def to_csv(tableRow):
with open(file_name + ".csv", mode="w") as utd_pmi_file:
utd_pmi_writer = csv.writer(utd_pmi_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
for item in tableRow:
column = item.select("td")
row = []
for col in column:
row.append(col.text)
utd_pmi_writer.writerow(row)
def to_json(tableRow):
jsons = []
for item in tableRow:
column = item.select("td")
jsons.append({
"no": column[0].text,
"nama_utd": column[1].text,
"provinsi": column[2].text,
"alamat": column[3].text,
"telp": column[4].text,
"fax": column[5].text,
})
json_result = json.dumps(jsons, indent=4)
with open(file_name + ".json", "w") as utd_pmi_file:
utd_pmi_file.write(json_result)
parser = argparse.ArgumentParser()
parser.add_argument("type", help="export data to type csv or json", type=str)
args = parser.parse_args()
output_type = args.type
if args.type == "csv":
to_csv(get_content())
elif args.type == "json":
to_json(get_content())
else:
print("invalid export type, choose between csv or json")