-
Notifications
You must be signed in to change notification settings - Fork 3
/
rawdata.py
88 lines (79 loc) · 2.78 KB
/
rawdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import datetime
import io
import pandas as pd
import requests
import database
class Rawdata():
def __init__(self):
self._url = "http://www.taifex.com.tw/chinese/3/3_2_3_b.asp"
# must be fill
# 1. DATA_DATE: 2016/07/20
# 2. DATA_DATE1: 2016/07/25
# 3. datestart: 2016/07/20
# 4. dateend: 2016/07/25
self._data = {
'goday': '',
'DATA_DATE': '',
'DATA_DATE1': '',
'DATA_DATE_Y': '',
'DATA_DATE_M': '',
'DATA_DATE_D': '',
'DATA_DATE_Y1': '',
'DATA_DATE_M1': '',
'DATA_DATE_D1': '',
'syear': '',
'smonth': '',
'sday': '',
'syear1': '',
'smonth1': '',
'sday1': '',
'datestart': '',
'dateend': '',
'COMMODITY_ID': 'TXO',
'commodity_id2t': '',
'his_year': ''
}
self._db = database.Database()
@property
def date_recorded(self):
sql = ("SELECT `record_date` FROM `crawler_config` "
"WHERE `table_name`='rawdata'")
self._date_recorded = self._db.query_from_mysql(sql)[0][0]
return self._date_recorded
@date_recorded.setter
def date_recorded(self, date_recorded):
sql = ("UPDATE `crawler_config` SET `record_date`='{}' "
"WHERE `table_name`='{}'")
self._db.update_mysql(sql.format(date_recorded, 'rawdata'))
self._date_recorded = date_recorded
def crawler(self):
today = datetime.datetime.today()
mdate = self.date_recorded
sql = ("INSERT INTO `rawdata` VALUES (null, %s, %s, TRIM(%s), %s, %s,"
"%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
while mdate <= today:
date = mdate.strftime("%Y/%m/%d")
self._data['DATA_DATE'] = date
self._data['DATA_DATE1'] = date
self._data['datestart'] = date
self._data['dateend'] = date
res = requests.post(self._url, data=self._data)
# not trade date
try:
content = res.content.decode('big5')
except:
self.date_recorded += datetime.timedelta(1)
mdate = self.date_recorded
continue
csv = pd.read_csv(io.StringIO(content))
for index, row in csv.iterrows():
# checks if the row[-1] is a NaN
# row[-1] is column `suspend_trading`
if row[-1] != row[-1]:
row[-1] = ''
self._db.insert_into_mysql(sql, tuple(row))
print(tuple(row))
self.date_recorded += datetime.timedelta(1)
mdate = self.date_recorded
rawdata = Rawdata()
rawdata.crawler()