-
Notifications
You must be signed in to change notification settings - Fork 8
/
googlefinance.py
126 lines (101 loc) · 3.48 KB
/
googlefinance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""
This file demonstrates how to read minute level data from the
Google finance api
"""
import datetime as dt
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import requests as r
import sys
from io import StriongIO
def retrieve_single_timeseries(ticker, secs=60, ndays=5):
"""
Grabs data from Google finance. It retrieves the data for `ticker` at
`secs` intervals for the most recent `ndays`. The fields it retrieves
for each interval is (time, open price, close price, volume of trade)
Parameters
----------
ticker : String
Single ticker name
secs : scalar(Int)
Number of seconds to sample at
ndays : scalar(Int)
Number of days of data to retrieve (max is 5)
Returns
-------
data : DataFrame
Pandas DataFrame with the stock open, close, volume, and date
information.
"""
# Get the Base url
baseurl = "http://www.google.com/finance/getprices"
# Dictionary for parameters
pdict = {}
pdict["q"] = ticker
pdict["i"] = secs
pdict["p"] = str(ndays) + "d"
pdict["f"] = "d,c,v,o"
# Retrieve data
raw_html = r.get(baseurl, params=pdict)
raw_text = raw_html.text
# Clean data
data = clean_data(raw_text, ticker)
return data
def clean_data(raw_text, ticker):
"""
Takes the raw text output of the html request and cleans it into a
pandas dataframe
Parameters
----------
raw_text : String
The text generated by the html request
Returns
-------
data : Pandas.DataFrame
DataFrame with relevant data
"""
# Split by line separators
all_lines = raw_text.split("\n")
metadata = all_lines[1:7]
data_csv = StringIO("\n".join(all_lines[7:]))
# Deal with metadata that we care about
for line in metadata:
if "COLUMNS=" in line:
# Get columns
columns = line.split("COLUMNS=")[1].split(",")
elif "INTERVAL" in line:
timeincrement = int(line.split("INTERVAL=")[1])
elif "TIMEZONE_OFFSET" in line:
# Get timezone offset in seconds
tzoffset = int(line.split("TIMEZONE_OFFSET=")[1])*60
elif "MARKET_OPEN_MINUTE" in line:
opentime = ()
# Load data into pandas
data = pd.read_csv(data_csv, names=columns)
# Fix the date rows
data.insert(0, "DateTime", np.NaN)
data.insert(1, "TICKER", ticker)
for i, row in data.iterrows():
if row["DATE"][0] is "a":
secsfromepoch = int(row["DATE"][1:])
basedate = dt.datetime.utcfromtimestamp(secsfromepoch + tzoffset)
data.set_value(i, "DateTime", basedate)
else:
secsfrombase = int(row["DATE"])
data.set_value(i, "DateTime", basedate + dt.timedelta(seconds=secsfrombase*timeincrement))
# Fix the data (no price changes when market is closed)
# Drop irrelevant info, compute incremental returns, rename, and set index
data = data.drop(labels="DATE", axis=1)
data = data.rename(columns={"DateTime": "DATE"})
data.insert(1, "RETURNS", 100*(data["CLOSE"]/data["OPEN"] - 1))
data.set_value(data.index, "VOLUME", data["VOLUME"]/1e6)
return data
stock_tickers = ["AAPL", "F", "GM" ,"GOOG", "MSFT",
"MDLZ", "FOXA", "VRSK", "PCLN",
"SBUX", "ROST", "WFM", "CHKP",
"MAT", "LVNTA", "AMAT", "ADI"]
dfs = []
for tick in stock_tickers:
dfs.append(retrieve_single_timeseries(tick))
fulldata = pd.concat(dfs)