-
Notifications
You must be signed in to change notification settings - Fork 0
/
generic.py
394 lines (353 loc) · 13.4 KB
/
generic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
'''
Created on Jun 7, 2012
Library for data mining and piling
Geo Tools:
get_lat_long(location) from Google maps api
Location(location) from Yahoo placefinder api
Data tools:
JsonDocument(file) / JsonUrl(Url) Json encoded file/urls, accessible as Python datatypes
XmlDocument(file) / XmlUrl(Url) XML encoded file/urls, accessible as Python objects
XlsDocument(file) XLS (pre-2003 Excel) spreadsheet interface, accessible as list of Python dicts
Piling tools:
SqliteTable - pile data into SQlite tables
@author: Williqm
'''
import sqlite3
import math
import urllib
import xlrd
import datetime
import json
import re
from xml.dom import minidom
#import simplejson as json
db = sqlite3.connect('tables.db')
def get_lat_long(location):
'''
returns tuple of latitude and longitude from a given location description string
for example, an address
results from the google geolocation api
You have to get a Google api key here:
https://developers.google.com/maps/signup
and put the key into the "key" variable below
see also: the reverse geocoder provided by Yahoo Placefinder in the Location class
(possible integration opportunity of this method into Location class)
'''
key = ""
output = "csv"
location = urllib.quote_plus(location)
request = "http://maps.google.com/maps/geo?q=%s&output=%s&key=%s" % (location, output, key)
data = urllib.urlopen(request).read()
dlist = data.split(',')
if dlist[0] == '200':
return dlist[2], dlist[3]
else:
return None, None
def calculate_distance(LL1, LL2):
'''
returns distance in mi
accepts two tuples: (Lat1, Lon1)=LL1, (Lat2, Lon2)=LL2
'''
lat1, lon1 = LL1
lat2, lon2 = LL2
lat1 = float(lat1) * math.pi/180
lon1 = float(lon1) * math.pi/180
lat2 = float(lat2) * math.pi/180
lon2 = float(lon2) * math.pi/180
return 3959.0 * math.acos(math.sin(lat1) * math.sin(lat2) + math.cos(lat1) * math.cos(lat2) * math.cos(lon2-lon1))
''' Intermediate Data Types Below '''
class Document:
''' Basic document type. Defaults to a filename which is read into the file attribute
'''
filename=''
file=None
#value = {}
#def __getattr__(self, key):
# return self.value[key]
def __init__(self, filename):
self.filename=filename
self.file=open(filename, 'r')
@property
def value(self):
return self.file.read()
class Url(Document):
''' a Document type that reads from a URL instead of a file'''
url=''
file=None
def __init__(self,url):
self.url=url
self.file = urllib.urlopen(url)
def __repr__(self):
return self.value
class XlsTable(Document):
'''
XlsTable is an iterable that returns a dict for each row in the xls spreadsheet,
with keys generated from the header of each column in the spreadsheet,
basically treats a spreadsheet generically of the format:
Col1 Title Col2 Title Col3 Title ... (name_row)
R1C1 R1C2 R1C3 ... (start_row)
R2C1 R2C2 R2C3 ...
... ... ...
Types are automatically inferred from the type described by the Excel format.
@usage:
'''
colnames=[]
_coltypes=[]
name_row=0
start_row=1
row=start_row
_sheet=0
def __len__(self):
return self.sheet.nrows-self.start_row
def next(self):
if(self.row<len(self)):
self.row+=1
return self[self.row]
else:
raise StopIteration
def __getitem__(self, n):
return [c(f) for c,f in zip(self._coltypes,self.sheet.row_values(n))]
def __iter__(self):
return self
def __repr__(self):
return '%s Table' % self.filename
def xlsfloat(self, value):
try:
i=int(value)
except ValueError:
return None
if(i==value):
return i
else:
return float(value)
def xlsdate(self, value):
return datetime.datetime(*xlrd.xldate_as_tuple(value, self.book.datemode))
@property
def sheet(self):
return self.book.sheets()[self._sheet]
def __init__(self, file, name_row=0, start_row=1, sheet=0):
self.name_row = name_row
self.start_row = start_row
self._sheet=sheet
self.book = xlrd.open_workbook(file)
self.colnames = [re.sub('[\W]', '', name) for name in self.sheet.row_values(self.name_row)]
self._coltypes = [[str,str,self.xlsfloat,self.xlsdate][v] for v in self.sheet.row_types(self.start_row)]
class JsonDocument(Document):
''' a Json encoded document '''
@property
def value(self):
return json.loads(self.file.read())
def __dict__(self):
return self.value
def __getattr__(self, k):
return self.value[k]
class XmlElement():
''' XmlElement is a simple auto nesting data mask for XML data.
Well adapted to list formatted arbitrary data - i.e. multiple successive elements with the same name,
that become lists inside the XmlElement object. Not particularly well suited to text markup.
Element Attributes are stored in the "attributes" attribute
of the XmlElement object.
Element Content is stored in the "value" attribute of the XmlElement object.
@usage:
document = """\
<tag1>
<listtag attribute="true">
Value
</listtag>
<listtag>
Value2
</listtag>
<listtag>
Value3
</listtag>
<tag2>
Value4
</tag2>
</tag1>
"""
>>> element = xml.dom.minidom.parseString(document)
>>> v = XmlElement(element)
>>> v.tag1.listtag[0].value
Value
>>> v.tag1.tag2.value
Value4
>>> v.tag1.listtag[0].attributes
{u'attribute': u'true'}
'''
value=''
attributes={}
def __init__(self,element):
''' Node type reference:
ELEMENT_NODE = 1
ATTRIBUTE_NODE = 2
TEXT_NODE = 3
CDATA_SECTION_NODE = 4
ENTITY_REFERENCE_NODE = 5
ENTITY_NODE = 6
PROCESSING_INSTRUCTION_NODE = 7
COMMENT_NODE = 8
DOCUMENT_NODE = 9
DOCUMENT_TYPE_NODE = 10
DOCUMENT_FRAGMENT_NODE = 11
NOTATION_NODE = 12
'''
self.dom=element
for child in element.childNodes:
if child.nodeType==element.ELEMENT_NODE:
try:
a=getattr(self, child.nodeName)
if(type(a) not in [type([])]):
a=[a]
a.append(XmlElement(child))
setattr(self,child.nodeName, a)
except AttributeError:
setattr(self, child.nodeName, XmlElement(child))
elif child.nodeType==element.ATTRIBUTE_NODE:
self.attributes[child.nodeName]=child.data
elif child.nodeType==element.TEXT_NODE:
self.value+=(child.data.strip())
try:
for k,v in element.attributes.items():
self.attributes[k]=v
except AttributeError:
pass
def __repr__(self):
return self.value
class XmlDocument(Document):
''' an XML encoded document'''
dom=None
@property
def value(self):
if(self.dom==None):
self.dom = minidom.parse(self.file)
return XmlElement(self.dom)
def __repr__(self):
return self.value
class XmlUrl(Url, XmlDocument):
''' The Python value output of a XML encoded URL
@usage
>>> v = XmlUrl(url)
>>> v.value
{}
GetAttr can be used to access values using [] notation directly
XmlUrl demonstrates nesting functionality of this toolbox.
Since it subclasses Url and XmlDocument, those classes provide
all the methods needed to access a Url as a Xml document.
'''
pass
class JsonUrl(Url, JsonDocument):
''' The Python value output of a JSON encoded URL
@usage
>>> v = JsonUrl(url)
>>> v.value
{}
GetAttr can be used to access values using [] notation directly
@note:
JsonUrl demonstrates nesting functionality of this toolbox.
Since it subclasses Url and JsonDocument, those classes provide
all the methods needed to access a Url as a Json document.
'''
pass
class EnergyPrice(JsonUrl):
''' Returns a dict of the output of the NREL utility price app
@usage
>>> EnergyPrice('11206').residential
0.209999999999
@note
typical url: http://developer.nrel.gov/api/georeserv/service/utility_rates.json?address=11206&api_key=xxxx
typical response: {"errors": [{}], "infos": [], "inputs": {"address": "11206"},
"outputs": {"commercial": 0.17999999999999999, "company_id": "04226",
"industrial": 0.17000000000000001, "name": "Consolidated Edison Co. Of New York Inc.",
"residential": 0.20999999999999999},
"version": "2.1.7", "warnings": []}
'''
_api_key='42d719e7c42834f7a7f6ca07e8642368d2185e15'
_url='http://developer.nrel.gov/api/georeserv/service/utility_rates.json'
address = None
def __init__(self, address):
self.address = address
self.url = '%s?address=%s&api_key=%s'%(self._url, self.address, self._api_key)
self.file=JsonUrl(self.url).file
#self.value = json.loads(.read())['outputs']
class Location(XmlUrl):
''' Generates a dict of the xml result of the Yahoo placefinder API
@usage:
>>> l = Location('11206')
>>> l.latitude
40.702690
>>> l.longitude
-73.942430
Get a yahoo placefinder api key here:
http://developer.yahoo.com/geo/placefinder/
place it into the _appid variable below
'''
_url='http://where.yahooapis.com/geocode'
_appid=''
location = ''
def __init__(self, location):
self.location=location
self.url = '%s?q=%s&appid=%s' % (self._url, self.location, self._appid)
self.file = XmlUrl(self.url).file
def __getattr__(self,k):
try:
return getattr(self.value.ResultSet.Result,k)
except AttributeError:
try:
return getattr(self.value.ResultSet,k)
except AttributeError:
return getattr(self.value,k)
class CensusTable(XlsTable):
''' Contains tables from Census
(nothing here yet)
'''
def __init__(self):
pass
class SQLiteTable(object):
''' This is a wrapper for SQlite specializing in data piling.
Step 1: create a table by initializing
>>> table = SQLiteTable('tablename',['id','name','value'],['int','str','text'],db)
'''
name=None
python_to_sql = {'str':'VARCHAR(255)',
'text':'TEXT',
'int':'INT',
'float':'DOUBLE',
'datetime':'DATETIME'
}
@classmethod
def sql_typeof(cls,*args):
return [cls.python_to_sql[type(v).__name__] for v in args]
def __init__(self, name, cols, types, db):
''' Constructor initializes table or initializes self with existing table '''
self.name=name
self.db=db
self.dbc=db.cursor()
columns = ','.join(['%s %s'%(n,t) for n,t in zip(cols,types)])
q = 'CREATE TABLE IF NOT EXISTS %s (%s);' % (name, columns)
print q
self.dbc.execute(q)
self.db.commit()
def addcolumn(self, name, type):
self.dbc.execute('ALTER TABLE %s ADD COLUMN %s %s' % (self.name, name, type))
def addrow(self, *args):
args2=[]
for arg in args:
if(type(arg).__name__=='str'):
args2.append("'%s'"%arg)
elif(type(arg).__name__=='datetime'):
args2.append("'%s'"%arg.strftime('%Y-%m-%d'))
else:
args2.append(str(arg))
q = 'INSERT INTO %s VALUES (%s)' %(self.name, ', '.join(args2))
print q
self.dbc.execute(q)
self.db.commit()
def update(self, **kwargs):
pass
def get(self, cols=['*'], n=None):
columns = ', '.join(cols)
limit = ''
if n!=None:
limit = 'LIMIT %d'%n
where = ''
query = 'SELECT %s FROM %s %s %s'%(columns, self.name, where, limit)