-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathriksdagen.py
35 lines (30 loc) · 1.05 KB
/
riksdagen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# -*- coding: utf-8 -*-
"""
@author: Fredrik Wahlberg <fredrik.wahlberg@it.uu.se>
"""
import requests
import json
import os.path
import zipfile
import urllib.request
#import re
def load_data_file(year):
url = """https://data.riksdagen.se/dataset/anforande/"""
fn = """anforande-20""" + str(int(year)) + str(int(year+1)) + """.json.zip"""
if not os.path.exists(fn):
print("Downloading %s" % fn)
with urllib.request.urlopen(url+fn) as response, open(fn, 'wb') as outfile:
data = response.read()
outfile.write(data)
with zipfile.ZipFile(fn, 'r') as archive:
parsed_data = list()
for filename in archive.filelist:
data = archive.read(filename)
parsed_data.append(json.loads(data.decode('utf-8-sig', 'strict')))
#print("%s, %i bytes" % (filename, len(data)))
# TODO Clean and reformat the data
return parsed_data
#"""https://data.riksdagen.se/dataset/person/person.json.zip"""
if __name__ == '__main__':
#print("""åäö""")
data = load_data_file(15)