-
Notifications
You must be signed in to change notification settings - Fork 0
/
clarityparser.py
130 lines (114 loc) · 6.11 KB
/
clarityparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from slugify import slugify # External dependency. See requirements.txt
import configuration # Local configuration file configuration.py
import io
import os
import zipfile
import csv
import json
from decimal import *
from collections import OrderedDict
import datetime
def bring_clarity(rawtime, countyname):
# And if you're server's not in Florida's time zone, these timestamps are going to be wrong. You need to do some surgery.
# Except Florida has more than one time zone.
snapshotsdir = configuration.snapshotsdir
targetdir = configuration.targetdir
filename = configuration.filename
electiondate = configuration.electiondate
timestamp = datetime.datetime.strftime(rawtime, "%Y%m%d-%H%M%S")
lastupdated = datetime.datetime.strftime(rawtime, "%Y-%m-%dT%H:%M:%S")
filepath = snapshotsdir + slugify(countyname) + "/" + timestamp + "/"
targetfilename = targetdir + "70-" + slugify(countyname) + ".csv"
os.makedirs(targetdir, exist_ok=True)
getcontext().prec = 10 # Precision
lineheaders = ["id", "raceid", "racetype", "racetypeid", "ballotorder", "candidateid", "description",
"delegatecount", "electiondate", "electtotal", "electwon", "fipscode", "first", "incumbent",
"initialization_data", "is_ballot_measure", "last", "lastupdated", "level", "national",
"officeid", "officename", "party", "polid", "polnum", "precinctsreporting", "precinctsreportingpct",
"precinctstotal", "reportingunitid", "reportingunitname", "runoff", "seatname",
"seatnum", "statename", "statepostal", "test", "uncontested", "votecount", "votepct", "winner"
]
zipfilename = filepath + filename
reader = csv.DictReader(io.TextIOWrapper(zipfile.ZipFile(zipfilename).open('summary.csv')))
reader = list(reader) # Otherwise can only traverse once through
masterlist = []
racevotes = {}
crosswalk = {
"line number": "ballotorder",
"party name": "party",
"total votes": "votecount",
"percent of votes": "votepct",
"ballots cast": "electtotal"
}
for row in reader:
line = OrderedDict()
for item in lineheaders:
line[item] = ""
for source in crosswalk:
line[crosswalk[source]] = row[source]
if "num County total" in row:
line['precinctstotal'] = row['num County total']
line['precinctsreporting'] = row['num County rptg']
elif 'num Area total' in row:
line['precinctstotal'] = row['num Area total']
line['precinctsreporting'] = row['num Area rptg']
elif 'num Precinct total' in row:
line['precinctstotal'] = row['num Precinct total']
line['precinctsreporting'] = row['num Precinct rptg']
else:
print(f"Problem with {countyname} headers. Cannot parse! Don't know what format this is.")
# Specific cleanups:
peep = row['choice name'].replace('\'\'', '\'').strip() # Replace double single quotes
if " " not in peep: # Handle single-word candidates like "YES"
line['first'] = peep
line['last'] = ""
else:
line['first'] = peep[:peep.rfind(" ")].strip() # First name is everything until the last space
line['last'] = peep[peep.rfind(" "):].strip() # Last name is everything after the last space
precinctstotal = line["precinctstotal"]
if precinctstotal == "0" or precinctstotal == "":
line['precinctsreportingpct'] = 0
else:
try:
line['precinctsreportingpct'] = Decimal(line['precinctsreporting'])/Decimal(precinctstotal)
except:
line['precinctsreportingpct'] = 0
# For Elex-CSV, the "pct" is kept at as a decimal, not a percentage. That is, the number ranges from 0 to 1.
contestname = row["contest name"]
line['officename'] = contestname.split(",")[0].strip()
line['seatname'] = ", ".join(contestname.split(",")[1:]).strip().replace(" ", " ")
# Do we try to handle partisan races here separately, or let whatever the ID is carry through to the seat?
# Palm Beach, naturally, messes with us.
if line['officename'][0:6] in ["REP - ", "DEM - "]:
print(f"Slicing around {line['officename']}")
if line['party'] == "":
line['party'] = line['officename'][:3]
line['seatname'] += " - " + line['officename'][:3]
line['officename'] = line['officename'][6:]
print(line['officename'] + " ... " + line['seatname'])
line["raceid"] = slugify(countyname + " " + contestname) # Keep district number, etc.
line["candidateid"] = slugify("-".join([line['raceid'], line["first"], line["last"]]))
if line['party'] in ['DEM', 'REP']:
line['racetypeid'] = line['party'][0] # D or R to indicate primary.
if line["raceid"] not in racevotes:
racevotes[line["raceid"]] = 0
if line["votepct"] != "0":
line["votepct"] = Decimal(line["votepct"])/100 # Number isn't a percentage; ranges from 0 to 1.
if line["raceid"] not in racevotes:
racevotes[line["raceid"]] = 0
racevotes[line["raceid"]] += int(line["votecount"])
line['reportingunitid'] = countyname
line['id'] = slugify(line['raceid'] + " " + line['reportingunitid'])
line['electiondate'] = electiondate
line['lastupdated'] = lastupdated
line['level'] = "subunit"
line['reportingunitname'] = countyname
masterlist.append(line)
for i, line in enumerate(masterlist):
masterlist[i]["electtotal"] = racevotes[line["raceid"]]
with open(targetfilename, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(lineheaders)
for row in masterlist:
writer.writerow(list(row.values()))
print(f"Done parsing out {countyname} to {targetfilename}")