-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudit1.py
100 lines (82 loc) · 3.02 KB
/
audit1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 05 10:16:17 2017
@author: Victor
"""
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
import pprint
OSMFILE_sample = "freetown.osm"
regex = re.compile(r'\b\S+\.?', re.IGNORECASE)
expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road",
"Trail", "Parkway", "Commons", "Cove", "Alley", "Park", "Way", "Walk" "Circle", "Highway",
"Plaza", "Path", "Center", "Mission"] #expected names in the dataset
mapping = {"Ave": "Avenue",
"Ave.": "Avenue",
"avenue": "Avenue",
"ave": "Avenue",
"Blvd": "Boulevard",
"Blvd.": "Boulevard",
"Blvd,": "Boulevard",
"Boulavard": "Boulevard",
"Boulvard": "Boulevard",
"Ct": "Court",
"Dr": "Drive",
"Dr.": "Drive",
"E": "East",
"Hwy": "Highway",
"Ln": "Lane",
"Ln.": "Lane",
"Pl": "Place",
"Plz": "Plaza",
"Rd": "Road",
"Rd.": "Road",
"St": "Street",
"St.": "Street",
"st": "Street",
"street": "Street",
"square": "Square",
"parkway": "Parkway"
}
# Search string for the regex. If it is matched and not in the expected list then add this as a key to the set.
def audit_street(street_types, street_name):
m = regex.search(street_name)
if m:
street_type = m.group()
if street_type not in expected:
street_types[street_type].add(street_name)
def is_street_name(elem): # Check if it is a street name
return (elem.attrib['k'] == "addr:street")
def audit(osmfile): # return the list that satify the above two functions
osm_file = open(osmfile, "r")
street_types = defaultdict(set)
for event, elem in ET.iterparse(osm_file, events=("start",)):
if elem.tag == "node" or elem.tag == "way":
for tag in elem.iter("tag"):
if is_street_name(tag):
audit_street(street_types, tag.attrib['v'])
return street_types
pprint.pprint(dict(audit(OSMFILE_sample))) # print the existing names
def string_case(s): # change string into titleCase except for UpperCase
if s.isupper():
return s
else:
return s.title()
# return the updated names
def update_name(name, mapping):
name = name.split(' ')
for i in range(len(name)):
if name[i] in mapping:
name[i] = mapping[name[i]]
name[i] = string_case(name[i])
else:
name[i] = string_case(name[i])
name = ' '.join(name)
return name
update_street = audit(OSMFILE_sample)
# print the updated names
for street_type, ways in update_street.iteritems():
for name in ways:
better_name = update_name(name, mapping)
print name, "=>", better_name