-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStreetMap_Milledgeville_SQL.sql
201 lines (160 loc) · 4.58 KB
/
StreetMap_Milledgeville_SQL.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
## Creating a database to store tables
CREATE SCHEMA Milledgeville_dbs;
USE Milledgeville_dbs;
## Creating empty tables
CREATE TABLE nodes (
changeset INT NOT NULL,
nod_id DOUBLE NOT NULL,
user_nm VARCHAR(50) NOT NULL,
uid INT NOT NULL,
lon DECIMAL(30, 26) NOT NULL,
lat DECIMAL(30, 26) NOT NULL,
PRIMARY KEY (nod_id)
);
CREATE TABLE node_tags (
k VARCHAR(50) NOT NULL,
v VARCHAR(200) NOT NULL,
nod_id INT4 NOT NULL
);
#primary key (nod_id)
CREATE TABLE ways (
changeset INT NOT NULL,
user_nm VARCHAR(50) NOT NULL,
uid INT NOT NULL,
way_id INT4 NOT NULL
);
CREATE TABLE way_tags (
k VARCHAR(50) NOT NULL,
v VARCHAR(200) NOT NULL,
way_id INT4 NOT NULL
);
CREATE TABLE way_nodes (
ref INT4 NOT NULL,
way_id INT4 NOT NULL
);
## Load the data to the table from .csv files
LOAD DATA LOCAL INFILE 'F:/One_drive_Gmail/OneDrive/Dropbox_transfers/Udacity/Nano_Degree_Data_analysis/Project_4_Data_wrangling/Final_project/nodes.csv'
INTO TABLE nodes
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
LOAD DATA LOCAL INFILE 'F:/One_drive_Gmail/OneDrive/Dropbox_transfers/Udacity/Nano_Degree_Data_analysis/Project_4_Data_wrangling/Final_project/node_tags.csv'
INTO TABLE node_tags
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
LOAD DATA LOCAL INFILE 'F:/One_drive_Gmail/OneDrive/Dropbox_transfers/Udacity/Nano_Degree_Data_analysis/Project_4_Data_wrangling/Final_project/ways.csv'
INTO TABLE ways
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
LOAD DATA LOCAL INFILE 'F:/One_drive_Gmail/OneDrive/Dropbox_transfers/Udacity/Nano_Degree_Data_analysis/Project_4_Data_wrangling/Final_project/way_tags.csv'
INTO TABLE way_tags
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
LOAD DATA LOCAL INFILE 'F:/One_drive_Gmail/OneDrive/Dropbox_transfers/Udacity/Nano_Degree_Data_analysis/Project_4_Data_wrangling/Final_project/way_nodes.csv'
INTO TABLE way_nodes
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
## dropping tables if nessary
#DROP TABLE node_tags;
#DROP TABLE milledgeville_dbs.nodes;
#DROP TABLE ways;
#DROP TABLE way_nodes;
#DROP TABLE way_tags;
SELECT v, k
FROM node_tags
WHERE k = 'addr:street';
## Working with zipcodes
SELECT v, COUNT(v) AS zipcodes
FROM node_tags
WHERE k LIKE '%post%'
GROUP BY v
ORDER BY zipcodes DESC;
SELECT v, COUNT(v) AS zipcodes
FROM way_tags
WHERE k LIKE '%post%'
GROUP BY v
ORDER BY zipcodes DESC;
## Unique users
## Top 10 zip codes
SELECT (v) as Zipcode, COUNT(v) AS counts
FROM (SELECT v
FROM node_tags
WHERE k LIKE '%post%'
UNION ALL
SELECT v
FROM way_tags
WHERE k LIKE '%post%') zipcds
GROUP BY Zipcode
ORDER BY counts DESC
LIMIT 10;
## Inappropriate zip codess
SELECT (v) as Zipcode, COUNT(v) AS counts
FROM (SELECT v
FROM node_tags
WHERE k LIKE '%post%' and v not like '%3%'
UNION ALL
SELECT v
FROM way_tags
WHERE k LIKE '%post%' and v not like '%3%') zipcds
GROUP BY Zipcode;
## User count for nodes
SELECT user_nm, COUNT(user_nm) AS user_count
FROM nodes
GROUP BY user_nm
ORDER BY user_count DESC
LIMIT 15;
## User count for ways
SELECT user_nm, COUNT(user_nm) AS user_count
FROM ways
GROUP BY user_nm
ORDER BY user_count DESC
LIMIT 15;
## Number of nodes
SELECT COUNT(user_nm) AS way_count
FROM nodes;
## Number of way nodes
SELECT COUNT(user_nm) AS way_count
FROM ways;
## Select elementary schools in the area
SELECT v as Elemtry_School
FROM node_tags
WHERE k = 'name' AND v LIKE '%Elementary School%'
ORDER BY Elemtry_School;
## User count for both ways and nodes
SELECT user_nm, SUM(usr_cunt) AS user_count
FROM (SELECT user_nm, COUNT(user_nm) AS usr_cunt
FROM ways
GROUP BY user_nm
UNION ALL
SELECT user_nm, COUNT(user_nm) AS usr_cunt
FROM nodes
GROUP BY user_nm) ways_nodes
GROUP BY user_nm
ORDER BY user_count DESC
LIMIT 20;
## User count for both ways and nodes and save into a table
CREATE TABLE user_summary AS
SELECT user_nm, SUM(usr_cunt) AS user_count
FROM (SELECT user_nm,COUNT(user_nm) AS usr_cunt
FROM ways
GROUP BY user_nm
UNION ALL
SELECT user_nm, COUNT(user_nm) AS usr_cunt
FROM nodes
GROUP BY user_nm) ways_nodes
GROUP BY user_nm
ORDER BY user_count DESC;
## User count and percentage for both ways and nodes
SELECT user_nm, user_count, (user_count/tot)*100 AS percentage
FROM user_summary
CROSS JOIN (SELECT SUM(user_count) AS tot
FROM user_summary) total
LIMIT 20;
## Unique user count
SELECT COUNT(username) as usercount
FROM (SELECT DISTINCT(user_nm) as username
FROM user_summary) usecont;