-
Notifications
You must be signed in to change notification settings - Fork 5
/
FilterData_readcsv.R
168 lines (144 loc) · 6.85 KB
/
FilterData_readcsv.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
ptm <- proc.time()
require(stringr)
require(utils)
require(data.table)
require(dplyr)
#require(readr)
<<<<<<< HEAD
for (yr in 2014:2014)
=======
sapply (2001:2014,
function (x) {
AccidentZipFile = paste0("NewJersey",x,"Accidents.zip")
if (!file.exists(AccidentZipFile)) {
download.file (url = paste0("http://www.state.nj.us/transportation/refdata/accident/", x, "/NewJersey",x, "Accidents.zip"),
destfile = AccidentZipFile)
}
if (!file.exists(sub(".zip",".txt",AccidentZipFile))) {
unzip(AccidentZipFile)
}
DriverZipFile = paste0("NewJersey",x,"Drivers.zip")
if (!file.exists(paste0(DriverZipFile))) {
download.file (url = paste0("http://www.state.nj.us/transportation/refdata/accident/", x, "/NewJersey",x, "Drivers.zip"),
destfile = DriverZipFile)
}
if (!file.exists(sub(".zip",".txt",DriverZipFile))) {
unzip(DriverZipFile)
}
}
)
for (yr in 2001:2014)
>>>>>>> origin/master
{
print(paste("Begin processing files for year", yr))
AccYrFile = paste0("*",yr,"Accidents.txt")
filename <-list.files(pattern=AccYrFile )
accident <- read.csv(filename,header=FALSE,sep=",", quote="")
colnames(accident) <- c("Case.Number","County.Name","Municipality.Name",
"Crash.Date","Crash.Day.Of.Week","Crash.Time",
"Police.Dept.Code", "Police.Department",
"Police.Station", "Total.Killed", "Total.Injured",
"Pedestrians.Killed", "Pedestrians.Injured",
"Severity", "Intersection", "Alcohol.Involved",
"HazMat.Involved", "Crash.Tyep.Code",
"Total.Vehicles.Involve", "Crash.Location",
"Location.Direction", "Route", "Route.Suffix",
"SRI", "MilePost", "Road.System", "Road.Character",
"Road.Surface.Type", "Surface.Condition",
"Light.Condition", "Environmental.Condition",
"Road.Divided.By", "Temporary.Traffic.Control.Zone",
"Distance.To.Cross.Street", "Unit.Of.Measurement",
"Directn.From.Cross.Street", "Cross.Street.Name",
"Is.Ramp", "Ramp.To.From.Route.Name",
"Ramp.To.From.Route.Direction", "Posted.Speed",
"Posted.Speed.Cross.Street", "Latitude", "Longitude",
"Cell.Phone.In.Use.Flag", "Other.Property.Damage",
"Reporting.Badge.No")
accident$Crash.Time <- sprintf("%04d",accident$Crash.Time)
accident$Crash.Time <- format(strptime(accident$Crash.Time, format="%H%M"), format = "%H:%M")
print(paste0("*",yr," Accidents file extracted and cleaned"))
AccYrFile = paste0("*",yr,"Drivers.txt")
filename <-list.files(pattern=AccYrFile )
drivers <- read.csv(filename,header=FALSE,sep=",",quote="")
colnames(drivers) <-c("Case.Number",
"Vehicle.Number",
"Driver.City",
"Driver.State",
"Driver.Zip.Code",
"Driver.License.State",
"Driver.DOB",
"Driver.Sex",
"Alcohol.Test.Given",
"Alcohol.Test.Type",
"Alcohol.Test.Results",
"Charge",
"Summons",
"Multi.Charge.Flag",
"Driver.Physical.Status")
# drivers$year <- substr(drivers$Case.Number,1,4) %>% as.numeric
# drivers$age = difftime(as.Date(paste0(drivers$year,"-06-30")),drivers$Driver.DOB %>% as.Date(format = "%m/%d/%Y"),units = "weeks")/52.14
drivers$alcohol <- !is.na(drivers$Charge) & grepl("39:4-50|39-4-50|39:4:50|39:4 50|DWI|DUI", drivers$Charge) # Alcohol charge codes? How to determine?
print(paste0("*",yr," Drivers file extracted and cleaned"))
AllData <- merge(drivers,accident,by="Case.Number")
print(paste("Merged accident and driver file for year",yr))
selectCols <- c("Case.Number","County.Name","Municipality.Name",
"Crash.Date","Crash.Day.Of.Week","Crash.Time",
"Police.Dept.Code", "Police.Department",
"Police.Station", "Total.Killed", "Total.Injured",
"Pedestrians.Killed", "Pedestrians.Injured",
"Severity", "Intersection", "Alcohol.Involved",
"HazMat.Involved", "Crash.Tyep.Code",
"Total.Vehicles.Involve", "Crash.Location",
# "Location.Direction",
"Route",
# "Route.Suffix",
# "SRI",
"MilePost", "Road.System", "Road.Character",
"Road.Surface.Type", "Surface.Condition",
"Light.Condition", "Environmental.Condition",
"Road.Divided.By", "Temporary.Traffic.Control.Zone",
"Distance.To.Cross.Street", "Unit.Of.Measurement",
"Directn.From.Cross.Street", "Cross.Street.Name",
# "Is.Ramp",
# "Ramp.To.From.Route.Name",
# "Ramp.To.From.Route.Direction", "Posted.Speed",
"Posted.Speed.Cross.Street", "Latitude", "Longitude",
"Cell.Phone.In.Use.Flag", "Other.Property.Damage",
"Reporting.Badge.No",
"Vehicle.Number",
"Driver.City",
"Driver.State",
"Driver.Zip.Code",
"Driver.License.State",
"Driver.DOB",
"Driver.Sex",
"Alcohol.Test.Given",
"Alcohol.Test.Type",
"Alcohol.Test.Results",
"Charge",
# "Summons",
# "Multi.Charge.Flag",
"Driver.Physical.Status")
# selectCols <- c("Case.Number","Driver.Zip.Code","age","Driver.Sex","alcohol",
# "County.Name","Municipality.Name",
# "Crash.Date","Crash.Day.Of.Week","Crash.Time",
# "Road.Surface.Type", "Surface.Condition",
# "Light.Condition", "Environmental.Condition",
# "Road.Divided.By", "Temporary.Traffic.Control.Zone",
# "Distance.To.Cross.Street", "Unit.Of.Measurement",
# "Posted.Speed.Cross.Street", "Latitude", "Longitude",
# "Cell.Phone.In.Use.Flag")
col.num <- which(colnames(AllData) %in% selectCols)
FilteredData <- select(AllData,col.num)
save (FilteredData, file ="C:\\Users\\Fireseraph\\Desktop\\Work Bench\\Github\\uncrash\\uncrash\\FilteredData.Rdata")
outFile = paste0("FilteredData",yr,".csv")
write.csv(FilteredData, file = outFile)
print(paste("Generated",outFile, "file"))
outFile = paste0("FilteredData",yr,".Rda")
save (FilteredData, file = outFile)
print(paste("Generated",outFile, "file"))
outFile = paste0("FilteredData",yr,".Rds")
save (FilteredData, file = outFile)
print(paste("Generated",outFile, "file"))
}
proc.time()-ptm