-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMechaCarChallenge.R
252 lines (191 loc) · 8.91 KB
/
MechaCarChallenge.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#Load dplyr package via library() function
>library(dplyr)
#Read in the MechaCar Data
> mechaCar_table <- read.csv(file='MechaCar_mpg.csv',check.names=F,stringsAsFactors = F)
#Examine data
> head(mechaCar_table)
vehicle_length vehicle_weight spoiler_angle ground_clearance AWD mpg
1 14.69710 6407.946 48.78998 14.64098 1 49.04918
2 12.53421 5182.081 90.00000 14.36668 1 36.76606
3 20.00000 8337.981 78.63232 12.25371 0 80.00000
4 13.42849 9419.671 55.93903 12.98936 1 18.94149
5 15.44998 3772.667 26.12816 15.10396 1 63.82457
6 14.45357 7286.595 30.58568 13.10695 0 48.54268
#Linear Regression on MechaCar Data (mpg ~ five other columns)
>lm(mpg ~ vehicle_length + vehicle_weight + spoiler_angle + ground_clearance + AWD,data=mechaCar_table)
Call:
lm(formula = mpg ~ vehicle_length + vehicle_weight + spoiler_angle +
ground_clearance + AWD, data = mechaCar_table)
Coefficients:
(Intercept) vehicle_length vehicle_weight spoiler_angle ground_clearance
-1.040e+02 6.267e+00 1.245e-03 6.877e-02 3.546e+00
AWD
-3.411e+00
#Summary function to find p-value and multiple r-squared
> summary(lm(mpg ~ vehicle_length + vehicle_weight + spoiler_angle + ground_clearance + AWD,data=mechaCar_table))
Call:
lm(formula = mpg ~ vehicle_length + vehicle_weight + spoiler_angle +
ground_clearance + AWD, data = mechaCar_table)
Residuals:
Min 1Q Median 3Q Max
-19.4701 -4.4994 -0.0692 5.4433 18.5849
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.040e+02 1.585e+01 -6.559 5.08e-08 ***
vehicle_length 6.267e+00 6.553e-01 9.563 2.60e-12 ***
vehicle_weight 1.245e-03 6.890e-04 1.807 0.0776 .
spoiler_angle 6.877e-02 6.653e-02 1.034 0.3069
ground_clearance 3.546e+00 5.412e-01 6.551 5.21e-08 ***
AWD -3.411e+00 2.535e+00 -1.346 0.1852
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8.774 on 44 degrees of freedom
Multiple R-squared: 0.7149, Adjusted R-squared: 0.6825
F-statistic: 22.07 on 5 and 44 DF, p-value: 5.35e-11
#Scatterplots:
> plt <- ggplot(mechaCar_table,aes(x=mpg,y=vehicle_length)) #import dataset into ggplot2 mpg by vehicle_length
> plt + geom_point(size=2) + labs(x="Fuel Efficieny (MPG)",y="Vehicle Length") #add scatter plot
>plt <- ggplot(mechaCar_table,aes(x=mpg,y=vehicle_weight)) #import dataset into ggplot2 mpg by vehicle_weight
> plt + geom_point(size=2) + labs(x="Fuel Efficieny (MPG)",y="Vehicle Weight") #add scatter plot
>plt <- ggplot(mechaCar_table,aes(x=mpg,y=spoiler_angle)) #import dataset into ggplot2 mgp by spoiler_angle
> plt + geom_point(size=2) + labs(x="Fuel Efficieny (MPG)",y="Spoiler Angle") #add scatter plot
> plt <- ggplot(mechaCar_table,aes(x=mpg,y=ground_clearance)) #import dataset into ggplot2 mpg by ground_clearance
> plt + geom_point(size=2) + labs(x="Fuel Efficieny (MPG)",y="Ground Clearance") #add scatter plot
> plt <- ggplot(mechaCar_table,aes(x=mpg,y=AWD)) #import dataset into ggplot2 mpg by AWD
> plt + geom_point(size=2) + labs(x="Fuel Efficieny (MPG)",y="All Wheel Drive") #add scatter plot
#For comparision summarize linear model mpg/vehicle_weight
> summary(lm(mpg~vehicle_weight,mechaCar_table))
Call:
lm(formula = mpg ~ vehicle_weight, data = mechaCar_table)
Residuals:
Min 1Q Median 3Q Max
-35.816 -11.252 -2.121 8.573 33.201
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.042e+01 7.784e+00 5.193 4.17e-06 ***
vehicle_weight 7.649e-04 1.213e-03 0.631 0.531
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.67 on 48 degrees of freedom
Multiple R-squared: 0.008223, Adjusted R-squared: -0.01244
F-statistic: 0.398 on 1 and 48 DF, p-value: 0.5311
#For comparision summarize linear model mpg/vehicle_length
>summary(lm(formula = mpg ~ vehicle_length, data = mechaCar_table))
Call:
lm(formula = mpg ~ vehicle_length, data = mechaCar_table)
Residuals:
Min 1Q Median 3Q Max
-26.303 -7.160 -1.231 9.374 26.670
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -25.0622 13.2960 -1.885 0.0655 .
vehicle_length 4.6733 0.8774 5.326 2.63e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 12.47 on 48 degrees of freedom
Multiple R-squared: 0.3715, Adjusted R-squared: 0.3584
F-statistic: 28.37 on 1 and 48 DF, p-value: 2.632e-06
#For comparision summarize linear model mpg/ground_clearance
> summary(lm(formula = mpg ~ ground_clearance, data = mechaCar_table))
Call:
lm(formula = mpg ~ ground_clearance, data = mechaCar_table)
Residuals:
Min 1Q Median 3Q Max
-28.788 -9.990 -1.615 7.332 35.803
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 19.4175 10.8662 1.787 0.0803 .
ground_clearance 2.0222 0.8385 2.412 0.0198 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 14.86 on 48 degrees of freedom
Multiple R-squared: 0.1081, Adjusted R-squared: 0.08949
F-statistic: 5.816 on 1 and 48 DF, p-value: 0.01975
#For comparision summarize linear model mpg/spoiler_angle
>summary(lm(formula = mpg ~ spoiler_angle, data = mechaCar_table))
Call:
lm(formula = mpg ~ spoiler_angle, data = mechaCar_table)
Residuals:
Min 1Q Median 3Q Max
-35.024 -11.772 -1.469 9.451 35.228
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 46.07616 6.92823 6.650 2.53e-08 ***
spoiler_angle -0.01659 0.11488 -0.144 0.886
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.73 on 48 degrees of freedom
Multiple R-squared: 0.0004343, Adjusted R-squared: -0.02039
F-statistic: 0.02086 on 1 and 48 DF, p-value: 0.8858
#Summarize linear model mpg/AWD
> summary(lm(formula = mpg ~ AWD, data = mechaCar_table))
Call:
lm(formula = mpg ~ AWD, data = mechaCar_table)
Residuals:
Min 1Q Median 3Q Max
-32.945 -12.627 0.363 8.503 32.687
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 47.313 3.115 15.189 <2e-16 ***
AWD -4.368 4.405 -0.992 0.326
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.57 on 48 degrees of freedom
Multiple R-squared: 0.02007, Adjusted R-squared: -0.0003449
F-statistic: 0.9831 on 1 and 48 DF, p-value: 0.3264
#Deliverable 2:
# Read in Suspension_Coil data as coil_table.
>coil_table <- read.csv(file='Suspension_Coil.csv',check.names=F,stringsAsFactors = F)
#Summarize the suspension coil’s PSI continuous variable across all manufacturing lots as total_summary.
>total_summary <- coil_table %>% summarize(Mean = mean(PSI), Median = median(PSI), Variance = var(PSI), SD = sd(PSI), .groups = 'keep')
#Summarize the following PSI metrics for each lot: mean, median, variance, and standard deviation.
>lot_summary <- coil_table %>% group_by(Manufacturing_Lot) %>% summarize(Mean = mean(PSI),Median = median(PSI), Variance = var(PSI), SD = sd(PSI) , .groups = 'keep')
#Deliverable 3:
# t-test for all the lots, pop mu = 1500 psi
> all_psi = coil_table['PSI']
> t.test(all_psi[['PSI']], mu=1500)
One Sample t-test
data: all_psi[["PSI"]]
t = -1.8931, df = 149, p-value = 0.06028
alternative hypothesis: true mean is not equal to 1500
95 percent confidence interval:
1497.507 1500.053
sample estimates:
mean of x
1498.78
# t-test for lot 1, pop mu = 1500 psi
> lot1_psi = subset(coil_table, Manufacturing_Lot == "Lot1")
> t.test(lot1_psi[['PSI']], mu=1500)
One Sample t-test
data: lot1_psi[["PSI"]]
t = 0, df = 49, p-value = 1
alternative hypothesis: true mean is not equal to 1500
95 percent confidence interval:
1499.719 1500.281
sample estimates:
mean of x
1500
# t-test for lot 2, pop mu = 1500 psi
> lot2_psi = subset(coil_table, Manufacturing_Lot == "Lot2")
> t.test(lot2_psi[['PSI']], mu=1500)
One Sample t-test
data: lot2_psi[["PSI"]]
t = 0.51745, df = 49, p-value = 0.6072
alternative hypothesis: true mean is not equal to 1500
95 percent confidence interval:
1499.423 1500.977
sample estimates:
mean of x
1500.2
# t-test for lot 3, pop mu = 1500 psi
>lot3_psi = subset(coil_table, Manufacturing_Lot == "Lot3")
> t.test(lot3_psi[['PSI']], mu=1500)
One Sample t-test
data: lot3_psi[["PSI"]]
t = -2.0916, df = 49, p-value = 0.04168
alternative hypothesis: true mean is not equal to 1500
95 percent confidence interval:
1492.431 1499.849
sample estimates:
mean of x
1496.14