-
Notifications
You must be signed in to change notification settings - Fork 1
/
.Rhistory
512 lines (512 loc) · 32.7 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
# download, read and list the resulting files from the zipped source document
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip"
zipFile <- "activity.zip"
if (!file.exists(zipFile)) {
message(paste("Downloding", zipFile))
download.file(fileUrl, destfile="activity.zip", method = "curl")
unzip(zipFile)
}else{
message(paste("File exists;", zipFile))
}
# Unzip & read the downloaded files
# unzip("activity.zip")
activity <<- read.csv("activity.csv")
#head(activity)
# list files in current dir
ls()
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$interval)),8)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$interval)),8)
# return results of mean and median calculations
paste0("The daily mean number of steps per day omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The daily median number of steps per day omiting 'NAs'is: ", mdtspd, sep = " ")
# set cran mirror
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)
})
# set document options
knitr::opts_chunk$set(echo = TRUE)
# download, read and list the resulting files from the zipped source document
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Factivity.zip"
zipFile <- "activity.zip"
if (!file.exists(zipFile)) {
message(paste("Downloding", zipFile))
download.file(fileUrl, destfile="activity.zip", method = "curl")
unzip(zipFile)
}else{
message(paste("File exists;", zipFile))
}
# Unzip & read the downloaded files
# unzip("activity.zip")
activity <<- read.csv("activity.csv")
#head(activity)
# list files in current dir
ls()
library(lubridate)
# Get date and time stamp on the data download
# dated <- today()
timeN <- now()
paste0("Date downloaded: ", timeN, sep = " ")
# convert raw activity data class from char to date class
activity$date <- as.Date(activity$date)
# change weekday labels to lowercase letters
weekdays <- tolower(weekdays(as.Date(activity$date, abbreviate = TRUE)))
# use the aggregate function from dplyr to get the sum of the nunber of steps taken per day
totalstepsperday <- aggregate(steps ~ interval, activity, sum)
# calculate and round off the value of the mean
# mntspd <- round(mean(totalstepsperday$interval),6)
# mntspd
# mdtspd <- round(median(totalstepsperday$interval),6)
# mdtspd
# set the plot paramters
par (mfrow = c(1,1), mar = c(4, 4, 2, 1))
# create a plot using the histogram base plot method
with(totalstepsperday, hist(steps, main = "Total Steps Per Day (Oct & Nov 2012)", xlab = "Total - Steps per Day", ylab = "Frequency - Days", col = "brown"))
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$interval)),8)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$interval)),8)
# return results of mean and median calculations
paste0("The daily mean number of steps per day omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The daily median number of steps per day omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$interval)),8)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$interval)),8)
# return results of mean and median calculations
paste0("The mean daily number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$steps)),8)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$steps)),8)
# return results of mean and median calculations
paste0("The mean daily number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$steps)),2)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$steps)),2)
# return results of mean and median calculations
paste0("The mean daily total number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily total number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(activity$steps)),2)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(activity$steps)),2)
# return results of mean and median calculations
paste0("The mean daily total number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily total number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$steps)),2)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$steps)),2)
# return results of mean and median calculations
paste0("The mean daily total number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily total number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
library(Hmisc)
library(dplyr)
# renaming the data set to preserve the original data set
imputedDatamean <- activity
# install pagckages and libraries needed for for imputation of mising values
imputedDatamean$steps <- with(imputedDatamean, impute(steps, mean))
# renaming the data set to preserve the original data set
imputedDatamedian <- activity
# impute median step values for missing or NA values
imputedDatamedian$steps <- with(imputedDatamedian, impute(steps, median))
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
totalstepsperdayImp2 <- aggregate(steps ~ interval, imputedDatamedian, sum)
# calculate the mean and median of the imputed total number of daily steps
# mtspdImp <- round(mean(totalstepsperdayImp1$interval),2)
# mdspdImp <- round(median(totalstepsperdayImp2$interval),2)
# return the message stating the value of the mean
# paste0("The mean total daily steps when imputing 'NAs' with the mean of total daily steps is; ", mtspdImp, sep = " ")
# set plot paramters
par (mfrow = c(1,2), mar = c(4, 4, 2, 1))
# create histogram
with(totalstepsperdayImp1, hist(steps, main = "Steps Per Day (Oct & Nov 2012)", xlab = "Sum of Steps per Day using the Imputed Mean for NA values", ylab = "Frequency", col = "brown"))
with(totalstepsperdayImp2, hist(steps, main = "Steps Per Day (Oct & Nov 2012)", xlab = "Sum of Steps per Day using the Imputed Median for NA values", ylab = "Frequency", col = "brown"))
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
totalstepsperdayImp2 <- aggregate(steps ~ interval, imputedDatamedian, sum)
# calculate the mean and median of the imputed total number of daily steps
# mtspdImp <- round(mean(totalstepsperdayImp1$interval),2)
# mdspdImp <- round(median(totalstepsperdayImp2$interval),2)
# return the message stating the value of the mean
# paste0("The mean total daily steps when imputing 'NAs' with the mean of total daily steps is; ", mtspdImp, sep = " ")
# set plot paramters
par (mfrow = c(1,2), mar = c(4, 4, 2, 1))
# create histogram
with(totalstepsperdayImp1, hist(steps, main = "Daily Steps (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Mean for NA values", ylab = "Frequency", col = "brown"))
with(totalstepsperdayImp2, hist(steps, main = "Daily Steps (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Median for NA values", ylab = "Frequency", col = "brown"))
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),8)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),8)
# calculate the median also
paste0("The daily mean steps when omiting NAs is: ", mutspd, ", compared to the daily mean steps when imputing NAs: ", mntspd3, sep = " ")
paste0("The daily median steps when omitinf NAs is: ", mdtspd, ", compared to the daily median steps when imputing NAs: ", mdtspd4, sep = " ")
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
# calculate the median also
paste0("The daily mean steps when omiting NAs is: ", mutspd, ", compared to the daily mean steps when imputing NAs: ", mntspd3, sep = " ")
paste0("The daily median steps when omiting NAs is: ", mdtspd, ", compared to the daily median steps when imputing NAs: ", mdtspd4, sep = " ")
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
# calculate the median also
paste0("The daily mean steps when omiting NAs is: ", mutspd, ", compared to the daily mean steps when imputing NAs using the mean of the step values: ", mntspd3, sep = " ")
paste0("The daily median steps when omiting NAs is: ", mdtspd, ", compared to the daily median steps when imputing NAs using the medina of the step values: ", mdtspd4, sep = " ")
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
# calculate the median also
paste0("The daily mean value for the steps variable when omiting NAs is: ", mutspd, ", compared to the daily mean steps when imputing NAs using the mean of the step values: ", mntspd3, sep = " ")
paste0("The daily median value for the steps variable when omiting NAs is: ", mdtspd, ", compared to the daily median steps when imputing NAs using the medina of the step values: ", mdtspd4, sep = " ")
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
# calculate the median also
paste0("The total number of steps taken per day when omiting NAs is: ", mutspd, ", compared total number of steps per day when imputing the missing or NA values with the mean of the step value is: ", mntspd3, sep = " ")
paste0("The daily median value for the steps variable when omiting NAs is: ", mdtspd, ", compared to the daily median steps when imputing NAs using the medina of the step values: ", mdtspd4, sep = " ")
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
# calculate the median also
paste0("The mean total number of steps taken per day when omiting NAs is: ", mutspd, ", compared to the mean total number of steps taken per day when imputing NAs with the mean of the step value is: ", mntspd3, sep = " ")
paste0("The median total number of steps taken per day when omiting NAs is: ", mdtspd, ", compared to the median total number of steps taken per day when imputing NAs with the median of the step values: ", mdtspd4, sep = " ")
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
# calculate the median also
paste0("The MEAN total number of steps taken per day when omiting NAs was calculated to be: ", mutspd, ", compared to the MEAN total number of steps taken per day when imputing NAs with the mean of the step variable which is: ", mntspd3, sep = " ")
paste0("The MEDIAN total number of steps taken per day when omiting NAs is: ", mdtspd, ", compared to the MEDIAN total number of steps taken per day when imputing NAs with the median of the step values: ", mdtspd4, sep = " ")
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
# calculate the median also
paste0("The MEAN total number of steps taken per day when omiting NAs was calculated to be: ", mutspd, ", compared to the MEAN total number of steps taken per day when imputing the missing step values or NAs with the mean of the step variable resulting in: ", mntspd3, sep = " ")
paste0("The MEDIAN total number of steps taken per day when omiting NAs is: ", mdtspd, ", compared to the MEDIAN total number of steps taken per day when imputing NAs with the median of the step values: ", mdtspd4, sep = " ")
totalstepsperdayImp1
mean(totalstepsperdayImp1$steps
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
mean(totalstepsperdayImp1$steps)
mean(totalstepsperdayImp2)
mean(totalstepsperdayImp2$steps)
mean(totalstepsperdayImp1$steps)
mntspd3
mdtspd4# calculate the median also
mdtspd4 <- round(mean(totalstepsperdayImp2$steps),2)
mdtspd4# calculate the median also
mntspd3
mdtspd4# calculate the median also
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
mdtspd4# calculate the median also
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
mean(totalstepsperdayImp1$steps)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
mntspd3
mdtspd4# calculate the median also
# use the aggregate function from dplyr to get the sum of the nunber of steps taken
# per day for both missing or NA values inputed with both the mean and then the median
# totalstepsperday3 <- aggregate(steps ~ interval, totalstepsperdayImp1, sum)
# totalstepsperday4 <- aggregate(steps ~ interval, totalstepsperdayImp2, sum)
# calculate and round off the value of the mean
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
mntspd3
mdtspd4# calculate the median also
# use the aggregate function from dplyr to get the sum of the nunber of steps taken per day
totalstepsperday <- aggregate(steps ~ interval, activity, sum)
# calculate and round off the value of the mean
# mntspd <- round(mean(totalstepsperday$interval),6)
# mntspd
# mdtspd <- round(median(totalstepsperday$interval),6)
# mdtspd
# set the plot paramters
par (mfrow = c(1,1), mar = c(4, 4, 2, 1))
# create a plot using the histogram base plot method
with(totalstepsperday, hist(steps, main = "Total Steps Per Day (Oct & Nov 2012)", xlab = "Total - Steps per Day", ylab = "Frequency - Days", col = "brown"))
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$steps)),2)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$steps)),4)
# return results of mean and median calculations
paste0("The mean daily total number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily total number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$steps)),2)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$steps)),6)
# return results of mean and median calculations
paste0("The mean daily total number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily total number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the mean no. of steps taken by day of week
mutspd <- round(mean(na.omit(totalstepsperday$steps)),2)
# calculate the median no. of steps taken by day of week
mdtspd <- round(median(na.omit(totalstepsperday$steps)),2)
# return results of mean and median calculations
paste0("The mean daily total number of steps omiting 'NAs'is; ", mutspd, sep = " ")
paste0("The median daily total number of steps omiting 'NAs'is: ", mdtspd, sep = " ")
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
totalstepsperdayImp2 <- aggregate(steps ~ interval, imputedDatamedian, sum)
# set plot paramters
par (mfrow = c(2,1), mar = c(4, 4, 2, 1))
# create histogram
with(totalstepsperdayImp1, hist(steps, main = "Daily Steps (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Mean for NA values", ylab = "Frequency", col = "brown"))
with(totalstepsperdayImp2, hist(steps, main = "Daily Steps (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Median for NA values", ylab = "Frequency", col = "brown"))
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
totalstepsperdayImp2 <- aggregate(steps ~ interval, imputedDatamedian, sum)
# set plot paramters
par (mfrow = c(2,1), mar = c(4, 4, 2, 1))
# create histogram
with(totalstepsperdayImp1, hist(steps, main = "Daily Steps (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Mean for NA values", ylab = "Frequency", col = "brown"))
with(totalstepsperdayImp2, hist(steps, main = "Daily Steps (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Median for NA values", ylab = "Frequency", col = "brown"))
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
totalstepsperdayImp2 <- aggregate(steps ~ interval, imputedDatamedian, sum)
# set plot paramters
par (mfrow = c(2,1), mar = c(4, 4, 2, 1))
# create histogram
with(totalstepsperdayImp1, hist(steps, main = "Histogram of Total Daily Step Frequency (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Mean for NA values", ylab = "Frequency", col = "brown"))
with(totalstepsperdayImp2, hist(steps, main = "Histogram of Total Daily Step Frequency (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Median for NA values", ylab = "Frequency", col = "brown"))
# use the aggregate funcion to calculate the mean number of steps by day of the week
mudailysteps <- aggregate(steps ~ interval, data = na.omit(activity), mean)
# create a line plot of the mean number of daily steps by daily interval
plot(steps ~ interval, data = mudailysteps, type = "l", main = "Average Daily Steps in 5-Minute Intervals (Oct - Nov 2012)", xlab = "Intervals / time of day ~ in 5-Minute Increments", ylab = " Average Daily Steps")
# Create comparative daily means steps interval plot by weekday & weekend
dailystepsmean <- aggregate(steps ~ interval + typeday, Impv2, mean)
# Create comparative daily means steps interval plot by weekday & weekend
dailystepsmean <- aggregate(steps ~ interval + typeday, Impv2, mean)
library(dplyr)
# rename the data set
imputedDatamean2 <- imputedDatamean
# factor typeday
imputedDatamean$typeday <- as.factor(ifelse(weekdays(imputedDatamean$date) %in% c("Saturday","Sunday"), "weekend", "weekday"))
# Create comparative daily means steps interval plot by weekday & weekend
dailystepsmean <- aggregate(steps ~ interval + typeday, imputedDatamean, mean)
library(lattice)
xyplot(steps ~ interval | factor(typeday), data = dailystepsmean, aspect = .8, type = "l")
# calculate and round off the value of the mean total daily steps
mntspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
mntspd3
mdtspd4
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:" `mntspd3-mntspd`)
diff1 <- mntspd3 - mutspd
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:" diff1)
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1)
diff2 <- mdtspd4 - mdtspd
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff2)
# calculate and round off the value of the mean total daily steps
mutspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
diff1 <- mutspd3 - mutspd
diff2 <- mdtspd4 - mdtspd
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1)
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff2)
# calculate and round off the value of the mean total daily steps
mutspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
diff1 <- mutspd3 - mutspd
diff2 <- mdtspd4 - mdtspd
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1)
paste("The difference between the median total daily steps ingoring the NA values and the median total daily steps using imputed mean values for the NAs is:", diff2)
# calculate and round off the value of the mean total daily steps
mutspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
diff1 <- mutspd3 - mutspd
diff2 <- mdtspd4 - mdtspd
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1)
paste("The difference between the median total daily steps ingoring the NA values and the median total daily steps using imputed median values for the NAs is:", diff2)
percentDiff <- diff1/mutspd
percentDiff
# calculate and round off the value of the mean total daily steps
mutspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
diff1 <- mutspd3 - mutspd
diff2 <- mdtspd4 - mdtspd
percentDiff <- diff1/mutspd
percentDiff
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1, "or", percentDiff,"%")
paste("The difference between the median total daily steps ingoring the NA values and the median total daily steps using imputed median values for the NAs is:", diff2)
percentDiff <- diff1/mutspd
percentDiff <- round(percentDiff.2)
percentDiff <- round(percentDiff,2)
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1, "or", percentDiff,"%")
percentDiff <- diff1/mutspd
percentDiff
percentDiff <- round(percentDiff,2)*100
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1, "or", percentDiff,"%")
paste("The difference between the median total daily steps ingoring the NA values and the median total daily steps using imputed median values for the NAs is:", diff2)
# calculate and round off the value of the mean total daily steps
mutspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
diff1 <- mutspd3 - mutspd
diff2 <- mdtspd4 - mdtspd
percentDiff <- diff1/mutspd
percentDiff
percentDiff <- round(percentDiff,2)*100
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1, "or", percentDiff,"%")
paste("The difference between the median total daily steps ingoring the NA values and the median total daily steps using imputed median values for the NAs is:", diff2)
# calculate and round off the value of the mean total daily steps
mutspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
diff1 <- mutspd3 - mutspd
diff2 <- mdtspd4 - mdtspd
percentDiff <- diff1/mutspd
percentDiff <- round(percentDiff,2)*100
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using imputed mean values for the NAs is:", diff1, "or", percentDiff,"%")
paste("The difference between the median total daily steps ingoring the NA values and the median total daily steps using imputed median values for the NAs is:", diff2)
# calculate the number of rows containing NA values
totalnas <- sum(is.na(activity$steps))
# return results of that calculation
paste0("The total number of rows containing 'NA' values is: ", totalnas, sep = "")
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
totalstepsperdayImp2 <- aggregate(steps ~ interval, imputedDatamedian, sum)
# set plot paramters
par (mfrow = c(1,2), mar = c(4, 4, 2, 1))
# create histogram
with(totalstepsperdayImp1, hist(steps, main = "Histogram of Total Daily Step Frequency (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Mean for NA values", ylab = "Frequency", col = "brown"))
with(totalstepsperdayImp2, hist(steps, main = "Histogram of Total Daily Step Frequency (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Median for NA values", ylab = "Frequency", col = "brown"))
# calculate the daily sum of steps using both the mean and median values
totalstepsperdayImp1 <- aggregate(steps ~ interval, imputedDatamean, sum)
totalstepsperdayImp2 <- aggregate(steps ~ interval, imputedDatamedian, sum)
# set plot paramters
par (mfrow = c(1,2), mar = c(4, 4, 2, 1))
# create histogram
with(totalstepsperdayImp1, hist(steps, main = "Histogram of Total Daily Steps vs Frequency (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Mean for NA values", ylab = "Frequency", col = "brown"))
with(totalstepsperdayImp2, hist(steps, main = "Histogram of Total Daily Steps vs Frequency (Oct & Nov 2012)", xlab = "Sum of Steps per Day using Imputed Median for NA values", ylab = "Frequency", col = "brown"))
# calculate and round off the value of the mean total daily steps
mutspd3 <- round(mean(totalstepsperdayImp1$steps),2)
# calculate and round off the value of the median total daily steps
mdtspd4 <- round(median(totalstepsperdayImp2$steps),2)
diff1 <- mutspd3 - mutspd
diff2 <- mdtspd4 - mdtspd
percentDiff <- diff1/mutspd
percentDiff <- round(percentDiff,2)*100
paste("The difference between the mean total daily steps ingoring the NA values and the mean total daily steps using the imputed mean values for the NAs is:", diff1, "or", percentDiff,"%")
paste("The difference between the median total daily steps ingoring the NA values and the median total daily steps using the imputed median values for the NAs is:", diff2)
library(dplyr)
# rename the data set
imputedDatamean2 <- imputedDatamean
# factor typeday
imputedDatamean$typeday <- as.factor(ifelse(weekdays(imputedDatamean$date) %in% c("Saturday","Sunday"), "weekend", "weekday"))
# Create comparative daily means steps interval plot by weekday & weekend
dailystepsmean <- aggregate(steps ~ interval + typeday, imputedDatamean, mean)
library(lattice)
xyplot(steps ~ interval | factor(typeday), data = dailystepsmean, aspect = .8, type = "l")
# Capturing the results of the randomForest prediction on the test data set as results
results <- data.frame(problem_id= testing2$X.problem_id., classe = predictionstesting2)
#rm(list=ls())
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)
})
setwd("~/Desktop/Coursera_R/Completed_Courses/8_PracticalMachineLearning/PracticalMachineLearning")
# Data
# The training data for this project are available here:
# https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv
# The test data are available here:
# https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv
# The data for this project come from this source:
# http://groupware.les.inf.puc-rio.br/har.
# Download the training data set
trainUrl <- "http://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv"
csvTrainingFile <- "pml-training.csv"
if (!file.exists(csvTrainingFile)){
message(paste("Downloding", csvTrainingFile))
download.file(trainUrl, destfile="pml-training.csv", method = "curl")
}else{
message(paste("File exists;", csvTrainingFile))
}
dateDownLoaded <- date()
#rm(list=ls())
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)
})
setwd("~/Desktop/Coursera_R/Completed_Courses/8_PracticalMachineLearning/PracticalMachineLearning")
# Data
# The training data for this project are available here:
# https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv
# The test data are available here:
# https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv
# The data for this project come from this source:
# http://groupware.les.inf.puc-rio.br/har.
# Download the training data set
trainUrl <- "http://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv"
csvTrainingFile <- "pml-training.csv"
if (!file.exists(csvTrainingFile)){
message(paste("Downloding", csvTrainingFile))
download.file(trainUrl, destfile="pml-training.csv", method = "curl")
}else{
message(paste("File exists;", csvTrainingFile))
}
dateDownLoaded <- date()
# Capturing the results of the randomForest prediction on the test data set as results
results <- data.frame(problem_id= testing2$X.problem_id., classe = predictionstesting2)
setwd("~/Desktop/Coursera_R/Completed_Courses/9_DevelopingDataProducts/DDP_CourseProject")