-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
56 lines (46 loc) · 3.02 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#########################################################
#03 week 1 assignment
# April 26. 2014, Rae Woong Park
#########################################################
library(data.table)
#setwd("C:/Users/Administrator/Documents/GitHub/GettingandCleaningData/")
#setwd("C:/Users/Administrator/Documents/data/UCI HAR Dataset/")
# 1. Merges the training and the test sets to create one data set.
# & 3. Uses descriptive activity names to name the activities in the data set
# $ 4. Appropriately labels the data set with descriptive activity names.
# 3. Uses descriptive activity names to name the activities in the data set
activity_labels <- fread("activity_labels.TXT", sep="auto", header=FALSE)
trainYData <- fread("./train/y_train.txt", sep="auto", header=FALSE)
testYData <- fread("./test/y_test.txt", sep="auto", header=FALSE)
setkey(trainYData, V1)
setkey(testYData, V1)
trainYData <- trainYData[activity_labels]
testYData <- testYData[activity_labels]
setnames(trainYData, "V2", "activity")
setnames(testYData, "V2", "activity")
col.names = read.table("features.TXT")
col.namesModified <- gsub("[()]","_", col.names$V2) # replace "(", ")", "-" into "_"
trainSubjectData <- read.table("./train/subject_train.txt", col.names="subject", sep="", header=FALSE)
trainXData <- read.table("./train/X_train.txt", sep="", col.names=col.namesModified, header=FALSE)
# 4. Appropriately labels the data set with descriptive activity names.
trainData <- cbind(trainSubjectData, trainYData[,2,with=FALSE], trainXData)
testSubjectData <- read.table("./test/subject_test.txt", col.names="subject", sep="", header=FALSE)
testXData <- read.table("./test/X_test.txt", sep="", col.names=col.namesModified, header=FALSE)
# 4. Appropriately labels the data set with descriptive activity names.
testData <- cbind(testSubjectData, testYData[,2,with=FALSE], testXData)
# 1. Merges the training and the test sets to create one data set.
mergedData <-rbind(trainData, testData)
write.table(mergedData, file = "mergedData.txt", append = FALSE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
# 2. Extracts only the measurements on the mean and standard deviation for each measurement.
# Only variables having "mean()" and "std()" in their names were included.
meanStdData <- data.table(mergedData[, grep("subject|activity|mean__|std__", colnames(mergedData), value=TRUE, ignore.case = FALSE)])
# 5. Creates a second, independent tidy data set with the average of each variable for each activity and each subject.
# The expression "each activity and each subject" is confusing!
# I thought it a grouping problem, theat is 'grouping by both subject and activity'.
# Thus each subject can have up to six rows (ativities).
setkey(meanStdData, subject, activity)
summaryDataWrite <- meanStdData[, lapply(.SD, mean()), by="subject,activity"]
write.table(summaryDataWrite, file="tidyData.txt", sep="\t", row.names=FALSE, col.names=TRUE)
#########################################################
# End of Assignment
#########################################################