-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1-Explore.R
57 lines (36 loc) · 1.11 KB
/
1-Explore.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#1-Explore.R
#------- Summarize Dataset
#dimensions of dataset
dim(dataset) #120 rows, 5 columns
#list data types
sapply(dataset, class)
#view data
head(dataset)
#list levels for the factor data
levels(dataset$Species) #there are three levels corresponding
#to three species of iris
#summarize the class distribution
percentage <- prop.table(table(dataset$Species)) * 100
cbind(freq=table(dataset$Species), percentage=percentage)
#Summarize attribute distributions
summary(dataset)
#------- Visualize dataset
#it's useful to be able to look at input attributes seperately from
#the output
x <- dataset[,1:4]
y <- dataset[,5]
#boxplot for each of the measurements
par(mfrow =c(1,4))
for(i in 1:4) {
boxplot(x[,i], main= names(iris)[i])
}
#barplot for class breakdown
plot(y)
#------- Multivariate plots
#scatterplot matrix
featurePlot(x=x, y=y, plot = "ellipse")
#box and whisker plots for each attribute
featurePlot(x=x, y=y, plot="box")
#density plots for each attribute by class value
scales <- list(x=list(relation="free"), y=list(relation="free"))
featurePlot(x=x, y=y, plot="density", scales=scales)