-
Notifications
You must be signed in to change notification settings - Fork 0
/
EDA_project2.Rmd
85 lines (62 loc) · 3.1 KB
/
EDA_project2.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
---
title: "EDA Project"
output: html_document
---
```{r setup}
library(ggplot2)
library(data.table)
videogame_ratings <- fread("ign.csv")
videogame_sales <- fread("vgrating.csv",na.strings = c("N/A",""))
videogame_ranked_sales <- fread("vgsales.csv", na.strings = c("N/A",""))
str(videogame_ranked_sales)
str(videogame_sales)
str(videogame_ratings)
title_year_sales <- videogame_sales[,c(1,2,3,6,7,8,9,10)]
title_rating <- videogame_ratings[,c(3,5,6,9)]
colnames(videogame_ratings)[3] <- "Name"
colnames(videogame_ratings)[5] <- "Platform"
colnames(videogame_ratings)[9] <- "Year_of_Release"
title_year_sales$Year_of_Release <- as.integer(title_year_sales$Year_of_Release)
videogame_sales$User_Score <- as.integer(videogame_sales$User_Score)
str(title_year_sales)
#merged <- merge(title_year_sales, title_rating, by= c("Name", "Platform", "Year_of_Release"))
# determining best selling games.
global_sales_per_platform <- videogame_sales[,sum(Global_Sales),by="Platform"]
(global_sales_per_platform[order(-global_sales_per_platform$V1)])
sales_since_08 <- subset(videogame_sales, Year_of_Release >= "2008")
globalsales_since_08 <- sales_since_08[,sum(Global_Sales), by="Platform"]
(globalsales_since_08[order(-globalsales_since_08$V1)])
```
Plots below
```{r}
#sales by year
ploty <- ggplot(data = videogame_sales, aes(x=Year_of_Release, y = Global_Sales)) + geom_bar(stat="identity")
ploty
#by console
nex <-ploty + geom_bar(stat = "identity",aes(fill = videogame_sales$Platform))
nex
#by genre
genre <- ploty + geom_bar(stat = "identity",aes(fill = videogame_sales$Genre)) + coord_flip()
genre
#by rating
mean_rating <- aggregate(videogame_sales$User_Score ~ videogame_sales$Year_of_Release,videogame_sales,mean)
#rating <- ploty + geom_line(mean_rating,aes(x= mean_rating$`videogame_sales$Year_of_Release`, y=mean_rating$`videogame_sales$User_Score`))
rating_metacritic <- ggplot(data = mean_rating, aes(x=mean_rating$`videogame_sales$Year_of_Release`, y= mean_rating$`videogame_sales$User_Score`)) + geom_bar(stat="identity")
rating_metacritic
```
```{r num_publishers_per_year}
# change to data.frame
sales <- data.frame(videogame_sales)
str(sales)
#remove anything with year of 2017 or greater
sales <- sales[sales$Year_of_Release < 2017, ]
sales$Year_of_Release <- factor(sales$Year_of_Release)
publishers_per_year <- aggregate(data=sales, Publisher ~ Year_of_Release, function(x) unique(x))
num_publishers_per_year <- aggregate(data=sales, Publisher ~ Year_of_Release, function(x) length(unique(x)))
ggplot(na.omit(num_publishers_per_year), aes(x = as.integer(as.character(Year_of_Release)), y = Publisher)) + geom_point(aes(color = Publisher)) + xlab("Year") + ylab("Number of Active Publishers")
```
```{r num_developers_per_year}
developers_per_year <- aggregate(data=sales, Developer ~ Year_of_Release, function(x) unique(x))
num_developers_per_year <- aggregate(data=sales, Developer ~ Year_of_Release, function(x) length(unique(x)))
ggplot(na.omit(num_developers_per_year), aes(x = as.integer(as.character(Year_of_Release)), y = Developer)) + geom_point(aes(color = Developer)) + xlab("Year") + ylab("Number of Active Developers")
```