forked from CrumpLab/LabJournalWebsite
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Final.Rmd
144 lines (111 loc) · 5.27 KB
/
Final.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
---
title: "Final Project"
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
library(usmap)
library(ggplot2)
table12 <- read.csv(file="table12.csv")
table11 <- read.csv(file="table11.csv")
```
## R Markdown
```{r}
summary(table12)
table12$Number.of.participating.agencies <-
as.integer(gsub(",", "", table12$Number.of.participating.agencies))
table12$Population.covered <-
as.integer(gsub(",", "", table12$Population.covered))
table12$Total.number.of.incidents.reported <-
as.integer(gsub(",","",table12$Total.number.of.incidents.reported))
summary(table12)
```
<First check if there is a correlation with the number of agencies participating
and the population>
```{r pressure, echo=FALSE}
options(scipen=999)
plot(table12$Population.covered, table12$Number.of.participating.agencies,
xlab="Population", ylab="Number of Agencies")
plot(table12$Population.covered, table12$Number.of.participating.agencies,
xlab="Population", ylab="Number of Agencies",
xlim = range(0, 12000000))
cor(table12$Population.covered, table12$Number.of.participating.agencies)
```
<See how many of agencies are submitting reports out of number submitting>
<data shows that a very low number agencies are actually submitting reports>
```{r}
percent_participate_table <- data.frame(table12$Participating.state.Federal,
table12$Agencies.submitting.incident.reports,
table12$Number.of.participating.agencies)
colnames(percent_participate_table) <- c('States', 'Agencies Submitting Reports'
, "Agencies Participating")
percent_participate_table = transform(percent_participate_table,
freq = (percent_participate_table$`Agencies Submitting Reports`/percent_participate_table$`Agencies Participating`)*100)
```
<there is a correlation -> Lets see if we can predict the n>
<first lets see if the number of ag>
```{r}
plot(table12$Number.of.participating.agencies, table12$Total.number.of.incidents.reported,
xlab="Population", ylab="Number of Agencies")
plot(table12$Number.of.participating.agencies, table12$Total.number.of.incidents.reported,
xlab="Number of agencies",
ylab="Number of incidents",
xlim = range(0, 800))
cor(table12$Number.of.participating.agencies, table12$Total.number.of.incidents.reported)
```
<lets see if there is an increase of agencies means an increase of reports>
```{r}
partagent <-table12$Number.of.participating.agencies
reported <-table12$Total.number.of.incidents.reported
lm_agencies_report <- lm(formula = partagent ~ reported,
data = table12)
summary(lm_agencies_report)
```
<linear regression assumptions test>
```{r}
plot(partagent, lm_agencies_report$residuals, main="Residuals vs. x", xlab="x, Scaled speed", ylab="Residuals")
abline(h = 0, lty="dashed")
```
```{r}
plot(lm_agencies_report, which=3)
```
```{r}
plot(lm_agencies_report, which=5)
plot(lm_agencies_report, which=2)
```
<a regression isn't valid so we can't say that for every predicted agencies that we get more reports>
<now look at the number of offenses we are seeing across state lines>
```{r}
summary(table11)
```
<total offences in general looking at freq from table 12 to table 11>
```{r}
table11_total = data.frame(table11$Participating.state.Federal, table11$Total.offenses, percent_participate_table$freq)
```
<most common crimes per state>
```{r}
table11_maxperstate <- data.frame(table11$Participating.state.Federal)
table11_maxperstate['max'] <- apply(table11[3:15], 1, max)
table11_maxperstate['highest occuring crimes'] <- colnames(table11[3:15])[max.col(table11[3:15], ties.method = "first")]
table11_total
table11_maxperstate
```
```{r}
incidents <- data.frame(table12$Participating.state.Federal, as.numeric(gsub(",","",table12$Total.number.of.incidents.reported)))
colnames(incidents) <- c('state', 'Total number of incidents')
plot_usmap(data = incidents, values = "Total number of incidents")+scale_fill_continuous(name = "Number of Incidents",low = "white", high ="darkblue", label = scales::comma) + theme(legend.position = "right")
```
```{r}
percent_participate_table <- data.frame(table12$Participating.state.Federal,
table12$Agencies.submitting.incident.reports,
table12$Number.of.participating.agencies)
colnames(percent_participate_table) <- c('state', 'Agencies Submitting Reports'
, "Agencies Participating")
percent_participate_table = transform(percent_participate_table, freq = (percent_participate_table$`Agencies Submitting Reports`/percent_participate_table$`Agencies Participating`)*100)
plot_usmap(data = percent_participate_table, values = "freq")+scale_fill_continuous(name = "Percent of Participating Agencies",low = "white", high ="darkblue", label = scales::comma) + theme(legend.position = "right")
```
```{r}
colnames(table11_maxperstate) <- c('state', "max", "incidents")
plot_usmap(data = table11_maxperstate, values = "incidents") + theme(legend.position = "right") + scale_fill_brewer(name = "Incidents with the highest occurrence",type = 'qual', palette = 1)
```