-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSentimentAnalysis.R
117 lines (97 loc) · 2.76 KB
/
SentimentAnalysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
install.packages("syuzhet")
library(syuzhet)
install.packages("tm")
library(tm)
install.packages("wordcloud")
library(wordcloud)
install.packages("wordcloud2")
library(wordcloud2)
install.packages("lubridate")
library(lubridate)
install.packages("scales")
library(reshape2)
library(dplyr)
### Import Data
tweets <- read.csv(file.choose(), header = T)
str(tweets)
# Only the tweets from Trump
tweets <- tweets[!(tweets$handle=="HillaryClinton"),]
summary(tweets$handle)
# Build corpus
library(tm) #text mining
corpus <- iconv(tweets$text, to = "utf-8-mac")
corpus <- Corpus(VectorSource(corpus))
inspect(corpus[1:5])
# Clean text
corpus <- tm_map(corpus, tolower) # all lowercase
inspect(corpus[1:5])
corpus <- tm_map(corpus, removePunctuation)
inspect(corpus[1:5])
corpus <- tm_map(corpus, removeNumbers)
inspect(corpus[1:5])
cleanset <- tm_map(corpus, removeWords, stopwords('english')) # filler words like "the" "and"
inspect(cleanset[1:5])
removeURL <- function(x) gsub('http[[:alnum:]]*', '', x) #takes out the link at end of tweets
cleanset <- tm_map(cleanset, content_transformer(removeURL))
inspect(cleanset[1:5])
cleanset <- tm_map(cleanset, stripWhitespace)
inspect(cleanset[1:5])
# Term document matrix
tdm <- TermDocumentMatrix(cleanset)
tdm
tdm <- as.matrix(tdm)
tdm[1:10, 1:20]
# Bar plot
w <- rowSums(tdm)
w <- subset(w, w>=25)
barplot(w,
las = 2,
col = rainbow(50))
# Word cloud
library(wordcloud)
w <- sort(rowSums(tdm), decreasing = TRUE)
set.seed(222)
wordcloud(words = names(w),
freq = w,
max.words = 150,
random.order = F,
min.freq = 5,
colors = brewer.pal(8, 'Dark2'),
scale = c(5, 0.3),
rot.per = 0.7)
library(wordcloud2)
w <- data.frame(names(w), w)
colnames(w) <- c('word', 'freq')
wordcloud2(w,
size = 0.7,
shape = 'triangle',
rotateRatio = 0.5,
minSize = 1)
# Sentiment analysis for Trump - could do the same for Hillary and compare sentiments
tweets2 <- read.csv(file.choose(), header = T)
tweets2 <- tweets2[!(tweets2$handle=="HillaryClinton"),]
trump <- iconv(tweets2$text, to = 'utf-8-mac')
# Obtain sentiment scores
s <- get_nrc_sentiment(trump)
head(s)
trump[5]
get_nrc_sentiment('debate')
# Bar plot
barplot(colSums(s),
las = 2,
col = rainbow(10),
ylab = 'Count',
main = 'Sentiment Scores for Trump Tweets')
### Now Sentiment for Hillary
tweets3 <- read.csv(file.choose(), header = T)
tweets3 <- tweets3[!(tweets3$handle=="realDonaldTrump"),]
clinton <- iconv(tweets3$text, to = 'utf-8-mac')
# Obtain sentiment scores
s2 <- get_nrc_sentiment(clinton)
head(s2)
# Bar plot
barplot(colSums(s2),
las = 2,
col = rainbow(10),
ylab = 'Count',
main = 'Sentiment Scores for Clinton Tweets')