-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathword_cloud.R
41 lines (35 loc) · 1.03 KB
/
word_cloud.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
## load rtweet
library(rtweet)
## load ggplot2
library(ggplot2)
## Read in the streamed data
d <- parse_stream("data/stream.json")
## function for cleaning text and creating word freq table
clean_text_table <- function(data) {
txt <- tolower(plain_tweets(data$text))
txt <- gsub("&", "", txt)
txt <- gsub("#nca17", "", txt, ignore.case = TRUE)
txt <- unlist(strsplit(txt, " "))
txt <- gsub("^[[:punct:]]{1,}|[[:punct:]]{1,}$", "", txt)
txt <- trimws(txt)
txt <- txt[txt != ""]
swds <- stopwordslangs$word[stopwordslangs$lang == "en" & stopwordslangs$p > .99]
txt <- txt[!txt %in% swds]
sort(table(txt), decreasing = TRUE)
}
## create frequency table of popular words
wds <- clean_text_table(nca)
## calc min freq for word cloud
minfreq <- quantile(as.double(wds), .75)
## save word cloud
png("../nca17-wc.png", height = 8, width = 8, "in", res = 300)
par(bg = "black")
wordcloud::wordcloud(
names(wds),
as.integer(wds),
min.freq = minfreq,
random.color = FALSE,
random.order = FALSE,
colors = gg_cols(6)
)
dev.off()