-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.R
45 lines (39 loc) · 1.42 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Analyzing song lyrics of two artist from their album
# STEP 1 - Add libraries
library(geniusR)
library(tidytext)
library(tidyverse)
# STEP 2 - Downloading lyrics of artist
Taylor_Swift = genius_album(artist = "Taylor Swift", album = "Reputation")
Ed_Sheeran = genius_album(artist = "Ed Sheeran", album = "Divide")
cat(Taylor_Swift)
cat(Ed_Sheeran)
# STEP 3 - Cleaning data (remove stop words, tokenize data)
tidy_Taylor <- Taylor_Swift %>%
unnest_tokens(word, lyric) %>%
anti_join(stop_words) %>%
count(word, sort=TRUE)
tidy_Ed <- Ed_Sheeran %>%
unnest_tokens(word, lyric) %>%
anti_join(stop_words) %>%
count(word, sort=TRUE)
head(tidy_Taylor)
head(tidy_Ed)
# STEP 4 - Balancing both the dataset
tidy_Taylor <- tidy_Taylor %>%
rename(swift = n) %>%
mutate(swift_prop = swift/sum(swift))
tidy_Ed <- tidy_lyrics2 %>%
rename(ed = n) %>%
mutate(ed_prop = ed/sum(ed))
head(tidy_Taylor)
head(tidy_Ed)
# STEP 5 - Comparing both lyrics
compare_words <- tidy_Taylor %>%
full_join(tidy_Ed, by = "word")
summary(compare_words)
# STEP 6 - Plotting graph for visualization
ggplot(compare_words, aes(x=shawn_prop1, y=shawn_prop2)) +
geom_abline() +
geom_text(aes(label=word), check_overlap=TRUE, vjust=1.5) +
labs(y="Stitches", x="In my blood") + theme_classic()