-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsentimentScoreFunc.R
50 lines (45 loc) · 2.81 KB
/
sentimentScoreFunc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#import libraries to work with
library(plyr)
library(stringr)
library(e1071)
#load up word polarity list and format it
afinn_list <- read.delim(file='AFINN/AFINN-111.txt', header=FALSE, stringsAsFactors=FALSE)
names(afinn_list) <- c('word', 'score')
afinn_list$word <- tolower(afinn_list$word)
#categorize words as very negative to very positive and add some movie-specific words
vNegTerms <- afinn_list$word[afinn_list$score==-5 | afinn_list$score==-4]
negTerms <- c(afinn_list$word[afinn_list$score==-3 | afinn_list$score==-2 | afinn_list$score==-1], "second-rate", "moronic", "third-rate", "flawed", "juvenile", "boring", "distasteful", "ordinary", "disgusting", "senseless", "static", "brutal", "confused", "disappointing", "bloody", "silly", "tired", "predictable", "stupid", "uninteresting", "trite", "uneven", "outdated", "dreadful", "bland")
posTerms <- c(afinn_list$word[afinn_list$score==3 | afinn_list$score==2 | afinn_list$score==1], "first-rate", "insightful", "clever", "charming", "comical", "charismatic", "enjoyable", "absorbing", "sensitive", "intriguing", "powerful", "pleasant", "surprising", "thought-provoking", "imaginative", "unpretentious")
vPosTerms <- c(afinn_list$word[afinn_list$score==5 | afinn_list$score==4], "uproarious", "riveting", "fascinating", "dazzling", "legendary")
#function to calculate number of words in each category within a sentence
sentimentScore <- function(sentences, vNegTerms, negTerms, posTerms, vPosTerms){
final_scores <- matrix('', 0, 5)
scores <- laply(sentences, function(sentence, vNegTerms, negTerms, posTerms, vPosTerms){
initial_sentence <- sentence
#remove unnecessary characters and split up by word
sentence <- gsub('[[:punct:]]', '', sentence)
sentence <- gsub('[[:cntrl:]]', '', sentence)
sentence <- gsub('\\d+', '', sentence)
sentence <- tolower(sentence)
wordList <- str_split(sentence, '\\s+')
words <- unlist(wordList)
#build vector with matches between sentence and each category
vPosMatches <- match(words, vPosTerms)
posMatches <- match(words, posTerms)
vNegMatches <- match(words, vNegTerms)
negMatches <- match(words, negTerms)
#sum up number of words in each category
vPosMatches <- sum(!is.na(vPosMatches))
posMatches <- sum(!is.na(posMatches))
vNegMatches <- sum(!is.na(vNegMatches))
negMatches <- sum(!is.na(negMatches))
score <- c(vNegMatches, negMatches, posMatches, vPosMatches)
#add row to scores table
newrow <- c(initial_sentence, score)
final_scores <- rbind(final_scores, newrow)
return(final_scores)
}, vNegTerms, negTerms, posTerms, vPosTerms)
return(scores)
}
Result <- as.data.frame(sentimentScore(sample, vNegTerms, negTerms, posTerms, vPosTerms))
colnames(Result) <- c('sentence', 'vNeg', 'neg', 'pos', 'vPos')