Skip to content

Latest commit

 

History

History
270 lines (229 loc) · 15.5 KB

performance.md

File metadata and controls

270 lines (229 loc) · 15.5 KB

Analysis

Data

library(tidyverse)
library(here)
source(here("src/lib/functions.R"))
gold = read_csv(here("data/intermediate/gold.csv")) %>% rename(gold=value)
scores = read_csv(here("data/intermediate/combined_predictions.csv"), col_types = "iccccddd")
scores = scores %>% filter(is.na(language) | language != "google", variable != "recessie", variable != "boukes")  %>% mutate(value=trichotomize(value))
names = read_csv(here("src/analysis/report_names.csv"))
sections = setNames(nm=unique(names$section))
variables = setNames(nm=unique(names$variable))

Overall performance of various methods

Compute scores per method x repetition, take average score per method:

perf = inner_join(scores, gold) %>% 
  mutate(correct=gold == value)%>% 
  group_by(method, language, variable, repetition) %>% 
  summarize(acc=mean(correct), #cor=cor(gold, value), 
            alpha=alpha(gold, value), 
            pos_precision=precision(value, gold, 1), pos_recall=recall(value, gold, 1), pos_f1=f1(pos_precision, pos_recall),
            neut_precision=precision(value, gold, 0), neut_recall=recall(value, gold, 0), neut_f1=f1(neut_precision, neut_recall),
            neg_precision=precision(value, gold, -1), neg_recall=recall(value, gold, -1), neg_f1=f1(neg_precision, neg_recall)) %>%  
  summarize_at(vars(acc:last_col()), mean) %>% 
  arrange(match(method, c("dictionary", "ml", "crowd", "manual")), language, variable ) %>% 
  ungroup() %>% left_join(names) %>% select(section, name, acc:neg_f1)
table2 = knitr::kable(perf %>% mutate(section=str_remove_all(section, "\\(.*")), digits=2)
table2
section name acc alpha pos_precision pos_recall pos_f1 neut_precision neut_recall neut_f1 neg_precision neg_recall neg_f1
English Dictionaries AFINN 0.43 0.27 0.35 0.38 0.37 0.40 0.50 0.45 0.58 0.38 0.46
English Dictionaries DamstraBoukes 0.42 0.07 0.67 0.08 0.15 0.40 0.98 0.57 1.00 0.02 0.04
English Dictionaries GenInq 0.41 0.26 0.31 0.37 0.34 0.38 0.38 0.38 0.54 0.47 0.51
English Dictionaries HuLiu 0.46 0.34 0.40 0.30 0.34 0.42 0.62 0.50 0.65 0.40 0.50
English Dictionaries LoughranMcDonald 0.50 0.29 0.50 0.14 0.22 0.46 0.79 0.58 0.62 0.43 0.51
English Dictionaries LSD 0.46 0.33 0.39 0.40 0.39 0.42 0.54 0.48 0.62 0.41 0.50
English Dictionaries Muddiman 0.48 0.27 0.48 0.38 0.43 0.46 0.71 0.55 0.57 0.30 0.39
English Dictionaries NRC 0.42 0.23 0.34 0.62 0.44 0.43 0.32 0.37 0.57 0.39 0.46
English Dictionaries RID 0.42 0.06 0.00 0.00 0.00 0.41 0.97 0.57 0.82 0.09 0.16
Dictionaries DANEW 0.42 0.10 0.75 0.08 0.15 0.40 0.97 0.57 0.80 0.04 0.08
Dictionaries DamstraBoukes 0.41 0.05 0.83 0.07 0.13 0.40 0.99 0.57 0.00 0.00 0.00
Dictionaries Muddiman 0.49 0.31 0.53 0.38 0.44 0.46 0.64 0.53 0.53 0.39 0.45
Dictionaries NRC 0.47 0.32 0.39 0.53 0.45 0.46 0.44 0.45 0.59 0.46 0.52
Dictionaries Pattern 0.39 0.07 0.43 0.08 0.14 0.39 0.90 0.54 0.38 0.03 0.06
Dictionaries Polyglot 0.42 0.26 0.38 0.32 0.34 0.39 0.55 0.45 0.53 0.33 0.41
Machine Learning CNN 0.63 0.50 0.68 0.49 0.56 0.58 0.78 0.66 0.72 0.57 0.63
Machine Learning NB 0.58 0.39 0.74 0.34 0.47 0.52 0.83 0.64 0.65 0.47 0.55
Machine Learning SVM 0.57 0.41 0.69 0.37 0.48 0.52 0.79 0.62 0.64 0.48 0.55
Crowd-Coding Single Coder 0.72 0.75 0.69 0.84 0.76 0.69 0.58 0.63 0.78 0.78 0.78
Crowd-Coding Vote (3 Coders) 0.77 0.81 0.73 0.89 0.80 0.74 0.65 0.69 0.83 0.81 0.82
Crowd-Coding Vote (5 Coders) 0.77 0.81 0.73 0.90 0.81 0.73 0.65 0.69 0.84 0.80 0.82
Manual Coding Single Coder 0.82 0.82 0.88 0.86 0.87 0.76 0.81 0.78 0.84 0.80 0.82
Manual Coding Vote (3 Coders) 0.88 0.90 0.97 0.91 0.94 0.82 0.88 0.85 0.87 0.84 0.86

Make markdown table for the appendix.

fn = here("report/figures/table_performance.md")
cat("# Table 2: Overall Performance of the Tested Sentiment Analysis Approaches\n\n", file=fn)
cat(table2, file=fn, sep="\n", append=T)

Make the latex table for the manuscript. I’m using jinja2 template table.tex.j2 which is called with a json string containing the data.

methodnames = setNames(as.list(names$name), names$variable)
table = purrr::map(sections, ~filter(perf, section == .) %>% select(name:neg_f1)) 
table$`Gold Standard` = NULL
render_j2("src/analysis/table_performance.tex.j2", "report/figures/table_performance.tex", data=list(data=table, methods=methodnames))

Bivariate correlations between dictionaries

Create a wide data frame with all methods as a column

wide_scores = scores %>% filter(method=="dictionary") %>%
  mutate(value=trichotomize(value), 
         variable = ifelse(!is.na(repetition), paste(variable, repetition, sep=":"), variable)) %>%
   select(id, variable, value) %>% bind_rows(gold %>% add_column(variable="gold") %>% rename(value=gold)) %>% 
  pivot_wider(id_cols="id", names_from = "variable", values_from = "value")

Compute bivariate correlations, pivot to longer, and average scores with repetition, and pivot back to wide

cors = cor(wide_scores %>% select(-id), use = "pairwise") %>% as_tibble(rownames = "method") %>% 
  pivot_longer(-method, names_to="method2") %>% 
  separate(method, sep=":", into=c("method", "repetition"), fill = "right") %>%
  separate(method2, sep=":", into=c("method2", "repetition2"), fill = "right") %>%
  group_by(method, method2) %>% summarize(value=mean(value)) 
# Surely there is a more R-onic way to convert a key1,key2,value data table into a nested list...?
cors_dict = purrr::map(variables, function(x1) purrr::map(variables, function(x2) cors$value[cors$method==x1 & cors$method2 == x2]))

Create latex table with j2 template

used_names = names %>% filter(variable %in% cors$method)
used_sections = sections[sections %in% used_names$section]
varnames = purrr::map(used_sections, ~filter(names, section == .))
render_j2("src/analysis/table_corr.tex.j2", "report/figures/table_corr.tex", data=list(data=cors_dict, names=varnames))

Correlation between uncertainty and accuracy

For crowd, get number of coders and majority size, compute accuracy per ncoder x majority, and compute cumulative accuracy and coverage:

crowd_cov = scores  %>% inner_join(gold) %>% filter(method == "crowd") %>% 
  mutate(ncoder=as.numeric(str_remove(variable, "crowd")), 
         support=ifelse(ncoder==1, 1, confidence),
         correct=as.numeric(gold==value)) %>% 
  filter(support != 0) %>% select(id, ncoder, support, correct, repetition) %>%
  group_by(ncoder, support) %>% summarize(n=n(), ncorrect=sum(correct), nrep=replace_na(max(repetition), 1)) %>% 
  arrange(ncoder, desc(support))  %>%  
  mutate(accuracy=cumsum(ncorrect)/cumsum(n), coverage=cumsum(n)/(284*nrep), label=str_c(support, ncoder, sep="/"))
knitr::kable(crowd_cov, digits = 2)
ncoder support n ncorrect nrep accuracy coverage label
1 1 1420 1021 5 0.72 1.00 1/1
3 3 1565 1413 10 0.90 0.55 3/3
3 2 1185 708 10 0.77 0.97 2/3
5 5 115 111 1 0.97 0.40 5/5
5 4 86 68 1 0.89 0.71 4/5
5 3 71 31 1 0.77 0.96 3/5

For ML, bin into 10 equal confidence bins per method x repetition, average over repetitions, and compute cumulative accuracy per bin

ml_cov = scores %>% inner_join(gold) %>% filter(method == "ml") %>% 
  mutate(repetition=replace_na(repetition, 0), correct=as.numeric(gold==value)) %>% 
  select(id, variable, repetition, confidence, correct) %>% 
  group_by(variable, repetition) %>% arrange(variable, repetition, confidence) %>% 
  mutate(bin=cut_number(seq_along(id), n=10, labels=1:10)) %>% 
  group_by(variable, bin, repetition) %>% summarize(n=n(), ncorrect=sum(correct)) %>% 
  summarize(n=mean(n), ncorrect=mean(ncorrect)) %>%
  arrange(variable, desc(bin)) %>%
  mutate(accuracy=cumsum(ncorrect) / cumsum(n), coverage=cumsum(n)/284) 
knitr::kable(ml_cov, digits=2)
variable bin n ncorrect accuracy coverage
cnn 10 29 26.6 0.92 0.1
cnn 9 28 21.4 0.84 0.2
cnn 8 28 18.9 0.79 0.3
cnn 7 29 16.9 0.74 0.4
cnn 6 28 13.9 0.69 0.5
cnn 5 28 15.2 0.66 0.6
cnn 4 29 17.7 0.66 0.7
cnn 3 28 16.9 0.65 0.8
cnn 2 28 17.6 0.65 0.9
cnn 1 29 14.2 0.63 1.0
nb 10 29 26.0 0.90 0.1
nb 9 28 20.0 0.81 0.2
nb 8 28 16.0 0.73 0.3
nb 7 29 17.0 0.69 0.4
nb 6 28 14.0 0.65 0.5
nb 5 28 13.0 0.62 0.6
nb 4 29 19.0 0.63 0.7
nb 3 28 14.0 0.61 0.8
nb 2 28 12.0 0.59 0.9
nb 1 29 14.0 0.58 1.0
svm 10 29 23.0 0.79 0.1
svm 9 28 20.0 0.75 0.2
svm 8 28 16.0 0.69 0.3
svm 7 29 15.0 0.65 0.4
svm 6 28 17.0 0.64 0.5
svm 5 28 17.0 0.64 0.6
svm 4 29 14.0 0.61 0.7
svm 3 28 13.0 0.59 0.8
svm 2 28 15.0 0.59 0.9
svm 1 29 13.0 0.57 1.0

Combine into a single plot:

ml_cov2 = ml_cov %>% ungroup() %>% mutate(method=toupper(variable), variable=toupper(variable)) %>% 
  select(method, variable, accuracy, coverage)
crowd_cov2 = crowd_cov %>% ungroup() %>% mutate(variable="Crowd Coding", method=paste0("Crowd (", ncoder, " Coder)")) %>%
  select(method, variable, accuracy, coverage, label)
combined = bind_rows(ml_cov2, crowd_cov2) %>% mutate(variable=fct_reorder(variable, -accuracy))


plot = ggplot(combined, aes(x=coverage, y=accuracy)) + 
  geom_line(aes(group=method, lty=variable, size=ifelse(variable=="Crowd Coding", .5, .75))) + 
  geom_point(aes(shape=variable)) + 
  geom_text(data=combined %>% filter(!label %in% c("3/5", "1/1")), aes(label=label), nudge_y=.008, nudge_x=.004) + 
  geom_text(data=combined %>% filter(label == "3/5"), aes(label=label), nudge_y=-.008, nudge_x=-.004) +
  geom_text(data=combined %>% filter(label == "1/1"), aes(label=label), nudge_y=-.008, nudge_x=-.004) +
  scale_linetype_manual(name="Method", values=c(CNN=4, SVM=2, NB=5, "Crowd Coding"=1)) +
  scale_size_identity() + scale_shape(name="Method")  + 
  xlab("Coverage") + ylab("Accuracy") + 
  scale_x_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1))+
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(.5,1))+
  ggthemes::theme_clean() + theme(legend.position = "top", legend.background = element_blank(),
                                  plot.background  = element_blank())
plot

Save plot to report folder

ggsave(plot=plot, filename="report/figures/fig_coverage.pdf")

Learning curve

curve = rbind(read_csv("data/intermediate/cnn_curve.csv") %>% add_column(method="CNN", .before=1),
              read_csv("data/intermediate/svm_curve.csv") %>% add_column(method="SVM", .before=1))
curve = curve %>% group_by(method, perc) %>% summarize(n=mean(n), acc=mean(acc))

plot = ggplot(curve, aes(x=n, y=acc, group=method, lty=method)) + geom_line() + 
  scale_linetype(name="Method") +
  xlab("Number of training examples") + ylab("Accuracy") + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
  ggthemes::theme_clean() + theme(legend.position = "top", legend.background = element_blank(),
                                  plot.background  = element_blank())
plot

Save plot to report folder

ggsave(plot=plot, filename="report/figures/fig_curve.pdf")

Add asymptotic fit (not used in paper):

get_asympt = function(data) coef(nls(acc ~ SSasymp(n, Asym, R0, lrc), data=data))["Asym"]
asym_cnn = get_asympt(filter(curve, method=="CNN"))
asym_svm = get_asympt(filter(curve, method=="SVM"))

ggplot(curve, aes(x=n, y=acc, group=method, lty=method)) + 
  geom_segment(x=0, xend=max(curve$n), y=asym_cnn, yend=asym_cnn, color="grey") +
  geom_smooth(color="grey", lwd=.5, data=filter(curve, method=="CNN"), method='nls', formula=y~SSasymp(x, Asym, R0, lrc), se=F) + 
  geom_segment(x=0, xend=max(curve$n), y=asym_svm, yend=asym_svm, color="grey", lty=2) + 
  geom_smooth(color="grey", lwd=.5, lty=2, data=filter(curve, method=="SVM"), method='nls', formula=y~SSasymp(x, Asym, R0, lrc), se=F) + 
  geom_line()