-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculateCoocStatistics.R
47 lines (42 loc) · 1.51 KB
/
calculateCoocStatistics.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
calculateCoocStatistics <- function(coocTerm, binDTM, measure) {
# Ensure Matrix (SparseM} or matrix {base} format
require(Matrix)
# Ensure binary DTM
if (any(binDTM > 1)) {
binDTM[binDTM > 1] <- 1
}
# calculate cooccurrence counts
coocCounts <- t(binDTM) %*% binDTM
# retrieve numbers for statistic calculation
k <- nrow(binDTM)
ki <- sum(binDTM[, coocTerm])
kj <- colSums(binDTM)
names(kj) <- colnames(binDTM)
kij <- coocCounts[coocTerm, ]
# calculate statistics
switch(measure,
DICE = {
dicesig <- 2 * kij / (ki + kj)
dicesig <- dicesig[order(dicesig, decreasing=TRUE)]
sig <- dicesig
},
LOGLIK = {
logsig <- 2 * ((k * log(k)) - (ki * log(ki)) - (kj * log(kj)) + (kij * log(kij))
+ (k - ki - kj + kij) * log(k - ki - kj + kij)
+ (ki - kij) * log(ki - kij) + (kj - kij) * log(kj - kij)
- (k - ki) * log(k - ki) - (k - kj) * log(k - kj))
logsig <- logsig[order(logsig, decreasing=T)]
sig <- logsig
},
MI = {
mutualInformationSig <- log(k * kij / (ki * kj))
mutualInformationSig <- mutualInformationSig[order(mutualInformationSig, decreasing = TRUE)]
sig <- mutualInformationSig
},
{
sig <- sort(kij, decreasing = TRUE)
}
)
sig <- sig[-match(coocTerm, names(sig))]
return(sig)
}