-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.R
executable file
·122 lines (99 loc) · 4.19 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# write clu or vec files with identification of number of vertices
# *Vertices[n]
# where n is a number of vertices
# x is a vector
writePajek <- function(x, file='file') {
write.table(x, file = file, row.names = FALSE, quote = FALSE)
dimx <- length(x)
f <- readLines(file)
f[1] <- paste0('*Vertices ', dimx)
write.table(f, file = file, row.names = FALSE, quote = FALSE, col.names = FALSE)
}
expand.grid.unique <- function(x, y, include.equals = FALSE)
{
x <- unique(x)
y <- unique(y)
g <- function(i)
{
z <- setdiff(y, x[seq_len(i - include.equals)])
if (length(z)) cbind(x[i], z, deparse.level = 0)
}
as.data.frame(do.call(rbind, lapply(seq_along(x), g)))
}
get_authors_network <- function(M = M,
m_groups = m_groups,
hubs_full_info = hubs_full_info,
keep_group = 'g01',
internal_citations = 2) {
m_groups |>
dplyr::filter(.data$group %in% keep_group) |>
dplyr::pull(SR) ->
g0
M |>
dplyr::filter(SR %in% g0) |>
{\(x)(biblioNetwork(x, analysis = "collaboration", network = "authors", sep = ";"))}() ->
NetMatrix
igraph::graph_from_adjacency_matrix(NetMatrix) |>
igraph::simplify() |>
tidygraph::as_tbl_graph() ->
net
M |>
dplyr::filter(SR %in% g0) |>
tidyr::separate_rows(AU, sep = ';') |>
dplyr::count(AU, sort = T, name = 'total_papers') |>
dplyr::rename(name = AU) ->
total_papers
hubs_full_info |>
dplyr::filter(SR %in% g0) |>
dplyr::select(AU, ki) |>
tidyr::separate_rows(AU, sep = ';') |>
dplyr::group_by(AU) |>
dplyr::summarise(ki = sum(ki)) |>
dplyr::arrange(dplyr::desc(ki)) |>
dplyr::rename(name = AU) ->
top_cited
full_join(total_papers, top_cited) |>
dplyr::mutate(kin = total_papers / ki) |>
dplyr::mutate(kin = ifelse(kin == Inf, 0, kin)) ->
tt
net |>
tidygraph::activate(nodes) |>
dplyr::left_join(tt) ->
net
net |>
tidygraph::activate(nodes) |>
dplyr::filter(ki >= internal_citations) ->
net2
V(net2)$group <- igraph::cluster_label_prop(net2)$membership
net2 |>
igraph::as_data_frame(what = 'vertices') |>
dplyr::mutate(label = name, value = ki) |>
dplyr::mutate(title = paste(paste('Papers ', total_papers, sep = ''), paste('Cited ', ki, sep = ''), sep = '; ')) |>
dplyr::mutate(title = paste(label, title, sep = '; ')) |>
dplyr::rename(id = name) ->
nodes
net2 |>
igraph::as_data_frame(what = 'edges') ->
edges
list(nodes = tibble::tibble(nodes), edges = tibble::tibble(edges))
}
# build textcleaner function
textcleaner_lda <- function(x) {
x <- as.character(x)
x <- x %>%
stringr::str_to_lower() %>% # convert all the string to low alphabet
textclean::replace_contraction() %>% # replace contraction to their multi-word forms
# textclean::replace_internet_slang() %>% # replace internet slang to normal words
# textclean::replace_emoji() %>% # replace emoji to words
# textclean::replace_emoticon() %>% # replace emoticon to words
textclean::replace_hash(replacement = "") %>% # remove hashtag
textclean::replace_word_elongation() %>% # replace informal writing with known semantic replacements
textclean::replace_number(remove = T) %>% # remove number
textclean::replace_date(replacement = "") %>% # remove date
textclean::replace_time(replacement = "") %>% # remove time
stringr::str_remove_all(pattern = "[[:punct:]]") %>% # remove punctuation
# stringr::str_remove_all(pattern = "[^\\s]*[0-9][^\\s]*") %>% # remove mixed string n number
stringr::str_squish() %>% # reduces repeated whitespace inside a string.
stringr::str_trim() # removes whitespace from start and end of string
return(as.data.frame(x))
}