-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.R
364 lines (304 loc) · 17.1 KB
/
app.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
suppressPackageStartupMessages(require(googleVis))
suppressPackageStartupMessages(library(shiny))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(RColorBrewer))
suppressPackageStartupMessages(require(ggplot2))
suppressPackageStartupMessages(library(shinyWidgets))
suppressPackageStartupMessages(library(lattice))
#functions for string maneuvering in Sankey
source("functions_Sankey.R")
# Data #
load(file.path("data", "Sim_Data_Sankey_Long.RData")) # data_long
ageRange <- range(data_long$Age)
Networks <- unique(data_long$Network)
Networks <- Networks[order(Networks)]
Ethnicity <- unique(data_long$ethnic)
# Define UI for application that draws a histogram
ui <- fluidPage(
fluidRow(
# Application title
titlePanel("Example Sankey Plot Using Fake Data"),
# Copy the line below to make a slider bar
wellPanel(column(11,
column(3, sliderInput("age", label = "Desired Age Range:", min = ageRange[1],
max = ageRange[2], value = c(ageRange[1],ageRange[2]))),
column(3, pickerInput("network","Specific Networks:", Networks, selected = Networks, options = list(`actions-box` = TRUE,`selected-text-format`= "count > 5",
`count-selected-text` = "All Networks"), multiple = TRUE)),
column(3, pickerInput("eth","Specified Ethnicities:", Ethnicity, selected = Ethnicity, options = list(`actions-box` = TRUE,`selected-text-format`= "count > 5",
`count-selected-text` = "All Ethnicities"), multiple = TRUE)),
offset = 2)),
column(10, uiOutput(HTML("timelabs")), offset = 3),
column(12, column(3, plotOutput("legend")), column(9, "", column(9,"", htmlOutput("distPlot")))),
#column(12, column(2, h3("Total Proportions at Each Time Point:")), column(10, "", fixedRow(10, plotOutput("stacked")))),
column(12, column(2, h3("Total Proportions at Each Time Point:")), column(10, "", fixedRow(column(10, htmlOutput("barchart"))))),
column(12, column(2, h3("Age Spread at each Time Point:")), column(10, "", fixedRow(column(10, "", plotOutput("ageD"))))),
column(12, column(2, h3("Ethnicity Frequencies at each Time Point:")), column(10, "", fixedRow(column(10, "", plotOutput("ethChart"))))),
column(12, column(2, h3("Network Frequencies at each Time Point:")), column(10, "", fixedRow(column(10, "", plotOutput("NChart")))))
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
#manipulate data (summarize)
getsub <- reactive({
shiny::validate(
need(input$eth, "Please select at least one ethnicity"),
need(input$network, "Please select at least one network")#,
)
data_long2 <- data_long %>%
filter(Age <= input$age[2] & Age >= input$age[1]) %>%
filter(Network %in% input$network) %>%
filter(ethnic %in% input$eth) #%>%
# creating Sankey Data #
# time point should be the same for each data set if longitudinal
times <- as.character(unique(data_long2$Time))
pairtime <- t(sapply(1:4, function(i) c(times[i], times[i+1])))
# how can I tell the column for the groupings of patients?
# not every data set will have "drug"...
# if all of them have eth, netowrk, age, and time, then just get
# that name then that's left out
#names(data_long2)
levs <- as.character(unique(data_long2$Drug))
levs <- levs[order(levs)]
data_long2$Drug <- factor(data_long2$Drug, levels = levs)
pairsnum <- lapply(1:4, function(x) table(data_long2$Drug[data_long2$Time == pairtime[x,1]],
data_long2$Drug[data_long2$Time == pairtime[x,2]]))
uniqueDrugs <- unique(data_long2$Drug)
uniqueDrugs <- uniqueDrugs[order(uniqueDrugs)]
numD <- length(uniqueDrugs)
numTime <- length(times) - 1
frame1 <- as.character(rep(uniqueDrugs, numD))
frame2 <- c(sapply(1:numD, function(x) rep(uniqueDrugs[x], numD)))
frame <- cbind(frame1, frame2)
frame <- do.call(rbind, replicate(numTime, frame, simplify=FALSE))
unlistwithin <- lapply(1:numTime, function(x) as.numeric(pairsnum[[x]]))
unlistwithin <- as.data.frame(cbind(unlist(unlistwithin), frame))
names(unlistwithin) <- c("Patients", "from", "to")
unlistwithin$from <- paste(sort(rep(times[1:numTime], numD^2)), unlistwithin$from,
sep = " ")
unlistwithin$to <- paste(sort(rep(times[2:(numTime+1)], numD^2)), unlistwithin$to,
sep = " ")
# slowly getting the data into sankey format
# these last couple of lines creates the sourceNodes and targetNodes
# labels, that will be used later
# also, the time comes first, then the Drug initial for these node names
# this will have to change (the substring lines) for however long the drug names are
unlistwithin$Patients <- as.numeric(as.character(unlistwithin$Patients))
#unlistwithin$from <- as.factor(unlistwithin$from);
#unlistwithin$to <- as.factor(unlistwithin$to);
unlistwithin <- unlistwithin[,c(3,2,1)]
# thsi line will have to have supplementary data
# such that we can use the extra data, which will have labels
# for times and drug names, to obtain the proper substrings
# in teh sankey formed data, as for now... it's fine haha
unlistwithin$bigtime <- substring(unlistwithin$from, 1, 6);
unlistwithin$drugname <- substring(unlistwithin$from, 8)
# gets the drug name and time name
timeto <- substring(unlistwithin$to, 1, 6)
drugto <- substring(unlistwithin$to, 8)
# drug name and time name for the target
unlistwithin$from <- paste(unlistwithin$drugname, unlistwithin$bigtime, sep = "-")
unlistwithin$to <- paste(drugto, timeto, sep = "-")
unlistwithin <- unlistwithin %>% arrange(bigtime,drugname)
#unlistwithin2 <- unlistwithin[,c(4, 5)]
#unlistwithin2$from <- factor(unlistwithin2$from, levels = (unique(unlistwithin2$from)))
#unlistwithin2$to <- factor(unlistwithin2$to, levels = (unique(unlistwithin2$to)))
unlistwithin2 <- unlistwithin[,c(2,1,3,4,5)]
#forSankey <- forSankey %>% jsonlite::toJSON()
names(unlistwithin2)[c(1,2,3)] <- c("sourceNode", "targetNode", "Patients")
# creates summary numbers for trajectories
# but not total patients in a node at a certaint ime point
source_ns <- unlistwithin2 %>%
group_by(sourceNode) %>%
summarise(sourceN = sum(Patients)) %>%
ungroup()
target_ns <- unlistwithin2 %>%
group_by(targetNode) %>%
summarise(targetN = sum(Patients)) %>%
ungroup()
unlistwithin2$Patients <- as.numeric(unlistwithin2$Patients)
# googleVis preparation #
dat2_reordered <- unlistwithin2
uniqueTarget <- unique(dat2_reordered$targetNode)
uniqueSource <- unique(dat2_reordered$sourceNode)
# now creating data for total counts per node for ggplot2 to
# have a stacked bar chart with matching colors as the sankey chart
dat2_reordered <- dat2_reordered %>%
inner_join(source_ns, by = 'sourceNode') %>%
inner_join(target_ns, by = 'targetNode') %>%
mutate(
to_from = paste0('<b>', sourceNode, " ⇒ ", targetNode, ':</b>'))
dat3 <- dat2_reordered %>% select(bigtime, targetNode, targetN) %>%
mutate(targetTime = substring(targetNode,3,8)) %>% unique()
dat4 <- dat2_reordered %>% select(bigtime, sourceNode, sourceN) %>% unique()
dat4 <- dat4[1:3,]
dat4$perce <- dat4$sourceN/sum(dat4$sourceN)
dat3$perce <- unlist(tapply(dat3$targetN, dat3$targetTime, function(x) x/sum(x)))
dat3 <- dat3 %>% mutate(Drug = substring(targetNode, 1,1)) %>%
select(targetTime, Drug, targetN, perce)
dat4 <- dat4 %>% mutate(Drug = substring(sourceNode, 1,1)) %>%
select(bigtime, Drug, sourceN, perce)
names(dat3)[1] <- "bigtime"
names(dat3)[3] <- names(dat4)[3] <- "Count"
alldat <- rbind(dat4, dat3)
# automatic color choices that are far away from each other
paints <- brewer.pal(length(uniqueDrugs),"Set1")
# for gg plot
nodestringlist <- paste(unlistwithin2$sourceNode,unlistwithin2$targetNode, collapse = ' ')
# Split them up
nodestringvector <- strsplit(nodestringlist, split =' ')
# Find the unique nodes in order they appear
node_order <- unique(nodestringvector[[1]])
cols <- numeric(length(node_order));
# find the drug name that they have
# make sure the drug name length begins at the start
# of the nodes.
# for automatic creation of sankey plots, I think this step would
# have to be a separate data set
drugords <- substring(node_order, 1,1)
for(i in 1:length(uniqueDrugs)){
cols[drugords == uniqueDrugs[i]] <- paints[i[]]
}
drugcols <- cbind(cols, drugords)
drugcols <- unique(drugcols)
#cols[drugords == uniqueDrugs[1]] <- '#1A237E'
#cols[drugords == "S"] <- '#FF6F00'
#cols[drugords == "C"] <- '#1B5E20'
my_colors <- unique(cols)
colurs <- cols; colurs <- factor(as.character(colurs))
names(colurs) <- drugords
#names(my_colors) <- levels(factor(c(levels(as.factor(alldat$Drug)), levels(as.factor(alldat$Drug)))))
my_scale <- scale_fill_manual(name = "Drug", values = my_colors)
key.trans <- list(title=names(data_long2)[4],
space="top", columns=1, rows = 2, #2
text=list(drugcols[,2]),
lines=list(col= drugcols[,1]),
cex.title=1, cex=.9)
# for densities of age
ageDens <- lattice::densityplot(~Age|Time,
par.settings = list(superpose.line = list(col = drugcols[,1])),
groups = Drug, data = data_long2,layout = c(5,1), plot.points = F,
key = key.trans, from = ageRange[1], to = ageRange[2],
scales=list(y=list(rot=45), x=list(rot=45)))
# frequency plots for ethnicity by time and drug
tab4eth <- table(data_long2$ethnic, data_long2$Time,data_long2$Drug)
ethnicHist <- lattice::barchart(tab4eth, groups = T,
horizontal = F,
par.settings = simpleTheme(col=drugcols[,1]),
layout = c(5,1),
auto.key= T,
scales=list(y=list(rot=45), x=list(rot=45)),
xlab = names(data_long2)[5], ylab = "Frequency")
# frequency plots for network by time and drug
tab4net <- table(data_long2$Network, data_long2$Time,data_long2$Drug)
netHist <- lattice::barchart(tab4net, groups = T,
horizontal = F,
par.settings = simpleTheme(col=drugcols[,1]),
layout = c(5,1),
auto.key= T,
scales=list(y=list(rot=45), x=list(rot=45)),
xlab = names(data_long2)[2], ylab = "Frequency"
)
# automatically makes colors for nodes in the order
# provided by the data set
colsjson <- paste(cols, collapse= "', '")
ncharpar <- nchar(colsjson)
colsjson2 <- insert_str(colsjson[1], c("'", "'"), c(1, ncharpar+1))
colsjson2 <- paste0("[",colsjson2,"]")
#googleVis stuff
begin <- "{link: { colorMode: 'gradient' }, node:{colors:"
end <- ", label: { fontSize: 0.000001, bold: true}, nodePadding: 20, interactivity: true, labelPadding: 10}, iterations: 0}"
forSankey <- paste0(begin, colsjson2, end)
dat2_reordered <- dat2_reordered %>% mutate(
brain.tooltip = ## Target nodes where 100% of patients came from same source:
## N + % (n) of source
ifelse(targetNode %in% uniqueTarget,
paste0(to_from, '<br>N = ', Patients,
'<br>', round((Patients / sourceN) * 100),
'% of ', sourceN, ': ', sourceNode, ' ⇒ ',
'<br>', round((Patients / targetN) * 100),
'% of ', targetN, ': ', targetNode),
## Source nodes where 100% of patients go to same target:
## N + % (n) of target
ifelse(sourceNode %in% uniqueSource,
paste0(to_from, '<br>N = ', Patients, '<br>',
round((Patients / targetN) * 100), '% of ',
targetN, ': ', targetNode),
## Otherwise, add N, % (n) of source, and % (n) of target
paste0(to_from, '<br>N = ', Patients,
'<br>', round((Patients / sourceN) * 100),
'% of ', sourceN, ': ', sourceNode, ' ⇒ ',
'<br>', round((Patients / targetN) * 100),
'% of ', targetN, ': ', targetNode)))
)
drugcols <- as.data.frame(drugcols);
names(drugcols) <- c("Color", "Drug")
drugcols$Color <- as.character(drugcols$Color)
dat2_reordered <- dat2_reordered %>% select(-c(bigtime, drugname))
#alldat$perce.html.tooltip <- paste0('N = ',alldat$Count,'<br>',alldat$perce*100,'%')
widedat <- reshape(alldat, timevar = "Drug", direction = "wide",
idvar = c("bigtime"), drop = c('perce'))
list("my_scale" = my_scale, "alldat" = alldat, "SankeyDat" = dat2_reordered,
"forSankey" = forSankey, "ageD" = ageDens, "ethChart" = ethnicHist,
"NChart" = netHist, "drugcols" = drugcols, "times" = times,
"widedat" = widedat)
})
output$legend <- renderPlot({
par(xpd = FALSE)
plot(NULL ,xaxt='n',yaxt='n',bty='n',ylab='',xlab='', xlim=c(0, 1), ylim=c(0,1))
legend(0, 1, legend=getsub()$drugcols[,2], fill=getsub()$drugcols[,1], title=names(getsub()$drugcols)[2],
horiz = F, cex = 1.2, bty = "n", y.intersp = 2)
}, width = 200, height = 500)
output$timelabs <- renderUI({
times <- paste(getsub()$times, collapse = " ")
div(tags$div(HTML(times)))
})
#output$labels <- renderText({})
output$stacked <- renderPlot({
ggplot(getsub()$alldat, aes(fill=Drug, y=perce, x=bigtime)) +
geom_bar(position="fill", stat="identity",
width = .4) + ylab("Percent of Patients") +
xlab("Time Point") +
geom_text(aes(label = paste0(round(perce * 100), '% \n',
"N = ", Count)),
position = position_stack(vjust = 0.5), color = "white") +
theme_classic() + getsub()$my_scale +
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14,face="bold"),
axis.title.x = element_text(size = 14, face= "bold"))
}, height = 400, width = 730)
output$barchart <- renderGvis({
drug <- unique(getsub()$alldat$Drug)
culs <- getsub()$drugcols[,1]; culs <- rev(culs);
culs <- paste(culs, collapse= "', '")
ncharpar <- nchar(culs)
culs <- insert_str(culs[1], c("'", "'"), c(1, ncharpar+1))
culs <- paste0("[",culs,"]")
wides <- getsub()$widedat; colnames(wides)[-1] <- as.character(drug)
wides[,2:ncol(wides)] <- wides[,ncol(wides):2]
colnames(wides)[2:ncol(wides)] <- colnames(wides)[ncol(wides):2]
gvisColumnChart(wides, xvar = "bigtime", yvar = colnames(wides)[-1],
options=list(tooltip="{isHtml:'true'}",
isStacked = 'percent',
width = 800,
height = 400,
legend = "{position: 'right',
alignment: 'center'}",
maxLines = 3,
bar = "{groupWidth: '65%'}",
colors = culs))
})
output$distPlot <- renderGvis({
# generate bins based on input$bins from ui.R
gvisSankey(getsub()$SankeyDat, from = "sourceNode",
to = "targetNode",
weight = "Patients",
options=list(height = 400, width = 550,
tooltip = "{isHtml:'True'}",
sankey = getsub()$forSankey))
})
output$ageD <- renderPlot({getsub()$ageD}, height = 400, width = 730)
output$ethChart <- renderPlot({getsub()$ethChart}, height = 400, width = 730)
output$NChart <- renderPlot({getsub()$NChart},height = 400, width = 730)
}
# Run the application
shinyApp(ui = ui, server = server)