From a5bf54003e843d121123fc40f1ef4bd719898df3 Mon Sep 17 00:00:00 2001 From: apigap01 Date: Wed, 20 Nov 2024 12:02:44 +0000 Subject: [PATCH 1/5] Data prep revision to incorporate new PHS publication. Run to 2021 --- app.R | 94 ++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/app.R b/app.R index 621f33a..d935b41 100755 --- a/app.R +++ b/app.R @@ -8,49 +8,57 @@ library(dplyr) #data manipulation library(plotly) #charts -library(shiny) - -#Preparing data - not needed unless new data coming through -# library(tidyr) -# library(readr) -# -# cl_out_pop <- "/conf/linkage/output/lookups/Unicode/Populations/Estimates/" -# -# hep_c <- read_csv("data/hepatitisc_board.csv") %>% -# mutate_if(is.character, factor) %>% #converting characters into factors -# setNames(tolower(names(.))) %>% #variable names to lower case -# gather(year, number, -nhsboard) %>% # to long format -# mutate(year = as.numeric(gsub("y", "", year))) #taking out y from year -# -# # Bringing population to calculate rates -# pop_lookup <- readRDS(paste0(cl_out_pop, "HB2019_pop_est_1981_2018.rds")) %>% -# setNames(tolower(names(.))) %>% #variables to lower case -# subset(year>2008) %>% #select only 2002+ -# # Aggregating to get hb totals -# rename(code = hb2019) %>% select(code, year, pop) %>% group_by(code, year) %>% -# summarise(denominator = sum(pop)) %>% ungroup %>% group_by(year) %>% -# # Adding Scotland totals -# bind_rows(summarise_all(., list(~if(is.numeric(.)) sum(.) else "S00000001"))) %>% -# ungroup() -# -# #Codes and names for areas -# names_lookup <- readRDS("/PHI_conf/ScotPHO/Profiles/Data/Lookups/Geography/HBdictionary.rds") %>% -# mutate(areaname = gsub("NHS ", "", areaname), -# areaname = gsub(" and ", " & ", areaname)) -# -# # merging with codes -# hep_c <- left_join(hep_c, names_lookup, by = c("nhsboard" = "areaname")) %>% -# mutate(code = case_when(nhsboard == "Scotland" ~ "S00000001", TRUE ~ code)) -# -# hep_c <- left_join(hep_c, pop_lookup, c("code", "year")) %>% -# mutate(rate = round(number/denominator*100000, 1)) %>% # calculate rate -# select(-denominator, -code) %>% -# gather(measure, value, c(-nhsboard, -year)) %>% -# mutate(measure = recode(measure, "number" = "Number", "rate" = "Rate")) -# -# saveRDS(hep_c, "data/hepatitisc_board.rds") - -hep_c <- readRDS("data/hepatitisc_board.rds") #reading data for app +library(shiny) #shiny app +library(tidyr) +library(readr) #for reading in csv +library(janitor) #for data cleaning + + +#Set filepaths +cl_out_pop <- "/conf/linkage/output/lookups/Unicode/Populations/Estimates/" #population lookups to calculate rates +filepath <- "/PHI_conf/ScotPHO/Website/Charts/Health Conditions/Hepatitis C/shiny_data" #shiny data + +#read in currently deployed data +current_data <- read_csv(paste0(filepath, "/hepatitisc_data_to_2018.csv")) + +#read in new data +hep_c <- read_csv(paste0(filepath, "/hepatitisc_data_2021.csv")) |> + mutate_if(is.character, factor) |> #converting characters into factors + clean_names() #variable names to lower case + +#bring in population to calculate rates +pop_lookup <- readRDS(paste0(cl_out_pop, "HB2019_pop_est_1981_2022.rds")) |> + clean_names() |> #variables to lower case + subset(year=="2021") |> #select only new year to be appended + # Aggregating to get hb totals + rename(code = hb2019) |> select(code, year, pop) |> group_by(code, year) |> + summarise(denominator = sum(pop)) |> ungroup() |> group_by(year) |> + # Adding Scotland totals + adorn_totals("row", name = "S00000001") |> + mutate(year = case_when(code == "S00000001" ~ 2021, TRUE ~ year)) #Update this line with newest year to match other rows + +#Codes and names for areas +names_lookup <- readRDS("/PHI_conf/ScotPHO/Profiles/Data/Lookups/Geography/HBdictionary.rds") |> + mutate(areaname = gsub("NHS ", "", areaname), + areaname = gsub(" and ", " & ", areaname)) + +# merging with codes +hep_c <- left_join(hep_c, names_lookup, by = c("nhsboard" = "areaname")) |> + mutate(code = case_when(nhsboard == "Scotland" ~ "S00000001", TRUE ~ code)) + +hep_c <- left_join(hep_c, pop_lookup, c("code", "year")) |> + mutate(rate = round(number/denominator*100000, 1)) |> # calculate rate + select(-denominator, -code) |> + gather(measure, value, c(-nhsboard, -year)) |> + mutate(measure = recode(measure, "number" = "Number", "rate" = "Rate")) + +#append new data onto current data +hep_c <- rbind(current_data, hep_c) + + +saveRDS(hep_c, paste0(filepath, "_hepatitisc_board.rds")) + +hep_c <- readRDS(paste0(filepath, "_hepatitisc_board.rds")) #reading data for app #Use for selection of areas board_list <- sort(unique(hep_c$nhsboard[hep_c$nhsboard != "Scotland"])) From e6aaeffbe0108b5b4b8db6d21a1e21169c630f92 Mon Sep 17 00:00:00 2001 From: apigap01 Date: Wed, 20 Nov 2024 13:06:42 +0000 Subject: [PATCH 2/5] Run for 2022 data --- app.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/app.R b/app.R index d935b41..80a62a0 100755 --- a/app.R +++ b/app.R @@ -19,23 +19,23 @@ cl_out_pop <- "/conf/linkage/output/lookups/Unicode/Populations/Estimates/" #pop filepath <- "/PHI_conf/ScotPHO/Website/Charts/Health Conditions/Hepatitis C/shiny_data" #shiny data #read in currently deployed data -current_data <- read_csv(paste0(filepath, "/hepatitisc_data_to_2018.csv")) +current_data <- read_csv(paste0(filepath, "/hepatitisc_data_to_2021.csv")) #read in new data -hep_c <- read_csv(paste0(filepath, "/hepatitisc_data_2021.csv")) |> +hep_c <- read_csv(paste0(filepath, "/hepatitisc_data_2022.csv")) |> mutate_if(is.character, factor) |> #converting characters into factors clean_names() #variable names to lower case #bring in population to calculate rates pop_lookup <- readRDS(paste0(cl_out_pop, "HB2019_pop_est_1981_2022.rds")) |> clean_names() |> #variables to lower case - subset(year=="2021") |> #select only new year to be appended + subset(year=="2022") |> #select only new year to be appended # Aggregating to get hb totals rename(code = hb2019) |> select(code, year, pop) |> group_by(code, year) |> summarise(denominator = sum(pop)) |> ungroup() |> group_by(year) |> # Adding Scotland totals adorn_totals("row", name = "S00000001") |> - mutate(year = case_when(code == "S00000001" ~ 2021, TRUE ~ year)) #Update this line with newest year to match other rows + mutate(year = case_when(code == "S00000001" ~ 2022, TRUE ~ year)) #Update this line with newest year to match other rows #Codes and names for areas names_lookup <- readRDS("/PHI_conf/ScotPHO/Profiles/Data/Lookups/Geography/HBdictionary.rds") |> @@ -55,10 +55,12 @@ hep_c <- left_join(hep_c, pop_lookup, c("code", "year")) |> #append new data onto current data hep_c <- rbind(current_data, hep_c) +#save files +saveRDS(hep_c, paste0(filepath, "/shiny_data_hepatitisc_board.rds")) -saveRDS(hep_c, paste0(filepath, "_hepatitisc_board.rds")) +write.csv(hep_c, paste0(filepath, "/hepatitisc_data_to_2022.csv"), row.names = FALSE) -hep_c <- readRDS(paste0(filepath, "_hepatitisc_board.rds")) #reading data for app +hep_c <- readRDS(paste0(filepath, "/shiny_data_hepatitisc_board.rds")) #reading data for app #Use for selection of areas board_list <- sort(unique(hep_c$nhsboard[hep_c$nhsboard != "Scotland"])) From ada988abf64b100a4e0febd23d3cc367499d8b80 Mon Sep 17 00:00:00 2001 From: apigap01 Date: Thu, 21 Nov 2024 15:43:30 +0000 Subject: [PATCH 3/5] Update chart to highcharter from Plotly --- app.R | 77 ++++++++++++++++++++++++----------------------------------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/app.R b/app.R index 80a62a0..c6dc65e 100755 --- a/app.R +++ b/app.R @@ -7,7 +7,8 @@ ##Packages library(dplyr) #data manipulation -library(plotly) #charts +library(highcharter) #charts +library(phsstyles) #for chart colors library(shiny) #shiny app library(tidyr) library(readr) #for reading in csv @@ -65,12 +66,6 @@ hep_c <- readRDS(paste0(filepath, "/shiny_data_hepatitisc_board.rds")) #reading #Use for selection of areas board_list <- sort(unique(hep_c$nhsboard[hep_c$nhsboard != "Scotland"])) -#ScotPHO logo. -#Needs to be https address or if local in code 64 (the latter does not work with 4.7 plotly) -scotpho_logo <- list(source ="https://raw.githubusercontent.com/ScotPHO/plotly-charts/master/scotpho.png", - xref = "paper", yref = "paper", - x= -0.09, y= 1.2, sizex = 0.22, sizey = 0.18, opacity = 1) - ############################. ## Visual interface ---- ############################. @@ -88,13 +83,15 @@ ui <- fluidPage(style="width: 650px; height: 500px; ", choices = board_list)) ), div(style= "width:100%; float: left;", #Main panel - plotlyOutput("chart", width = "100%", height = "350px"), + highchartOutput("line_chart"), + #plotlyOutput("chart", width = "100%", height = "350px"), p(div(style = "width: 25%; float: left;", #Footer - HTML("Source: HPS")), + HTML("Source: HPS (2009-18)"), + HTML("Source: PHS (2021-)")), div(style = "width: 25%; float: left;", downloadLink('download_data', 'Download data')), div(style = "width: 50%; float: left;", - "Note: Year of earliest positive specimen.") + "Notes: Year of earliest positive specimen. Publication of 2019 and 2020 data was prevented by the COVID-19 pandemic.") ) ) ) @@ -109,48 +106,36 @@ server <- function(input, output) { filename = 'hepatitisc_data.csv', content = function(file) { write.csv(hep_c, file, row.names=FALSE) }) - ############################. - #Visualization - output$chart <- renderPlotly({ - #For Island plots and rates plot an empty chart - if (input$area == "Island Boards" & input$measure == "Rate") { - text_na <- list(x = 5, y = 5, text = "Rates are not published for the island boards" , - xref = "x", yref = "y", showarrow = FALSE, size=15) - plot_ly() %>% - layout(annotations = text_na, - yaxis = list(showline = FALSE, showticklabels = FALSE, showgrid = FALSE, fixedrange=TRUE), - xaxis = list(showline = FALSE, showticklabels = FALSE, showgrid = FALSE, fixedrange=TRUE), - font = list(family = 'Arial, sans-serif')) %>% - config( displayModeBar = FALSE) # taking out plotly logo and collaborate button - } else { - - #Data for Scotland line - data_scot <- hep_c %>% subset(nhsboard=="Scotland" & measure==input$measure) - #Data for Health board line - data_board <- hep_c %>% subset(nhsboard==input$area & measure==input$measure) + #Creating chart + output$line_chart <- renderHighchart({ + + #Data for plot + data_chart <- hep_c |> subset(measure == input$measure & nhsboard == input$area) + + #Separate Scotland data out for separate series + data_scot <- hep_c |> subset(measure == input$measure & nhsboard == "Scotland") #y axis title - yaxistitle <- ifelse(input$measure == "Rate", "Rate per 100,000", "Number of diagnosis") + yaxistitle <- ifelse(input$measure == "Number", "Number of diagnoses", + "Rate per 100,000") + + #Creating dynamic text for if Island Board rates selected + validate( + need((input$area != "Island Boards" | input$measure == "Number"), "Rates are not published for the island boards")) - plot <- plot_ly(data=data_board, x=~year, y = ~value, - type = "scatter", mode = 'lines', line = list(color = '#08519c'), - name = unique(data_board$nhsboard), width = 650, height = 350) %>% - add_lines(data = data_scot, y = ~value, mode = 'lines', - name = "Scotland", line = list(color = '#000000')) %>% - #Layout - layout(annotations = list(), #It needs this because of a buggy behaviour - yaxis = list(title = yaxistitle, rangemode="tozero", fixedrange=TRUE), - xaxis = list(title = "Year", fixedrange=TRUE), - font = list(family = 'Arial, sans-serif'), #font - margin = list(pad = 4, t = 50), #margin-paddings - hovermode = 'false', # to get hover compare mode as default - images = scotpho_logo) %>% - config(displayModeBar= T, displaylogo = F, collaborate=F, editable =F) # taking out plotly logo and collaborate button - } - }) + highchart() |> + hc_add_series(data_chart, "line", hcaes(y = value, x = year), name = input$area) |> + hc_add_series(data_scot, "line", hcaes(y = value, x = year), name = "Scotland") |> + hc_xAxis(title = list(text = "Year")) |> + hc_yAxis(title = list(text = yaxistitle)) |> + hc_legend(align = "left", verticalAlign = "top") + + }) } # end of server part + + ############################. ## Calling app ---- ############################. From 4965200311da989dd3c8c4b86dc4f2d8b4c7dd23 Mon Sep 17 00:00:00 2001 From: Monica McGibbon Date: Mon, 25 Nov 2024 10:47:07 +0000 Subject: [PATCH 4/5] uncomment data prep, create data folder within repo, add notes for analysts --- app.R | 164 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 68 deletions(-) diff --git a/app.R b/app.R index c6dc65e..2f6fccd 100755 --- a/app.R +++ b/app.R @@ -1,74 +1,107 @@ -#Code to create chart of hepatitis c by board. - -############################. -## Global ---- -############################. -############################. -##Packages - -library(dplyr) #data manipulation +######################. +### Analyst notes ---- +######################. + +# This script prepares a data file and a shiny app to be embedded on the following page of the scotpho website: https://www.scotpho.org.uk/health-conditions/hepatitis-c/data/scotland-and-uk/ + +# instructions: +# Source new data from [add instructions] and save it in the following folder: /PHI_conf/ScotPHO/Website/Charts/Health Conditions/Hepatitis C/shiny_data +# Un-comment the code in part 1 below and update the following: +# a. current_data filepath to [add instructions] +# b. the filepath for reading in the hep_c data +# c. the population lookups file and year + +# This will prepare a data file within this repository to use to create the chart in the shiny app +# Make sure to comment the code back out again when deploying the shiny app, otherwise it won't work properly! + + +# Part 1 - data prep +# Part 2 - shiny app + + +###########################. +### Part 1 - data prep ---- +###########################. + +# packages +# library(tidyr) +# library(readr) # for reading in csv +# library(janitor) # for data cleaning +# library(dplyr) # data manipulation + +# #Set filepaths +# cl_out_pop <- "/conf/linkage/output/lookups/Unicode/Populations/Estimates/" #population lookups to calculate rates +# filepath <- "/PHI_conf/ScotPHO/Website/Charts/Health Conditions/Hepatitis C/shiny_data" #shiny data +# +# #read in currently deployed data +# current_data <- read_csv(paste0(filepath, "/hepatitisc_data_to_2021.csv")) +# +# #read in new data +# hep_c <- read_csv(paste0(filepath, "/hepatitisc_data_2022.csv")) |> +# mutate_if(is.character, factor) |> #converting characters into factors +# clean_names() #variable names to lower case +# +# #bring in population to calculate rates +# pop_lookup <- readRDS(paste0(cl_out_pop, "HB2019_pop_est_1981_2022.rds")) |> +# clean_names() |> #variables to lower case +# subset(year=="2022") |> #select only new year to be appended +# # Aggregating to get hb totals +# rename(code = hb2019) |> +# select(code, year, pop) |> +# group_by(code, year) |> +# summarise(denominator = sum(pop)) |> ungroup() |> group_by(year) |> +# # Adding Scotland totals +# adorn_totals("row", name = "S00000001") |> +# mutate(year = case_when(code == "S00000001" ~ 2022, TRUE ~ year)) #Update this line with newest year to match other rows +# +# #Codes and names for areas +# names_lookup <- readRDS("/PHI_conf/ScotPHO/Profiles/Data/Lookups/Geography/HBdictionary.rds") |> +# mutate(areaname = gsub("NHS ", "", areaname), +# areaname = gsub(" and ", " & ", areaname)) +# +# # merging with codes +# hep_c <- left_join(hep_c, names_lookup, by = c("nhsboard" = "areaname")) |> +# mutate(code = case_when(nhsboard == "Scotland" ~ "S00000001", TRUE ~ code)) +# +# hep_c <- left_join(hep_c, pop_lookup, c("code", "year")) |> +# mutate(rate = round(number/denominator*100000, 1)) |> # calculate rate +# select(-denominator, -code) |> +# gather(measure, value, c(-nhsboard, -year)) |> +# mutate(measure = recode(measure, "number" = "Number", "rate" = "Rate")) +# +# #append new data onto current data +# hep_c <- rbind(current_data, hep_c) +# +# +# write.csv(hep_c, paste0(filepath, "/hepatitisc_data_to_2022.csv"), row.names = FALSE) +# +# # uncomment to create data folder in repository if running for first time +# #dir.create("data/") +# +# # save rds data in data folder +# saveRDS(hep_c, "data/shiny_data_hepatitisc_board.rds") +# + + +#########################. +# Part 2 - shiny app ---- +########################. + +# packages library(highcharter) #charts library(phsstyles) #for chart colors library(shiny) #shiny app library(tidyr) -library(readr) #for reading in csv -library(janitor) #for data cleaning - - -#Set filepaths -cl_out_pop <- "/conf/linkage/output/lookups/Unicode/Populations/Estimates/" #population lookups to calculate rates -filepath <- "/PHI_conf/ScotPHO/Website/Charts/Health Conditions/Hepatitis C/shiny_data" #shiny data - -#read in currently deployed data -current_data <- read_csv(paste0(filepath, "/hepatitisc_data_to_2021.csv")) - -#read in new data -hep_c <- read_csv(paste0(filepath, "/hepatitisc_data_2022.csv")) |> - mutate_if(is.character, factor) |> #converting characters into factors - clean_names() #variable names to lower case - -#bring in population to calculate rates -pop_lookup <- readRDS(paste0(cl_out_pop, "HB2019_pop_est_1981_2022.rds")) |> - clean_names() |> #variables to lower case - subset(year=="2022") |> #select only new year to be appended - # Aggregating to get hb totals - rename(code = hb2019) |> select(code, year, pop) |> group_by(code, year) |> - summarise(denominator = sum(pop)) |> ungroup() |> group_by(year) |> - # Adding Scotland totals - adorn_totals("row", name = "S00000001") |> - mutate(year = case_when(code == "S00000001" ~ 2022, TRUE ~ year)) #Update this line with newest year to match other rows - -#Codes and names for areas -names_lookup <- readRDS("/PHI_conf/ScotPHO/Profiles/Data/Lookups/Geography/HBdictionary.rds") |> - mutate(areaname = gsub("NHS ", "", areaname), - areaname = gsub(" and ", " & ", areaname)) - -# merging with codes -hep_c <- left_join(hep_c, names_lookup, by = c("nhsboard" = "areaname")) |> - mutate(code = case_when(nhsboard == "Scotland" ~ "S00000001", TRUE ~ code)) +library(dplyr) -hep_c <- left_join(hep_c, pop_lookup, c("code", "year")) |> - mutate(rate = round(number/denominator*100000, 1)) |> # calculate rate - select(-denominator, -code) |> - gather(measure, value, c(-nhsboard, -year)) |> - mutate(measure = recode(measure, "number" = "Number", "rate" = "Rate")) -#append new data onto current data -hep_c <- rbind(current_data, hep_c) - -#save files -saveRDS(hep_c, paste0(filepath, "/shiny_data_hepatitisc_board.rds")) - -write.csv(hep_c, paste0(filepath, "/hepatitisc_data_to_2022.csv"), row.names = FALSE) - -hep_c <- readRDS(paste0(filepath, "/shiny_data_hepatitisc_board.rds")) #reading data for app +# data prepared and saved from part 1 +hep_c <- readRDS("data/shiny_data_hepatitisc_board.rds") # reading data for app #Use for selection of areas board_list <- sort(unique(hep_c$nhsboard[hep_c$nhsboard != "Scotland"])) -############################. -## Visual interface ---- -############################. +# UI #Height and widths as percentages to allow responsiveness #Using divs as issues with classing css ui <- fluidPage(style="width: 650px; height: 500px; ", @@ -96,9 +129,7 @@ ui <- fluidPage(style="width: 650px; height: 500px; ", ) ) -############################. -## Server ---- -############################. +# Server server <- function(input, output) { # Allowing user to download data @@ -136,10 +167,7 @@ server <- function(input, output) { -############################. -## Calling app ---- -############################. - +# calling app shinyApp(ui = ui, server = server) ##END From dbad8c0e19aa6018253071864b250120d7f39bad Mon Sep 17 00:00:00 2001 From: Monica McGibbon Date: Mon, 25 Nov 2024 10:55:51 +0000 Subject: [PATCH 5/5] add comments about url to deploy to --- app.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app.R b/app.R index 2f6fccd..4a73042 100755 --- a/app.R +++ b/app.R @@ -14,6 +14,8 @@ # This will prepare a data file within this repository to use to create the chart in the shiny app # Make sure to comment the code back out again when deploying the shiny app, otherwise it won't work properly! +# Once the data/app is ready and has been checked, deploy to the following URL to update the shiny app on +# the webpage: https://scotland.shinyapps.io/scotpho-hepatitisc-board/ # Part 1 - data prep # Part 2 - shiny app