diff --git a/.Rbuildignore b/.Rbuildignore index d598482..8cab5a2 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -17,3 +17,6 @@ cran-comments.md ^cran-comments\.md$ ^CRAN-RELEASE$ ^codecov\.yml$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.gitignore b/.gitignore index 1678d63..939a92f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ inst/doc DEV/ .binder/ google-analytics.html +docs diff --git a/DESCRIPTION b/DESCRIPTION index 2f3351e..71d4d51 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: BayesianReasoning Type: Package Title: Plot Positive and Negative Predictive Values for Medical Tests -Version: 0.3.1 -Date: 2020-06-29 +Version: 0.3.2 +Date: 2020-07-03 Authors@R: c(person("Gorka", "Navarrete", email = "gorkang@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7678-8656"))) Maintainer: Gorka Navarrete Description: Functions to plot and help understand positive and negative predictive values (PPV and NPV), and their relationship with sensitivity, specificity, and prevalence. See Akobeng, A.K. (2007) for a theoretical overview of the technical concepts and Navarrete et al. (2015) for a practical explanation about the importance of their understanding . @@ -14,9 +14,7 @@ Imports: dplyr, tidyr, magrittr, tibble, - ggforce, - shinythemes, - shinyjs + ggforce License: CC0 RoxygenNote: 7.1.1 URL: https://github.com/gorkang/BayesianReasoning @@ -26,5 +24,6 @@ Suggests: testthat, knitr, rmarkdown, - covr + covr, + patchwork VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index 5674972..c26eab7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,16 @@ # README +### v0.3.2 Clean up, test, rinse and repeat + +Clean up old comments, add more tests... + +* 100% code coverage +* min_possible_prevalence() is now much more efficient +* Improvements to overlay = "line" en PPV_heatmap(), now using {ggforce} for labels +* Changed color palette for NPV +* Tweaked color palette for PPV + + ### v0.3.1 Testing, testing Corrected issues raised in CRAN revision @@ -8,7 +19,7 @@ Corrected issues raised in CRAN revision * Do not capitalize things in the Description text * Added references to Description explaining main concepts * Replaced cat() with message() -* Aded folder parameter to PPV_diagnostic_vs_screening() and PPV_heatmap() functions +* Added folder parameter to PPV_diagnostic_vs_screening() and PPV_heatmap() functions Added tests diff --git a/R/PPV_diagnostic_vs_screening.R b/R/PPV_diagnostic_vs_screening.R index 38adb7d..90de937 100644 --- a/R/PPV_diagnostic_vs_screening.R +++ b/R/PPV_diagnostic_vs_screening.R @@ -35,13 +35,6 @@ PPV_diagnostic_vs_screening <- function(Max_FP = 10, Sensitivity = 100, prevalen # save_plot = FALSE, folder = "") { - # PARAMETERS -------------------------------------------------------------- - # Max_FP = 10 - # Sensitivity = 100 - # prevalence_screening_group = 100 - # prevalence_diagnostic_group = 2 - # labels_prevalence = c("Screening", "Diagnostic") - # FIXED parameters -------------------------------------------------------- @@ -53,6 +46,7 @@ PPV_diagnostic_vs_screening <- function(Max_FP = 10, Sensitivity = 100, prevalen Min_FP = 0 #Step_size_FP #0 FP = seq(Min_FP, Max_FP, Step_size_FP) + # Calculate PPVs ---------------------------------------------------------- Real_Prevalence_PPV = list() diff --git a/R/PPV_heatmap.R b/R/PPV_heatmap.R index 8cd410b..d922813 100644 --- a/R/PPV_heatmap.R +++ b/R/PPV_heatmap.R @@ -56,52 +56,45 @@ PPV_heatmap <- # Check dimensions ----------------------------------------------------------- - + + if (Min_Prevalence > Max_Prevalence) { + message("[WARNING]: Min_Prevalence (", Min_Prevalence , ") is > than Max_Prevalence (", Max_Prevalence, "). [EXPECTED]: Min_Prevalence should be smaller than Max_Prevalence. [CHANGED]: Min_Prevalence = Max_Prevalence/2") + Min_Prevalence = Max_Prevalence/2 + } # If the dimensions of the overlay are bigger, adjust Max_FP and Max_Prevalence if (overlay == "area") { if (overlay_position_FP > Max_FP) { - # if (overlay_position_FP_FN > Max_FP) { - message("Changing Max_FP to overlay_position_FP to fit overlay") - # Max_FP = overlay_position_FP_FN - Max_FP = overlay_position_FP + message("[WARNING]: overlay_position_FP (", overlay_position_FP , ") is > than Max_FP (", Max_FP, "). [EXPECTED]: overlay_position_FP should be smaller than Max_FP [CHANGED]: Max_FP = overlay_position_FP") + Max_FP = overlay_position_FP } if (overlay_prevalence_2 > Max_Prevalence) { - message("Changing Max_Prevalence to overlay_prevalence_2 to fit overlay") + message("[WARNING]: overlay_prevalence_2 (", overlay_prevalence_2 , ") is > than Max_Prevalence (", Max_Prevalence, "). [EXPECTED]: overlay_prevalence_2 should be smaller than Max_Prevalence [CHANGED]: Max_Prevalence = overlay_prevalence_2") Max_Prevalence = overlay_prevalence_2 } - # if(overlay_position_FP_FN > (100 - Sensitivity)) { if(overlay_position_FN > (100 - Sensitivity)) { + message("[WARNING]: overlay_position_FN (", overlay_position_FN , ") is > than (100 - Sensitivity) (", (100 - Sensitivity), "). [EXPECTED]: overlay_position_FN should be smaller than (100 - Sensitivity) [CHANGED]: Sensitivity = 100 - overlay_position_FN") Sensitivity = 100 - overlay_position_FN } - if (overlay_prevalence_1/overlay_prevalence_2 < Min_Prevalence/Max_Prevalence) { - - message("Overlay impossible to fit in plot: overlay_prevalence_1/overlay_prevalence_2 < Min_Prevalence/Max_Prevalence: Changing Min_Prevalence to (overlay_prevalence_1/overlay_prevalence_2) * Max_Prevalence to fit overlay") - - # Min Prevalence adjusted to fit overlay - Min_Prevalence = (overlay_prevalence_1/overlay_prevalence_2) * Max_Prevalence - + message("[WARNING]: overlay_prevalence_1/overlay_prevalence_2 (", overlay_prevalence_1/overlay_prevalence_2 , ") is > than Min_Prevalence/Max_Prevalence (", Min_Prevalence/Max_Prevalence, "). [EXPECTED]: Prevalence for overlay should be smaller than Prevalence [CHANGED]: Changing Min_Prevalence to (overlay_prevalence_1/overlay_prevalence_2) * Max_Prevalence to fit overlay") + Min_Prevalence = (overlay_prevalence_1/overlay_prevalence_2) * Max_Prevalence # Min Prevalence adjusted to fit overlay } } - - -# Check overlay prevalence ------------------------------------------------ + # Check overlay prevalence ------------------------------------------------ if (length(overlay_prevalence_1) == 1) { if (overlay_prevalence_1 > overlay_prevalence_2) { - - message(overlay_prevalence_1, " is > than ", overlay_prevalence_2) - overlay_prevalence_1 = overlay_prevalence_2 - + message("[WARNING]: overlay_prevalence_1 (", overlay_prevalence_1 , ") is > than overlay_prevalence_2 (", overlay_prevalence_2, "). [EXPECTED]: overlay_prevalence_1 should be smaller than overlay_prevalence_2 [CHANGED]: overlay_prevalence_1 = overlay_prevalence_2/2") + overlay_prevalence_1 = overlay_prevalence_2/2 } } else if (length(overlay_prevalence_1) > 1) { if (DEBUG != 0) message("> 1 overlay") @@ -110,9 +103,6 @@ PPV_heatmap <- # SYSTEM parameters ------------------------------------------------------- - #GRAPHIC Parameters ************* - - # modifier_text_overlay_position = (Max_Prevalence/75) if (overlay != "no") { filename_overlay = paste0("_", overlay) } else { @@ -169,6 +159,7 @@ PPV_heatmap <- p = .plot_overlay_line( PPV_melted = PPV_melted, + uncertainty_prevalence = uncertainty_prevalence, Min_Prevalence = Min_Prevalence, Max_Prevalence = Max_Prevalence, Max_FP = Max_FP, @@ -176,7 +167,6 @@ PPV_heatmap <- overlay_prevalence_2 = overlay_prevalence_2, - # overlay_position_FP_FN = overlay_position_FP_FN, overlay_position_FP = overlay_position_FP, overlay_position_FN = overlay_position_FN, @@ -196,6 +186,7 @@ PPV_heatmap <- } else if (overlay == "area") { + p = .plot_overlay_area( PPV_melted, uncertainty_prevalence = uncertainty_prevalence, @@ -204,12 +195,10 @@ PPV_heatmap <- Sensitivity = Sensitivity, Min_FP = Min_FP, Max_FP = Max_FP, - # Step_size_FP = Step_size_FP, overlay_labels = overlay_labels, overlay_prevalence_1 = overlay_prevalence_1, overlay_prevalence_2 = overlay_prevalence_2, - # overlay_position_FP_FN = overlay_position_FP_FN, overlay_position_FP = overlay_position_FP, overlay_position_FN = overlay_position_FN, @@ -228,15 +217,14 @@ PPV_heatmap <- ) } else { - # if (overlay == "no") { - + p = .plot_creation( PPV_melted = PPV_melted, Min_Prevalence = Min_Prevalence, Sensitivity = Sensitivity, Min_FP = Min_FP, Max_FP = Max_FP, - # Step_size_FP = Step_size_FP, + decimals_x = decimals_x, decimals_y = decimals_y, @@ -250,8 +238,7 @@ PPV_heatmap <- PPV_NPV = PPV_NPV ) - - + } @@ -260,7 +247,6 @@ PPV_heatmap <- if (folder != "") { print(p) - # plot_name = here::here(paste0("outputs/PPV_heatmap/", PPV_NPV, "_", Min_Prevalence, "_", Max_Prevalence, "_", Sensitivity, "_", Max_FP, filename_overlay, "_", Language, ".png")) plot_name = paste0(folder, "/", PPV_NPV, "_", Min_Prevalence, "_", Max_Prevalence, "_", Sensitivity, "_", Max_FP, filename_overlay, "_", Language, ".png") ggsave(plot_name, p, dpi = 300, width = 14, height = 10) message("\n Plot created in: ", plot_name, "\n") diff --git a/R/helper_functions.R b/R/helper_functions.R index 6a429bf..3143c1a 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -5,10 +5,9 @@ #' @param Min_Prevalence [x] out of y prevalence of disease #' @param Max_Prevalence x out of [y] prevalence of disease #' @param Sensitivity Sensitivity of test -#' @param Max_FP Maximum False Positives ratio -#' @param Min_FP . -#' @param PPV_NPV . -#' @param steps_matrix . +#' @param Max_FP Maximum False Positive ratio +#' @param Min_FP Minimum False Positive ratio +#' @param steps_matrix How big the matrix should be (probably better to leave as it is: 100) #' #' @return A DF called PPV #' @importFrom reshape2 melt @@ -19,26 +18,10 @@ Sensitivity = 100, Min_FP = 0, Max_FP = 10, - PPV_NPV = "PPV", steps_matrix = 100) { - # library(reshape2) - - # DEBUG ------------------------------------------------------------------- - - # Min_Prevalence = 1 - # Max_Prevalence = 500 - # Sensitivity = 90 - # Min_FP = 0 - # Max_FP = 5 - # library(tidyverse) - # PPV_NPV = "NPV" # NPV/PPV - # label_title = "" - # label_subtitle = "" - # steps_matrix = 100 - - # TEST Parameters ************** + # TEST Parameters --- # False Positives (x axis) Steps_FP <- steps_matrix @@ -55,7 +38,7 @@ Sensitivity_range = seq(Min_FN, Max_FN, Step_size_FN) - #CONDITION Parameters *********** + # CONDITION Parameters --- Min_Prevalence <- Min_Prevalence Prevalence_x <- Min_Prevalence @@ -64,15 +47,15 @@ range_prevalence = (Max_Prevalence - Min_Prevalence) Step_size_Prevalence <- range_prevalence / Steps_Prevalence Prevalence <- round(seq(Min_Prevalence, (Max_Prevalence), Step_size_Prevalence), 4) #With (1 + Max_Prevalence) we get 101. If we use Max_Prevalence we get 100 - + # PPV Calculation ------------------------------------------------------------- - # We calculate the 100x100 PPV matrix using %o% (outer) + # We calculate a 100x100 PPV matrix using %o% (outer) PPV <- round((Sensitivity * Prevalence_x) / ((Sensitivity * Prevalence_x) + ((Prevalence - 1) %o% FP)), 2) NPV <- round(((Prevalence - Min_Prevalence) * (100 - Max_FP)) / (((Prevalence - Min_Prevalence) * (100 - Max_FP)) + (Prevalence %o% Sensitivity_range)), 2) - # ( Healty * Specificity) / ( Healty * Specificity) + Sick * FN + # (Healthy * Specificity) / (Healthy * Specificity) + (Sick * FN) #Label columns and rows of matrix colnames(PPV) = FP @@ -89,7 +72,6 @@ cbind(PPV_melted_NPV) # Give names to variables - # names(PPV_melted) = c("melted_Prevalence", "melted_FP", "melted_PPV", "melted_Prevalence2", "melted_XXX", "melted_NPV") names(PPV_melted) = c("Prevalence", "FP", "PPV", "Prevalence2", "FN", "NPV") PPV_melted %>% @@ -99,24 +81,19 @@ dplyr::mutate(prevalence_pct = prevalence_1/Prevalence) %>% dplyr::as_tibble() - # hist(PPV_melted$prevalence_pct, breaks = 100) - } - - - #' .get_point_ppv_npv #' -#' Description +#' Get PPV or NPV for the overlay #' -#' @param PPV_melted . -#' @param PPV_NPV . -#' @param Sensitivity . -#' @param overlay_prevalence_1 . -#' @param overlay_prevalence_2 . +#' @param PPV_melted DF out of .createPPVmatrix() +#' @param PPV_NPV Should calculate PPV or NPV? +#' @param Sensitivity Sensitivity of the test +#' @param overlay_prevalence_1 [x] out of y prevalence of disease +#' @param overlay_prevalence_2 x out of [y] prevalence of disease #' @param overlay_position_FP . #' @param overlay_position_FN . #' @param overlay_labels . @@ -135,7 +112,6 @@ overlay_prevalence_2, overlay_labels, - # overlay_position_FP_FN, overlay_position_FP, overlay_position_FN, @@ -146,13 +122,6 @@ decimals_x, decimals_y) { - # # BUG: THIS SHOULDN'T BE HERE. SHOULD READ FROM THE APPROPR - # # prevalence_label = " / " - # # decimals_x = 1 - # if (exists("overlay_labels") == FALSE) { overlay_labels = ""} - # if (exists("prevalence_label") == FALSE) { prevalence_label = ""} - # if (exists("decimals_x") == FALSE) { decimals_x = 1} - # X The variable that defines axis position depends on PPV_NPV if (PPV_NPV == "PPV") { @@ -167,18 +136,13 @@ dplyr::filter( # Closest value to overlay_prevalence_2 & overlay_position_FP_FN abs(Prevalence - point_Prevalence) == min(abs(Prevalence - point_Prevalence)) & - # abs(FP - overlay_position_FP_FN) == min(abs(FP - overlay_position_FP_FN))) - # abs(FP - overlay_FP_FN) == min(abs(FP - overlay_FP_FN))) abs(FP - overlay_position_FP) == min(abs(FP - overlay_position_FP))) DF_point_PPV_NPV = DF_point_PPV_NPV[1,] if (PPV_NPV == "NPV") { - - # browser() - - # calculated_NPV = paste0(round( ( (100 - overlay_position_FP_FN) * overlay_prevalence_2) / (((100 - overlay_position_FP_FN) * overlay_prevalence_2) + (overlay_prevalence_1) * Sensitivity), 2) * 100, "%") + calculated_NPV = paste0( round( ((100 - overlay_position_FP) * (overlay_prevalence_2 - overlay_prevalence_1)) / @@ -186,20 +150,14 @@ (((100 - overlay_position_FP) * (overlay_prevalence_2 - overlay_prevalence_1)) + (overlay_prevalence_1 * overlay_position_FN)) , 2) * 100, "%") - # NPV <- round(((Prevalence - Min_Prevalence) * (100 - Max_FP)) / (((Prevalence - Min_Prevalence) * (100 - Max_FP)) + (Prevalence %o% Sensitivity_range)), 2) + Details_point_PPV_NPV = paste0( overlay_labels, - # "\n ", Min_Prevalence, " ", prevalence_label, " ", point_Prevalence, "\n", overlay_prevalence_1, " ", prevalence_label, " ", overlay_prevalence_2, - # "\nFN = ", paste0(overlay_position_FP_FN, - "\nFN = ", paste0(overlay_FP_FN, - - # round(DF_point_PPV_NPV$FN, decimals_x) # BUG - "%"), + "\nFN = ", paste0(overlay_FP_FN, "%"), "\nNPV = ", calculated_NPV - # paste0(round(DF_point_PPV_NPV$NPV, 2) * 100, "%") - ) + ) point_PPV_NPV = DF_point_PPV_NPV %>% mutate(NPV = round(NPV * 100, 2)) %>% dplyr::pull(NPV) @@ -207,19 +165,15 @@ calculated_PPV = paste0( round( - # (Sensitivity * overlay_prevalence_1) / ((Sensitivity * overlay_prevalence_1) + (overlay_prevalence_2 - overlay_prevalence_1) * overlay_position_FP_FN), (Sensitivity * overlay_prevalence_1) / ((Sensitivity * overlay_prevalence_1) + (overlay_prevalence_2 - overlay_prevalence_1) * overlay_FP_FN), - 2) * 100, "%") Details_point_PPV_NPV = paste0( overlay_labels, - # "\n ", Min_Prevalence, " ", prevalence_label, " ", point_Prevalence, "\n", overlay_prevalence_1, " ", prevalence_label, " ", overlay_prevalence_2, "\nFP = ", paste0(round(DF_point_PPV_NPV$FP, decimals_x), "%"), "\nPPV = ", calculated_PPV - # paste0(round(DF_point_PPV_NPV$PPV, 2) * 100, "%") - ) + ) point_PPV_NPV = DF_point_PPV_NPV %>% dplyr::mutate(PPV = round(PPV * 100, 2)) %>% dplyr::pull(PPV) @@ -235,22 +189,19 @@ - - - #' .number_decimals_plot_axis +#' +#' The number of decimal places in the x and y axis label depends on how wide the range is. #' #' @param PPV_NPV . #' @param Min_FP . #' @param Max_FP . #' @param Min_FN . #' @param Max_FN . -#' @param Min_Prevalence . -#' @param Max_Prevalence . +#' @param Min_Prevalence [x] out of y prevalence of disease +#' @param Max_Prevalence x out of [y] prevalence of disease .number_decimals_plot_axis <- function(PPV_NPV = "PPV", Min_FP = 0, Max_FP, Min_FN, Max_FN, Min_Prevalence, Max_Prevalence) { - - # The number of decimal places in the x and y axis label depends on how wide the range is # The vars to calculate range depend on PPV NPV if (PPV_NPV == "PPV") { @@ -268,8 +219,6 @@ decimals_x = 1 } else if (Max_FP_FN - Min_FP_FN > 5) { decimals_x = 0 - } else { - decimals_x = 0 } @@ -278,8 +227,6 @@ decimals_y = 1 } else if (Max_Prevalence - Min_Prevalence >= 9) { decimals_y = 0 - } else { - decimals_y = 0 } @@ -287,25 +234,14 @@ list("decimals_x" = decimals_x, "decimals_y" = decimals_y) - # decimals_x <- decimals_x - # decimals_y <- decimals_y - - # decimals = c(decimals_x, decimals_y) - # decimals } - - - - - - - - #' .plot_creation #' +#' Function to create the main heatmap plot +#' #' @param PPV_melted . #' @param Sensitivity . #' @param PPV_NPV . @@ -330,7 +266,6 @@ Min_Prevalence, Sensitivity, PPV_NPV = "PPV", - # Min_Prevalence = 1, Min_FP = 0, Max_FP, steps_matrix = 100, @@ -344,31 +279,22 @@ x_axis_label, y_axis_label) { - # DEBUG ------------------------------------------------------------------- - - # label_title = "" - # label_subtitle = "" - # label_caption = "" - - # # DEBUG ------------------------------------------------------------------- - # if (exists("DEBUG") == FALSE) {DEBUG = 0} - # - # if (DEBUG == 1) { - # message("\n*** .plot_creation() *** ") - # message("Max_FP: ", Max_FP) - # message("Step_size_FP: ", Step_size_FP) - # message("decimals_x: ", decimals_x) - # message("prevalence_label: ", prevalence_label) - # message("\n*** END *** ") - # - # } - # # ************************************************************************** - # Global variables ------------------------------------------------------- - # Colors, breaks and steps for Prevalence and FP axis - Paleta_DV = c("white", "grey", "gray30", "yellowgreen", "chartreuse4") + # Colors PPV + # https://www.google.com/search?q=color+picker + if (PPV_NPV == "PPV") { + + Paleta_DV = c("white", "grey", "1b2610", "yellowgreen", "chartreuse4") #Original + + } else if (PPV_NPV == "NPV") { + + Paleta_DV = c("#ffffff", "grey", "#190d24","#bd7afa", "#420080") # Violet + + } + + # Breaks and labels for PPV/NPV legend breaks_DV = c(0, 0.25, 0.5, 0.75, 1) labels_DV = c(0, 25, 50, 75, 100) @@ -392,21 +318,19 @@ # Create plot p = ggplot2::ggplot(PPV_melted, ggplot2::aes(FP, (Prevalence))) - # USE PPV_melted to get this!!!! + # [TODO] Can USE PPV_melted to get this? breaks_x = round(seq(from = Min_FP, to = Max_FP, by = Step_size_FP * 10), decimals_x) labels_x = paste0(breaks_x, "%") breaks_y = round(unique(PPV_melted$Prevalence)[c(seq(1, steps_matrix, 10), 101)], decimals_y) labels_y = paste(Min_Prevalence, prevalence_label, breaks_y) - # breaks_y = round(unique(PPV_melted$Prevalence)[c(seq(1, steps_matrix, 10), max(PPV_melted$Prevalence))], decimals_y) - # labels_y = paste(Min_Prevalence, prevalence_label, round(unique(PPV_melted$Prevalence)[c(seq(1, 100, 10), 101)], 0)) + # PPV tiles p = p + ggplot2::geom_tile(ggplot2::aes(fill = PPV), colour = "white") - - # NPV --------------------------------------------------------------------- + # NPV --------------------------------------------------------------------- } else if (PPV_NPV == "NPV") { @@ -415,27 +339,21 @@ # Create plot p = ggplot2::ggplot(PPV_melted, ggplot2::aes(FN, (Prevalence))) - # USE PPV_melted to get this!!!! + # [TODO] Can USE PPV_melted to get this? breaks_x = round(seq(Min_FN, Max_FN, Step_size_FN * 10), decimals_x) labels_x = paste0(breaks_x, "%") breaks_y = round(unique(PPV_melted$Prevalence)[c(seq(1, steps_matrix, 10), 101)], decimals_y) labels_y = paste(Min_Prevalence, prevalence_label, breaks_y) - # labels_y = paste(Min_Prevalence, prevalence_label, round(unique(PPV_melted$Prevalence)[c(seq(1, 100, 10), 101)], 0)) - + # NPV tiles p = p + ggplot2::geom_tile(ggplot2::aes(fill = NPV), colour = "white") } - - # if (!exists("label_title")) label_title <- "" - # if (!exists("label_subtitle")) label_subtitle <- "" - - # add p = p + ggplot2::scale_x_continuous(breaks = breaks_x, labels = labels_x, expand = c(0,0)) + - ggplot2::scale_y_continuous(breaks = breaks_y, labels = labels_y, expand = c(0,0)) + + ggplot2::scale_y_continuous(breaks = breaks_y, labels = labels_y, expand = c(0,0)) + ggplot2::scale_fill_gradientn(colours = Paleta_DV, na.value = "transparent", breaks = breaks_DV, labels = labels_DV, limits = c(0,1), name = legend_label) + ggplot2::theme(text = ggplot2::element_text(size = 20), plot.caption = ggplot2::element_text(size = 16, color = "darkgrey"), @@ -444,7 +362,6 @@ ggplot2::labs(title = label_title, subtitle = label_subtitle, caption = label_caption, - # fill = legend_label, x = x_axis_label, y = y_axis_label) @@ -457,23 +374,21 @@ - - #' .plot_overlay_area #' -#' Description +#' Add area overlay to PPV_heatmap plot #' #' @param PPV_melted . #' @param uncertainty_prevalence . -#' @param Min_Prevalence . -#' @param Max_Prevalence . +#' @param Min_Prevalence [x] out of y prevalence of disease +#' @param Max_Prevalence x out of [y] prevalence of disease #' @param Sensitivity . #' @param Min_FP . #' @param Max_FP . #' @param overlay_labels . #' @param PPV_NPV . -#' @param overlay_prevalence_1 . -#' @param overlay_prevalence_2 . +#' @param overlay_prevalence_1 [x] out of y prevalence of disease for the overlay +#' @param overlay_prevalence_2 x out of [y] prevalence of disease for the overlay #' @param decimals_x . #' @param decimals_y . #' @param prevalence_label . @@ -500,7 +415,6 @@ overlay_prevalence_1, overlay_prevalence_2, - # overlay_position_FP_FN, overlay_position_FP, overlay_position_FN, @@ -515,15 +429,10 @@ y_axis_label ) { - # DEBUG ------------------------------------------------------------------- - # uncertainty_prevalence = "high" - # overlay_labels = "" - # browser() # Calculate point prevalence ---------------------------------------------- # Calculates y as in Min_Prevalence out of y - # point_Prevalence_temp = Min_Prevalence / (overlay_prevalence_1 / overlay_prevalence_2) point_Prevalence_temp = Min_Prevalence / (overlay_prevalence_1 / overlay_prevalence_2) # Looks for closer value in the Prevalence column @@ -536,7 +445,7 @@ # Get PPV or NPV value ---------------------------------------------------- -# browser() + list_point_PPV = .get_point_ppv_npv( PPV_melted = PPV_melted, PPV_NPV = PPV_NPV, @@ -545,7 +454,6 @@ overlay_prevalence_2 = overlay_prevalence_2, overlay_labels = overlay_labels, - # overlay_position_FP_FN = overlay_position_FP_FN, overlay_position_FP = overlay_position_FP, overlay_position_FN = overlay_position_FN, @@ -556,27 +464,27 @@ prevalence_label = prevalence_label, x_axis_label = x_axis_label, y_axis_label = y_axis_label - ) #, overlay_labels = overlay_labels, decimals_x = decimals_x, prevalence_label = prevalence_label + ) - Details_point_PPV_NPV = list_point_PPV$Details_point_PPV_NPV - point_PPV_NPV = list_point_PPV$point_PPV_NPV - size_overlay_text = list_point_PPV$size_overlay_text + Details_point_PPV_NPV = list_point_PPV$Details_point_PPV_NPV + point_PPV_NPV = list_point_PPV$point_PPV_NPV + size_overlay_text = list_point_PPV$size_overlay_text # Add overlay ------------------------------------------------------------- - ## If overlay outside old matrix, we need to do this - # if (DEBUG == 1) warning("\n\n *Recalculate PPVMatrix: ", Min_Prevalence, " ", Max_Prevalence, " ", Sensitivity, " ", Max_FP) + # Size of geom_mark_rect() if (uncertainty_prevalence == "high") { uncertainty_prevalence_num = .05 } else { uncertainty_prevalence_num = .02 } + # X The variable that defines axis position depends on PPV_NPV if (PPV_NPV == "PPV") { x_axis_position = overlay_position_FP - } else { + } else if (PPV_NPV == "NPV") { x_axis_position = overlay_position_FN } @@ -586,7 +494,6 @@ Min_Prevalence = Min_Prevalence, Sensitivity = Sensitivity, Max_FP = Max_FP, - # Step_size_FP = Step_size_FP, decimals_x = decimals_x, decimals_y = decimals_y, prevalence_label = prevalence_label, @@ -605,23 +512,20 @@ # Overlay center ggplot2::annotate("point", color = "red", alpha = .5, size = 1, - # x = overlay_position_FP_FN, x = x_axis_position, y = point_Prevalence) + # Text + rectangle - ggforce::geom_mark_rect( - label.colour = "black", - alpha = .04, - expand = uncertainty_prevalence_num, - aes( - # x = overlay_position_FP_FN, - x = x_axis_position, - y = point_Prevalence), - # label = plot_text, - fill = "red", - # show.legend = FALSE, - description = paste0(Details_point_PPV_NPV)) + ggforce::geom_mark_rect(label.colour = "black", + alpha = .04, + expand = uncertainty_prevalence_num, + aes( + x = x_axis_position, + y = point_Prevalence), + fill = "red", + description = paste0(Details_point_PPV_NPV), + # con.border = "none", + con.size = .2) @@ -633,14 +537,15 @@ - #' .plot_overlay_line +#' +#' Add line overlay to PPV_heatmap plot #' #' @param PPV_melted DF -#' @param Min_Prevalence . -#' @param Max_Prevalence MAX prevalence for plot -#' @param overlay_prevalence_1 vector with x/prevalence values -#' @param overlay_prevalence_2 vector with prevalence/x values +#' @param Min_Prevalence [x] out of y prevalence of disease +#' @param Max_Prevalence x out of [y] prevalence of disease +#' @param overlay_prevalence_1 vector with [x] out of y prevalence of disease +#' @param overlay_prevalence_2 vector with x out of [y] prevalence of disease #' @param overlay_labels vector with labels for each overlay point #' @param Max_FP . #' @param Sensitivity . @@ -655,11 +560,13 @@ #' @param overlay_position_FN . #' @param PPV_NPV . #' @param legend_label . +#' @param uncertainty_prevalence How big the uncertainty area should be: ["low" or "high"] #' #' @importFrom ggplot2 annotate .plot_overlay_line <- function(PPV_melted, + uncertainty_prevalence = "low", PPV_NPV, Min_Prevalence, Max_Prevalence, @@ -668,7 +575,6 @@ overlay_prevalence_1, overlay_prevalence_2, - # overlay_position_FP_FN, overlay_position_FP, overlay_position_FN, @@ -685,37 +591,23 @@ x_axis_label, y_axis_label) { - # We made the modifiers proportional to the parameters (Max_Prevalence, Max_FP) - if (exists("size_uncertainty_area") == FALSE) {size_uncertainty_area = 0} - - if (abs(Max_Prevalence - max(overlay_prevalence_2)) > 10) { - - modifier_text_overlay_position = (Max_Prevalence * size_uncertainty_area + 1) - } else { + - modifier_text_overlay_position = -(Max_Prevalence * size_uncertainty_area + 1) + # Size of geom_mark_rect() + if (uncertainty_prevalence == "high") { + uncertainty_prevalence_num = .02 + } else if (uncertainty_prevalence == "low"){ + uncertainty_prevalence_num = .01 + } - } - - # overlay_labels = c("80", "70", "60", "50", "40", "30 y.o.") - # overlay_position_FP_FN = c(7, 8, 9, 12, 14) - # overlay_prevalence_2 = c(26, 29, 44, 69, 227) - - # overlay_position_x_end = c(overlay_position_FP_FN[1], overlay_position_FP_FN[-length(overlay_position_FP_FN)]) - overlay_position_x_end = c(overlay_position_FP[1], overlay_position_FP[-length(overlay_position_FP)]) - overlay_position_y_end = c(overlay_prevalence_2[1], overlay_prevalence_2[-length(overlay_prevalence_2)]) - # Create plot after adjusting overlay dimensions - # Should re-create ppv/npv matrix first? - # .plot_creation(PPV_melted) p = .plot_creation( PPV_melted = PPV_melted, Min_Prevalence = Min_Prevalence, Max_FP = Max_FP, Sensitivity = Sensitivity, - # Step_size_FP = Step_size_FP, decimals_x = decimals_x, decimals_y = decimals_y, prevalence_label = prevalence_label, @@ -723,39 +615,71 @@ x_axis_label = x_axis_label, y_axis_label = y_axis_label, + legend_label = legend_label, label_subtitle = label_subtitle, label_title = label_title, label_caption = paste0("Sensitivity = ", Sensitivity, "%")) + # X The variable that defines axis position depends on PPV_NPV if (PPV_NPV == "PPV") { x_axis_position = overlay_position_FP + overlay_position_FN = NA } else { x_axis_position = overlay_position_FN + overlay_position_FP = NA } + overlay_position_x_end = c(x_axis_position[1], x_axis_position[-length(x_axis_position)]) + overlay_position_y_end = c(overlay_prevalence_2[1], overlay_prevalence_2[-length(overlay_prevalence_2)]) + + + # Plot Overlay ------------------------------------------------------------ - p = p + ggplot2::annotate("segment", - # x = overlay_position_FP_FN, + + # DF for ggforce::geom_mark_rect() + DF_X = data.frame(x_axis_position = x_axis_position, + overlay_prevalence_2 = overlay_prevalence_2, + overlay_labels = overlay_labels) + + p = p + ggplot2::annotate("segment", x = x_axis_position, xend = overlay_position_x_end, y = overlay_prevalence_2, yend = overlay_position_y_end, color = "red", alpha = .1, size = 3) + - ggplot2::annotate("text", x = x_axis_position, y = overlay_prevalence_2, label = overlay_labels, size = 4) - # ggplot2::annotate("text", x = overlay_position_FP_FN, y = overlay_prevalence_2, label = overlay_labels, size = 4) + + ggplot2::annotate("point", color = "red", alpha = .5, size = .8, + x = x_axis_position, + y = overlay_prevalence_2) + + + ggforce::geom_mark_rect(data = DF_X, + label.colour = "black", + alpha = .04, + expand = uncertainty_prevalence_num, + aes( + x = x_axis_position, + y = overlay_prevalence_2, + group = overlay_labels, + description = overlay_labels), + fill = "red", + # con.border = "none", + con.size = .2) + + + # Output vars ------------------------------------------------------------- return(p) + } - - - #' .translate_labels +#' +#' Supports showing plot labels in Spanish (sp) or English (default) #' -#' @param Language . +#' @param Language Can be Spanish "sp" or English (default) #' @param Sensitivity . #' @param Max_FP . #' @param PPV_NPV . @@ -823,5 +747,5 @@ prevalence_label = prevalence_label, legend_label = legend_label ) + } - diff --git a/R/min_possible_prevalence.R b/R/min_possible_prevalence.R index 7098e8a..fda9317 100644 --- a/R/min_possible_prevalence.R +++ b/R/min_possible_prevalence.R @@ -23,41 +23,33 @@ #' you need a prevalence of at least 1 out of 21" min_possible_prevalence <- function(Sensitivity, FP_test, min_PPV_desired) { - #TEST Parameters ************** - #FP - Max_FP = 100 - Steps_FP = 1000 - Step_size_FP = Max_FP / Steps_FP - Min_FP = 0 - FP = seq(Min_FP, Max_FP, Step_size_FP) - - #CONDITION Parameters *********** - - #Prevalence_y - x out of y - Prevalence_x = 1 + + # Fixed parameters -------------------------------------------------------- + Min_Prevalence = 1 Max_Prevalence = 10000 # CHANGE ME Steps_Prevalence = 10000 Step_size_Prevalence = Max_Prevalence / Steps_Prevalence Prevalence = seq(Min_Prevalence, (1 + Max_Prevalence), Step_size_Prevalence) - # **************************************************************************************** + + # Calculation ------------------------------------------------------------- # We calculate the 100x100 PPV matrix - PPV = (Sensitivity * Prevalence_x) / ( (Sensitivity * Prevalence_x) + ( (Prevalence - 1) %o% FP) ) - #Label columns and rows of matrix - colnames(PPV) = FP - rownames(PPV) = Prevalence + PPV = (Sensitivity * Min_Prevalence) / ( (Sensitivity * Min_Prevalence) + ( (Prevalence - 1) * FP_test) ) - # Long format para ggplot Heatmap - PPV_melted = reshape2::melt(PPV) + # Long format + PPV_melted = PPV %>% as.data.frame() %>% mutate(Prevalence = 1:length(.)) # Rename columns - names(PPV_melted) = c("melted_Prevalence", "melted_FP", "melted_PPV") + names(PPV_melted) = c("melted_PPV", "melted_Prevalence") # Calculate prevalence - output_prevalence = max(PPV_melted$melted_Prevalence[PPV_melted$melted_PPV > (min_PPV_desired / 100) & PPV_melted$melted_FP == FP_test]) + output_prevalence = max(PPV_melted$melted_Prevalence[PPV_melted$melted_PPV > (min_PPV_desired / 100)]) + # PPV_melted %>% filter(abs(melted_PPV - (min_PPV_desired / 100)) == min(abs(melted_PPV - (min_PPV_desired / 100)))) # Keep closest value to min_PPV_desired - # Function output! + + # Function output -------------------------------------------------------- message("To reach a PPV of ", min_PPV_desired, "% when using a test with ", Sensitivity, "% Sensitivity and ", FP_test, "% False Positive Rate, you need a prevalence of at least 1 out of ", output_prevalence) + } diff --git a/README.md b/README.md index cae32d3..f8bc9bc 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ -# BayesianReasoning +# BayesianReasoning + +[![CRAN status](https://www.r-pkg.org/badges/version/BayesianReasoning)](https://cran.r-project.org/package=BayesianReasoning) [![Codecov test coverage](https://codecov.io/gh/gorkang/BayesianReasoning/branch/master/graph/badge.svg)](https://codecov.io/gh/gorkang/BayesianReasoning?branch=master) - ## Bayesian reasoning in medical contexts @@ -23,9 +24,9 @@ The BayesianReasoning package has three main functions: --- -If you want to install the package can use: `remotes::install_github("gorkang/BayesianReasoning")`. Please report any problems you find in the [Issues Github page](https://github.com/gorkang/BayesianReasoning/issues). +You can install the stable (CRAN) version of the package with `install.packages("BayesianReasoning")` or development version with `remotes::install_github("gorkang/BayesianReasoning@dev")`. Please report any problems you find in the [Issues Github page](https://github.com/gorkang/BayesianReasoning/issues). -There is a [shiny app implementation](https://gorkang.shinyapps.io/BayesianReasoning/) with most of the main features available. +There is a [shiny app implementation](https://gorkang.shinyapps.io/BayesianReasoning/) with most of the main features of the PPV_heatmap() function available. --- diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..e69de29 diff --git a/cran-comments.md b/cran-comments.md index 382a5d4..8a39869 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -10,18 +10,16 @@ 0 errors | 0 warnings | 1 note -* This is a new release. +* Days since last update: 3 -## Resubmission +## New minor version -This is a resubmission. In this version I have: +This is new minor version to correct a few NOTES that appeared in the CRAN checks. -Corrected issues raised in CRAN revision: +I also added more tests, cleaned up old comments and improved a couple functions: -* Reduced title to less than 65 characters -* Do not capitalize things in the Description text -* Added references to Description explaining main concepts -* Replaced cat() with message() -* Aded folder parameter to PPV_diagnostic_vs_screening() and PPV_heatmap() functions - -I also added tests for all main functions, deleted old comments and slightly improve the documentation \ No newline at end of file +* 100% code coverage +* min_possible_prevalence() is now much more efficient +* Improvements to overlay = "line" en PPV_heatmap(), now using {ggforce} for labels +* Changed color palette for NPV +* Tweaked color palette for PPV \ No newline at end of file diff --git a/docs/404.html b/docs/404.html index ccc0306..3ca54d0 100644 --- a/docs/404.html +++ b/docs/404.html @@ -8,6 +8,13 @@ Page not found (404) • BayesianReasoning + + + + + + + @@ -40,6 +47,7 @@ + @@ -71,7 +79,7 @@ BayesianReasoning - 0.3.1 + 0.3.2 @@ -93,6 +101,9 @@