Skip to content

Commit

Permalink
Merge pull request #5 from gorkang/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
gorkang authored Sep 2, 2018
2 parents f982897 + 9bcabdb commit 1aa2397
Show file tree
Hide file tree
Showing 11 changed files with 188 additions and 148 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: BayesianReasoning
Title: Plot and help understand Positive Predictive Values, and their relationship with Sensitivity, Specificity and Prevalence.
Title: Plot and help understand Positive and Negative Predictive Values, and their relationship with Sensitivity, Specificity and Prevalence.
Version: 0.1
Authors@R: person("Gorka", "Navarrete", email = "gorkang@gmail.com",
role = c("aut", "cre"))
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# README

### v0.1 It barely works

Initial release, includes 3 functions:

* **PPV_heatmap.R**: Plot PPV heatmaps
* **PPV_diagnostic_vs_screening.R**: Plot PPV for a diagnostic and a screening group
* **min_possible_prevalence.R**: Show minimum possible prevalence given the test characteristics
73 changes: 73 additions & 0 deletions R/.createPPVmatrix.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
.createPPVmatrix <- function(Max_Prevalence, Sensitivity, Max_FP) {

# Libraries ---------------------------------------------------------------
# if (!require('dplyr')) install.packages('dplyr'); library('dplyr')
if (!require('reshape2')) install.packages('reshape2'); library('reshape2')


# DEBUG -------------------------------------------------------------------

# Max_Prevalence = 4
# Sensitivity = 90
# Max_FP = 10
# PPV_NPV = "PPV" # NPV/PPV

#TEST Parameters **************

# False Positives (x axis)
Steps_FP <<- 100
Step_size_FP <<- Max_FP/Steps_FP
Min_FP <<- 0
FP = seq(Min_FP, Max_FP, Step_size_FP) #With (Max_FP-Step_size_FP) we get 100 FPs. If we use Max_FP instead we have 101 (because we start at 0!)

#CONDITION Parameters ***********

#Prevalence_y - x out of y
Prevalence_x <<- 1
Min_Prevalence <<- 1
Steps_Prevalence <<- 100
Step_size_Prevalence <<- Max_Prevalence/Steps_Prevalence
Prevalence = seq(Min_Prevalence, (1 + Max_Prevalence), Step_size_Prevalence) #With (1 + Max_Prevalence) we get 101. If we use Max_Prevalence we get 100


# PPV Calculation -------------------------------------------------------------

# We calculate the 100x100 PPV matrix using %o% (outer)
PPV <<- (Sensitivity * Prevalence_x) / ((Sensitivity * Prevalence_x) + ((Prevalence - 1) %o% FP) )

#Label columns and rows of matrix
colnames(PPV) = FP
rownames(PPV) = Prevalence

# Long format para ggplot Heatmap
PPV_melted = melt(PPV)

# Give names to variables
names(PPV_melted) = c("melted_Prevalence", "melted_FP", "melted_PPV")

PPV_melted <<- PPV_melted


# NPV Calculation -------------------------------------------------------------

# As NPV is more dependent on Sensitivity, the x axis should show Sensitivity, and FP should be fixed
NPV <<- ((Prevalence - 1) %o% (100 - FP)) / (((Prevalence - 1) %o% (100 - FP)) + (Prevalence_x * (100 - Sensitivity)))

#Label columns and rows of matrix
colnames(NPV) = FP
rownames(NPV) = Prevalence

# Long format para ggplot Heatmap
NPV_melted = melt(NPV)

# Give names to variables
names(NPV_melted) = c("melted_Prevalence", "melted_FP", "melted_NPV")

NPV_melted <<- NPV_melted

# HACK to easily switch betweeen NPV and PPV
if (PPV_NPV == "NPV") {
PPV_melted = NPV_melted
}

}
78 changes: 39 additions & 39 deletions R/PPV_diagnostic_vs_screening.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Plot PPV for a diagnostic and a screening group
#'
#'Plot PPV associated to different levels of FP and a specific Sensitivity, for two different Prevalence groups.
#' Plot PPV associated to different levels of FP and a specific Sensitivity, for two different Prevalence groups.
#'
#' @param Max_FP False positive rate (1-Specificity) [0-100].
#' @param Sensitivity Sensitivity of the test [0-100].
Expand All @@ -23,13 +23,17 @@
#' PPV_diagnostic_vs_screening(Max_FP = 10, Sensitivity = 100,
#' prevalence_screening_group = 1667,
#' prevalence_diagnostic_group = 44,
# ' labels_prevalence = c("20 y.o.", "50 y.o."))
#' labels_prevalence = c("20 y.o.", "50 y.o."))
PPV_diagnostic_vs_screening <- function(Max_FP = 10, Sensitivity = 100, prevalence_screening_group = 100, prevalence_diagnostic_group = 2,
labels_prevalence = c("Screening", "Diagnostic"),
save_plot = TRUE) {
# Prevalences (1 out of x)
save_plot = FALSE) {

# Libraries ---------------------------------------------------------------
if (!require('dplyr')) install.packages('dplyr'); library('dplyr')
if (!require('tidyr')) install.packages('tidyr'); library('tidyr')
if (!require('ggplot2')) install.packages('ggplot2'); library('ggplot2')
if (!require('reshape2')) install.packages('reshape2'); library('reshape2')

library(tidyverse)

# PARAMETERS --------------------------------------------------------------
# Max_FP = 10
Expand Down Expand Up @@ -69,42 +73,38 @@ PPV_diagnostic_vs_screening <- function(Max_FP = 10, Sensitivity = 100, prevalen

# Plot --------------------------------------------------------------------

# Labels_plot = c(paste0("Real: ", (1/prevalence_screening_group) * 100, "%"), paste0("Study: ", (1/(prevalence_diagnostic_group) * 100), "%"))
Labels_plot = c(paste0(labels_prevalence[1], " prevalence: 1 out of ", prevalence_screening_group), paste0(labels_prevalence[2], " prevalence: 1 out of ", prevalence_diagnostic_group))

p = ggplot(data = FINAL, aes(x = FP, y = PPV, colour = Prevalence)) +
geom_line(size = 1.5) +
scale_colour_hue(l = 50, labels = Labels_plot) +
theme_minimal() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = function(x) paste0(x, "%")) +
scale_y_continuous(name = "Positive Predictive Value", limits = c(0, 100), labels = function(x) paste0(x, "%")) +
theme(legend.position = "bottom") +
labs(title = "",
subtitle = paste0("Sensitivity = ", Sensitivity, "%" ),
x = "False Positive rate",
color = "")
# labs(caption = "(based on data from ...)") +
# theme(plot.caption = element_text(size = 10))
# guides(color=guide_legend("Prevalence of the "))

print(p)

# Parameters = paste0("FP", Max_FP, "_Sens", Sensitivity, "_PReal", prevalence_screening_group, "_PStudy", prevalence_diagnostic_group)
# ggsave(paste0("outputs/diagnostic_vs_screening/", Parameters, ".svg"), Plot_PPV, dpi = 300, width = 14, height = 10)
# ggsave(paste0("outputs/diagnostic_vs_screening/", Parameters, ".png"), Plot_PPV, dpi = 300, width = 14, height = 10)

if (save_plot == TRUE) {

print(p)
plot_name = paste0("outputs/diagnostic_vs_screening/", "FP_", Max_FP, "_sens_", Sensitivity, "_screening_", prevalence_screening_group, "_diagnostic_", prevalence_diagnostic_group, ".png")
ggsave(plot_name, p, dpi = 300, width = 14, height = 10)
cat("\n Plot created in: ", plot_name, "\n")
Labels_plot = c(paste0(labels_prevalence[1], " prevalence: 1 out of ", prevalence_screening_group), paste0(labels_prevalence[2], " prevalence: 1 out of ", prevalence_diagnostic_group))

} else {
p = ggplot(data = FINAL, aes(x = FP, y = PPV, colour = Prevalence)) +
geom_line(size = 1.5) +
scale_colour_hue(l = 50, labels = Labels_plot) +
theme_minimal() +
theme(text = element_text(size = 20)) +
scale_x_continuous(labels = function(x) paste0(x, "%")) +
scale_y_continuous(name = "Positive Predictive Value", limits = c(0, 100), labels = function(x) paste0(x, "%")) +
theme(legend.position = "bottom") +
labs(title = "",
subtitle = paste0("Sensitivity = ", Sensitivity, "%" ),
x = "False Positive rate",
color = "")
# labs(caption = "(based on data from ...)") +
# theme(plot.caption = element_text(size = 10))
# guides(color=guide_legend("Prevalence of the "))

print(p)

}

}
if (save_plot == TRUE) {

print(p)
plot_name = paste0("outputs/diagnostic_vs_screening/", "FP_", Max_FP, "_sens_", Sensitivity, "_screening_", prevalence_screening_group, "_diagnostic_", prevalence_diagnostic_group, ".png")
ggsave(plot_name, p, dpi = 300, width = 14, height = 10)
cat("\n Plot created in: ", plot_name, "\n")

} else {

print(p)

}

}
57 changes: 14 additions & 43 deletions R/PPV_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,26 @@
#' overlay_labels = c("40 y.o.", "35 y.o.", "30 y.o.", "25 y.o.", "20 y.o."),
#' overlay_position_FP = c(4.8, 4.8, 4.8, 4.8, 4.8),
#' overlay_position_Prevalence = c(68, 249, 626, 946, 1068))
PPV_heatmap <- function(Max_Prevalence, Sensitivity, Max_FP,
PPV_heatmap <- function(Max_Prevalence, Sensitivity, Max_FP,
overlay = FALSE, overlay_labels, overlay_position_FP, overlay_position_Prevalence,
label_title = "", label_subtitle = "",
Language = "en", save_plot = TRUE) {
Language = "en", save_plot = FALSE,
PPV_NPV = "PPV") {

# Libraries ---------------------------------------------------------------

if (!require('pacman')) install.packages('pacman'); library('pacman')
p_load(tidyverse, reshape2)

# if (!require('dplyr')) install.packages('dplyr'); library('dplyr')
if (!require('ggplot2')) install.packages('ggplot2'); library('ggplot2')
if (!require('reshape2')) install.packages('reshape2'); library('reshape2')
source("R/.createPPVmatrix.R", local = TRUE)


# DEBUG -------------------------------------------------

# overlay = TRUE # TRUE / FALSE
# Language = "en" # "sp" / "en"
# Sensitivity = 99 # [0-100]
# Max_FP = 10 # FP (1-Specificity): [0-100]
# Max_Prevalence = 1667.5 # Prevalence (1 out of X): [1-Inf?]
# Max_Prevalence = 4 # Prevalence (1 out of X): [1-Inf?]
# Sensitivity = 90 # [0-100]
# Max_FP = 5 # FP (1-Specificity): [0-100]
# label_subtitle = "PPV of Mammogram for Breast Cancer by Age"
# overlay = TRUE
# overlay_labels = c("80", "70", "60", "50", "40", "30", "20 y.o.")
Expand Down Expand Up @@ -112,42 +114,11 @@ PPV_heatmap <- function(Max_Prevalence, Sensitivity, Max_FP,
}
}


#TEST Parameters **************

# False Positives (x axis)
Steps_FP = 100
Step_size_FP = Max_FP/Steps_FP
Min_FP = 0
FP = seq(Min_FP, Max_FP, Step_size_FP) #With (Max_FP-Step_size_FP) we get 100 FPs. If we use Max_FP instead we have 101 (because we start at 0!)

#CONDITION Parameters ***********

#Prevalence_y - x out of y
Prevalence_x = 1
Min_Prevalence = 1
Steps_Prevalence = 100
Step_size_Prevalence = Max_Prevalence/Steps_Prevalence
Prevalence = seq(Min_Prevalence, (1 + Max_Prevalence), Step_size_Prevalence) #With (1 + Max_Prevalence) we get 101. If we use Max_Prevalence we get 100


# Calculation -------------------------------------------------------------
# Create PPV matrix -------------------------------------------------------
.createPPVmatrix(Max_Prevalence, Sensitivity, Max_FP)

# We calculate the 100x100 PPV matrix using %o% (outer)
PPV = (Sensitivity * Prevalence_x) / ((Sensitivity * Prevalence_x) + ((Prevalence - 1) %o% FP) )
# NPV = (Sensitivity * Prevalence_x) / ((Sensitivity * Prevalence_x) + ((Prevalence - 1) %o% FP) )

#Label columns and rows of matrix
colnames(PPV) = FP
rownames(PPV) = Prevalence

# Long format para ggplot Heatmap
PPV_melted = melt(PPV)

# Give names to variables
names(PPV_melted) = c("melted_Prevalence", "melted_FP", "melted_PPV")



# PLOT --------------------------------------------------------------------

Expand All @@ -160,7 +131,7 @@ PPV_heatmap <- function(Max_Prevalence, Sensitivity, Max_FP,
breaks_x = seq(0, Max_FP, Step_size_FP * 10)
labels_x = paste0(seq(Min_FP, Max_FP, Step_size_FP * 10), "%")
breaks_y = seq(0, Max_Prevalence, Step_size_Prevalence * 10)
labels_y = paste(prevalence_label, round(seq(Min_Prevalence - 1, Max_Prevalence, Step_size_Prevalence * 10), 0))[-1]
labels_y = paste(prevalence_label, round(seq(Min_Prevalence - 1, Max_Prevalence, Step_size_Prevalence * 10),0))[-1]
labels_y = c(paste(prevalence_label, "1"), labels_y) #We want the legend to start on 1 out of 1


Expand Down
33 changes: 18 additions & 15 deletions R/min_possible_prevalence.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,29 @@
#' Show minimum possible prevalence
#' Show minimum possible prevalence given the test characteristics
#'
#'Given a FP and a desired PPV, what is the Minimum Prevalence of a Condition
#'
#' @param Sensitivity Sensitivity of the test.
#' @param FP_test False positive rate (1-Specificity) [0-100].
#' @param min_PPV_desired Which PPV is what you consider the minimum to trust a positive result in the test.
#' @param Sensitivity Sensitivity of the test: [0-100]
#' @param FP_test False positive rate (1-Specificity): [0-100]
#' @param min_PPV_desired Which PPV is what you consider the minimum to trust a positive result in the test: [0-100]
#'
#' @return A description showing the minimum necessary prevalence.
#' @export
#'
#' @examples
#'
#' # Example 1
#' min_possible_prevalence(Sensitivity = 99.9, FP_test = 0.2, min_PPV_desired = 0.9)
#' > To get a PPV of 0.9 with a test with 99.9 % Sensitivity and 0.2 % False Positive Rate, you need a prevalence of at least 1 out of 56
#' > min_possible_prevalence(Sensitivity = 99.9, FP_test = .1, min_PPV_desired = 70)
#' To reach a PPV of 70 when using a test with 99.9 % Sensitivity and 0.1 % False Positive Rate, you need a prevalence of at least 1 out of 429
#'
#' # Example 2
#' min_possible_prevalence(100, 0.1, 0.98)
#' > To get a PPV of 0.98 with a test with 100 % Sensitivity and 0.1 % False Positive Rate, you need a prevalence of at least 1 out of 21
#' > min_possible_prevalence(100, 0.1, 98)
#' To reach a PPV of 98 when using a test with 100 % Sensitivity and 0.1 % False Positive Rate, you need a prevalence of at least 1 out of 21
min_possible_prevalence <- function(Sensitivity, FP_test, min_PPV_desired) {

if (!require('pacman')) install.packages('pacman'); library('pacman')
p_load(tidyverse, reshape2)

# Libraries ---------------------------------------------------------------
# if (!require('dplyr')) install.packages('dplyr'); library('dplyr')
if (!require('reshape2')) install.packages('reshape2'); library('reshape2')

#TEST Parameters **************
#FP
Max_FP = 100
Expand All @@ -35,12 +36,11 @@ min_possible_prevalence <- function(Sensitivity, FP_test, min_PPV_desired) {

#Prevalence_y - x out of y
Prevalence_x = 1

Min_Prevalence = 1
Max_Prevalence = 10000 # CHANGE ME
Steps_Prevalence = 10000
Step_size_Prevalence = Max_Prevalence/Steps_Prevalence
Prevalence = seq(Min_Prevalence, (1 + Max_Prevalence), Step_size_Prevalence) #With (1 + Max_Prevalence) we get 101. If we use Max_Prevalence we get 100
Prevalence = seq(Min_Prevalence, (1 + Max_Prevalence), Step_size_Prevalence)

# ****************************************************************************************

Expand All @@ -55,7 +55,10 @@ min_possible_prevalence <- function(Sensitivity, FP_test, min_PPV_desired) {
#Por algun motivo, melt a veces cambia nombres de variables. De este modo los fijamos
names(PPV_melted) = c("melted_Prevalence", "melted_FP", "melted_PPV")


output_prevalence = max(PPV_melted$melted_Prevalence[PPV_melted$melted_PPV > (min_PPV_desired/100) & PPV_melted$melted_FP == FP_test])


# Function output!
cat("To get a PPV of", min_PPV_desired, "with a test with", Sensitivity, "% Sensitivity and", FP_test, "% False Positive Rate, you need a prevalence of at least 1 out of", max(PPV_melted$melted_Prevalence[PPV_melted$melted_PPV > min_PPV_desired & PPV_melted$melted_FP == FP_test]))

cat("To reach a PPV of", min_PPV_desired, "when using a test with", Sensitivity, "% Sensitivity and", FP_test, "% False Positive Rate, you need a prevalence of at least 1 out of", output_prevalence)
}
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,27 @@ PPV_diagnostic_vs_screening(Max_FP = 10,
![](outputs/diagnostic_vs_screening/FP_10_sens_100_screening_1000_diagnostic_3.png)


---


## min_possible_prevalence

Imagine you would like to use a test in a population and have a 98% PPV, that is, *if* a positive result comes out in the test, you would like a 98% certainty that it is a true positive. How high should be the prevalence of the disease in that group?

``` r
min_possible_prevalence(100, 0.1, 98)
```

`To reach a PPV of 98 when using a test with 100 % Sensitivity and 0.1 % False Positive Rate, you need a prevalence of at least 1 out of 21`

---

Another example, with a very good test, and lower expectations:

``` r
min_possible_prevalence(Sensitivity = 99.9, FP_test = .1, min_PPV_desired = 70)
```

$ To reach a PPV of 70 when using a test with 99.9 % Sensitivity and 0.1 % False Positive Rate, you need a prevalence of at least 1 out of 429

---
Loading

0 comments on commit 1aa2397

Please sign in to comment.