From b38515a0c63b159549faeeb38420282c9472e9b0 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Tue, 12 Sep 2023 10:55:28 +0200 Subject: [PATCH 01/42] added plot function --- R/plot.R | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 R/plot.R diff --git a/R/plot.R b/R/plot.R new file mode 100644 index 0000000..63b53b0 --- /dev/null +++ b/R/plot.R @@ -0,0 +1,58 @@ + +plot.ulsif <- function(object, type = "univariate", var1 = NULL, var2, ...) { + + # Data handling + data <- rbind(object$df_numerator, object$df_denominator) + data$dr <- predict(object, newdata = data) + + # Univariate plot + + if(type == "univariate") { + if(is.null(var1)){ + cor <- cor(data)[1:ncol(data)-1,ncol(data)] + var <- names(cor[which.max(cor)]) + } + else { + var <- var1 + } + + plot <- + ggplot(data, aes(x = .data[[var]], y = .data[["dr"]])) + + geom_point(aes(col = dr)) + + theme_bw() + + labs(y = "Density ratio") + + labs(title = "Scatter plot of individual values and density ratio") + + scale_colour_viridis_c(option = "B", name ="Density ratio") + + } + + # Histogram + if(type == "histogram") { + plot <- + ggplot(data, aes(x = dr)) + + geom_histogram(fill = "grey25", color = "black", + bins = max(data$dr)) + + scale_x_continuous(breaks = seq(round(min(data$dr), 0),round(max(data$dr), 0))) + + theme_bw() + + labs( + x = "Density ratio", + y = "Count" + ) + + labs(title = "Distribution of density ratio estimates") + } + + # Bivariate + if(type == "bivariate") { + plot <- + ggplot(data, mapping = aes(x = .data[[var1]], y = .data[[var2]])) + + geom_point(aes(col = dr)) + + scale_colour_viridis_c(option = "B", name ="Density ratio") + + theme_bw() + + labs(x = var1, + y = var2) + + labs(title = "Density ratio estimates for combinations of values") + + } + + return(plot) +} From 3f0752edefa8cb7e0e43055ab73eb87f110c89df Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 2 Oct 2023 15:49:36 +0200 Subject: [PATCH 02/42] separete plot functions Separetes plot functions into three functions 1. Default: Plots density ratio histograms of both samples 2. Univariate: Plot density ratio value against value of one variable. 3. Bivariate: Plot density ratio value (in colors), for combinations of two variables --- R/plot.R | 134 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 95 insertions(+), 39 deletions(-) diff --git a/R/plot.R b/R/plot.R index 63b53b0..f588d43 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,58 +1,114 @@ -plot.ulsif <- function(object, type = "univariate", var1 = NULL, var2, ...) { +plot.ulsif <- function(object, sample = "both") { + + data <- rbind(object$df_numerator, object$df_denominator, make.row.names = TRUE) + data$dr <- predict(object, newdata = data) + + data$sample[1:nrow(object$df_numerator)] <- "Numerator" + data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" + + if(sample == "numerator"){ + data <- data %>% filter(sample == "Numerator") + } + if(sample == "denominator"){ + data <- data %>% filter(sample == "Denominator") + } + if(sample == "both"){ + data <- data + } + + plot <- + ggplot(data, aes(x = dr)) + + geom_histogram(aes(fill = sample), alpha = .75, + color = "black", + position = "identity", + bins = max(data$dr)) + + scale_x_continuous(breaks = seq(round(min(data$dr), 0),round(max(data$dr), 0))) + + scale_fill_manual(values = c("firebrick", "steelblue")) + + theme_bw() + + labs( + x = "Density ratio", + y = "Count" + ) + + labs(title = "Distribution of density ratio estimates", + fill = "Sample") + + return(plot) +} + + +plot_univariate <- function(object, vars, sample = "both") { # Data handling data <- rbind(object$df_numerator, object$df_denominator) data$dr <- predict(object, newdata = data) - # Univariate plot + data$sample[1:nrow(object$df_numerator)] <- "Numerator" + data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" + + if(sample == "numerator"){ + data <- data %>% filter(sample == "Numerator") + } + if(sample == "denominator"){ + data <- data %>% filter(sample == "Denominator") + } + if(sample == "both"){ + data <- data + } + - if(type == "univariate") { - if(is.null(var1)){ - cor <- cor(data)[1:ncol(data)-1,ncol(data)] - var <- names(cor[which.max(cor)]) - } - else { - var <- var1 - } + plots <- list() + plot_onevariable <- function(var, shape = "sample"){ plot <- - ggplot(data, aes(x = .data[[var]], y = .data[["dr"]])) + - geom_point(aes(col = dr)) + + ggplot(data, aes(x = .data[[var]], y = dr )) + + geom_point(aes(col = dr, shape = sample)) + theme_bw() + labs(y = "Density ratio") + - labs(title = "Scatter plot of individual values and density ratio") + - scale_colour_viridis_c(option = "B", name ="Density ratio") + labs(title = "Scatter plot of individual values and density ratio", + shape = "Sample") + + scale_colour_viridis_c(option = "B", name ="Density ratio") + + scale_shape_manual(values = c(16, 3)) + + return(plot) } - # Histogram - if(type == "histogram") { - plot <- - ggplot(data, aes(x = dr)) + - geom_histogram(fill = "grey25", color = "black", - bins = max(data$dr)) + - scale_x_continuous(breaks = seq(round(min(data$dr), 0),round(max(data$dr), 0))) + - theme_bw() + - labs( - x = "Density ratio", - y = "Count" - ) + - labs(title = "Distribution of density ratio estimates") - } - - # Bivariate - if(type == "bivariate") { - plot <- - ggplot(data, mapping = aes(x = .data[[var1]], y = .data[[var2]])) + - geom_point(aes(col = dr)) + - scale_colour_viridis_c(option = "B", name ="Density ratio") + - theme_bw() + - labs(x = var1, - y = var2) + - labs(title = "Density ratio estimates for combinations of values") + for(var in vars){ + plots[[var]] <- plot_onevariable(var) } + return(plots) +} + +plot_bivariate <- function(object, vars, sample = "both") { + + data <- rbind(object$df_numerator, object$df_denominator) + data$dr <- predict(object, newdata = data) + + data$sample[1:nrow(object$df_numerator)] <- "Numerator" + data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" + + if(sample == "numerator"){ + data <- data %>% filter(sample == "Numerator") + } + if(sample == "denominator"){ + data <- data %>% filter(sample == "Denominator") + } + if(sample == "both"){ + data <- data + } + + + plot <- + ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + + geom_point(aes(col = dr, shape = sample)) + + scale_colour_viridis_c(option = "B", name ="Density ratio") + + theme_bw() + + labs(title = "Density ratio estimates for combinations of values", + shape = "Sample") + + scale_shape_manual(values = c(16, 3)) + return(plot) } From 109d385870132e933eaca90cc104df0cb849495f Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 2 Oct 2023 16:47:14 +0200 Subject: [PATCH 03/42] small fix plots --- R/plot.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/plot.R b/R/plot.R index f588d43..a2873a0 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,9 +1,10 @@ plot.ulsif <- function(object, sample = "both") { - data <- rbind(object$df_numerator, object$df_denominator, make.row.names = TRUE) + data <- rbind(object$df_numerator, object$df_denominator) data$dr <- predict(object, newdata = data) + data$sample <- NA data$sample[1:nrow(object$df_numerator)] <- "Numerator" data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" @@ -43,6 +44,7 @@ plot_univariate <- function(object, vars, sample = "both") { data <- rbind(object$df_numerator, object$df_denominator) data$dr <- predict(object, newdata = data) + data$sample <- NA data$sample[1:nrow(object$df_numerator)] <- "Numerator" data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" @@ -87,6 +89,7 @@ plot_bivariate <- function(object, vars, sample = "both") { data <- rbind(object$df_numerator, object$df_denominator) data$dr <- predict(object, newdata = data) + data$sample <- NA data$sample[1:nrow(object$df_numerator)] <- "Numerator" data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" From 94310641334902829186ad5f35b22c2502142660 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 9 Oct 2023 16:09:04 +0200 Subject: [PATCH 04/42] update plot functions implemented several changes in plot functions (separe objects, simply code, use base pipe, use default bins, separate bins visually, add argument to plot x with logscale, add argument for shapes) --- .gitignore | 1 + R/plot.R | 86 +++++++++++++++++++++++++++++------------------------- 2 files changed, 47 insertions(+), 40 deletions(-) diff --git a/.gitignore b/.gitignore index 5b6a065..0d206b9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rhistory .RData .Ruserdata +\playground diff --git a/R/plot.R b/R/plot.R index a2873a0..29d2088 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,30 +1,35 @@ -plot.ulsif <- function(object, sample = "both") { +plot.ulsif <- function(object, sample = "both", logscale = FALSE) { data <- rbind(object$df_numerator, object$df_denominator) - data$dr <- predict(object, newdata = data) - - data$sample <- NA - data$sample[1:nrow(object$df_numerator)] <- "Numerator" - data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" - if(sample == "numerator"){ - data <- data %>% filter(sample == "Numerator") + if("dr" %in% names(data) | "sample" %in% names(data)) { + stop("Variables in your dataframe cannot have name 'dr' or 'sample'. Please rename your variable(s)") } - if(sample == "denominator"){ - data <- data %>% filter(sample == "Denominator") + + data$dr <- predict(object, newdata = data) + + if(logscale){ + data$dr <- log(data$dr) } - if(sample == "both"){ - data <- data + + obsclass <- rep(c("numerator", "denominator"), + c(nrow(object$df_numerator), nrow(object$df_denominator))) + + data$sample <- obsclass + + obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + + if (obsselect != "both") { + data <- filter(data, obsclass == obsselect) } plot <- ggplot(data, aes(x = dr)) + geom_histogram(aes(fill = sample), alpha = .75, color = "black", - position = "identity", - bins = max(data$dr)) + - scale_x_continuous(breaks = seq(round(min(data$dr), 0),round(max(data$dr), 0))) + + position = position_dodge2(preserve = "single", + padding = 0.2)) + scale_fill_manual(values = c("firebrick", "steelblue")) + theme_bw() + labs( @@ -42,20 +47,20 @@ plot_univariate <- function(object, vars, sample = "both") { # Data handling data <- rbind(object$df_numerator, object$df_denominator) + + if("dr" %in% names(data)) { + stop("Variables in your dataframe cannot have name 'dr'. Please rename your variable") + } + data$dr <- predict(object, newdata = data) - data$sample <- NA - data$sample[1:nrow(object$df_numerator)] <- "Numerator" - data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" + obsclass <- rep(c("numerator", "denominator"), + c(nrow(object$df_numerator), nrow(object$df_denominator))) - if(sample == "numerator"){ - data <- data %>% filter(sample == "Numerator") - } - if(sample == "denominator"){ - data <- data %>% filter(sample == "Denominator") - } - if(sample == "both"){ - data <- data + obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + + if (obsselect != "both") { + data <- filter(data, obsclass == obsselect) } @@ -84,29 +89,30 @@ plot_univariate <- function(object, vars, sample = "both") { return(plots) } -plot_bivariate <- function(object, vars, sample = "both") { +plot_bivariate <- function(object, vars, sample = "both", show.samples = FALSE) { data <- rbind(object$df_numerator, object$df_denominator) + + if("dr" %in% names(data) | "sample" %in% names(data)) { + stop("Variables in your dataframe cannot have name 'dr' or 'sample'. Please rename your variable(s)") + } + data$dr <- predict(object, newdata = data) - data$sample <- NA - data$sample[1:nrow(object$df_numerator)] <- "Numerator" - data$sample[(nrow(object$df_numerator) + 1):nrow(data)] <- "Denominator" + obsclass <- rep(c("numerator", "denominator"), + c(nrow(object$df_numerator), nrow(object$df_denominator))) - if(sample == "numerator"){ - data <- data %>% filter(sample == "Numerator") - } - if(sample == "denominator"){ - data <- data %>% filter(sample == "Denominator") - } - if(sample == "both"){ - data <- data - } + data$sample <- obsclass + obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + + if (obsselect != "both") { + data <- filter(data, obsclass == obsselect) + } plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(aes(col = dr, shape = sample)) + + geom_point(aes(col = dr, shape = if (show.samples) sample else NULL)) + scale_colour_viridis_c(option = "B", name ="Density ratio") + theme_bw() + labs(title = "Density ratio estimates for combinations of values", From 2f38355046ae1148e9c6f4be8283b0e29b2abd01 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 9 Oct 2023 17:38:53 +0200 Subject: [PATCH 05/42] implement plot checks checks in all plots for object type, names not overriden, names in original dataset --- R/checks.R | 18 ++++++++++++++++++ R/plot.R | 32 +++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/R/checks.R b/R/checks.R index 562f7be..a40b0f6 100644 --- a/R/checks.R +++ b/R/checks.R @@ -257,3 +257,21 @@ check.newdata <- function(object, newdata) { } newdata } + +check.var.names <- function(vars){ + if(!all(vars %in% names(data))) { + stop("Indicate variable (s) are not present in object. Check variable names are correct") + } +} + +check.overriden.names <- function(data){ + if("dr" %in% names(data) | "sample" %in% names(data)) { + stop("Variables in your dataframe cannot have name 'dr' or 'sample'. Please rename your variable(s)") + } +} + +check.object.type <- function(object) { + if(!any(c("ulsif", "kliep") == attr(output, "class"))) { + stop("Objects should be of class 'ulsif' or 'kliep'") + } +} diff --git a/R/plot.R b/R/plot.R index 29d2088..be390d5 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,11 +1,11 @@ -plot.ulsif <- function(object, sample = "both", logscale = FALSE) { +plot.histogram <- function(object, sample = "both", logscale = FALSE) { + + check.object.type(object) data <- rbind(object$df_numerator, object$df_denominator) - if("dr" %in% names(data) | "sample" %in% names(data)) { - stop("Variables in your dataframe cannot have name 'dr' or 'sample'. Please rename your variable(s)") - } + check.overriden.names(data) data$dr <- predict(object, newdata = data) @@ -42,15 +42,25 @@ plot.ulsif <- function(object, sample = "both", logscale = FALSE) { return(plot) } +plot.ulsif <- function(object, sample = "both", logscale = FALSE) { + plot.histogram(object, sample = "both", logscale = FALSE) +} + +plot.kliep <- function(object, sample = "both", logscale = FALSE) { + plot.histogram(object, sample = "both", logscale = FALSE) +} + + plot_univariate <- function(object, vars, sample = "both") { + check.object.type(object) + # Data handling data <- rbind(object$df_numerator, object$df_denominator) - if("dr" %in% names(data)) { - stop("Variables in your dataframe cannot have name 'dr'. Please rename your variable") - } + check.overriden.names(data) + check.var.names(vars) data$dr <- predict(object, newdata = data) @@ -91,13 +101,13 @@ plot_univariate <- function(object, vars, sample = "both") { plot_bivariate <- function(object, vars, sample = "both", show.samples = FALSE) { + check.object.type(object) + data <- rbind(object$df_numerator, object$df_denominator) - if("dr" %in% names(data) | "sample" %in% names(data)) { - stop("Variables in your dataframe cannot have name 'dr' or 'sample'. Please rename your variable(s)") - } + check.overriden.names(data) + check.var.names(vars) - data$dr <- predict(object, newdata = data) obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) From d46b9c948bba78eb9743283457c94719a3e5b215 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 20 Oct 2023 15:21:47 +0200 Subject: [PATCH 06/42] fix checks --- R/checks.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/checks.R b/R/checks.R index a40b0f6..9156529 100644 --- a/R/checks.R +++ b/R/checks.R @@ -258,9 +258,9 @@ check.newdata <- function(object, newdata) { newdata } -check.var.names <- function(vars){ +check.var.names <- function(vars, data){ if(!all(vars %in% names(data))) { - stop("Indicate variable (s) are not present in object. Check variable names are correct") + stop("Indicated variable (s) are not present in object. Check variable names are correct") } } @@ -271,7 +271,7 @@ check.overriden.names <- function(data){ } check.object.type <- function(object) { - if(!any(c("ulsif", "kliep") == attr(output, "class"))) { + if(all(c("ulsif", "kliep") != attr(output, "class"))) { stop("Objects should be of class 'ulsif' or 'kliep'") } } From e74cffc4ee428306a36e140a55a2949770dd60de Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 20 Oct 2023 15:24:21 +0200 Subject: [PATCH 07/42] update plots - Bivariate plot fuctions plots bivariate plots of chosen variables - Adjust some aesthethics (fill, colour, alpha, shape) to improve visibility - Add roxigeny2 to include documentation (+export) --- R/plot.R | 79 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 9 deletions(-) diff --git a/R/plot.R b/R/plot.R index be390d5..be3c422 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,11 +1,10 @@ - plot.histogram <- function(object, sample = "both", logscale = FALSE) { check.object.type(object) data <- rbind(object$df_numerator, object$df_denominator) - check.overriden.names(data) + check.overriden.names(vars) data$dr <- predict(object, newdata = data) @@ -42,16 +41,46 @@ plot.histogram <- function(object, sample = "both", logscale = FALSE) { return(plot) } +#' Title +#' +#' @param object +#' @param sample +#' @param logscale +#' +#' @return +#' @export +#' +#' @examples plot.ulsif <- function(object, sample = "both", logscale = FALSE) { - plot.histogram(object, sample = "both", logscale = FALSE) + plot.histogram(object, sample = sample, logscale = logscale) } +#' Title +#' +#' @param object +#' @param sample +#' @param logscale +#' +#' @return +#' @export +#' +#' @examples plot.kliep <- function(object, sample = "both", logscale = FALSE) { - plot.histogram(object, sample = "both", logscale = FALSE) + plot.histogram(object, sample = sample, logscale = logscale) } +#' Title +#' +#' @param object +#' @param vars +#' @param sample +#' +#' @return +#' @export +#' +#' @examples plot_univariate <- function(object, vars, sample = "both") { check.object.type(object) @@ -60,7 +89,7 @@ plot_univariate <- function(object, vars, sample = "both") { data <- rbind(object$df_numerator, object$df_denominator) check.overriden.names(data) - check.var.names(vars) + check.var.names(vars, data) data$dr <- predict(object, newdata = data) @@ -99,15 +128,30 @@ plot_univariate <- function(object, vars, sample = "both") { return(plots) } -plot_bivariate <- function(object, vars, sample = "both", show.samples = FALSE) { +#' Title +#' +#' @param object +#' @param vars +#' @param sample +#' @param show.samples +#' +#' @return +#' @export +#' +#' @examples +plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = TRUE) { check.object.type(object) data <- rbind(object$df_numerator, object$df_denominator) check.overriden.names(data) - check.var.names(vars) + vars <- c(var.x, var.y) + + check.var.names(vars, data) + + data$dr <- predict(object, newdata = data) obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) @@ -120,14 +164,31 @@ plot_bivariate <- function(object, vars, sample = "both", show.samples = FALSE) data <- filter(data, obsclass == obsselect) } + plots <- list() + + var_combinations <- expand.grid(var.x, var.y) + var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) + var_combinations <- var_combinations %>% filter(Var1 != Var2) + var_combinations <- as.matrix(var_combinations) + + # helper function to plot two variables + plot_twovariables <- function(data, vars, showsamples = show.samples){ plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(aes(col = dr, shape = if (show.samples) sample else NULL)) + + geom_point(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), + alpha = 0.5) + + scale_fill_viridis_c(option = "B", name ="Density ratio") + scale_colour_viridis_c(option = "B", name ="Density ratio") + theme_bw() + labs(title = "Density ratio estimates for combinations of values", shape = "Sample") + - scale_shape_manual(values = c(16, 3)) + scale_shape_manual(values = c(21, 24)) return(plot) + } + + for(i in 1:nrow(var_combinations)){ + plots[[i]] <- plot_twovariables(data = data, vars = var_combinations[i,]) + } + return(plots) } From 8c0e86e404ff9cee3bf2b129163cd17f55fa3909 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 17 Nov 2023 10:17:37 +0100 Subject: [PATCH 08/42] fix small bug --- R/checks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/checks.R b/R/checks.R index 9156529..734d932 100644 --- a/R/checks.R +++ b/R/checks.R @@ -271,7 +271,7 @@ check.overriden.names <- function(data){ } check.object.type <- function(object) { - if(all(c("ulsif", "kliep") != attr(output, "class"))) { + if(all(c("ulsif", "kliep") != attr(object, "class"))) { stop("Objects should be of class 'ulsif' or 'kliep'") } } From 8651a65f9be8b36fd8539d7d5e27539d06db88f3 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 17 Nov 2023 16:53:31 +0100 Subject: [PATCH 09/42] update plots Solve a couple of small bugs,added informative messages, added histogram binwidth as an argument --- R/plot.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/plot.R b/R/plot.R index be3c422..639e403 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,4 +1,4 @@ -plot.histogram <- function(object, sample = "both", logscale = FALSE) { +plot.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { check.object.type(object) @@ -9,7 +9,9 @@ plot.histogram <- function(object, sample = "both", logscale = FALSE) { data$dr <- predict(object, newdata = data) if(logscale){ + data$dr[data$dr < 0] <- 10e-8 data$dr <- log(data$dr) + warning("Negative estimated density ratios converted to 10e-0.8 before applying logarithmic transformation") } obsclass <- rep(c("numerator", "denominator"), @@ -25,8 +27,10 @@ plot.histogram <- function(object, sample = "both", logscale = FALSE) { plot <- ggplot(data, aes(x = dr)) + - geom_histogram(aes(fill = sample), alpha = .75, + geom_histogram(aes(fill = sample), + alpha = .75, color = "black", + binwidth = if (!is.null(binwidth)) binwidth else NULL, position = position_dodge2(preserve = "single", padding = 0.2)) + scale_fill_manual(values = c("firebrick", "steelblue")) + @@ -51,8 +55,8 @@ plot.histogram <- function(object, sample = "both", logscale = FALSE) { #' @export #' #' @examples -plot.ulsif <- function(object, sample = "both", logscale = FALSE) { - plot.histogram(object, sample = sample, logscale = logscale) +plot.ulsif <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { + plot.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) } #' Title @@ -65,8 +69,8 @@ plot.ulsif <- function(object, sample = "both", logscale = FALSE) { #' @export #' #' @examples -plot.kliep <- function(object, sample = "both", logscale = FALSE) { - plot.histogram(object, sample = sample, logscale = logscale) +plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { + plot.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) } From f663b416f7aa9e38f530029388a08b42e636e0f3 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 17 Nov 2023 18:02:20 +0100 Subject: [PATCH 10/42] update plot --- R/plot.R | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/R/plot.R b/R/plot.R index 639e403..60f0b2b 100644 --- a/R/plot.R +++ b/R/plot.R @@ -85,7 +85,7 @@ plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NUL #' @export #' #' @examples -plot_univariate <- function(object, vars, sample = "both") { +plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { check.object.type(object) @@ -106,6 +106,18 @@ plot_univariate <- function(object, vars, sample = "both") { data <- filter(data, obsclass == obsselect) } + if (logscale) { + if(any(data$dr < 0)){ + warning("Negative estimated density ratios converted to 10e-8 before applying logarithmic transformation") + data$dr[data$dr < 0] <- 10e-8 + } + + data$dr <- log(data$dr) + y_lab <- "Log(Density Ratio)" + + } else { + y_lab <- "Density Ratio" + } plots <- list() @@ -114,11 +126,13 @@ plot_univariate <- function(object, vars, sample = "both") { ggplot(data, aes(x = .data[[var]], y = dr )) + geom_point(aes(col = dr, shape = sample)) + theme_bw() + - labs(y = "Density ratio") + labs(title = "Scatter plot of individual values and density ratio", - shape = "Sample") + + shape = "Sample", + y = y_lab) + + geom_hline(yintercept = 0, linetype = "dashed")+ scale_colour_viridis_c(option = "B", name ="Density ratio") + - scale_shape_manual(values = c(16, 3)) + scale_shape_manual(values = c(16, 3)) + + scale_y_continuous(breaks = c(-15,-10,-5,0,1,2,3,4, 8)) return(plot) @@ -128,7 +142,6 @@ plot_univariate <- function(object, vars, sample = "both") { for(var in vars){ plots[[var]] <- plot_onevariable(var) } - return(plots) } @@ -143,7 +156,7 @@ plot_univariate <- function(object, vars, sample = "both") { #' @export #' #' @examples -plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = TRUE) { +plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = TRUE, output = "assembled") { check.object.type(object) @@ -194,5 +207,11 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = for(i in 1:nrow(var_combinations)){ plots[[i]] <- plot_twovariables(data = data, vars = var_combinations[i,]) } + + if(output == "assembled"){ + plots_assembly <- patchwork::wrap_plots(plots, guides = "collect") + return(plots_assembly) + } else { return(plots) + } } From d775ce65e4f8500532c213611d4dc7c8cf44bacd Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 20 Nov 2023 15:31:46 +0100 Subject: [PATCH 11/42] update plots --- R/plot.R | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/R/plot.R b/R/plot.R index 60f0b2b..0383631 100644 --- a/R/plot.R +++ b/R/plot.R @@ -167,7 +167,6 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = vars <- c(var.x, var.y) check.var.names(vars, data) - data$dr <- predict(object, newdata = data) obsclass <- rep(c("numerator", "denominator"), @@ -190,7 +189,7 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = # helper function to plot two variables plot_twovariables <- function(data, vars, showsamples = show.samples){ - plot <- + plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + geom_point(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), alpha = 0.5) + @@ -209,9 +208,78 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = } if(output == "assembled"){ - plots_assembly <- patchwork::wrap_plots(plots, guides = "collect") + plots_assembly <- patchwork::wrap_plots(plots, guides = "collect", byrow = TRUE,ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) + plots_assembly <- plots_assembly + plot_annotation(title = "Density ratio estimates for combinations of values") return(plots_assembly) } else { return(plots) } } + +plot_bivariate_heatmap <- function(object, var.x,var.y, sample = "both", show.samples = TRUE, output = "assembled") { + + check.object.type(object) + + data <- rbind(object$df_numerator, object$df_denominator) + + check.overriden.names(data) + + vars <- c(var.x, var.y) + + check.var.names(vars, data) + data$dr <- predict(object, newdata = data) + + obsclass <- rep(c("numerator", "denominator"), + c(nrow(object$df_numerator), nrow(object$df_denominator))) + + data$sample <- obsclass + + obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + + if (obsselect != "both") { + data <- filter(data, obsclass == obsselect) + } + + plots <- list() + + var_combinations <- expand.grid(var.x, var.y) + var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) + var_combinations <- var_combinations %>% filter(Var1 != Var2) + var_combinations <- as.matrix(var_combinations) + + # helper function to plot two variables + plot_twovariables <- function(data, vars, showsamples = show.samples){ + + grid_var1 <- seq(min(data[[vars[1]]]), max(data[[vars[1]]])) + grid_var2 <- seq(min(data[[vars[2]]]), max(data[[vars[2]]])) + + grid_data <- expand.grid(grid_var1, grid_var2) + + grid_data$dr <- predict(object, newdata = grid_data) + + + ggplot(grid_data, mapping = aes(x = .grid_data[[vars[1]]], y = .grid_data[[vars[2]]])) + + geom_point(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), + alpha = 0.5) + + scale_fill_viridis_c(option = "B", name ="Density ratio") + + scale_colour_viridis_c(option = "B", name ="Density ratio") + + theme_bw() + + labs(title = "Density ratio estimates for combinations of values", + shape = "Sample") + + scale_shape_manual(values = c(21, 24)) + + return(plot) + } + + for(i in 1:nrow(var_combinations)){ + plots[[i]] <- plot_twovariables(data = data, vars = var_combinations[i,]) + } + + if(output == "assembled"){ + plots_assembly <- patchwork::wrap_plots(plots, guides = "collect", byrow = TRUE,ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) + plots_assembly <- plots_assembly + plot_annotation(title = "Density ratio estimates for combinations of values") + return(plots_assembly) + } else { + return(plots) + } +} From a4d36c8b5d2443b5ae3475ab1ced99316f39d410 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 24 Nov 2023 15:31:54 +0100 Subject: [PATCH 12/42] update plotting functions Remove duplicate variables, add heatmap --- R/plot.R | 53 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/R/plot.R b/R/plot.R index 0383631..e881291 100644 --- a/R/plot.R +++ b/R/plot.R @@ -182,11 +182,32 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = plots <- list() + + var_combinations <- expand.grid(var.x, var.y) var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) var_combinations <- var_combinations %>% filter(Var1 != Var2) var_combinations <- as.matrix(var_combinations) + # Write function to remove duplicate rows + remove_duplicate_rows <- function(data_matrix) { + # Convert matrix to data frame + data_frame <- as.data.frame(data_matrix) + + # Sort elements within each row + sorted_data_frame <- t(apply(data_frame, 1, function(row) sort(row))) + + # Convert back to matrix + sorted_matrix <- as.matrix(sorted_data_frame) + + # Remove duplicate rows + unique_matrix <- unique(sorted_matrix) + + return(unique_matrix) + } + + var_combinations <- remove_duplicate_rows(var_combinations) + # helper function to plot two variables plot_twovariables <- function(data, vars, showsamples = show.samples){ plot <- @@ -247,32 +268,46 @@ plot_bivariate_heatmap <- function(object, var.x,var.y, sample = "both", show.sa var_combinations <- var_combinations %>% filter(Var1 != Var2) var_combinations <- as.matrix(var_combinations) + # helper function to plot two variables - plot_twovariables <- function(data, vars, showsamples = show.samples){ + plot_twovariables <- function(object, data, vars, showsamples = show.samples){ grid_var1 <- seq(min(data[[vars[1]]]), max(data[[vars[1]]])) grid_var2 <- seq(min(data[[vars[2]]]), max(data[[vars[2]]])) grid_data <- expand.grid(grid_var1, grid_var2) - grid_data$dr <- predict(object, newdata = grid_data) + colnames(grid_data) <- c(vars[[1]], vars[[2]]) + #browser() + # Identify variables in data that are not in vars + other_vars <- setdiff(names(data), vars) + # Assign the mean value of each other variable to the corresponding variable in grid_data + for (var in other_vars) { + grid_data[[var]] <- mean(data[[var]]) + } + + grid_data <- as.data.frame(grid_data) + + grid_data <- grid_data[, names(data)] # reorder names + + + grid_data$dr <- predict(object, newdata = grid_data) - ggplot(grid_data, mapping = aes(x = .grid_data[[vars[1]]], y = .grid_data[[vars[2]]])) + - geom_point(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), - alpha = 0.5) + + plot <- + ggplot(grid_data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + + geom_raster(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), + alpha = 0.5) + scale_fill_viridis_c(option = "B", name ="Density ratio") + scale_colour_viridis_c(option = "B", name ="Density ratio") + theme_bw() + labs(title = "Density ratio estimates for combinations of values", shape = "Sample") + scale_shape_manual(values = c(21, 24)) - return(plot) } - - for(i in 1:nrow(var_combinations)){ - plots[[i]] <- plot_twovariables(data = data, vars = var_combinations[i,]) + for (i in 1:nrow(var_combinations)) { + plots[[i]] <- plot_twovariables(object = object, data = data, vars = as.character(var_combinations[i,])) } if(output == "assembled"){ From 463c149b1b81d7d915e95ac9bceb80ca02b0233b Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Wed, 10 Jan 2024 12:46:31 +0100 Subject: [PATCH 13/42] update plots changed histogram function, change warning called, changed value transformation --- R/plot.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/plot.R b/R/plot.R index e881291..ad09528 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,4 +1,4 @@ -plot.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { +dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { check.object.type(object) @@ -9,9 +9,10 @@ plot.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = data$dr <- predict(object, newdata = data) if(logscale){ - data$dr[data$dr < 0] <- 10e-8 + data$dr[data$dr < 0] <- 10e-6 data$dr <- log(data$dr) - warning("Negative estimated density ratios converted to 10e-0.8 before applying logarithmic transformation") + warning("Negative estimated density ratios converted to 10e-0.6 before applying logarithmic transformation", + .call = FALSE) } obsclass <- rep(c("numerator", "denominator"), @@ -56,7 +57,7 @@ plot.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = #' #' @examples plot.ulsif <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { - plot.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) + dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) } #' Title @@ -70,7 +71,7 @@ plot.ulsif <- function(object, sample = "both", logscale = FALSE, binwidth = NUL #' #' @examples plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { - plot.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) + dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) } @@ -108,8 +109,9 @@ plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { if (logscale) { if(any(data$dr < 0)){ - warning("Negative estimated density ratios converted to 10e-8 before applying logarithmic transformation") - data$dr[data$dr < 0] <- 10e-8 + data$dr[data$dr < 0] <- 10e-6 + warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + .call = FALSE) } data$dr <- log(data$dr) From 6b9595409f96dee1f21c83c5639acb07175849a4 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 19 Jan 2024 15:05:10 +0100 Subject: [PATCH 14/42] fix histogram plot --- R/plot.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/plot.R b/R/plot.R index ad09528..2b1df15 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,31 +1,35 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { + # Checks check.object.type(object) - - data <- rbind(object$df_numerator, object$df_denominator) - check.overriden.names(vars) + # Create data object and estimate density ratio + data <- rbind(object$df_numerator, object$df_denominator) data$dr <- predict(object, newdata = data) if(logscale){ + # Convert negative predicted density ratios to 10e-0.6, so log can be computed data$dr[data$dr < 0] <- 10e-6 data$dr <- log(data$dr) warning("Negative estimated density ratios converted to 10e-0.6 before applying logarithmic transformation", - .call = FALSE) + call. = FALSE) # to avoid printing the whole call } + # Create a sample index variable (denominator or numerator) obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) - data$sample <- obsclass + # Create a object selection variable (both, numerator, denominator) obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + # If not both, subset data (only num or only den) if (obsselect != "both") { data <- filter(data, obsclass == obsselect) } + # Plot plot <- ggplot(data, aes(x = dr)) + geom_histogram(aes(fill = sample), @@ -111,7 +115,7 @@ plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { if(any(data$dr < 0)){ data$dr[data$dr < 0] <- 10e-6 warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", - .call = FALSE) + call. = FALSE) } data$dr <- log(data$dr) From acdde6884d5a3cb83dc1a60b6502f240914952c5 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 19 Jan 2024 15:25:52 +0100 Subject: [PATCH 15/42] update univariate plot --- R/plot.R | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/R/plot.R b/R/plot.R index 2b1df15..3ce3474 100644 --- a/R/plot.R +++ b/R/plot.R @@ -92,61 +92,76 @@ plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NUL #' @examples plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { + # Check object type check.object.type(object) - # Data handling + # Create data object data <- rbind(object$df_numerator, object$df_denominator) + # Check names in data and variable names check.overriden.names(data) check.var.names(vars, data) + # Estimate density ratio data$dr <- predict(object, newdata = data) + # Create a sample index variable (denominator or numerator) obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) + # Create a object selection variable (both, numerator, denominator) obsselect <- match.arg(sample, c("both", "numerator", "denominator")) if (obsselect != "both") { data <- filter(data, obsclass == obsselect) } + if (logscale) { + if(any(data$dr < 0)){ + # Convert negative predicted density ratios to 10e-6, so log can be computed data$dr[data$dr < 0] <- 10e-6 warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", call. = FALSE) - } + } data$dr <- log(data$dr) + + # Assign correct y and legend labels y_lab <- "Log(Density Ratio)" + colour_name <- "Log (Density ratio)" } else { y_lab <- "Density Ratio" + colour_name <- "Density ratio" } + # Create list storage for plots object (for iteration) plots <- list() - plot_onevariable <- function(var, shape = "sample"){ + # Write the function to make one plot, for one variable + one_plot <- function(var, shape = "sample"){ plot <- - ggplot(data, aes(x = .data[[var]], y = dr )) + + ggplot(data, aes(x = .data[[var]], y = dr)) + geom_point(aes(col = dr, shape = sample)) + theme_bw() + labs(title = "Scatter plot of individual values and density ratio", shape = "Sample", y = y_lab) + geom_hline(yintercept = 0, linetype = "dashed")+ - scale_colour_viridis_c(option = "B", name ="Density ratio") + + scale_colour_viridis_c(option = "B", name = colour_name) + scale_shape_manual(values = c(16, 3)) + - scale_y_continuous(breaks = c(-15,-10,-5,0,1,2,3,4, 8)) - + scale_y_continuous(breaks = seq( + from = floor(min(data$dr)), + to = ceiling(max(data$dr)))) return(plot) } for(var in vars){ - plots[[var]] <- plot_onevariable(var) + plots[[var]] <- one_plot(var) } return(plots) } From 7cfcdce94863a077c5c4e64ef8c56ce51b483728 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 19 Jan 2024 17:18:25 +0100 Subject: [PATCH 16/42] fix iteration heatmap 1 --- R/plot.R | 118 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 55 deletions(-) diff --git a/R/plot.R b/R/plot.R index 3ce3474..00b0577 100644 --- a/R/plot.R +++ b/R/plot.R @@ -179,57 +179,48 @@ plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { #' @examples plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = TRUE, output = "assembled") { + # Check object type check.object.type(object) + # Create data object and check variable names data <- rbind(object$df_numerator, object$df_denominator) - check.overriden.names(data) + # Create variables vector and check variable names vars <- c(var.x, var.y) - check.var.names(vars, data) + + # Estimate density ratio data$dr <- predict(object, newdata = data) + # Create a sample index variable (denominator or numerator) obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) - data$sample <- obsclass - + # Create a object selection variable (both, numerator, denominator) obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + # Filter data based on object selection if (obsselect != "both") { data <- filter(data, obsclass == obsselect) } - plots <- list() - + # Create a grid of variable combinations + var_combinations <- expand.grid(var.x, var.y) + ## Remove duplicate combinations + ## Start by sorting elements within each row + ## This makes duplicate rows with different order of variables identical + var_combinations <- t(apply(var_combinations, 1, sort)) + var_combinations <- unique(var_combinations) # retain unique rows only - var_combinations <- expand.grid(var.x, var.y) + # Remove rows where both variables are the same var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) - var_combinations <- var_combinations %>% filter(Var1 != Var2) + var_combinations <- var_combinations %>% filter(V1 != V2) var_combinations <- as.matrix(var_combinations) - # Write function to remove duplicate rows - remove_duplicate_rows <- function(data_matrix) { - # Convert matrix to data frame - data_frame <- as.data.frame(data_matrix) - - # Sort elements within each row - sorted_data_frame <- t(apply(data_frame, 1, function(row) sort(row))) - - # Convert back to matrix - sorted_matrix <- as.matrix(sorted_data_frame) - - # Remove duplicate rows - unique_matrix <- unique(sorted_matrix) - - return(unique_matrix) - } - var_combinations <- remove_duplicate_rows(var_combinations) - - # helper function to plot two variables + # Define function to make bivariate plot plot_twovariables <- function(data, vars, showsamples = show.samples){ plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + @@ -245,10 +236,15 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = return(plot) } + + # Iterate over grid of variable combinations + # Create a list to store plots + plots <- list() for(i in 1:nrow(var_combinations)){ plots[[i]] <- plot_twovariables(data = data, vars = var_combinations[i,]) } + if(output == "assembled"){ plots_assembly <- patchwork::wrap_plots(plots, guides = "collect", byrow = TRUE,ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) plots_assembly <- plots_assembly + plot_annotation(title = "Density ratio estimates for combinations of values") @@ -258,63 +254,70 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = } } -plot_bivariate_heatmap <- function(object, var.x,var.y, sample = "both", show.samples = TRUE, output = "assembled") { +plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.samples = TRUE, output = "assembled") { + # Check object type check.object.type(object) + # Create data object and check variable names data <- rbind(object$df_numerator, object$df_denominator) - check.overriden.names(data) + # Create variables vector and check variable names vars <- c(var.x, var.y) - check.var.names(vars, data) - data$dr <- predict(object, newdata = data) + # Create a sample index variable (denominator or numerator) obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) - data$sample <- obsclass - + # Create a object selection variable (both, numerator, denominator) obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + # Filter data based on object selection if (obsselect != "both") { data <- filter(data, obsclass == obsselect) } - plots <- list() - + # Create a grid of variable combinations var_combinations <- expand.grid(var.x, var.y) - var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) - var_combinations <- var_combinations %>% filter(Var1 != Var2) - var_combinations <- as.matrix(var_combinations) - - # helper function to plot two variables - plot_twovariables <- function(object, data, vars, showsamples = show.samples){ + ## Remove duplicate combinations + ## Start by sorting elements within each row + ## This makes duplicate rows with different order of variables identical + var_combinations <- t(apply(var_combinations, 1, sort)) + var_combinations <- unique(var_combinations) # retain unique rows only - grid_var1 <- seq(min(data[[vars[1]]]), max(data[[vars[1]]])) - grid_var2 <- seq(min(data[[vars[2]]]), max(data[[vars[2]]])) + # Remove rows where both variables are the same + var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) + names(var_combinations) <- c("V1", "V2") # In some cases, var names are changed @TO-DO. Why? + var_combinations <- var_combinations %>% filter(V1 != V2) + var_combinations <- as.matrix(var_combinations) - grid_data <- expand.grid(grid_var1, grid_var2) + object2 <- object + # Define function to make bivariate plot + plot_twovariables_heatmap <- function(object = object2, data, vars, showsamples = show.samples){ + # Create a 100x100 grid of values for the two variables, in the range of the data + seq_var1 <- seq(min(data[[vars[1]]]), max(data[[vars[1]]]), length.out = 100) + seq_var2 <- seq(min(data[[vars[2]]]), max(data[[vars[2]]]), length.out = 100) + grid_data <- expand.grid(seq_var1, seq_var2) colnames(grid_data) <- c(vars[[1]], vars[[2]]) - #browser() - # Identify variables in data that are not in vars - other_vars <- setdiff(names(data), vars) - # Assign the mean value of each other variable to the corresponding variable in grid_data + # Add the rest of data variables to the grid, inputting its mean value + # First, identify variables in data that are not in vars + other_vars <- setdiff(names(data), vars) for (var in other_vars) { grid_data[[var]] <- mean(data[[var]]) } + # Predict density ratio for each combination of values + # Assign to dataframe and reorder column in the same order as data, so that predict works grid_data <- as.data.frame(grid_data) - - grid_data <- grid_data[, names(data)] # reorder names - - + grid_data <- grid_data[, names(data)] grid_data$dr <- predict(object, newdata = grid_data) + # Plot plot <- ggplot(grid_data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + geom_raster(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), @@ -327,12 +330,17 @@ plot_bivariate_heatmap <- function(object, var.x,var.y, sample = "both", show.sa scale_shape_manual(values = c(21, 24)) return(plot) } - for (i in 1:nrow(var_combinations)) { - plots[[i]] <- plot_twovariables(object = object, data = data, vars = as.character(var_combinations[i,])) + + # Iterate over grid of variable combinations + # Create a list to store plots + plots <- list() + for(i in 1:nrow(var_combinations)){ + plots[[i]] <- plot_twovariables_heatmap(data = data, vars = var_combinations[i,]) } + if(output == "assembled"){ - plots_assembly <- patchwork::wrap_plots(plots, guides = "collect", byrow = TRUE,ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) + plots_assembly <- patchwork::wrap_plots(plots, guides = "collect", byrow = TRUE,ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) plots_assembly <- plots_assembly + plot_annotation(title = "Density ratio estimates for combinations of values") return(plots_assembly) } else { From 9c99749a3cd9b8640bcc4a5ae7ede85bd4c2bdfc Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 22 Jan 2024 15:03:35 +0100 Subject: [PATCH 17/42] update bivariate plots Fix small bug related to variable names in variable grid. Make both bivariates plot show logarithm(density ratio) by default, and ajust scales based on it --- R/plot.R | 73 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 12 deletions(-) diff --git a/R/plot.R b/R/plot.R index 00b0577..7f0684f 100644 --- a/R/plot.R +++ b/R/plot.R @@ -177,7 +177,8 @@ plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { #' @export #' #' @examples -plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = TRUE, output = "assembled") { +plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = TRUE, + output = "assembled", logscale = TRUE) { # Check object type check.object.type(object) @@ -193,6 +194,28 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = # Estimate density ratio data$dr <- predict(object, newdata = data) + # Determine if DR is shown in logscale (default) or not + if (logscale) { + + if(any(data$dr < 0)){ + # Convert negative predicted density ratios to 10e-6, so log can be computed + data$dr[data$dr < 0] <- 10e-6 + warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + call. = FALSE) + } + + data$dr <- log(data$dr) + + # Assign correct y and legend labels + y_lab <- "Log(Density Ratio)" + colour_name <- "Log (Density ratio)" + + } else { + y_lab <- "Density Ratio" + colour_name <- "Density ratio" + } + + # Create a sample index variable (denominator or numerator) obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) @@ -216,7 +239,8 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = # Remove rows where both variables are the same var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) - var_combinations <- var_combinations %>% filter(V1 != V2) + names(var_combinations) <- c("Var1", "Var2") + var_combinations <- var_combinations %>% filter(Var1 != Var2) var_combinations <- as.matrix(var_combinations) @@ -226,11 +250,12 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + geom_point(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), alpha = 0.5) + - scale_fill_viridis_c(option = "B", name ="Density ratio") + - scale_colour_viridis_c(option = "B", name ="Density ratio") + + scale_fill_viridis_c(option = "B", name = colour_name) + + scale_colour_viridis_c(option = "B", name = colour_name) + theme_bw() + labs(title = "Density ratio estimates for combinations of values", - shape = "Sample") + + shape = "Sample", + y = y_lab) + scale_shape_manual(values = c(21, 24)) return(plot) @@ -254,7 +279,8 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = } } -plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.samples = TRUE, output = "assembled") { +plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.samples = TRUE, + output = "assembled", log.scale = TRUE) { # Check object type check.object.type(object) @@ -290,13 +316,14 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s # Remove rows where both variables are the same var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) - names(var_combinations) <- c("V1", "V2") # In some cases, var names are changed @TO-DO. Why? - var_combinations <- var_combinations %>% filter(V1 != V2) + names(var_combinations) <- c("Var1", "Var2") + var_combinations <- var_combinations %>% filter(Var1 != Var2) + var_combinations <- as.matrix(var_combinations) object2 <- object # Define function to make bivariate plot - plot_twovariables_heatmap <- function(object = object2, data, vars, showsamples = show.samples){ + plot_twovariables_heatmap <- function(object = object2, data, vars, showsamples = show.samples, logscale = log.scale){ # Create a 100x100 grid of values for the two variables, in the range of the data seq_var1 <- seq(min(data[[vars[1]]]), max(data[[vars[1]]]), length.out = 100) @@ -317,16 +344,38 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s grid_data <- grid_data[, names(data)] grid_data$dr <- predict(object, newdata = grid_data) + # Determine if DR is shown in logscale (default) or not + if (logscale) { + + if(any(data$dr < 0)){ + # Convert negative predicted density ratios to 10e-6, so log can be computed + data$dr[data$dr < 0] <- 10e-6 + warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + call. = FALSE) + } + + grid_data$dr <- log(grid_data$dr) + + # Assign correct y and legend labels + y_lab <- "Log(Density Ratio)" + colour_name <- "Log (Density ratio)" + + } else { + y_lab <- "Density Ratio" + colour_name <- "Density ratio" + } + # Plot plot <- ggplot(grid_data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + geom_raster(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), alpha = 0.5) + - scale_fill_viridis_c(option = "B", name ="Density ratio") + - scale_colour_viridis_c(option = "B", name ="Density ratio") + + scale_fill_viridis_c(option = "B", name = colour_name) + + scale_colour_viridis_c(option = "B", name = colour_name) + theme_bw() + labs(title = "Density ratio estimates for combinations of values", - shape = "Sample") + + shape = "Sample", + y = y_lab) + scale_shape_manual(values = c(21, 24)) return(plot) } From 90fdb30b3a5ab04103dffe4bbad6a346a515261e Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 22 Jan 2024 15:46:47 +0100 Subject: [PATCH 18/42] update bivariate plot small bug fix with heatmap related to negative predicted density ratios --- R/plot.R | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/R/plot.R b/R/plot.R index 7f0684f..eaf09ec 100644 --- a/R/plot.R +++ b/R/plot.R @@ -344,6 +344,9 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s grid_data <- grid_data[, names(data)] grid_data$dr <- predict(object, newdata = grid_data) + # Estimate density ration in the original datapoints (for superposing) + data$dr <- predict(object, newdata = data) + # Determine if DR is shown in logscale (default) or not if (logscale) { @@ -354,7 +357,15 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s call. = FALSE) } + if(any(grid_data$dr < 0)){ + # Convert negative predicted HEATMAP density ratios to 10e-6, so log can be computed + grid_data$dr[grid_data$dr < 0] <- 10e-6 + warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + call. = FALSE) + } + grid_data$dr <- log(grid_data$dr) + data$dr <- log(data$dr) # Assign correct y and legend labels y_lab <- "Log(Density Ratio)" @@ -368,10 +379,13 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s # Plot plot <- ggplot(grid_data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + + geom_point(data = data, + colour = "grey40", + alpha = 1, + shape = 1) + geom_raster(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), - alpha = 0.5) + + alpha = 0.85) + scale_fill_viridis_c(option = "B", name = colour_name) + - scale_colour_viridis_c(option = "B", name = colour_name) + theme_bw() + labs(title = "Density ratio estimates for combinations of values", shape = "Sample", @@ -389,7 +403,9 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s if(output == "assembled"){ - plots_assembly <- patchwork::wrap_plots(plots, guides = "collect", byrow = TRUE,ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) + plots_assembly <- patchwork::wrap_plots(plots, + guides = "collect", byrow = TRUE, + ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) plots_assembly <- plots_assembly + plot_annotation(title = "Density ratio estimates for combinations of values") return(plots_assembly) } else { From 193cee568543a48554a11b36d9cc054e6a81d677 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 26 Jan 2024 15:54:54 +0100 Subject: [PATCH 19/42] change arrangement bivariate plot panel --- R/plot.R | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/R/plot.R b/R/plot.R index eaf09ec..16e8771 100644 --- a/R/plot.R +++ b/R/plot.R @@ -228,6 +228,8 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = data <- filter(data, obsclass == obsselect) } + + if(output == "individual"){ # Create a grid of variable combinations var_combinations <- expand.grid(var.x, var.y) @@ -243,7 +245,6 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = var_combinations <- var_combinations %>% filter(Var1 != Var2) var_combinations <- as.matrix(var_combinations) - # Define function to make bivariate plot plot_twovariables <- function(data, vars, showsamples = show.samples){ plot <- @@ -261,21 +262,41 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = return(plot) } - # Iterate over grid of variable combinations # Create a list to store plots plots <- list() for(i in 1:nrow(var_combinations)){ plots[[i]] <- plot_twovariables(data = data, vars = var_combinations[i,]) } + return(plots) + } + if (output == "assembled") { + long_data <- data %>% + pivot_longer(cols = c("x3", "x4", "x5")) - if(output == "assembled"){ - plots_assembly <- patchwork::wrap_plots(plots, guides = "collect", byrow = TRUE,ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) - plots_assembly <- plots_assembly + plot_annotation(title = "Density ratio estimates for combinations of values") - return(plots_assembly) - } else { - return(plots) + plot_data <- inner_join(df, df, by = "dr", multiple = "all") %>% + filter(name.x %in% var.x, + name.y %in% var.y) %>% + mutate(dr = ifelse(name.x == name.y, NA, dr), + value.x = ifelse(name.x == name.y, NA, value.x), + value.y = ifelse(name.x == name.y, NA, value.y)) + plot <- + ggplot(plot_data, mapping = aes(x = value.x, y = value.y)) + + geom_point(aes(colour = dr, fill = dr), + alpha = 0.5) + + facet_grid(rows = vars(name.y), cols = vars(name.x), scales = "free_y", + switch = "both") + + scale_fill_viridis_c(option = "B", name = colour_name) + + scale_colour_viridis_c(option = "B", name = colour_name) + + scale_y_continuous(position = "right") + + scale_x_continuous(position = "top") + + theme_bw() + + labs(title = "Density ratio estimates for combinations of values", + shape = "Sample") + + scale_shape_manual(values = c(21, 24)) + + return(plot) } } @@ -401,12 +422,13 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s plots[[i]] <- plot_twovariables_heatmap(data = data, vars = var_combinations[i,]) } - + # Assemble plots if(output == "assembled"){ plots_assembly <- patchwork::wrap_plots(plots, - guides = "collect", byrow = TRUE, - ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) - plots_assembly <- plots_assembly + plot_annotation(title = "Density ratio estimates for combinations of values") + guides = "collect") + # , byrow = TRUE, + # axes = "collect", + # ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) return(plots_assembly) } else { return(plots) From 030e9e1dc46fb48f41b9673bab27d9b28cd1ee20 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 26 Jan 2024 16:36:11 +0100 Subject: [PATCH 20/42] solve minor bug --- R/plot.R | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/R/plot.R b/R/plot.R index 16e8771..2d01394 100644 --- a/R/plot.R +++ b/R/plot.R @@ -275,7 +275,7 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = long_data <- data %>% pivot_longer(cols = c("x3", "x4", "x5")) - plot_data <- inner_join(df, df, by = "dr", multiple = "all") %>% + plot_data <- inner_join(long_data, long_data, by = "dr", multiple = "all") %>% filter(name.x %in% var.x, name.y %in% var.y) %>% mutate(dr = ifelse(name.x == name.y, NA, dr), @@ -326,6 +326,7 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s data <- filter(data, obsclass == obsselect) } + # Create a grid of variable combinations var_combinations <- expand.grid(var.x, var.y) @@ -424,11 +425,11 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s # Assemble plots if(output == "assembled"){ + plots_assembly <- patchwork::wrap_plots(plots, - guides = "collect") - # , byrow = TRUE, - # axes = "collect", - # ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) + guides = "collect", byrow = TRUE, + axes = "collect", + ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) return(plots_assembly) } else { return(plots) From 21873c0f0a3ba741c5efb1b7e6265493a59a4a61 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 29 Jan 2024 14:24:54 +0100 Subject: [PATCH 21/42] fix small bugs --- R/plot.R | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/R/plot.R b/R/plot.R index 2d01394..271f099 100644 --- a/R/plot.R +++ b/R/plot.R @@ -14,6 +14,10 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N data$dr <- log(data$dr) warning("Negative estimated density ratios converted to 10e-0.6 before applying logarithmic transformation", call. = FALSE) # to avoid printing the whole call + + x_lab <- "Log (Density Ratio)" + } else { + x_lab <- "Density Ratio" } # Create a sample index variable (denominator or numerator) @@ -41,7 +45,7 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N scale_fill_manual(values = c("firebrick", "steelblue")) + theme_bw() + labs( - x = "Density ratio", + x = x_lab, y = "Count" ) + labs(title = "Distribution of density ratio estimates", @@ -207,11 +211,9 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = data$dr <- log(data$dr) # Assign correct y and legend labels - y_lab <- "Log(Density Ratio)" colour_name <- "Log (Density ratio)" } else { - y_lab <- "Density Ratio" colour_name <- "Density ratio" } @@ -255,8 +257,7 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = scale_colour_viridis_c(option = "B", name = colour_name) + theme_bw() + labs(title = "Density ratio estimates for combinations of values", - shape = "Sample", - y = y_lab) + + shape = "Sample") + scale_shape_manual(values = c(21, 24)) return(plot) @@ -273,11 +274,11 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = if (output == "assembled") { long_data <- data %>% - pivot_longer(cols = c("x3", "x4", "x5")) + pivot_longer(cols = -dr) plot_data <- inner_join(long_data, long_data, by = "dr", multiple = "all") %>% filter(name.x %in% var.x, - name.y %in% var.y) %>% + name.y %in% var.y) %>% mutate(dr = ifelse(name.x == name.y, NA, dr), value.x = ifelse(name.x == name.y, NA, value.x), value.y = ifelse(name.x == name.y, NA, value.y)) @@ -285,6 +286,7 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = ggplot(plot_data, mapping = aes(x = value.x, y = value.y)) + geom_point(aes(colour = dr, fill = dr), alpha = 0.5) + + geom_hline(yintercept = 0, linetype = "dashed") + facet_grid(rows = vars(name.y), cols = vars(name.x), scales = "free_y", switch = "both") + scale_fill_viridis_c(option = "B", name = colour_name) + @@ -296,6 +298,8 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = shape = "Sample") + scale_shape_manual(values = c(21, 24)) + return(suppressWarnings(print(plot))) + return(plot) } } @@ -390,11 +394,9 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s data$dr <- log(data$dr) # Assign correct y and legend labels - y_lab <- "Log(Density Ratio)" colour_name <- "Log (Density ratio)" } else { - y_lab <- "Density Ratio" colour_name <- "Density ratio" } @@ -410,8 +412,7 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s scale_fill_viridis_c(option = "B", name = colour_name) + theme_bw() + labs(title = "Density ratio estimates for combinations of values", - shape = "Sample", - y = y_lab) + + shape = "Sample") + scale_shape_manual(values = c(21, 24)) return(plot) } From ffb08501a52d1ba5c56fd62739d3425facc2a52d Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 29 Jan 2024 15:03:28 +0100 Subject: [PATCH 22/42] Update plot.R --- R/plot.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/plot.R b/R/plot.R index 271f099..041e58c 100644 --- a/R/plot.R +++ b/R/plot.R @@ -273,15 +273,16 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = } if (output == "assembled") { - long_data <- data %>% - pivot_longer(cols = -dr) - plot_data <- inner_join(long_data, long_data, by = "dr", multiple = "all") %>% + plot_data <- inner_join(data, data, by = "dr") %>% + pivot_longer(cols = ends_with(".x"), names_to = "name.x", values_to = "value.x") %>% + pivot_longer(cols = ends_with(".y"), names_to = "name.y", values_to = "value.y") %>% + mutate(name.x = stringr::str_remove(name.x, ".x"), + name.y = stringr::str_remove(name.y, ".y")) %>% filter(name.x %in% var.x, name.y %in% var.y) %>% mutate(dr = ifelse(name.x == name.y, NA, dr), - value.x = ifelse(name.x == name.y, NA, value.x), - value.y = ifelse(name.x == name.y, NA, value.y)) + value.x = ifelse(name.x == name.y, NA, value.x)) plot <- ggplot(plot_data, mapping = aes(x = value.x, y = value.y)) + geom_point(aes(colour = dr, fill = dr), @@ -300,7 +301,6 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = return(suppressWarnings(print(plot))) - return(plot) } } From 883b5adcfd969e71271167a839cdcb3768503bdf Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Tue, 13 Feb 2024 12:48:20 +0100 Subject: [PATCH 23/42] update Plot.R added bins argument for histogram; made logarithmic transformation warning conditional on negative values present, mapped colours to sample in univariate plot --- R/plot.R | 93 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/R/plot.R b/R/plot.R index 041e58c..6640dc2 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,4 +1,4 @@ -dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { +dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL, bins = NULL,...) { # Checks check.object.type(object) @@ -6,16 +6,21 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) - data$dr <- predict(object, newdata = data) + data$dr <- predict(object, newdata = data, ...) + + if (logscale) { - if(logscale){ - # Convert negative predicted density ratios to 10e-0.6, so log can be computed - data$dr[data$dr < 0] <- 10e-6 - data$dr <- log(data$dr) - warning("Negative estimated density ratios converted to 10e-0.6 before applying logarithmic transformation", - call. = FALSE) # to avoid printing the whole call + if(any(data$dr <= 0)){ + # Convert negative predicted density ratios to 10e-3, so log can be computed + count <- length(data$dr[data$dr <= 0]) + data$dr[data$dr <= 0] <- 10e-3 + warning( + paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), + call. = FALSE) + } - x_lab <- "Log (Density Ratio)" + data$dr <- log(data$dr) + x_lab <- "Log (Density Ratio)" } else { x_lab <- "Density Ratio" } @@ -40,6 +45,7 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N alpha = .75, color = "black", binwidth = if (!is.null(binwidth)) binwidth else NULL, + bins = if(!is.null(bins)) bins else NULL, position = position_dodge2(preserve = "single", padding = 0.2)) + scale_fill_manual(values = c("firebrick", "steelblue")) + @@ -64,8 +70,10 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N #' @export #' #' @examples -plot.ulsif <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { - dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) +plot.ulsif <- function(object, sample = "both", logscale = FALSE, binwidth = NULL, + bins = NULL) { + dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth, + bins = bins) } #' Title @@ -78,8 +86,10 @@ plot.ulsif <- function(object, sample = "both", logscale = FALSE, binwidth = NUL #' @export #' #' @examples -plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NULL) { - dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth) +plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NULL, + bins = NULL) { + dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth, + bins = bins) } @@ -94,7 +104,7 @@ plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NUL #' @export #' #' @examples -plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { +plot_univariate <- function(object, vars, samples = "both", logscale = TRUE) { # Check object type check.object.type(object) @@ -109,24 +119,26 @@ plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { # Estimate density ratio data$dr <- predict(object, newdata = data) - # Create a sample index variable (denominator or numerator) - obsclass <- rep(c("numerator", "denominator"), - c(nrow(object$df_numerator), nrow(object$df_denominator))) + # Creta sample identifier + data$sample <- rep(c("numerator", "denominator"), + c(nrow(object$df_numerator), nrow(object$df_denominator))) # Create a object selection variable (both, numerator, denominator) - obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + obsselect <- match.arg(samples, c("both", "numerator", "denominator")) if (obsselect != "both") { - data <- filter(data, obsclass == obsselect) + data <- filter(data, sample == obsselect) } if (logscale) { - if(any(data$dr < 0)){ - # Convert negative predicted density ratios to 10e-6, so log can be computed - data$dr[data$dr < 0] <- 10e-6 - warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + if(any(data$dr <= 0)){ + # Convert negative predicted density ratios to 10e-3, so log can be computed + count <- length(data$dr[data$dr <= 0]) + data$dr[data$dr <= 0] <- 10e-3 + warning( + paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) } @@ -134,27 +146,26 @@ plot_univariate <- function(object, vars, sample = "both", logscale = TRUE) { # Assign correct y and legend labels y_lab <- "Log(Density Ratio)" - colour_name <- "Log (Density ratio)" } else { y_lab <- "Density Ratio" - colour_name <- "Density ratio" } # Create list storage for plots object (for iteration) plots <- list() # Write the function to make one plot, for one variable - one_plot <- function(var, shape = "sample"){ + one_plot <- function(var){ plot <- ggplot(data, aes(x = .data[[var]], y = dr)) + - geom_point(aes(col = dr, shape = sample)) + + geom_point(aes(col = sample), + alpha = .6) + theme_bw() + labs(title = "Scatter plot of individual values and density ratio", - shape = "Sample", + color = "Sample", y = y_lab) + geom_hline(yintercept = 0, linetype = "dashed")+ - scale_colour_viridis_c(option = "B", name = colour_name) + + scale_color_manual(values = c("firebrick", "steelblue")) + scale_shape_manual(values = c(16, 3)) + scale_y_continuous(breaks = seq( from = floor(min(data$dr)), @@ -201,10 +212,10 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = # Determine if DR is shown in logscale (default) or not if (logscale) { - if(any(data$dr < 0)){ - # Convert negative predicted density ratios to 10e-6, so log can be computed - data$dr[data$dr < 0] <- 10e-6 - warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + if(any(data$dr <= 0)){ + # Convert negative predicted density ratios to 10e-3, so log can be computed + data$dr[data$dr <= 0] <- 10e-3 + warning("Negative estimated density ratios converted to 10e-3 before applying logarithmic transformation", call. = FALSE) } @@ -376,17 +387,17 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s # Determine if DR is shown in logscale (default) or not if (logscale) { - if(any(data$dr < 0)){ - # Convert negative predicted density ratios to 10e-6, so log can be computed - data$dr[data$dr < 0] <- 10e-6 - warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + if(any(data$dr <= 0)){ + # Convert negative predicted density ratios to 10e-3, so log can be computed + data$dr[data$dr <= 0] <- 10e-3 + warning("Negative estimated density ratios converted to 10e-3 before applying logarithmic transformation", call. = FALSE) } - if(any(grid_data$dr < 0)){ - # Convert negative predicted HEATMAP density ratios to 10e-6, so log can be computed - grid_data$dr[grid_data$dr < 0] <- 10e-6 - warning("Negative estimated density ratios converted to 10e-6 before applying logarithmic transformation", + if(any(grid_data$dr <= 0)){ + # Convert negative predicted HEATMAP density ratios to 10e-3, so log can be computed + grid_data$dr[grid_data$dr <= 0] <- 10e-3 + warning("Negative estimated density ratios converted to 10e-3 before applying logarithmic transformation", call. = FALSE) } From cfa6a38d4a4c42a096b863e8fd3de54ccb782ef8 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Tue, 13 Feb 2024 16:00:16 +0100 Subject: [PATCH 24/42] updated plots improved warning messages; make bivariate plot only show the lower diagonal --- R/plot.R | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/R/plot.R b/R/plot.R index 6640dc2..4b7a663 100644 --- a/R/plot.R +++ b/R/plot.R @@ -192,7 +192,7 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE) { #' @export #' #' @examples -plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = TRUE, +plot_bivariate <- function(object, vars, sample = "both", show.samples = TRUE, output = "assembled", logscale = TRUE) { # Check object type @@ -202,8 +202,7 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = data <- rbind(object$df_numerator, object$df_denominator) check.overriden.names(data) - # Create variables vector and check variable names - vars <- c(var.x, var.y) + # Check variable names check.var.names(vars, data) # Estimate density ratio @@ -214,9 +213,11 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = if(any(data$dr <= 0)){ # Convert negative predicted density ratios to 10e-3, so log can be computed + count <- length(data$dr[data$dr <= 0]) data$dr[data$dr <= 0] <- 10e-3 - warning("Negative estimated density ratios converted to 10e-3 before applying logarithmic transformation", - call. = FALSE) + warning( + paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), + call. = FALSE) } data$dr <- log(data$dr) @@ -241,10 +242,8 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = data <- filter(data, obsclass == obsselect) } - - if(output == "individual"){ # Create a grid of variable combinations - var_combinations <- expand.grid(var.x, var.y) + var_combinations <- expand.grid(vars, vars) ## Remove duplicate combinations ## Start by sorting elements within each row @@ -258,6 +257,7 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = var_combinations <- var_combinations %>% filter(Var1 != Var2) var_combinations <- as.matrix(var_combinations) + if(output == "individual"){ # Define function to make bivariate plot plot_twovariables <- function(data, vars, showsamples = show.samples){ plot <- @@ -272,7 +272,7 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = scale_shape_manual(values = c(21, 24)) return(plot) - } + } # Iterate over grid of variable combinations # Create a list to store plots @@ -285,21 +285,27 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = if (output == "assembled") { - plot_data <- inner_join(data, data, by = "dr") %>% + # plot_data <- data %>% + # pivot_longer(cols = c("V1", "V2"), names_to = "variable", values_to = "value") + # browser() + combinations <- paste0(var_combinations[,1], "-", var_combinations[,2]) + + plot_data <- + inner_join(data, data, by = "dr") %>% pivot_longer(cols = ends_with(".x"), names_to = "name.x", values_to = "value.x") %>% pivot_longer(cols = ends_with(".y"), names_to = "name.y", values_to = "value.y") %>% mutate(name.x = stringr::str_remove(name.x, ".x"), - name.y = stringr::str_remove(name.y, ".y")) %>% - filter(name.x %in% var.x, - name.y %in% var.y) %>% - mutate(dr = ifelse(name.x == name.y, NA, dr), - value.x = ifelse(name.x == name.y, NA, value.x)) + name.y = stringr::str_remove(name.y, ".y"), + combination = paste0(name.x, "-", name.y)) %>% + filter(combination %in% combinations) + + plot <- ggplot(plot_data, mapping = aes(x = value.x, y = value.y)) + geom_point(aes(colour = dr, fill = dr), alpha = 0.5) + geom_hline(yintercept = 0, linetype = "dashed") + - facet_grid(rows = vars(name.y), cols = vars(name.x), scales = "free_y", + facet_grid(rows = vars(name.y), cols = vars(name.x), scales = "free", switch = "both") + scale_fill_viridis_c(option = "B", name = colour_name) + scale_colour_viridis_c(option = "B", name = colour_name) + @@ -307,7 +313,8 @@ plot_bivariate <- function(object, var.x,var.y, sample = "both", show.samples = scale_x_continuous(position = "top") + theme_bw() + labs(title = "Density ratio estimates for combinations of values", - shape = "Sample") + + x = NULL, + y = NULL) + scale_shape_manual(values = c(21, 24)) return(suppressWarnings(print(plot))) @@ -389,15 +396,17 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s if(any(data$dr <= 0)){ # Convert negative predicted density ratios to 10e-3, so log can be computed + count <- length(data$dr[data$dr <= 0]) data$dr[data$dr <= 0] <- 10e-3 - warning("Negative estimated density ratios converted to 10e-3 before applying logarithmic transformation", - call. = FALSE) + warning( + paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), + call. = FALSE) } if(any(grid_data$dr <= 0)){ # Convert negative predicted HEATMAP density ratios to 10e-3, so log can be computed grid_data$dr[grid_data$dr <= 0] <- 10e-3 - warning("Negative estimated density ratios converted to 10e-3 before applying logarithmic transformation", + warning("Negative estimated density ratios in the heatmap converted to 10e-3 before applying logarithmic transformation", call. = FALSE) } From 226602bd67c90dbd1dd149c68ff30c0130bf7fc1 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Wed, 14 Feb 2024 10:36:03 +0100 Subject: [PATCH 25/42] update bivariate heatmap minor edit in colours bivariate heatmap --- R/plot.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/plot.R b/R/plot.R index 4b7a663..f42f199 100644 --- a/R/plot.R +++ b/R/plot.R @@ -424,11 +424,11 @@ plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.s plot <- ggplot(grid_data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + geom_point(data = data, - colour = "grey40", + colour = "grey60", alpha = 1, shape = 1) + geom_raster(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), - alpha = 0.85) + + alpha = 0.6) + scale_fill_viridis_c(option = "B", name = colour_name) + theme_bw() + labs(title = "Density ratio estimates for combinations of values", From 9413f1f1bc493b2c5f8771ac18b08b519d000e4e Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Fri, 23 Feb 2024 18:17:57 +0100 Subject: [PATCH 26/42] Update plots Solve small bug warning, rearrageged numerator and denominator labels univariate plot, create assembled outplot in univariate plots, implemented "smart" labels on y axis --- R/plot.R | 66 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/R/plot.R b/R/plot.R index f42f199..5aa15cf 100644 --- a/R/plot.R +++ b/R/plot.R @@ -15,7 +15,7 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N count <- length(data$dr[data$dr <= 0]) data$dr[data$dr <= 0] <- 10e-3 warning( - paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), + paste("Negative estimated density ratios for", count, "observation(s) converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) } @@ -48,7 +48,9 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N bins = if(!is.null(bins)) bins else NULL, position = position_dodge2(preserve = "single", padding = 0.2)) + - scale_fill_manual(values = c("firebrick", "steelblue")) + + scale_fill_manual(values = c("firebrick", "steelblue"), + breaks = c("numerator", "denominator"), + labels = c("Numerator", "Denominator")) + theme_bw() + labs( x = x_lab, @@ -92,7 +94,25 @@ plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NUL bins = bins) } +# Write the function to make one plot, for one variable +individual_uni_plot <- function(data, var, y_lab){ + + y_max <- max(1,data$dr) + y_min <- min(-1, data$dr) + plot <- + ggplot(data, aes(x = .data[[var]], y = dr)) + + geom_point(aes(col = sample), + alpha = .6) + + theme_bw() + + labs(title = "Scatter plot of individual values and density ratio", + color = "Sample", + y = y_lab) + + geom_hline(yintercept = 0, linetype = "dashed")+ + scale_color_manual(values = c("firebrick", "steelblue")) + + scale_y_continuous(limits = c(y_min, y_max)) + return(plot) +} #' Title #' @@ -104,7 +124,8 @@ plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NUL #' @export #' #' @examples -plot_univariate <- function(object, vars, samples = "both", logscale = TRUE) { +plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, + output = "individual") { # Check object type check.object.type(object) @@ -138,7 +159,7 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE) { count <- length(data$dr[data$dr <= 0]) data$dr[data$dr <= 0] <- 10e-3 warning( - paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), + paste("Negative estimated density ratios for", count, "observation(s) converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) } @@ -151,34 +172,39 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE) { y_lab <- "Density Ratio" } + if(output == "individual"){ # Create list storage for plots object (for iteration) plots <- list() + for(var in vars){ + plots[[var]] <- individual_uni_plot(data, var, y_lab) + } + return(plots) - # Write the function to make one plot, for one variable - one_plot <- function(var){ - plot <- - ggplot(data, aes(x = .data[[var]], y = dr)) + - geom_point(aes(col = sample), + } + if (output == "assembled"){ + data <- data %>% + pivot_longer(cols = vars, + names_to = "variable", + values_to = "value") + + y_max <- max(1,data$dr) + y_min <- min(-1, data$dr) + + plot <- ggplot(data) + + geom_point(aes(x = value, y = dr, col = sample), alpha = .6) + theme_bw() + labs(title = "Scatter plot of individual values and density ratio", color = "Sample", - y = y_lab) + + y = "Density ratio") + geom_hline(yintercept = 0, linetype = "dashed")+ scale_color_manual(values = c("firebrick", "steelblue")) + - scale_shape_manual(values = c(16, 3)) + - scale_y_continuous(breaks = seq( - from = floor(min(data$dr)), - to = ceiling(max(data$dr)))) + facet_wrap(~variable, scales = "free_x") + + scale_y_continuous(limits = c(y_min, y_max)) - return(plot) } + return(plot) - - for(var in vars){ - plots[[var]] <- one_plot(var) - } - return(plots) } #' Title From 9e178c1740ab284ab9fb546d73521b28847e0c48 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 29 Feb 2024 12:37:57 +0100 Subject: [PATCH 27/42] fix ordering histogram --- R/plot.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/plot.R b/R/plot.R index 5aa15cf..a3a3892 100644 --- a/R/plot.R +++ b/R/plot.R @@ -26,7 +26,7 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N } # Create a sample index variable (denominator or numerator) - obsclass <- rep(c("numerator", "denominator"), + obsclass <- rep(c("Numerator", "Denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) data$sample <- obsclass @@ -47,10 +47,10 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N binwidth = if (!is.null(binwidth)) binwidth else NULL, bins = if(!is.null(bins)) bins else NULL, position = position_dodge2(preserve = "single", - padding = 0.2)) + + padding = 0.2, + reverse = TRUE)) + scale_fill_manual(values = c("firebrick", "steelblue"), - breaks = c("numerator", "denominator"), - labels = c("Numerator", "Denominator")) + + breaks = c("Numerator", "Denominator")) + theme_bw() + labs( x = x_lab, @@ -94,6 +94,7 @@ plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NUL bins = bins) } + # Write the function to make one plot, for one variable individual_uni_plot <- function(data, var, y_lab){ From 6706b443e92138a0325b1eb2ea5b1aa0b892d211 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 29 Feb 2024 12:54:25 +0100 Subject: [PATCH 28/42] update univariate plots fix labels, add possibility of choosing number of rows univariate plot --- R/plot.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R/plot.R b/R/plot.R index a3a3892..61fccff 100644 --- a/R/plot.R +++ b/R/plot.R @@ -26,7 +26,7 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N } # Create a sample index variable (denominator or numerator) - obsclass <- rep(c("Numerator", "Denominator"), + obsclass <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) data$sample <- obsclass @@ -50,7 +50,8 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N padding = 0.2, reverse = TRUE)) + scale_fill_manual(values = c("firebrick", "steelblue"), - breaks = c("Numerator", "Denominator")) + + breaks = c("numerator", "denominator"), + labels = c("Numerator", "Denominator")) + theme_bw() + labs( x = x_lab, @@ -109,7 +110,9 @@ individual_uni_plot <- function(data, var, y_lab){ color = "Sample", y = y_lab) + geom_hline(yintercept = 0, linetype = "dashed")+ - scale_color_manual(values = c("firebrick", "steelblue")) + + scale_colour_manual(values = c("firebrick", "steelblue"), + breaks = c("numerator", "denominator"), + labels = c("Numerator", "Denominator")) + scale_y_continuous(limits = c(y_min, y_max)) return(plot) @@ -126,7 +129,8 @@ individual_uni_plot <- function(data, var, y_lab){ #' #' @examples plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, - output = "individual") { + output = "individual", + nrow = NULL) { # Check object type check.object.type(object) @@ -199,8 +203,10 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, color = "Sample", y = "Density ratio") + geom_hline(yintercept = 0, linetype = "dashed")+ - scale_color_manual(values = c("firebrick", "steelblue")) + - facet_wrap(~variable, scales = "free_x") + + scale_color_manual(values = c("firebrick", "steelblue"), + breaks = c("numerator", "denominator"), + labels = c("Numerator", "Denominator")) + + facet_wrap(~variable, scales = "free_x", nrow = nrow) + scale_y_continuous(limits = c(y_min, y_max)) } From 435cd1d44490a333fdc0b78d8cdb0a499dd1a2a4 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 29 Feb 2024 13:24:57 +0100 Subject: [PATCH 29/42] add facetting option univariate plot --- R/plot.R | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/R/plot.R b/R/plot.R index 61fccff..df82787 100644 --- a/R/plot.R +++ b/R/plot.R @@ -97,7 +97,7 @@ plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NUL # Write the function to make one plot, for one variable -individual_uni_plot <- function(data, var, y_lab){ +individual_uni_plot <- function(data, var, y_lab, facet = TRUE){ y_max <- max(1,data$dr) y_min <- min(-1, data$dr) @@ -115,6 +115,10 @@ individual_uni_plot <- function(data, var, y_lab){ labels = c("Numerator", "Denominator")) + scale_y_continuous(limits = c(y_min, y_max)) + if(facet){ + plot <- plot + facet_wrap(~sample) + } + return(plot) } @@ -129,7 +133,7 @@ individual_uni_plot <- function(data, var, y_lab){ #' #' @examples plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, - output = "individual", + output = "individual", facet = FALSE, nrow = NULL) { # Check object type @@ -181,7 +185,7 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, # Create list storage for plots object (for iteration) plots <- list() for(var in vars){ - plots[[var]] <- individual_uni_plot(data, var, y_lab) + plots[[var]] <- individual_uni_plot(data, var, y_lab, facet) } return(plots) @@ -195,6 +199,7 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, y_max <- max(1,data$dr) y_min <- min(-1, data$dr) + plot <- ggplot(data) + geom_point(aes(x = value, y = dr, col = sample), alpha = .6) + @@ -206,9 +211,19 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, scale_color_manual(values = c("firebrick", "steelblue"), breaks = c("numerator", "denominator"), labels = c("Numerator", "Denominator")) + - facet_wrap(~variable, scales = "free_x", nrow = nrow) + scale_y_continuous(limits = c(y_min, y_max)) + if(facet){ + plot <- plot + + facet_grid(cols = vars(sample), + rows = vars(variable), + scales = "free_x") + + } else { + plot <- plot + + facet_wrap(~variable, scales = "free_x", nrow = nrow) + } + } return(plot) From e5115889f6f4b00e4de1c06ba48bd3fe30b4907a Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 29 Feb 2024 15:51:41 +0100 Subject: [PATCH 30/42] Update plot.R --- R/plot.R | 78 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/R/plot.R b/R/plot.R index df82787..e28307e 100644 --- a/R/plot.R +++ b/R/plot.R @@ -55,10 +55,9 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N theme_bw() + labs( x = x_lab, - y = "Count" - ) + - labs(title = "Distribution of density ratio estimates", - fill = "Sample") + y = "Count", + title = "Distribution of density ratio estimates", + fill = "Sample") return(plot) } @@ -207,7 +206,6 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, labs(title = "Scatter plot of individual values and density ratio", color = "Sample", y = "Density ratio") + - geom_hline(yintercept = 0, linetype = "dashed")+ scale_color_manual(values = c("firebrick", "steelblue"), breaks = c("numerator", "denominator"), labels = c("Numerator", "Denominator")) + @@ -229,6 +227,29 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, } + +# Define function to make bivariate plot +plot_twovariables <- function(data, vars){ + + dr_max <- max(1, data$dr) + dr_min <- min(-1, data$dr) + + plot <- + ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + + geom_point(aes(colour = dr)) + + scale_colour_gradient2(low = "firebrick", + high = "steelblue", + mid = "lightyellow", + limits = c(dr_min, dr_max)) + + theme_bw() + + labs(title = "Scatter plot, with density ratio mapped to colour", + colour = "Log (Density ratio)") + + scale_shape_manual(values = c(21, 24)) + + + return(plot) +} + #' Title #' #' @param object @@ -240,7 +261,7 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, #' @export #' #' @examples -plot_bivariate <- function(object, vars, sample = "both", show.samples = TRUE, +plot_bivariate <- function(object, vars, sample = "both", output = "assembled", logscale = TRUE) { # Check object type @@ -302,31 +323,15 @@ plot_bivariate <- function(object, vars, sample = "both", show.samples = TRUE, # Remove rows where both variables are the same var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) names(var_combinations) <- c("Var1", "Var2") - var_combinations <- var_combinations %>% filter(Var1 != Var2) - var_combinations <- as.matrix(var_combinations) if(output == "individual"){ - # Define function to make bivariate plot - plot_twovariables <- function(data, vars, showsamples = show.samples){ - plot <- - ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), - alpha = 0.5) + - scale_fill_viridis_c(option = "B", name = colour_name) + - scale_colour_viridis_c(option = "B", name = colour_name) + - theme_bw() + - labs(title = "Density ratio estimates for combinations of values", - shape = "Sample") + - scale_shape_manual(values = c(21, 24)) - return(plot) - } + var_combinations <- var_combinations %>% filter(Var1 != Var2) + var_combinations <- as.matrix(var_combinations) - # Iterate over grid of variable combinations - # Create a list to store plots plots <- list() for(i in 1:nrow(var_combinations)){ - plots[[i]] <- plot_twovariables(data = data, vars = var_combinations[i,]) + plots[[i]] <- plot_twovariables(data, vars = var_combinations[i,]) } return(plots) } @@ -347,25 +352,32 @@ plot_bivariate <- function(object, vars, sample = "both", show.samples = TRUE, combination = paste0(name.x, "-", name.y)) %>% filter(combination %in% combinations) + dr_max <- max(1, plot_data$dr) + dr_min <- min(-1, plot_data$dr) + plot <- ggplot(plot_data, mapping = aes(x = value.x, y = value.y)) + - geom_point(aes(colour = dr, fill = dr), - alpha = 0.5) + - geom_hline(yintercept = 0, linetype = "dashed") + + geom_point(aes(colour = dr)) + facet_grid(rows = vars(name.y), cols = vars(name.x), scales = "free", switch = "both") + - scale_fill_viridis_c(option = "B", name = colour_name) + - scale_colour_viridis_c(option = "B", name = colour_name) + + scale_colour_gradient2(low = "firebrick", + high = "steelblue", + mid = "#ffffbf", + limits = c(dr_min, dr_max), + ) + scale_y_continuous(position = "right") + scale_x_continuous(position = "top") + theme_bw() + - labs(title = "Density ratio estimates for combinations of values", + labs(title = "Scatter plots, with density ratio mapped to colour", x = NULL, - y = NULL) + + y = NULL, + colour = colour_name) + scale_shape_manual(values = c(21, 24)) - return(suppressWarnings(print(plot))) + grob <- ggplotGrob(plot) + + return(suppressWarnings(grob)) } } From 887b0a1796930c560477d6dd9b684123d1535a7f Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 7 Mar 2024 14:31:08 +0100 Subject: [PATCH 31/42] update bivariate plot --- R/plot.R | 32 ++++++++++++++++++++++++-------- R/print.R | 15 +++++++++++++++ 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/R/plot.R b/R/plot.R index e28307e..add72a7 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,3 +1,4 @@ + dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL, bins = NULL,...) { # Checks @@ -229,10 +230,10 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, # Define function to make bivariate plot -plot_twovariables <- function(data, vars){ +plot_twovariables <- function(data, vars, logscale){ - dr_max <- max(1, data$dr) - dr_min <- min(-1, data$dr) + dr_max <- ifelse(logscale, max(2, data$dr), max(exp(2), data$dr)) + dr_min <- ifelse(logscale, max(-2, data$dr), min(exp(-2, data$dr))) plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + @@ -331,7 +332,7 @@ plot_bivariate <- function(object, vars, sample = "both", plots <- list() for(i in 1:nrow(var_combinations)){ - plots[[i]] <- plot_twovariables(data, vars = var_combinations[i,]) + plots[[i]] <- plot_twovariables(data, vars = var_combinations[i,], logscale) } return(plots) } @@ -366,18 +367,33 @@ plot_bivariate <- function(object, vars, sample = "both", mid = "#ffffbf", limits = c(dr_min, dr_max), ) + - scale_y_continuous(position = "right") + - scale_x_continuous(position = "top") + + scale_y_continuous(position = "left") + + scale_x_continuous(position = "bottom") + theme_bw() + + theme(strip.placement = "outside") + labs(title = "Scatter plots, with density ratio mapped to colour", x = NULL, y = NULL, colour = colour_name) + scale_shape_manual(values = c(21, 24)) + # Erase upper diagonal + ## Create plot into a grob grob <- ggplotGrob(plot) - - return(suppressWarnings(grob)) + ## Create name of empty panels in the upper diagonal + empty_panels <- expand.grid(seq(1:length(vars)), seq(1:length(vars))) %>% + filter(Var2 > Var1) %>% + mutate(panel = paste0("panel-", Var1, "-", Var2)) %>% + pull(panel) + # Delete panels in upper diagonal, based in their index + idx <- which(grob$layout$name %in% empty_panels) + for (i in idx) grob$grobs[[i]] <- grid::nullGrob() + + out <- grob + class(out) <- "bivariateplot" + return(out) + # grid::grid.newpage() + # grid::grid.draw(grob) } } diff --git a/R/print.R b/R/print.R index 6f81075..4a08014 100644 --- a/R/print.R +++ b/R/print.R @@ -297,3 +297,18 @@ print.summary.naivesubspacedensityratio <- function(x, digits = max(3L, getOptio } invisible(x) } + +#' Print a \code{bivariate.plot} object +#' +#' @rdname print +#' @return \code{NULL} +#' @method print bivariate.plot +#' @importFrom +#' @export +print.bivariateplot <- function(x, newpage = TRUE) { + if (newpage) { + grid::grid.newpage() + } + grid::grid.draw(x) + invisible(x) +} From c1ec758c14a5b5529d6ea128a9fd415e0b849538 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 7 Mar 2024 14:31:33 +0100 Subject: [PATCH 32/42] remove heatmap --- R/plot.R | 135 ------------------------------------------------------- 1 file changed, 135 deletions(-) diff --git a/R/plot.R b/R/plot.R index add72a7..72266d1 100644 --- a/R/plot.R +++ b/R/plot.R @@ -397,138 +397,3 @@ plot_bivariate <- function(object, vars, sample = "both", } } - -plot_bivariate_heatmap <- function(object, var.x, var.y, sample = "both", show.samples = TRUE, - output = "assembled", log.scale = TRUE) { - - # Check object type - check.object.type(object) - - # Create data object and check variable names - data <- rbind(object$df_numerator, object$df_denominator) - check.overriden.names(data) - - # Create variables vector and check variable names - vars <- c(var.x, var.y) - check.var.names(vars, data) - - # Create a sample index variable (denominator or numerator) - obsclass <- rep(c("numerator", "denominator"), - c(nrow(object$df_numerator), nrow(object$df_denominator))) - - # Create a object selection variable (both, numerator, denominator) - obsselect <- match.arg(sample, c("both", "numerator", "denominator")) - - # Filter data based on object selection - if (obsselect != "both") { - data <- filter(data, obsclass == obsselect) - } - - - # Create a grid of variable combinations - var_combinations <- expand.grid(var.x, var.y) - - ## Remove duplicate combinations - ## Start by sorting elements within each row - ## This makes duplicate rows with different order of variables identical - var_combinations <- t(apply(var_combinations, 1, sort)) - var_combinations <- unique(var_combinations) # retain unique rows only - - # Remove rows where both variables are the same - var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) - names(var_combinations) <- c("Var1", "Var2") - var_combinations <- var_combinations %>% filter(Var1 != Var2) - - var_combinations <- as.matrix(var_combinations) - - object2 <- object - # Define function to make bivariate plot - plot_twovariables_heatmap <- function(object = object2, data, vars, showsamples = show.samples, logscale = log.scale){ - - # Create a 100x100 grid of values for the two variables, in the range of the data - seq_var1 <- seq(min(data[[vars[1]]]), max(data[[vars[1]]]), length.out = 100) - seq_var2 <- seq(min(data[[vars[2]]]), max(data[[vars[2]]]), length.out = 100) - grid_data <- expand.grid(seq_var1, seq_var2) - colnames(grid_data) <- c(vars[[1]], vars[[2]]) - - # Add the rest of data variables to the grid, inputting its mean value - # First, identify variables in data that are not in vars - other_vars <- setdiff(names(data), vars) - for (var in other_vars) { - grid_data[[var]] <- mean(data[[var]]) - } - - # Predict density ratio for each combination of values - # Assign to dataframe and reorder column in the same order as data, so that predict works - grid_data <- as.data.frame(grid_data) - grid_data <- grid_data[, names(data)] - grid_data$dr <- predict(object, newdata = grid_data) - - # Estimate density ration in the original datapoints (for superposing) - data$dr <- predict(object, newdata = data) - - # Determine if DR is shown in logscale (default) or not - if (logscale) { - - if(any(data$dr <= 0)){ - # Convert negative predicted density ratios to 10e-3, so log can be computed - count <- length(data$dr[data$dr <= 0]) - data$dr[data$dr <= 0] <- 10e-3 - warning( - paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), - call. = FALSE) - } - - if(any(grid_data$dr <= 0)){ - # Convert negative predicted HEATMAP density ratios to 10e-3, so log can be computed - grid_data$dr[grid_data$dr <= 0] <- 10e-3 - warning("Negative estimated density ratios in the heatmap converted to 10e-3 before applying logarithmic transformation", - call. = FALSE) - } - - grid_data$dr <- log(grid_data$dr) - data$dr <- log(data$dr) - - # Assign correct y and legend labels - colour_name <- "Log (Density ratio)" - - } else { - colour_name <- "Density ratio" - } - - # Plot - plot <- - ggplot(grid_data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(data = data, - colour = "grey60", - alpha = 1, - shape = 1) + - geom_raster(aes(colour = dr, fill = dr, shape = if (showsamples) sample else NULL), - alpha = 0.6) + - scale_fill_viridis_c(option = "B", name = colour_name) + - theme_bw() + - labs(title = "Density ratio estimates for combinations of values", - shape = "Sample") + - scale_shape_manual(values = c(21, 24)) - return(plot) - } - - # Iterate over grid of variable combinations - # Create a list to store plots - plots <- list() - for(i in 1:nrow(var_combinations)){ - plots[[i]] <- plot_twovariables_heatmap(data = data, vars = var_combinations[i,]) - } - - # Assemble plots - if(output == "assembled"){ - - plots_assembly <- patchwork::wrap_plots(plots, - guides = "collect", byrow = TRUE, - axes = "collect", - ncol = length(var.x), nrow = length(var.y)) & labs(title = NULL) - return(plots_assembly) - } else { - return(plots) - } -} From 90233ba992c3cbe6cca5e841cc8b184701648e99 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 14 Mar 2024 10:58:43 +0100 Subject: [PATCH 33/42] updates bivariate plot add show.sample argument for bivariate plots, add printing method --- R/plot.R | 31 ++++++++++++++++--------------- R/print.R | 8 +++----- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/R/plot.R b/R/plot.R index 72266d1..1d557c7 100644 --- a/R/plot.R +++ b/R/plot.R @@ -230,14 +230,14 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, # Define function to make bivariate plot -plot_twovariables <- function(data, vars, logscale){ +plot_twovariables <- function(data, vars, logscale, show.sample){ dr_max <- ifelse(logscale, max(2, data$dr), max(exp(2), data$dr)) - dr_min <- ifelse(logscale, max(-2, data$dr), min(exp(-2, data$dr))) + dr_min <- ifelse(logscale, min(-2, data$dr), min(exp(-2), data$dr)) plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(aes(colour = dr)) + + geom_point(aes(colour = dr, shape = if(show.sample) sample else NULL)) + scale_colour_gradient2(low = "firebrick", high = "steelblue", mid = "lightyellow", @@ -247,7 +247,6 @@ plot_twovariables <- function(data, vars, logscale){ colour = "Log (Density ratio)") + scale_shape_manual(values = c(21, 24)) - return(plot) } @@ -262,8 +261,8 @@ plot_twovariables <- function(data, vars, logscale){ #' @export #' #' @examples -plot_bivariate <- function(object, vars, sample = "both", - output = "assembled", logscale = TRUE) { +plot_bivariate <- function(object, vars, samples = "both", + output = "assembled", logscale = TRUE, show.sample = FALSE) { # Check object type check.object.type(object) @@ -301,15 +300,15 @@ plot_bivariate <- function(object, vars, sample = "both", # Create a sample index variable (denominator or numerator) - obsclass <- rep(c("numerator", "denominator"), + data$sample <- rep(c("numerator", "denominator"), c(nrow(object$df_numerator), nrow(object$df_denominator))) # Create a object selection variable (both, numerator, denominator) - obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + obsselect <- match.arg(samples, c("both", "numerator", "denominator")) # Filter data based on object selection if (obsselect != "both") { - data <- filter(data, obsclass == obsselect) + data <- filter(data, sample == obsselect) } # Create a grid of variable combinations @@ -332,7 +331,7 @@ plot_bivariate <- function(object, vars, sample = "both", plots <- list() for(i in 1:nrow(var_combinations)){ - plots[[i]] <- plot_twovariables(data, vars = var_combinations[i,], logscale) + plots[[i]] <- plot_twovariables(data, vars = var_combinations[i,], logscale, show.sample) } return(plots) } @@ -345,7 +344,7 @@ plot_bivariate <- function(object, vars, sample = "both", combinations <- paste0(var_combinations[,1], "-", var_combinations[,2]) plot_data <- - inner_join(data, data, by = "dr") %>% + inner_join(data, data, by = c("dr", "sample")) %>% # Possible error in case of duplicate DR? pivot_longer(cols = ends_with(".x"), names_to = "name.x", values_to = "value.x") %>% pivot_longer(cols = ends_with(".y"), names_to = "name.y", values_to = "value.y") %>% mutate(name.x = stringr::str_remove(name.x, ".x"), @@ -356,9 +355,9 @@ plot_bivariate <- function(object, vars, sample = "both", dr_max <- max(1, plot_data$dr) dr_min <- min(-1, plot_data$dr) - plot <- - ggplot(plot_data, mapping = aes(x = value.x, y = value.y)) + + ggplot(plot_data, mapping = aes(x = value.x, y = value.y, + shape = if(show.sample) sample else NULL)) + geom_point(aes(colour = dr)) + facet_grid(rows = vars(name.y), cols = vars(name.x), scales = "free", switch = "both") + @@ -374,7 +373,8 @@ plot_bivariate <- function(object, vars, sample = "both", labs(title = "Scatter plots, with density ratio mapped to colour", x = NULL, y = NULL, - colour = colour_name) + + colour = colour_name, + shape = if(show.sample) "Sample" else NULL) + scale_shape_manual(values = c(21, 24)) # Erase upper diagonal @@ -390,7 +390,8 @@ plot_bivariate <- function(object, vars, sample = "both", for (i in idx) grob$grobs[[i]] <- grid::nullGrob() out <- grob - class(out) <- "bivariateplot" + class(out) <- c("bivariateplot", class(grob)) + return(out) # grid::grid.newpage() # grid::grid.draw(grob) diff --git a/R/print.R b/R/print.R index 4a08014..ba9067f 100644 --- a/R/print.R +++ b/R/print.R @@ -305,10 +305,8 @@ print.summary.naivesubspacedensityratio <- function(x, digits = max(3L, getOptio #' @method print bivariate.plot #' @importFrom #' @export -print.bivariateplot <- function(x, newpage = TRUE) { - if (newpage) { - grid::grid.newpage() - } +print.bivariateplot <- function(x, ...) { + grid::grid.newpage() grid::grid.draw(x) - invisible(x) + # invisible(x) } From 8ba533291eb86a2b3313e4259d66f6bc39088e65 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 14 Mar 2024 13:55:46 +0100 Subject: [PATCH 34/42] started documentation --- NAMESPACE | 5 +++ R/plot.R | 94 ++++++++++++++++++++++++++++-------------- man/dr.histogram.Rd | 41 ++++++++++++++++++ man/plot.kliep.Rd | 14 +++++++ man/plot_bivariate.Rd | 21 ++++++++++ man/plot_univariate.Rd | 28 +++++++++++++ man/print.Rd | 7 ++++ 7 files changed, 180 insertions(+), 30 deletions(-) create mode 100644 man/dr.histogram.Rd create mode 100644 man/plot.kliep.Rd create mode 100644 man/plot_bivariate.Rd create mode 100644 man/plot_univariate.Rd diff --git a/NAMESPACE b/NAMESPACE index e6f4efa..4d2266b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,13 @@ # Generated by roxygen2: do not edit by hand +S3method(plot,kliep) +S3method(plot,ulsif) S3method(predict,kliep) S3method(predict,lhss) S3method(predict,naivedensityratio) S3method(predict,naivesubspacedensityratio) S3method(predict,ulsif) +S3method(print,bivariate.plot) S3method(print,kliep) S3method(print,lhss) S3method(print,naivedensityratio) @@ -26,6 +29,8 @@ export(kmm) export(lhss) export(naive) export(naivesubspace) +export(plot_bivariate) +export(plot_univariate) export(ulsif) importFrom(Rcpp,sourceCpp) importFrom(parallel,detectCores) diff --git a/R/plot.R b/R/plot.R index 1d557c7..793fcbc 100644 --- a/R/plot.R +++ b/R/plot.R @@ -1,42 +1,74 @@ -dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = NULL, bins = NULL,...) { +#' A histogram of density ratio estimates +#' +#' Creates a histogram of the density ratio estimates. Useful to understand the +#' distribution of estimated density ratios in each sample, or compare it among +#' samples. It is the default plotting method for density ratio objects. +#' +#' @param object Density ratio object created with e.g., [kliep()], [ulsif()], +#' or [naive()] +#' @param sample Character string indicating whether to plot the 'numerator', +#' 'denominator', or 'both' samples. Default is 'both'. +#' @param logscale Logical indicating whether to plot the density ratio +#' estimates on a log scale. Default is TRUE. +#' @param binwidth Numeric indicating the width of the bins, passed on to +#' `ggplot2`. +#' @param bins Numeric indicating the number of bins. Overriden by binwidth, and +#' passed on to `ggplot2`. +#' @param ... Additional arguments passed on to `predict()`. +#' +#' @return A histogram of density ratio estimates. +#' +#' +#' @examples +dr.histogram <- function(object, + samples = "both", + logscale = TRUE, + binwidth = NULL, + bins = NULL, + ...) { - # Checks + # Check object type check.object.type(object) - check.overriden.names(vars) + # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) + # Check no variable names that will be overriden when plotting. + check.overriden.names(data) + # Create density ratio prediction data$dr <- predict(object, newdata = data, ...) + if (logscale) { + # Convert negative predicted density ratios to 10e-3, so log can be computed if(any(data$dr <= 0)){ - # Convert negative predicted density ratios to 10e-3, so log can be computed - count <- length(data$dr[data$dr <= 0]) data$dr[data$dr <= 0] <- 10e-3 + # Throw warning with number of converted values + count <- length(data$dr[data$dr <= 0]) warning( paste("Negative estimated density ratios for", count, "observation(s) converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) } + # Apply log transformation data$dr <- log(data$dr) x_lab <- "Log (Density Ratio)" } else { - x_lab <- "Density Ratio" + x_lab <- "Density Ratio" } # Create a sample index variable (denominator or numerator) - obsclass <- rep(c("numerator", "denominator"), - c(nrow(object$df_numerator), nrow(object$df_denominator))) - data$sample <- obsclass + data$sample <- rep(c("numerator", "denominator"), + c(nrow(object$df_numerator), nrow(object$df_denominator))) # Create a object selection variable (both, numerator, denominator) - obsselect <- match.arg(sample, c("both", "numerator", "denominator")) + obsselect <- match.arg(samples, c("both", "numerator", "denominator")) # If not both, subset data (only num or only den) if (obsselect != "both") { - data <- filter(data, obsclass == obsselect) + data <- filter(data, sample == obsselect) } # Plot @@ -63,35 +95,34 @@ dr.histogram <- function(object, sample = "both", logscale = FALSE, binwidth = N return(plot) } -#' Title -#' -#' @param object -#' @param sample -#' @param logscale + + +#' @inheritParams dr.histogram +#' @rdname dr.histogram #' #' @return #' @export #' #' @examples -plot.ulsif <- function(object, sample = "both", logscale = FALSE, binwidth = NULL, +plot.ulsif <- function(object, samples = "both", logscale = FALSE, binwidth = NULL, bins = NULL) { - dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth, + dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, bins = bins) } -#' Title -#' -#' @param object -#' @param sample + + +#' @inheritParams dr.histogram +#' @rdname dr.histogram #' @param logscale #' #' @return #' @export #' #' @examples -plot.kliep <- function(object, sample = "both", logscale = FALSE, binwidth = NULL, +plot.kliep <- function(object, samples = "both", logscale = FALSE, binwidth = NULL, bins = NULL) { - dr.histogram(object, sample = sample, logscale = logscale, binwidth = binwidth, + dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, bins = bins) } @@ -122,13 +153,16 @@ individual_uni_plot <- function(data, var, y_lab, facet = TRUE){ return(plot) } -#' Title +#' Scatter plot of density ratios and individual variables #' -#' @param object -#' @param vars -#' @param sample +#' Plot a scatter plot showing the relationship between estimated densityratios and individual variables.Said differently, displays which densityratios are more likely for which values of the individual variables. #' -#' @return +#' @inheritParams dr.histogram +#' @param vars Character vector of variable names to be plotted. +#' @param output Character indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual". +#' @param sample.facet Logical indicating whether to facet the plot by sample, i.e, showing plots separate for each sample, and side to side. Defaults to FALSE. +#' @param nrow Integer indicating the number of rows in the assembled plot. If NULL, the number of rows is automatically calculated. +#' @return Scatter plot of density ratios and individual variables. #' @export #' #' @examples @@ -212,7 +246,7 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, labels = c("Numerator", "Denominator")) + scale_y_continuous(limits = c(y_min, y_max)) - if(facet){ + if(sample.facet){ plot <- plot + facet_grid(cols = vars(sample), rows = vars(variable), diff --git a/man/dr.histogram.Rd b/man/dr.histogram.Rd new file mode 100644 index 0000000..cf4f014 --- /dev/null +++ b/man/dr.histogram.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{dr.histogram} +\alias{dr.histogram} +\alias{plot.ulsif} +\title{A histogram of density ratio estimates} +\usage{ +dr.histogram( + object, + sample = "both", + logscale = TRUE, + binwidth = NULL, + bins = NULL +) + +\method{plot}{ulsif}(object, sample = "both", logscale = FALSE, binwidth = NULL, bins = NULL) +} +\arguments{ +\item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, +or \code{\link[=naive]{naive()}}} + +\item{sample}{Character string indicating whether to plot the 'numerator', +'denominator', or 'both' samples. Default is 'both'.} + +\item{logscale}{Logical indicating whether to plot the density ratio +estimates on a log scale. Default is TRUE.} + +\item{binwidth}{Numeric indicating the width of the bins, passed on to +\code{ggplot2}.} + +\item{bins}{Numeric indicating the number of bins. Overriden by binwidth, and +passed on to \code{ggplot2}.} +} +\value{ +A histogram of density ratio estimates. +} +\description{ +Creates a histogram of the density ratio estimates. Useful to understand the +distribution of estimated density ratios in each sample, or compare it among +samples. It is the default plotting method for density ratio objects. +} diff --git a/man/plot.kliep.Rd b/man/plot.kliep.Rd new file mode 100644 index 0000000..ba97c52 --- /dev/null +++ b/man/plot.kliep.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{plot.kliep} +\alias{plot.kliep} +\title{Title} +\usage{ +\method{plot}{kliep}(object, sample = "both", logscale = FALSE, binwidth = NULL, bins = NULL) +} +\arguments{ +\item{logscale}{} +} +\description{ +Title +} diff --git a/man/plot_bivariate.Rd b/man/plot_bivariate.Rd new file mode 100644 index 0000000..4d1c24b --- /dev/null +++ b/man/plot_bivariate.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{plot_bivariate} +\alias{plot_bivariate} +\title{Title} +\usage{ +plot_bivariate( + object, + vars, + samples = "both", + output = "assembled", + logscale = TRUE, + show.sample = FALSE +) +} +\arguments{ +\item{show.samples}{} +} +\description{ +Title +} diff --git a/man/plot_univariate.Rd b/man/plot_univariate.Rd new file mode 100644 index 0000000..2273fa5 --- /dev/null +++ b/man/plot_univariate.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{plot_univariate} +\alias{plot_univariate} +\title{Scatter plot of density ratios and individual variables} +\usage{ +plot_univariate( + object, + vars, + samples = "both", + logscale = TRUE, + output = "individual", + facet = FALSE, + nrow = NULL +) +} +\arguments{ +\item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, +or \code{\link[=naive]{naive()}}} + +\item{vars}{Character vector of variable names to be plotted.} + +\item{logscale}{Logical indicating whether to plot the density ratio +estimates on a log scale. Default is TRUE.} +} +\description{ +Plot a scatter plot showing the relationship between estimated densityratios and individual variables.Said differently, displays which densityratios are more likely for which values of the individual variables. +} diff --git a/man/print.Rd b/man/print.Rd index 0ae7ec5..47de6dd 100644 --- a/man/print.Rd +++ b/man/print.Rd @@ -11,6 +11,7 @@ \alias{print.naivesubspacedensityratio} \alias{print.summary.naivedensityratio} \alias{print.summary.naivesubspacedensityratio} +\alias{print.bivariateplot} \title{Print a \code{ulsif} object} \usage{ \method{print}{ulsif}(x, digits = max(3L, getOption("digits") - 3L), ...) @@ -32,6 +33,8 @@ \method{print}{summary.naivedensityratio}(x, digits = max(3L, getOption("digits") - 3L), ...) \method{print}{summary.naivesubspacedensityratio}(x, digits = max(3L, getOption("digits") - 3L), ...) + +\method{print}{bivariate.plot}(x, ...) } \arguments{ \item{x}{Object of class \code{ulsif}, \code{summary.ulsif}, \code{kliep} @@ -60,6 +63,8 @@ or \code{summary.kliep}.} \code{NULL} +\code{NULL} + \code{NULL} } \description{ @@ -82,4 +87,6 @@ Print a \code{naivesubspacedensityratio} object Print a \code{summary.naivedensityratio} object Print a \code{summary.naivesubspacedensityratio} object + +Print a \code{bivariate.plot} object } From 7beefc0b73004e177566b7926b92a9b0b565bf65 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 14 Mar 2024 16:53:03 +0100 Subject: [PATCH 35/42] update documentation and solve small bugs --- NAMESPACE | 2 +- R/plot.R | 69 +++++++++++++++++++++++--------------- man/dr.histogram.Rd | 21 +++++++----- man/individual_biv_plot.Rd | 19 +++++++++++ man/individual_uni_plot.Rd | 20 +++++++++++ man/plot.kliep.Rd | 14 -------- man/plot.univariate.Rd | 36 ++++++++++++++++++++ man/plot_bivariate.Rd | 18 ++++++++-- man/plot_univariate.Rd | 28 ---------------- 9 files changed, 146 insertions(+), 81 deletions(-) create mode 100644 man/individual_biv_plot.Rd create mode 100644 man/individual_uni_plot.Rd delete mode 100644 man/plot.kliep.Rd create mode 100644 man/plot.univariate.Rd delete mode 100644 man/plot_univariate.Rd diff --git a/NAMESPACE b/NAMESPACE index 4d2266b..ec4837b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(plot,kliep) S3method(plot,ulsif) +S3method(plot,univariate) S3method(predict,kliep) S3method(predict,lhss) S3method(predict,naivedensityratio) @@ -30,7 +31,6 @@ export(lhss) export(naive) export(naivesubspace) export(plot_bivariate) -export(plot_univariate) export(ulsif) importFrom(Rcpp,sourceCpp) importFrom(parallel,detectCores) diff --git a/R/plot.R b/R/plot.R index 793fcbc..36b5ebc 100644 --- a/R/plot.R +++ b/R/plot.R @@ -127,11 +127,22 @@ plot.kliep <- function(object, samples = "both", logscale = FALSE, binwidth = NU } -# Write the function to make one plot, for one variable -individual_uni_plot <- function(data, var, y_lab, facet = TRUE){ - y_max <- max(1,data$dr) - y_min <- min(-1, data$dr) +#' Indivual univariate plot +#' +#' Scatterplot of individual values and density ratio estimates. Used internally in [plot.univariate()] +#' @param data Data frame with the individual values and density ratio estimates +#' @param var Name of the variable to be plotted +#' @param y_lab Name of the y-axis label ("Density Ratio" or "Log Density Ratio") +#' @param sample.facet Logical indicating whether to facet the plot by sample. Default is TRUE. +#' +#' @return +#' +individual_uni_plot <- function(data, var, y_lab, sample.facet = TRUE){ + + y_max <- max(2,data$dr) + y_min <- min(-2, data$dr) + plot <- ggplot(data, aes(x = .data[[var]], y = dr)) + geom_point(aes(col = sample), @@ -146,7 +157,7 @@ individual_uni_plot <- function(data, var, y_lab, facet = TRUE){ labels = c("Numerator", "Denominator")) + scale_y_continuous(limits = c(y_min, y_max)) - if(facet){ + if(sample.facet){ plot <- plot + facet_wrap(~sample) } @@ -155,7 +166,7 @@ individual_uni_plot <- function(data, var, y_lab, facet = TRUE){ #' Scatter plot of density ratios and individual variables #' -#' Plot a scatter plot showing the relationship between estimated densityratios and individual variables.Said differently, displays which densityratios are more likely for which values of the individual variables. +#' A scatter plot showing the relationship between estimated density ratios and individual variables. #' #' @inheritParams dr.histogram #' @param vars Character vector of variable names to be plotted. @@ -167,7 +178,7 @@ individual_uni_plot <- function(data, var, y_lab, facet = TRUE){ #' #' @examples plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, - output = "individual", facet = FALSE, + output = "individual", sample.facet = FALSE, nrow = NULL) { # Check object type @@ -215,25 +226,27 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, y_lab <- "Density Ratio" } + if(output == "individual"){ # Create list storage for plots object (for iteration) plots <- list() for(var in vars){ - plots[[var]] <- individual_uni_plot(data, var, y_lab, facet) + plots[[var]] <- individual_uni_plot(data, var, y_lab, sample.facet) } return(plots) } + if (output == "assembled"){ data <- data %>% pivot_longer(cols = vars, names_to = "variable", values_to = "value") + # Maximum scale for y y_max <- max(1,data$dr) y_min <- min(-1, data$dr) - plot <- ggplot(data) + geom_point(aes(x = value, y = dr, col = sample), alpha = .6) + @@ -263,8 +276,15 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, } -# Define function to make bivariate plot -plot_twovariables <- function(data, vars, logscale, show.sample){ +#' Bivariate plot +#' +#' @inheritParams individual_uni_plot +#' @param show.sample Logical indicating whether to give different shapes to observations, depending on the sample they come from (numerator or denominator). Defaults to FALSE. +#' +#' @return Bivariate plot +#' +#' @examples +individual_biv_plot <- function(data, vars, logscale, show.sample){ dr_max <- ifelse(logscale, max(2, data$dr), max(exp(2), data$dr)) dr_min <- ifelse(logscale, min(-2, data$dr), min(exp(-2), data$dr)) @@ -284,14 +304,14 @@ plot_twovariables <- function(data, vars, logscale, show.sample){ return(plot) } -#' Title +#' Densityratio in bidimensional plot #' -#' @param object -#' @param vars -#' @param sample -#' @param show.samples +#' Plots a scatterplot of two variables, with densityratio mapped to the colour scale. #' -#' @return +#' @inheritParams plot.univariate +#' @inheritParams individual_biv_plot +#' +#' @return Scatter plot of two variables, with density ratio mapped to the colour scale. #' @export #' #' @examples @@ -316,8 +336,8 @@ plot_bivariate <- function(object, vars, samples = "both", if(any(data$dr <= 0)){ # Convert negative predicted density ratios to 10e-3, so log can be computed - count <- length(data$dr[data$dr <= 0]) data$dr[data$dr <= 0] <- 10e-3 + count <- length(data$dr[data$dr <= 0]) warning( paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) @@ -353,28 +373,24 @@ plot_bivariate <- function(object, vars, samples = "both", ## This makes duplicate rows with different order of variables identical var_combinations <- t(apply(var_combinations, 1, sort)) var_combinations <- unique(var_combinations) # retain unique rows only - - # Remove rows where both variables are the same var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) names(var_combinations) <- c("Var1", "Var2") if(output == "individual"){ - + # Remove rows where both variables are the same var_combinations <- var_combinations %>% filter(Var1 != Var2) var_combinations <- as.matrix(var_combinations) plots <- list() for(i in 1:nrow(var_combinations)){ - plots[[i]] <- plot_twovariables(data, vars = var_combinations[i,], logscale, show.sample) + plots[[i]] <- individual_biv_plot(data, vars = var_combinations[i,], logscale, show.sample) } return(plots) } if (output == "assembled") { - # plot_data <- data %>% - # pivot_longer(cols = c("V1", "V2"), names_to = "variable", values_to = "value") - # browser() + # Give variable combinations in a format we can use later combinations <- paste0(var_combinations[,1], "-", var_combinations[,2]) plot_data <- @@ -427,8 +443,7 @@ plot_bivariate <- function(object, vars, samples = "both", class(out) <- c("bivariateplot", class(grob)) return(out) - # grid::grid.newpage() - # grid::grid.draw(grob) + } } diff --git a/man/dr.histogram.Rd b/man/dr.histogram.Rd index cf4f014..ae662d3 100644 --- a/man/dr.histogram.Rd +++ b/man/dr.histogram.Rd @@ -3,33 +3,38 @@ \name{dr.histogram} \alias{dr.histogram} \alias{plot.ulsif} +\alias{plot.kliep} \title{A histogram of density ratio estimates} \usage{ dr.histogram( object, - sample = "both", + samples = "both", logscale = TRUE, binwidth = NULL, - bins = NULL + bins = NULL, + ... ) -\method{plot}{ulsif}(object, sample = "both", logscale = FALSE, binwidth = NULL, bins = NULL) +\method{plot}{ulsif}(object, samples = "both", logscale = FALSE, binwidth = NULL, bins = NULL) + +\method{plot}{kliep}(object, samples = "both", logscale = FALSE, binwidth = NULL, bins = NULL) } \arguments{ \item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, or \code{\link[=naive]{naive()}}} -\item{sample}{Character string indicating whether to plot the 'numerator', -'denominator', or 'both' samples. Default is 'both'.} - -\item{logscale}{Logical indicating whether to plot the density ratio -estimates on a log scale. Default is TRUE.} +\item{logscale}{} \item{binwidth}{Numeric indicating the width of the bins, passed on to \code{ggplot2}.} \item{bins}{Numeric indicating the number of bins. Overriden by binwidth, and passed on to \code{ggplot2}.} + +\item{...}{Additional arguments passed on to \code{predict()}.} + +\item{sample}{Character string indicating whether to plot the 'numerator', +'denominator', or 'both' samples. Default is 'both'.} } \value{ A histogram of density ratio estimates. diff --git a/man/individual_biv_plot.Rd b/man/individual_biv_plot.Rd new file mode 100644 index 0000000..6ef3e42 --- /dev/null +++ b/man/individual_biv_plot.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{individual_biv_plot} +\alias{individual_biv_plot} +\title{Bivariate plot} +\usage{ +individual_biv_plot(data, vars, logscale, show.sample) +} +\arguments{ +\item{data}{Data frame with the individual values and density ratio estimates} + +\item{show.sample}{Logical indicating whether to give different shapes to observations, depending on the sample they come from (numerator or denominator). Defaults to FALSE.} +} +\value{ +Bivariate plot +} +\description{ +Bivariate plot +} diff --git a/man/individual_uni_plot.Rd b/man/individual_uni_plot.Rd new file mode 100644 index 0000000..a6bcb18 --- /dev/null +++ b/man/individual_uni_plot.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{individual_uni_plot} +\alias{individual_uni_plot} +\title{Indivual univariate plot} +\usage{ +individual_uni_plot(data, var, y_lab, sample.facet = TRUE) +} +\arguments{ +\item{data}{Data frame with the individual values and density ratio estimates} + +\item{var}{Name of the variable to be plotted} + +\item{y_lab}{Name of the y-axis label ("Density Ratio" or "Log Density Ratio")} + +\item{sample.facet}{Logical indicating whether to facet the plot by sample. Default is TRUE.} +} +\description{ +Scatterplot of individual values and density ratio estimates. Used internally in \code{\link[=plot.univariate]{plot.univariate()}} +} diff --git a/man/plot.kliep.Rd b/man/plot.kliep.Rd deleted file mode 100644 index ba97c52..0000000 --- a/man/plot.kliep.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot.R -\name{plot.kliep} -\alias{plot.kliep} -\title{Title} -\usage{ -\method{plot}{kliep}(object, sample = "both", logscale = FALSE, binwidth = NULL, bins = NULL) -} -\arguments{ -\item{logscale}{} -} -\description{ -Title -} diff --git a/man/plot.univariate.Rd b/man/plot.univariate.Rd new file mode 100644 index 0000000..a0be7e7 --- /dev/null +++ b/man/plot.univariate.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.R +\name{plot.univariate} +\alias{plot.univariate} +\title{Scatter plot of density ratios and individual variables} +\usage{ +\method{plot}{univariate}( + object, + vars, + samples = "both", + logscale = TRUE, + output = "individual", + sample.facet = FALSE, + nrow = NULL +) +} +\arguments{ +\item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, +or \code{\link[=naive]{naive()}}} + +\item{vars}{Character vector of variable names to be plotted.} + +\item{logscale}{} + +\item{output}{Character indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual".} + +\item{sample.facet}{Logical indicating whether to facet the plot by sample, i.e, showing plots separate for each sample, and side to side. Defaults to FALSE.} + +\item{nrow}{Integer indicating the number of rows in the assembled plot. If NULL, the number of rows is automatically calculated.} +} +\value{ +Scatter plot of density ratios and individual variables. +} +\description{ +A scatter plot showing the relationship between estimated density ratios and individual variables. +} diff --git a/man/plot_bivariate.Rd b/man/plot_bivariate.Rd index 4d1c24b..0a45ca8 100644 --- a/man/plot_bivariate.Rd +++ b/man/plot_bivariate.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plot.R \name{plot_bivariate} \alias{plot_bivariate} -\title{Title} +\title{Densityratio in bidimensional plot} \usage{ plot_bivariate( object, @@ -14,8 +14,20 @@ plot_bivariate( ) } \arguments{ -\item{show.samples}{} +\item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, +or \code{\link[=naive]{naive()}}} + +\item{vars}{Character vector of variable names to be plotted.} + +\item{output}{Character indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual".} + +\item{logscale}{} + +\item{show.sample}{Logical indicating whether to give different shapes to observations, depending on the sample they come from (numerator or denominator). Defaults to FALSE.} +} +\value{ +Scatter plot of two variables, with density ratio mapped to the colour scale. } \description{ -Title +Plots a scatterplot of two variables, with densityratio mapped to the colour scale. } diff --git a/man/plot_univariate.Rd b/man/plot_univariate.Rd deleted file mode 100644 index 2273fa5..0000000 --- a/man/plot_univariate.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot.R -\name{plot_univariate} -\alias{plot_univariate} -\title{Scatter plot of density ratios and individual variables} -\usage{ -plot_univariate( - object, - vars, - samples = "both", - logscale = TRUE, - output = "individual", - facet = FALSE, - nrow = NULL -) -} -\arguments{ -\item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, -or \code{\link[=naive]{naive()}}} - -\item{vars}{Character vector of variable names to be plotted.} - -\item{logscale}{Logical indicating whether to plot the density ratio -estimates on a log scale. Default is TRUE.} -} -\description{ -Plot a scatter plot showing the relationship between estimated densityratios and individual variables.Said differently, displays which densityratios are more likely for which values of the individual variables. -} From b08695cd00f6b25f669e8dcb3d6038225716d738 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Thu, 28 Mar 2024 18:08:52 +0100 Subject: [PATCH 36/42] started reviewing thom's comments --- R/checks.R | 2 +- R/plot.R | 197 ++++++++++++++++++++++++++--------------------------- R/print.R | 13 ---- 3 files changed, 96 insertions(+), 116 deletions(-) diff --git a/R/checks.R b/R/checks.R index 280cfff..4e9a888 100644 --- a/R/checks.R +++ b/R/checks.R @@ -332,7 +332,7 @@ check.newdata <- function(object, newdata) { check.var.names <- function(vars, data){ if(!all(vars %in% names(data))) { - stop("Indicated variable (s) are not present in object. Check variable names are correct") + stop("Indicated variable (s) are not present in object. Check whether variable names are correct") } } diff --git a/R/plot.R b/R/plot.R index 36b5ebc..191eaed 100644 --- a/R/plot.R +++ b/R/plot.R @@ -34,57 +34,51 @@ dr.histogram <- function(object, # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) - # Check no variable names that will be overriden when plotting. - check.overriden.names(data) - # Create density ratio prediction - data$dr <- predict(object, newdata = data, ...) - + ext <- data.frame(dr = predict(object, data = data, ...), + sample = c(rep("numerator", nrow(object$df_numerator)), + rep("denominator", nrow(object$df_denominator)))) + # Check if logscale is TRUE if (logscale) { # Convert negative predicted density ratios to 10e-3, so log can be computed - if(any(data$dr <= 0)){ - data$dr[data$dr <= 0] <- 10e-3 + if(any(ext$dr <= 0)){ + ext$dr[ext$dr <= 0] <- 10e-3 # Throw warning with number of converted values - count <- length(data$dr[data$dr <= 0]) + count <- length(ext$dr[ext$dr <= 0]) warning( paste("Negative estimated density ratios for", count, "observation(s) converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) } # Apply log transformation - data$dr <- log(data$dr) + ext$dr <- log(ext$dr) x_lab <- "Log (Density Ratio)" } else { x_lab <- "Density Ratio" } - # Create a sample index variable (denominator or numerator) - data$sample <- rep(c("numerator", "denominator"), - c(nrow(object$df_numerator), nrow(object$df_denominator))) - - # Create a object selection variable (both, numerator, denominator) - obsselect <- match.arg(samples, c("both", "numerator", "denominator")) - - # If not both, subset data (only num or only den) - if (obsselect != "both") { - data <- filter(data, sample == obsselect) + # Select data + samples <- match.arg(samples, c("both", "numerator", "denominator")) + if(samples != "both"){ + data <- subset(data, ext$sample == samples) + ext <- subset(ext, ext$sample == samples) } # Plot plot <- - ggplot(data, aes(x = dr)) + + ggplot(ext, aes(x = dr)) + geom_histogram(aes(fill = sample), alpha = .75, color = "black", - binwidth = if (!is.null(binwidth)) binwidth else NULL, - bins = if(!is.null(bins)) bins else NULL, + binwidth = binwidth, + bins = bins, position = position_dodge2(preserve = "single", padding = 0.2, reverse = TRUE)) + - scale_fill_manual(values = c("firebrick", "steelblue"), - breaks = c("numerator", "denominator"), - labels = c("Numerator", "Denominator")) + + scale_fill_viridis_d(option = "cividis", + breaks = c("numerator", "denominator"), + labels = c("Numerator", "Denominator")) + theme_bw() + labs( x = x_lab, @@ -104,7 +98,7 @@ dr.histogram <- function(object, #' @export #' #' @examples -plot.ulsif <- function(object, samples = "both", logscale = FALSE, binwidth = NULL, +plot.ulsif <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, bins = NULL) { dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, bins = bins) @@ -120,7 +114,7 @@ plot.ulsif <- function(object, samples = "both", logscale = FALSE, binwidth = NU #' @export #' #' @examples -plot.kliep <- function(object, samples = "both", logscale = FALSE, binwidth = NULL, +plot.kliep <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, bins = NULL) { dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, bins = bins) @@ -138,23 +132,23 @@ plot.kliep <- function(object, samples = "both", logscale = FALSE, binwidth = NU #' #' @return #' -individual_uni_plot <- function(data, var, y_lab, sample.facet = TRUE){ +create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ - y_max <- max(2,data$dr) - y_min <- min(-2, data$dr) + y_max <- max(2, ext$dr) + y_min <- min(-2, ext$dr) plot <- - ggplot(data, aes(x = .data[[var]], y = dr)) + - geom_point(aes(col = sample), + ggplot(data, aes(x = .data[[var]], y = ext$dr)) + + geom_point(aes(col = ext$sample), alpha = .6) + theme_bw() + labs(title = "Scatter plot of individual values and density ratio", color = "Sample", y = y_lab) + geom_hline(yintercept = 0, linetype = "dashed")+ - scale_colour_manual(values = c("firebrick", "steelblue"), - breaks = c("numerator", "denominator"), - labels = c("Numerator", "Denominator")) + + scale_color_viridis_d(option = "cividis", + breaks = c("numerator", "denominator"), + labels = c("Numerator", "Denominator")) + scale_y_continuous(limits = c(y_min, y_max)) if(sample.facet){ @@ -170,54 +164,53 @@ individual_uni_plot <- function(data, var, y_lab, sample.facet = TRUE){ #' #' @inheritParams dr.histogram #' @param vars Character vector of variable names to be plotted. -#' @param output Character indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual". +#' @param grid Logical indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual". #' @param sample.facet Logical indicating whether to facet the plot by sample, i.e, showing plots separate for each sample, and side to side. Defaults to FALSE. #' @param nrow Integer indicating the number of rows in the assembled plot. If NULL, the number of rows is automatically calculated. +#' @param ... Additional arguments passed to the predict() function. +#' #' @return Scatter plot of density ratios and individual variables. #' @export #' #' @examples plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, - output = "individual", sample.facet = FALSE, - nrow = NULL) { + grid = FALSE, sample.facet = FALSE, + nrow = NULL, ...) { # Check object type check.object.type(object) - # Create data object + # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) - - # Check names in data and variable names - check.overriden.names(data) + ext <- data.frame(dr = predict(object, data = data, ...), + sample = c(rep("numerator", nrow(object$df_numerator)), + rep("denominator", nrow(object$df_denominator)))) + # Check variable names check.var.names(vars, data) # Estimate density ratio - data$dr <- predict(object, newdata = data) - - # Creta sample identifier - data$sample <- rep(c("numerator", "denominator"), - c(nrow(object$df_numerator), nrow(object$df_denominator))) + ext$dr <- predict(object, newdata = data) - # Create a object selection variable (both, numerator, denominator) - obsselect <- match.arg(samples, c("both", "numerator", "denominator")) - - if (obsselect != "both") { - data <- filter(data, sample == obsselect) + # Select data + samples <- match.arg(samples, c("both", "numerator", "denominator")) + if(samples != "both"){ + data <- subset(data, ext$sample == samples) + ext <- subset(ext, ext$sample == samples) } if (logscale) { - if(any(data$dr <= 0)){ + if(any(ext$dr <= 0)){ # Convert negative predicted density ratios to 10e-3, so log can be computed - count <- length(data$dr[data$dr <= 0]) - data$dr[data$dr <= 0] <- 10e-3 + count <- length(ext$dr[ext$dr <= 0]) + ext$dr[ext$dr <= 0] <- 10e-3 warning( paste("Negative estimated density ratios for", count, "observation(s) converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) } - data$dr <- log(data$dr) + ext$dr <- log(ext$dr) # Assign correct y and legend labels y_lab <- "Log(Density Ratio)" @@ -227,36 +220,33 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, } - if(output == "individual"){ - # Create list storage for plots object (for iteration) - plots <- list() - for(var in vars){ - plots[[var]] <- individual_uni_plot(data, var, y_lab, sample.facet) - } - return(plots) + if(!grid){ - } + plot <- lapply(vars, function(var) create_univariate_plot(data, ext, var, y_lab, sample.facet)) - if (output == "assembled"){ - data <- data %>% - pivot_longer(cols = vars, - names_to = "variable", - values_to = "value") + } else { + + values <- data[, vars] |> unlist(use.names = FALSE) + variable <- rep(vars, each = length(values)/length(vars)) + dr <- rep(ext$dr, length(vars)) + sample <- rep(ext$sample, length(vars)) + data <- data.frame(values = values, variable = variable) + ext <- data.frame(dr = dr, sample = sample) # Maximum scale for y - y_max <- max(1,data$dr) - y_min <- min(-1, data$dr) + y_max <- max(1,ext$dr) + y_min <- min(-1, ext$dr) plot <- ggplot(data) + - geom_point(aes(x = value, y = dr, col = sample), + geom_point(aes(x = values, y = ext$dr, col = ext$sample), alpha = .6) + theme_bw() + labs(title = "Scatter plot of individual values and density ratio", color = "Sample", y = "Density ratio") + - scale_color_manual(values = c("firebrick", "steelblue"), - breaks = c("numerator", "denominator"), - labels = c("Numerator", "Denominator")) + + scale_color_viridis_d(option = "cividis", + breaks = c("numerator", "denominator"), + labels = c("Numerator", "Denominator")) + scale_y_continuous(limits = c(y_min, y_max)) if(sample.facet){ @@ -268,11 +258,12 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, } else { plot <- plot + facet_wrap(~variable, scales = "free_x", nrow = nrow) - } + } + } - return(plot) + return(plot) } @@ -284,14 +275,14 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, #' @return Bivariate plot #' #' @examples -individual_biv_plot <- function(data, vars, logscale, show.sample){ +create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ - dr_max <- ifelse(logscale, max(2, data$dr), max(exp(2), data$dr)) - dr_min <- ifelse(logscale, min(-2, data$dr), min(exp(-2), data$dr)) + dr_max <- ifelse(logscale, max(2, ext$dr), max(exp(2), ext$dr)) + dr_min <- ifelse(logscale, min(-2, ext$dr), min(exp(-2), ext$dr)) plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(aes(colour = dr, shape = if(show.sample) sample else NULL)) + + geom_point(aes(colour = ext$dr, shape = if(show.sample) sample else NULL)) + scale_colour_gradient2(low = "firebrick", high = "steelblue", mid = "lightyellow", @@ -309,41 +300,42 @@ individual_biv_plot <- function(data, vars, logscale, show.sample){ #' Plots a scatterplot of two variables, with densityratio mapped to the colour scale. #' #' @inheritParams plot.univariate -#' @inheritParams individual_biv_plot +#' @param logscale Logical indicating whether to plot the density ratio +#' estimates on a log scale. Default is TRUE. +#' #' #' @return Scatter plot of two variables, with density ratio mapped to the colour scale. #' @export #' #' @examples plot_bivariate <- function(object, vars, samples = "both", - output = "assembled", logscale = TRUE, show.sample = FALSE) { + grid = FALSE, logscale = TRUE, show.sample = FALSE, + ...) { # Check object type check.object.type(object) - # Create data object and check variable names + # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) - check.overriden.names(data) - + ext <- data.frame(dr = predict(object, data = data, ...), + sample = c(rep("numerator", nrow(object$df_numerator)), + rep("denominator", nrow(object$df_denominator)))) # Check variable names check.var.names(vars, data) - # Estimate density ratio - data$dr <- predict(object, newdata = data) - # Determine if DR is shown in logscale (default) or not if (logscale) { - if(any(data$dr <= 0)){ + if(any(ext$dr <= 0)){ # Convert negative predicted density ratios to 10e-3, so log can be computed - data$dr[data$dr <= 0] <- 10e-3 - count <- length(data$dr[data$dr <= 0]) + ext$dr[ext$dr <= 0] <- 10e-3 + count <- length(ext$dr[ext$dr <= 0]) warning( paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), call. = FALSE) } - data$dr <- log(data$dr) + ext$dr <- log(ext$dr) # Assign correct y and legend labels colour_name <- "Log (Density ratio)" @@ -376,9 +368,9 @@ plot_bivariate <- function(object, vars, samples = "both", var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) names(var_combinations) <- c("Var1", "Var2") - if(output == "individual"){ + if(!grid){ # Remove rows where both variables are the same - var_combinations <- var_combinations %>% filter(Var1 != Var2) + var_combinations <- var_combinations |> subset(Var1 != Var2) var_combinations <- as.matrix(var_combinations) plots <- list() @@ -388,7 +380,7 @@ plot_bivariate <- function(object, vars, samples = "both", return(plots) } - if (output == "assembled") { + if (grid) { # Give variable combinations in a format we can use later combinations <- paste0(var_combinations[,1], "-", var_combinations[,2]) @@ -397,13 +389,13 @@ plot_bivariate <- function(object, vars, samples = "both", inner_join(data, data, by = c("dr", "sample")) %>% # Possible error in case of duplicate DR? pivot_longer(cols = ends_with(".x"), names_to = "name.x", values_to = "value.x") %>% pivot_longer(cols = ends_with(".y"), names_to = "name.y", values_to = "value.y") %>% - mutate(name.x = stringr::str_remove(name.x, ".x"), - name.y = stringr::str_remove(name.y, ".y"), + mutate(name.x = substr(name.x, 1 ,nchar(name.x)-2), + name.y = substr(name.y, 1 ,nchar(name.y)-2), combination = paste0(name.x, "-", name.y)) %>% filter(combination %in% combinations) - dr_max <- max(1, plot_data$dr) - dr_min <- min(-1, plot_data$dr) + dr_max <- max(1, plot_ext$dr) + dr_min <- min(-1, plot_ext$dr) plot <- ggplot(plot_data, mapping = aes(x = value.x, y = value.y, @@ -440,9 +432,10 @@ plot_bivariate <- function(object, vars, samples = "both", for (i in idx) grob$grobs[[i]] <- grid::nullGrob() out <- grob - class(out) <- c("bivariateplot", class(grob)) + # class(out) <- c("bivariateplot", class(grob)) - return(out) + grid::grid.newpage() + grid::grid.draw(out) } diff --git a/R/print.R b/R/print.R index ba9067f..6f81075 100644 --- a/R/print.R +++ b/R/print.R @@ -297,16 +297,3 @@ print.summary.naivesubspacedensityratio <- function(x, digits = max(3L, getOptio } invisible(x) } - -#' Print a \code{bivariate.plot} object -#' -#' @rdname print -#' @return \code{NULL} -#' @method print bivariate.plot -#' @importFrom -#' @export -print.bivariateplot <- function(x, ...) { - grid::grid.newpage() - grid::grid.draw(x) - # invisible(x) -} From b45070a85415b60b9d470e60ee7acae3169efff2 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 15 Apr 2024 16:48:59 +0200 Subject: [PATCH 37/42] update plot r --- R/plot.R | 61 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/R/plot.R b/R/plot.R index 191eaed..9fafe98 100644 --- a/R/plot.R +++ b/R/plot.R @@ -76,6 +76,7 @@ dr.histogram <- function(object, position = position_dodge2(preserve = "single", padding = 0.2, reverse = TRUE)) + + scale_fill_viridis_d(option = "cividis", breaks = c("numerator", "denominator"), labels = c("Numerator", "Denominator")) + @@ -136,7 +137,6 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ y_max <- max(2, ext$dr) y_min <- min(-2, ext$dr) - plot <- ggplot(data, aes(x = .data[[var]], y = ext$dr)) + geom_point(aes(col = ext$sample), @@ -152,7 +152,7 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ scale_y_continuous(limits = c(y_min, y_max)) if(sample.facet){ - plot <- plot + facet_wrap(~sample) + plot <- plot + facet_wrap(~vars(ext$sample)) } return(plot) @@ -176,7 +176,6 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, grid = FALSE, sample.facet = FALSE, nrow = NULL, ...) { - # Check object type check.object.type(object) @@ -225,7 +224,6 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, plot <- lapply(vars, function(var) create_univariate_plot(data, ext, var, y_lab, sample.facet)) } else { - values <- data[, vars] |> unlist(use.names = FALSE) variable <- rep(vars, each = length(values)/length(vars)) dr <- rep(ext$dr, length(vars)) @@ -250,6 +248,7 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, scale_y_continuous(limits = c(y_min, y_max)) if(sample.facet){ + plot <- plot + facet_grid(cols = vars(sample), rows = vars(variable), @@ -277,15 +276,24 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, #' @examples create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ + dr_max <- ifelse(logscale, max(2, ext$dr), max(exp(2), ext$dr)) dr_min <- ifelse(logscale, min(-2, ext$dr), min(exp(-2), ext$dr)) plot <- ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(aes(colour = ext$dr, shape = if(show.sample) sample else NULL)) + - scale_colour_gradient2(low = "firebrick", - high = "steelblue", - mid = "lightyellow", + geom_point(aes(colour = ext$dr, shape = show.sample), + alpha = 1, + size = 2.0) + + # scale_colour_gradient2(low = "#00204DFF", + # high = "#7D0000", + # mid = "lightyellow", + # midpoint = 0, + # limits = c(dr_min, dr_max)) + + scale_colour_gradient2(low = "#00204DFF", + high = "#FFEA46FF", + mid = "#7C7B78FF", + midpoint = 0, limits = c(dr_min, dr_max)) + theme_bw() + labs(title = "Scatter plot, with density ratio mapped to colour", @@ -309,7 +317,7 @@ create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ #' #' @examples plot_bivariate <- function(object, vars, samples = "both", - grid = FALSE, logscale = TRUE, show.sample = FALSE, + grid = FALSE, logscale = TRUE, show.sample = NULL, ...) { # Check object type @@ -365,28 +373,41 @@ plot_bivariate <- function(object, vars, samples = "both", ## This makes duplicate rows with different order of variables identical var_combinations <- t(apply(var_combinations, 1, sort)) var_combinations <- unique(var_combinations) # retain unique rows only + var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) names(var_combinations) <- c("Var1", "Var2") - if(!grid){ - # Remove rows where both variables are the same + # Remove rows where both variables are the same var_combinations <- var_combinations |> subset(Var1 != Var2) - var_combinations <- as.matrix(var_combinations) + var_combinations <- as.list(as.data.frame(t(var_combinations))) - plots <- list() - for(i in 1:nrow(var_combinations)){ - plots[[i]] <- individual_biv_plot(data, vars = var_combinations[i,], logscale, show.sample) - } - return(plots) + if(!grid){ + + + plot <- lapply(var_combinations, function(vars) create_bivariate_plot(data, ext, vars, logscale, show.sample)) + + return(plot) } if (grid) { - + browser() # Give variable combinations in a format we can use later - combinations <- paste0(var_combinations[,1], "-", var_combinations[,2]) + + combinations <- sapply(var_combinations, \(vars) paste0(vars, collapse = "-")) + + ext <- data.frame(data, ext$dr, ext$sample) + datlist <- lapply(var_combinations, \(x) data.frame(values.x = ext[,x[1]], + values.y = ext[,x[2]], + xvar = rep(x[1], nrow(ext)), + yvar = rep(x[2], nrow(ext)), + sample = ext[, "ext.sample"], + dr = ext[, "ext.dr"])) + + + plot_data2 <- do.call(datlist, what = rbind) plot_data <- - inner_join(data, data, by = c("dr", "sample")) %>% # Possible error in case of duplicate DR? + inner_join(ext, ext, by = c("dr", "sample")) %>% # Possible error in case of duplicate DR? pivot_longer(cols = ends_with(".x"), names_to = "name.x", values_to = "value.x") %>% pivot_longer(cols = ends_with(".y"), names_to = "name.y", values_to = "value.y") %>% mutate(name.x = substr(name.x, 1 ,nchar(name.x)-2), From 5de9b381353d6c788f920c9eaf158876a1e49e5d Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Tue, 16 Apr 2024 13:18:12 +0200 Subject: [PATCH 38/42] address thom PR review --- R/plot.R | 276 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 142 insertions(+), 134 deletions(-) diff --git a/R/plot.R b/R/plot.R index 9fafe98..64a0f0d 100644 --- a/R/plot.R +++ b/R/plot.R @@ -26,6 +26,7 @@ dr.histogram <- function(object, logscale = TRUE, binwidth = NULL, bins = NULL, + tol = 10e-3, ...) { # Check object type @@ -40,15 +41,14 @@ dr.histogram <- function(object, # Check if logscale is TRUE if (logscale) { - - # Convert negative predicted density ratios to 10e-3, so log can be computed - if(any(ext$dr <= 0)){ - ext$dr[ext$dr <= 0] <- 10e-3 - # Throw warning with number of converted values - count <- length(ext$dr[ext$dr <= 0]) - warning( - paste("Negative estimated density ratios for", count, "observation(s) converted to 10e-3 before applying logarithmic transformation"), - call. = FALSE) + # Converte negative values to tol + negdr <- ext$dr < tol + ext$dr[negdr] <- tol + + if(any(negdr)){ + warning( + paste("Negative estimated density ratios for", sum(negdr), "observation(s) converted to", tol, "before applying logarithmic transformation"), + call. = FALSE) } # Apply log transformation @@ -67,21 +67,21 @@ dr.histogram <- function(object, # Plot plot <- - ggplot(ext, aes(x = dr)) + - geom_histogram(aes(fill = sample), + ggplot2::ggplot(data, ggplot2::aes(x = ext$dr)) + + ggplot2::geom_histogram(ggplot2::aes(fill = ext$sample), alpha = .75, color = "black", binwidth = binwidth, bins = bins, - position = position_dodge2(preserve = "single", + position = ggplot2::position_dodge2(preserve = "single", padding = 0.2, reverse = TRUE)) + - scale_fill_viridis_d(option = "cividis", + ggplot2::scale_fill_viridis_d(option = "cividis", breaks = c("numerator", "denominator"), labels = c("Numerator", "Denominator")) + - theme_bw() + - labs( + ggplot2::theme_bw() + + ggplot2::labs( x = x_lab, y = "Count", title = "Distribution of density ratio estimates", @@ -100,9 +100,9 @@ dr.histogram <- function(object, #' #' @examples plot.ulsif <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, - bins = NULL) { + bins = NULL, tol = 10e-3) { dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, - bins = bins) + bins = bins, tol = tol) } @@ -116,9 +116,9 @@ plot.ulsif <- function(object, samples = "both", logscale = TRUE, binwidth = NUL #' #' @examples plot.kliep <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, - bins = NULL) { + bins = NULL, tol = 10e-3) { dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, - bins = bins) + bins = bins, tol = tol) } @@ -138,21 +138,23 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ y_max <- max(2, ext$dr) y_min <- min(-2, ext$dr) plot <- - ggplot(data, aes(x = .data[[var]], y = ext$dr)) + - geom_point(aes(col = ext$sample), + ggplot2::ggplot(data, ggplot2::aes(x = .data[[var]], y = ext$dr)) + + ggplot2::geom_point(ggplot2::aes(col = ext$sample), alpha = .6) + - theme_bw() + - labs(title = "Scatter plot of individual values and density ratio", + ggplot2::theme_bw() + + ggplot2::labs(title = "Scatter plot of individual values and density ratio", color = "Sample", y = y_lab) + - geom_hline(yintercept = 0, linetype = "dashed")+ - scale_color_viridis_d(option = "cividis", + ggplot2::scale_color_viridis_d(option = "cividis", breaks = c("numerator", "denominator"), labels = c("Numerator", "Denominator")) + - scale_y_continuous(limits = c(y_min, y_max)) + ggplot2::scale_y_continuous(limits = c(y_min, y_max)) if(sample.facet){ - plot <- plot + facet_wrap(~vars(ext$sample)) + plot <- plot + ggplot2::facet_wrap(~ext$sample) + + ggplot2::geom_hline(yintercept = ext$yintercept, linetype = "dashed") + } else { + plot <- plot + ggplot2::geom_hline(yintercept = ext$yintercept, linetype = "dashed") } return(plot) @@ -173,9 +175,13 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ #' @export #' #' @examples -plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, +plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TRUE, grid = FALSE, sample.facet = FALSE, - nrow = NULL, ...) { + nrow = NULL, tol = 10e-3, ...) { + + if(is.null(vars)){ + vars <- names(object$df_numerator) + } # Check object type check.object.type(object) @@ -198,24 +204,24 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, } + # Check if logscale is TRUE if (logscale) { - - if(any(ext$dr <= 0)){ - # Convert negative predicted density ratios to 10e-3, so log can be computed - count <- length(ext$dr[ext$dr <= 0]) - ext$dr[ext$dr <= 0] <- 10e-3 + negdr <- ext$dr < tol + ext$dr[negdr] <- tol + if(any(negdr)){ warning( - paste("Negative estimated density ratios for", count, "observation(s) converted to 10e-3 before applying logarithmic transformation"), - call. = FALSE) - } - + paste("Negative estimated density ratios for", sum(negdr), "observation(s) converted to", tol, "before applying logarithmic transformation"), + call. = FALSE) + } + # For the plots + ext$yintercept <- 0 + # Apply log transformation ext$dr <- log(ext$dr) - - # Assign correct y and legend labels - y_lab <- "Log(Density Ratio)" - - } else { + y_lab <- "Log (Density Ratio)" + } + else { y_lab <- "Density Ratio" + ext$yintercept <- 1 } @@ -224,39 +230,41 @@ plot_univariate <- function(object, vars, samples = "both", logscale = TRUE, plot <- lapply(vars, function(var) create_univariate_plot(data, ext, var, y_lab, sample.facet)) } else { + values <- data[, vars] |> unlist(use.names = FALSE) variable <- rep(vars, each = length(values)/length(vars)) dr <- rep(ext$dr, length(vars)) sample <- rep(ext$sample, length(vars)) data <- data.frame(values = values, variable = variable) - ext <- data.frame(dr = dr, sample = sample) + ext <- data.frame(dr = dr, sample = sample, yintercept = rep(ext$yintercept, length(vars))) # Maximum scale for y y_max <- max(1,ext$dr) y_min <- min(-1, ext$dr) - plot <- ggplot(data) + - geom_point(aes(x = values, y = ext$dr, col = ext$sample), + plot <- ggplot2::ggplot(data) + + ggplot2::geom_point(ggplot2::aes(x = values, y = ext$dr, col = ext$sample), alpha = .6) + - theme_bw() + - labs(title = "Scatter plot of individual values and density ratio", + ggplot2::theme_bw() + + ggplot2::geom_hline(ggplot2::aes(yintercept = ext$yintercept), linetype = "dashed") + + ggplot2::labs(title = "Scatter plot of individual values and density ratio", color = "Sample", y = "Density ratio") + - scale_color_viridis_d(option = "cividis", + ggplot2::scale_color_viridis_d(option = "cividis", breaks = c("numerator", "denominator"), labels = c("Numerator", "Denominator")) + - scale_y_continuous(limits = c(y_min, y_max)) + ggplot2::scale_y_continuous(limits = c(y_min, y_max)) if(sample.facet){ plot <- plot + - facet_grid(cols = vars(sample), + ggplot2::facet_grid(cols = vars(sample), rows = vars(variable), scales = "free_x") } else { plot <- plot + - facet_wrap(~variable, scales = "free_x", nrow = nrow) + ggplot2::facet_wrap(~variable, scales = "free_x", nrow = nrow) } @@ -281,24 +289,19 @@ create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ dr_min <- ifelse(logscale, min(-2, ext$dr), min(exp(-2), ext$dr)) plot <- - ggplot(data, mapping = aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + - geom_point(aes(colour = ext$dr, shape = show.sample), + ggplot2::ggplot(data, mapping = ggplot2::aes(x = .data[[vars[1]]], y = .data[[vars[2]]])) + + ggplot2::geom_point(ggplot2::aes(colour = ext$dr, shape = show.sample), alpha = 1, size = 2.0) + - # scale_colour_gradient2(low = "#00204DFF", - # high = "#7D0000", - # mid = "lightyellow", - # midpoint = 0, - # limits = c(dr_min, dr_max)) + - scale_colour_gradient2(low = "#00204DFF", - high = "#FFEA46FF", - mid = "#7C7B78FF", + ggplot2::scale_colour_gradient2(low = "#00204DFF", + high = "#7D0000", + mid = "lightyellow", midpoint = 0, limits = c(dr_min, dr_max)) + - theme_bw() + - labs(title = "Scatter plot, with density ratio mapped to colour", + ggplot2::theme_bw() + + ggplot2::labs(title = "Scatter plot, with density ratio mapped to colour", colour = "Log (Density ratio)") + - scale_shape_manual(values = c(21, 24)) + ggplot2::scale_shape_manual(values = c(21, 24)) return(plot) } @@ -316,11 +319,19 @@ create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ #' @export #' #' @examples -plot_bivariate <- function(object, vars, samples = "both", +plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", grid = FALSE, logscale = TRUE, show.sample = NULL, - ...) { + tol = 10e-3, ...) { - # Check object type + if(length(vars1) == 1 & grid == TRUE){ + grid <- FALSE + warning("A grid cannot be displayed for one plot. Defaulting to grid = FALSE") + } + + if(length(vars1) == 1 & is.null(vars2)){ + stop("For a bivariate plot, two variables are required as input. Please specify more variables in var1, or any variable in var2") + } + # Check object type check.object.type(object) # Create data object and estimate density ratio @@ -328,45 +339,49 @@ plot_bivariate <- function(object, vars, samples = "both", ext <- data.frame(dr = predict(object, data = data, ...), sample = c(rep("numerator", nrow(object$df_numerator)), rep("denominator", nrow(object$df_denominator)))) - # Check variable names - check.var.names(vars, data) # Determine if DR is shown in logscale (default) or not + # Check if logscale is TRUE if (logscale) { - - if(any(ext$dr <= 0)){ - # Convert negative predicted density ratios to 10e-3, so log can be computed - ext$dr[ext$dr <= 0] <- 10e-3 - count <- length(ext$dr[ext$dr <= 0]) + negdr <- ext$dr < tol + ext$dr[negdr] <- tol + if(any(negdr)){ warning( - paste("Negative estimated density ratios for", count, "observations converted to 10e-3 before applying logarithmic transformation"), + paste("Negative estimated density ratios for", sum(negdr), "observation(s) converted to", tol, "before applying logarithmic transformation"), call. = FALSE) } + # Apply log transformation ext$dr <- log(ext$dr) - - # Assign correct y and legend labels - colour_name <- "Log (Density ratio)" - + colour_name <- "Log (Density Ratio)" } else { colour_name <- "Density ratio" } - # Create a sample index variable (denominator or numerator) - data$sample <- rep(c("numerator", "denominator"), - c(nrow(object$df_numerator), nrow(object$df_denominator))) + # Select data + samples <- match.arg(samples, c("both", "numerator", "denominator")) + if(samples != "both"){ + data <- subset(data, ext$sample == samples) + ext <- subset(ext, ext$sample == samples) + } + - # Create a object selection variable (both, numerator, denominator) - obsselect <- match.arg(samples, c("both", "numerator", "denominator")) + if (is.null(vars2)) { + # Check variable names + check.var.names(vars1, data) + var_combinations <- expand.grid(vars1, vars1) + } else { + if(length(vars1) != length(vars2)){ + stop("The number of variables in vars1 and vars2 must be the same") + } + # Check variable names + check.var.names(vars1, data) + check.var.names(vars2, data) - # Filter data based on object selection - if (obsselect != "both") { - data <- filter(data, sample == obsselect) + var_combinations <- expand.grid(vars1, vars2) } - # Create a grid of variable combinations - var_combinations <- expand.grid(vars, vars) ## Remove duplicate combinations ## Start by sorting elements within each row @@ -374,7 +389,12 @@ plot_bivariate <- function(object, vars, samples = "both", var_combinations <- t(apply(var_combinations, 1, sort)) var_combinations <- unique(var_combinations) # retain unique rows only + if(length(vars1) != 1){ var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) + } else { + var_combinations <- as.data.frame(var_combinations) + } + names(var_combinations) <- c("Var1", "Var2") # Remove rows where both variables are the same @@ -390,70 +410,58 @@ plot_bivariate <- function(object, vars, samples = "both", } if (grid) { - browser() - # Give variable combinations in a format we can use later - - combinations <- sapply(var_combinations, \(vars) paste0(vars, collapse = "-")) - ext <- data.frame(data, ext$dr, ext$sample) + ext <- data.frame(data, dr = ext$dr, sample = ext$sample) datlist <- lapply(var_combinations, \(x) data.frame(values.x = ext[,x[1]], values.y = ext[,x[2]], xvar = rep(x[1], nrow(ext)), yvar = rep(x[2], nrow(ext)), - sample = ext[, "ext.sample"], - dr = ext[, "ext.dr"])) + sample = ext[, "sample"], + dr = ext[, "dr"])) + plot_data <- do.call(datlist, what = rbind) - plot_data2 <- do.call(datlist, what = rbind) + dr_max <- max(1, ext$dr) + dr_min <- min(-1, ext$dr) - plot_data <- - inner_join(ext, ext, by = c("dr", "sample")) %>% # Possible error in case of duplicate DR? - pivot_longer(cols = ends_with(".x"), names_to = "name.x", values_to = "value.x") %>% - pivot_longer(cols = ends_with(".y"), names_to = "name.y", values_to = "value.y") %>% - mutate(name.x = substr(name.x, 1 ,nchar(name.x)-2), - name.y = substr(name.y, 1 ,nchar(name.y)-2), - combination = paste0(name.x, "-", name.y)) %>% - filter(combination %in% combinations) - - dr_max <- max(1, plot_ext$dr) - dr_min <- min(-1, plot_ext$dr) - - plot <- - ggplot(plot_data, mapping = aes(x = value.x, y = value.y, - shape = if(show.sample) sample else NULL)) + - geom_point(aes(colour = dr)) + - facet_grid(rows = vars(name.y), cols = vars(name.x), scales = "free", + plot <- + ggplot2::ggplot(plot_data, mapping = ggplot2::aes(x = values.x, y = values.y, + shape = show.sample)) + + ggplot2::geom_point(aes(colour = dr), + size = 2.0) + + ggplot2::facet_grid(rows = vars(yvar), cols = vars(xvar), scales = "free", switch = "both") + - scale_colour_gradient2(low = "firebrick", - high = "steelblue", - mid = "#ffffbf", - limits = c(dr_min, dr_max), - ) + - scale_y_continuous(position = "left") + - scale_x_continuous(position = "bottom") + - theme_bw() + - theme(strip.placement = "outside") + - labs(title = "Scatter plots, with density ratio mapped to colour", + ggplot2::scale_colour_gradient2(low = "#00204DFF", + high = "#7D0000", + mid = "lightyellow", + midpoint = 0, + limits = c(dr_min, dr_max)) + + ggplot2::scale_y_continuous(position = "left") + + ggplot2::scale_x_continuous(position = "bottom") + + ggplot2::theme_bw() + + ggplot2::theme(strip.placement = "outside") + + ggplot2::labs(title = "Scatter plots, with density ratio mapped to colour", x = NULL, y = NULL, colour = colour_name, - shape = if(show.sample) "Sample" else NULL) + - scale_shape_manual(values = c(21, 24)) + shape = show.sample) + + ggplot2::scale_shape_manual(values = c(21, 24)) # Erase upper diagonal ## Create plot into a grob - grob <- ggplotGrob(plot) + grob <- ggplot2::ggplotGrob(plot) + ## Create name of empty panels in the upper diagonal - empty_panels <- expand.grid(seq(1:length(vars)), seq(1:length(vars))) %>% - filter(Var2 > Var1) %>% - mutate(panel = paste0("panel-", Var1, "-", Var2)) %>% - pull(panel) + empty_panels <- expand.grid(seq(1:length(vars1)), seq(1:length(vars1))) |> + subset(Var2 > Var1) + empty_panels$panel <- paste0("panel-", empty_panels$Var1, "-", empty_panels$Var2) + # Delete panels in upper diagonal, based in their index - idx <- which(grob$layout$name %in% empty_panels) + idx <- which(grob$layout$name %in% empty_panels$panel) for (i in idx) grob$grobs[[i]] <- grid::nullGrob() out <- grob - # class(out) <- c("bivariateplot", class(grob)) + grid::grid.newpage() grid::grid.draw(out) From 9e6257cd6fe4af77070103a280e95b020fac0c80 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Tue, 16 Apr 2024 14:38:36 +0200 Subject: [PATCH 39/42] add imports --- R/plot.R | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/R/plot.R b/R/plot.R index 64a0f0d..a99487f 100644 --- a/R/plot.R +++ b/R/plot.R @@ -18,6 +18,13 @@ #' @param ... Additional arguments passed on to `predict()`. #' #' @return A histogram of density ratio estimates. +#' @importFrom ggplot2 ggplot +#' @importFrom ggplot2 aes +#' @importFrom ggplot2 position_dodge2 +#' @importFrom ggplot2 geom_histogram +#' @importFrom ggplot2 scale_fill_viridis_d +#' @importFrom ggplot2 theme_bw +#' @importFrom ggplot2 labs #' #' #' @examples @@ -133,6 +140,17 @@ plot.kliep <- function(object, samples = "both", logscale = TRUE, binwidth = NUL #' #' @return #' +#' @importFrom ggplot2 ggplot +#' @importFrom ggplot2 aes +#' @importFrom ggplot2 geom_point +#' @importFrom ggplot2 theme_bw +#' @importFrom ggplot2 labs +#' @importFrom ggplot2 scale_color_viridis_d +#' @importFrom ggplot2 scale_y_continuous +#' @importFrom ggplot2 facet_wrap +#' @importFrom ggplot2 geom_hline +#' +#' create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ y_max <- max(2, ext$dr) @@ -174,6 +192,17 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ #' @return Scatter plot of density ratios and individual variables. #' @export #' +#' @importFrom ggplot2 ggplot +#' @importFrom ggplot2 aes +#' @importFrom ggplot2 geom_point +#' @importFrom ggplot2 geom_hline +#' @importFrom ggplot2 theme_bw +#' @importFrom ggplot2 labs +#' @importFrom ggplot2 scale_color_viridis_d +#' @importFrom ggplot2 scale_y_continuous +#' @importFrom ggplot2 facet_wrap +#' @importFrom ggplot2 facet_grid +#' #' @examples plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TRUE, grid = FALSE, sample.facet = FALSE, @@ -282,6 +311,15 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR #' @return Bivariate plot #' #' @examples +#' +#' @importFrom ggplot2 ggplot +#' @importFrom ggplot2 aes +#' @importFrom ggplot2 geom_point +#' @importFrom ggplot2 scale_colour_gradient2 +#' @importFrom ggplot2 scale_shape_manual +#' @importFrom ggplot2 theme_bw +#' @importFrom ggplot2 labs +#' create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ @@ -318,6 +356,22 @@ create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ #' @return Scatter plot of two variables, with density ratio mapped to the colour scale. #' @export #' +#' @importFrom ggplot2 ggplot +#' @importFrom ggplot2 aes +#' @importFrom ggplot2 geom_point +#' @importFrom ggplot2 facet_grid +#' @importFrom ggplot2 scale_colour_gradient2 +#' @importFrom ggplot2 scale_y_continuous +#' @importFrom ggplot2 scale_x_continuous +#' @importFrom ggplot2 theme_bw +#' @importFrom ggplot2 theme +#' @importFrom ggplot2 labs +#' @importFrom ggplot2 scale_shape_manual +#' @importFrom ggplot2 ggplotGrob +#' @importFrom grid grid.draw +#' @importFrom grid grid.newpage +#' @importFrom grid nullGrob +#' #' @examples plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", grid = FALSE, logscale = TRUE, show.sample = NULL, From 43310585f05e4388e91f5038fc0cb5ce81eef0e2 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 20 May 2024 17:16:57 +0200 Subject: [PATCH 40/42] update plots Solve final issues before CRAN submission. Concretely: - solve small typos/improve comments - creates function check.logscale() - adds plot.lhss and plot.spectral - minor changes to legiility of code - adds check.var.combinations function --- R/checks.R | 71 ++++++++++++++++-- R/plot.R | 206 +++++++++++++++++++++-------------------------------- 2 files changed, 147 insertions(+), 130 deletions(-) diff --git a/R/checks.R b/R/checks.R index 4e9a888..6b95f1e 100644 --- a/R/checks.R +++ b/R/checks.R @@ -336,14 +336,73 @@ check.var.names <- function(vars, data){ } } -check.overriden.names <- function(data){ - if("dr" %in% names(data) | "sample" %in% names(data)) { - stop("Variables in your dataframe cannot have name 'dr' or 'sample'. Please rename your variable(s)") +check.object.type <- function(object) { + if(all(c("ulsif", "kliep", "lhss", "spectral") != attr(object, "class"))) { + stop("Objects should be of class 'ulsif' or 'kliep'") } } -check.object.type <- function(object) { - if(all(c("ulsif", "kliep") != attr(object, "class"))) { - stop("Objects should be of class 'ulsif' or 'kliep'") +check.logscale <- function(ext, logscale, tol){ + if (logscale) { + # Convert values lower than tolerance to tol + negdr <- ext$dr < tol + ext$dr[negdr] <- tol + if (any(negdr)) { + warning( + paste( + "Negative estimated density ratios for", sum(negdr), + "observation(s) converted to",tol, + "before applying logarithmic transformation" + ), + call. = FALSE + ) + } + + # Apply log transformation + ext$dr <- log(ext$dr) + + # Set y axis intercept to 0 + ext$yintercept <- 0 + } else { + # Set y axis intercept to 1 + ext$yintercept <- 1 } + return(ext) +} + +check.var.combinations <- function(data, vars1, vars2) { + if (is.null(vars2)) { + # Check variable names + check.var.names(vars1, data) + var_combinations <- expand.grid(vars1, vars1) + } else { + if(length(vars1) != length(vars2)){ + stop("The number of variables in vars1 and vars2 must be the same") + } + # Check variable names + check.var.names(vars1, data) + check.var.names(vars2, data) + + var_combinations <- expand.grid(vars1, vars2) + } + + ## Remove duplicate combinations + ## Start by sorting elements within each row + ## This makes duplicate rows with different order of variables identical + var_combinations <- t(apply(var_combinations, 1, sort)) + var_combinations <- unique(var_combinations) # retain unique rows only + + if(length(vars1) != 1){ + var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) + } else { + var_combinations <- as.data.frame(var_combinations) + } + + names(var_combinations) <- c("Var1", "Var2") + + # Remove rows where both variables are the same + var_combinations <- var_combinations |> subset(Var1 != Var2) + var_combinations <- as.list(as.data.frame(t(var_combinations))) + + return(var_combinations) } diff --git a/R/plot.R b/R/plot.R index a99487f..c735193 100644 --- a/R/plot.R +++ b/R/plot.R @@ -7,8 +7,9 @@ #' #' @param object Density ratio object created with e.g., [kliep()], [ulsif()], #' or [naive()] -#' @param sample Character string indicating whether to plot the 'numerator', +#' @param samples Character string indicating whether to plot the 'numerator', #' 'denominator', or 'both' samples. Default is 'both'. +#' @param tol Numeric indicating the tolerance: values below this value will be set to the tolerance value, for legibility of the plots #' @param logscale Logical indicating whether to plot the density ratio #' estimates on a log scale. Default is TRUE. #' @param binwidth Numeric indicating the width of the bins, passed on to @@ -27,7 +28,6 @@ #' @importFrom ggplot2 labs #' #' -#' @examples dr.histogram <- function(object, samples = "both", logscale = TRUE, @@ -39,33 +39,23 @@ dr.histogram <- function(object, # Check object type check.object.type(object) - # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) ext <- data.frame(dr = predict(object, data = data, ...), sample = c(rep("numerator", nrow(object$df_numerator)), rep("denominator", nrow(object$df_denominator)))) - # Check if logscale is TRUE + # If logscale = TRUE, transform density ratio estimates + ext <- check.logscale(ext, logscale, tol) + + # Assign x-axis label if (logscale) { - # Converte negative values to tol - negdr <- ext$dr < tol - ext$dr[negdr] <- tol - - if(any(negdr)){ - warning( - paste("Negative estimated density ratios for", sum(negdr), "observation(s) converted to", tol, "before applying logarithmic transformation"), - call. = FALSE) - } - - # Apply log transformation - ext$dr <- log(ext$dr) x_lab <- "Log (Density Ratio)" } else { x_lab <- "Density Ratio" } - # Select data + # Filter correct subset samples <- match.arg(samples, c("both", "numerator", "denominator")) if(samples != "both"){ data <- subset(data, ext$sample == samples) @@ -102,10 +92,9 @@ dr.histogram <- function(object, #' @inheritParams dr.histogram #' @rdname dr.histogram #' -#' @return +#' @return A histogram of density ratio estimates. #' @export #' -#' @examples plot.ulsif <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, bins = NULL, tol = 10e-3) { dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, @@ -118,27 +107,51 @@ plot.ulsif <- function(object, samples = "both", logscale = TRUE, binwidth = NUL #' @rdname dr.histogram #' @param logscale #' -#' @return +#' @return A histogram of density ratio estimates. #' @export #' -#' @examples + plot.kliep <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, bins = NULL, tol = 10e-3) { dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, bins = bins, tol = tol) } +#' @inheritParams dr.histogram +#' @rdname dr.histogram +#' +#' @return A histogram of density ratio estimates. +#' @export +#' + +plot.spectral <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, + bins = NULL, tol = 10e-3) { + dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, + bins = bins, tol = tol) +} +#' @inheritParams dr.histogram +#' @rdname dr.histogram +#' +#' @return A histogram of density ratio estimates. +#' @export +#' +plot.lhss <- function(object, samples = "both", logscale = TRUE, binwidth = NULL, + bins = NULL, tol = 10e-3) { + dr.histogram(object, samples = samples, logscale = logscale, binwidth = binwidth, + bins = bins, tol = tol) +} #' Indivual univariate plot #' -#' Scatterplot of individual values and density ratio estimates. Used internally in [plot.univariate()] +#' Scatterplot of individual values and density ratio estimates. Used internally in [create_univariate_plot()] #' @param data Data frame with the individual values and density ratio estimates -#' @param var Name of the variable to be plotted -#' @param y_lab Name of the y-axis label ("Density Ratio" or "Log Density Ratio") +#' @param var Name of the variable to be plotted on the x-axis +#' @param y_lab Name of the y-axis label, typically ("Density Ratio" or "Log Density Ratio") +#' @param ext Data frame with the density ratio estimates and sample indicator #' @param sample.facet Logical indicating whether to facet the plot by sample. Default is TRUE. #' -#' @return +#' @return A scatterplot of variable values and density ratio estimates. #' #' @importFrom ggplot2 ggplot #' @importFrom ggplot2 aes @@ -155,6 +168,7 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ y_max <- max(2, ext$dr) y_min <- min(-2, ext$dr) + plot <- ggplot2::ggplot(data, ggplot2::aes(x = .data[[var]], y = ext$dr)) + ggplot2::geom_point(ggplot2::aes(col = ext$sample), @@ -169,12 +183,12 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ ggplot2::scale_y_continuous(limits = c(y_min, y_max)) if(sample.facet){ - plot <- plot + ggplot2::facet_wrap(~ext$sample) + - ggplot2::geom_hline(yintercept = ext$yintercept, linetype = "dashed") - } else { - plot <- plot + ggplot2::geom_hline(yintercept = ext$yintercept, linetype = "dashed") + plot <- plot + ggplot2::facet_wrap(~ext$sample) } + plot <- plot + + ggplot2::geom_hline(yintercept = ext$yintercept, linetype = "dashed") + return(plot) } @@ -203,7 +217,7 @@ create_univariate_plot <- function(data, ext, var, y_lab, sample.facet = TRUE){ #' @importFrom ggplot2 facet_wrap #' @importFrom ggplot2 facet_grid #' -#' @examples + plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TRUE, grid = FALSE, sample.facet = FALSE, nrow = NULL, tol = 10e-3, ...) { @@ -214,7 +228,7 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR # Check object type check.object.type(object) - # Create data object and estimate density ratio + # Create data object, and external object with density ratio and sample indicators data <- rbind(object$df_numerator, object$df_denominator) ext <- data.frame(dr = predict(object, data = data, ...), sample = c(rep("numerator", nrow(object$df_numerator)), @@ -222,10 +236,7 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR # Check variable names check.var.names(vars, data) - # Estimate density ratio - ext$dr <- predict(object, newdata = data) - - # Select data + # Filter appropriate subset samples <- match.arg(samples, c("both", "numerator", "denominator")) if(samples != "both"){ data <- subset(data, ext$sample == samples) @@ -233,27 +244,17 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR } - # Check if logscale is TRUE - if (logscale) { - negdr <- ext$dr < tol - ext$dr[negdr] <- tol - if(any(negdr)){ - warning( - paste("Negative estimated density ratios for", sum(negdr), "observation(s) converted to", tol, "before applying logarithmic transformation"), - call. = FALSE) - } - # For the plots - ext$yintercept <- 0 - # Apply log transformation - ext$dr <- log(ext$dr) + # If logscale is TRUE, transform density ratio estimates + ext <- check.logscale(ext, logscale, tol) + + # Set y axis label + if(logscale) { y_lab <- "Log (Density Ratio)" - } - else { + } else { y_lab <- "Density Ratio" - ext$yintercept <- 1 } - + # Plot either individual plots in a list, or a grid of individual plots if(!grid){ plot <- lapply(vars, function(var) create_univariate_plot(data, ext, var, y_lab, sample.facet)) @@ -264,18 +265,17 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR variable <- rep(vars, each = length(values)/length(vars)) dr <- rep(ext$dr, length(vars)) sample <- rep(ext$sample, length(vars)) - data <- data.frame(values = values, variable = variable) - ext <- data.frame(dr = dr, sample = sample, yintercept = rep(ext$yintercept, length(vars))) + ext <- data.frame(values = values, variable = variable, + dr = dr, sample = sample, yintercept = rep(ext$yintercept, length(vars))) # Maximum scale for y y_max <- max(1,ext$dr) y_min <- min(-1, ext$dr) - plot <- ggplot2::ggplot(data) + - ggplot2::geom_point(ggplot2::aes(x = values, y = ext$dr, col = ext$sample), + plot <- ggplot2::ggplot(ext) + + ggplot2::geom_point(ggplot2::aes(x = values, y = dr, col = sample), alpha = .6) + ggplot2::theme_bw() + - ggplot2::geom_hline(ggplot2::aes(yintercept = ext$yintercept), linetype = "dashed") + ggplot2::labs(title = "Scatter plot of individual values and density ratio", color = "Sample", y = "Density ratio") + @@ -287,16 +287,17 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR if(sample.facet){ plot <- plot + - ggplot2::facet_grid(cols = vars(sample), - rows = vars(variable), + ggplot2::facet_grid(rows = ggplot2::vars(sample), + cols = ggplot2::vars(variable), scales = "free_x") } else { plot <- plot + - ggplot2::facet_wrap(~variable, scales = "free_x", nrow = nrow) + ggplot2::facet_wrap(ggplot2::vars(variable), scales = "free_x", nrow = nrow) } - + plot <- plot + + ggplot2::geom_hline(yintercept = ext$yintercept, linetype = "dashed") } return(plot) @@ -305,12 +306,13 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR #' Bivariate plot #' -#' @inheritParams individual_uni_plot +#' @inheritParams create_univariate_plot #' @param show.sample Logical indicating whether to give different shapes to observations, depending on the sample they come from (numerator or denominator). Defaults to FALSE. +#' @param vars Character vector of variable names to be plotted. +#' @param logscale Logical indicating whether the density ratio should be plotted in log scale. Defaults to TRUE. #' #' @return Bivariate plot #' -#' @examples #' #' @importFrom ggplot2 ggplot #' @importFrom ggplot2 aes @@ -348,10 +350,12 @@ create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ #' #' Plots a scatterplot of two variables, with densityratio mapped to the colour scale. #' -#' @inheritParams plot.univariate +#' @inheritParams plot_univariate #' @param logscale Logical indicating whether to plot the density ratio #' estimates on a log scale. Default is TRUE. -#' +#' @param show.sample Logical indicating whether to give different shapes to observations, depending on the sample they come from (numerator or denominator). Defaults to FALSE. +#' @param vars1 Character vector of variable names to be plotted on X axis +#' @param vars2 Character vector of variable names to be plotted on Y axis #' #' @return Scatter plot of two variables, with density ratio mapped to the colour scale. #' @export @@ -372,7 +376,6 @@ create_bivariate_plot <- function(data, ext, vars, logscale, show.sample){ #' @importFrom grid grid.newpage #' @importFrom grid nullGrob #' -#' @examples plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", grid = FALSE, logscale = TRUE, show.sample = NULL, tol = 10e-3, ...) { @@ -394,22 +397,14 @@ plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", sample = c(rep("numerator", nrow(object$df_numerator)), rep("denominator", nrow(object$df_denominator)))) - # Determine if DR is shown in logscale (default) or not - # Check if logscale is TRUE - if (logscale) { - negdr <- ext$dr < tol - ext$dr[negdr] <- tol - if(any(negdr)){ - warning( - paste("Negative estimated density ratios for", sum(negdr), "observation(s) converted to", tol, "before applying logarithmic transformation"), - call. = FALSE) - } - - # Apply log transformation - ext$dr <- log(ext$dr) - colour_name <- "Log (Density Ratio)" + # Check if logscale is TRUE, then change ext + ext <- check.logscale(ext, logscale, tol) + + # Assign correct labels depending on logscale + if(logscale){ + colour_label <- "Log (Density Ratio)" } else { - colour_name <- "Density ratio" + colour_label <- "Density Ratio" } @@ -420,50 +415,15 @@ plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", ext <- subset(ext, ext$sample == samples) } - - if (is.null(vars2)) { - # Check variable names - check.var.names(vars1, data) - var_combinations <- expand.grid(vars1, vars1) - } else { - if(length(vars1) != length(vars2)){ - stop("The number of variables in vars1 and vars2 must be the same") - } - # Check variable names - check.var.names(vars1, data) - check.var.names(vars2, data) - - var_combinations <- expand.grid(vars1, vars2) - } - - - ## Remove duplicate combinations - ## Start by sorting elements within each row - ## This makes duplicate rows with different order of variables identical - var_combinations <- t(apply(var_combinations, 1, sort)) - var_combinations <- unique(var_combinations) # retain unique rows only - - if(length(vars1) != 1){ - var_combinations <- as.data.frame(apply(var_combinations, 2, as.character)) - } else { - var_combinations <- as.data.frame(var_combinations) - } - - names(var_combinations) <- c("Var1", "Var2") - - # Remove rows where both variables are the same - var_combinations <- var_combinations |> subset(Var1 != Var2) - var_combinations <- as.list(as.data.frame(t(var_combinations))) + # Create variable combinations + var_combinations <- check.var.combinations(data, vars1, vars2) if(!grid){ - plot <- lapply(var_combinations, function(vars) create_bivariate_plot(data, ext, vars, logscale, show.sample)) - return(plot) - } - if (grid) { + } else { ext <- data.frame(data, dr = ext$dr, sample = ext$sample) datlist <- lapply(var_combinations, \(x) data.frame(values.x = ext[,x[1]], @@ -481,9 +441,9 @@ plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", plot <- ggplot2::ggplot(plot_data, mapping = ggplot2::aes(x = values.x, y = values.y, shape = show.sample)) + - ggplot2::geom_point(aes(colour = dr), + ggplot2::geom_point(ggplot2::aes(colour = dr), size = 2.0) + - ggplot2::facet_grid(rows = vars(yvar), cols = vars(xvar), scales = "free", + ggplot2::facet_grid(rows = ggplot2::vars(yvar), cols = ggplot2::vars(xvar), scales = "free", switch = "both") + ggplot2::scale_colour_gradient2(low = "#00204DFF", high = "#7D0000", @@ -497,7 +457,7 @@ plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", ggplot2::labs(title = "Scatter plots, with density ratio mapped to colour", x = NULL, y = NULL, - colour = colour_name, + colour = colour_label, shape = show.sample) + ggplot2::scale_shape_manual(values = c(21, 24)) @@ -516,10 +476,8 @@ plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", out <- grob - grid::grid.newpage() grid::grid.draw(out) - } } From 1b56697ae473c9b66f23ffcfb525c94dbfb9a38f Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Mon, 20 May 2024 17:17:11 +0200 Subject: [PATCH 41/42] update documentandion and gitignore --- .gitignore | 2 +- DESCRIPTION | 3 +- NAMESPACE | 26 ++++++++- ...l_biv_plot.Rd => create_bivariate_plot.Rd} | 12 +++- ..._uni_plot.Rd => create_univariate_plot.Rd} | 17 ++++-- man/dr.histogram.Rd | 55 +++++++++++++++++-- man/plot_bivariate.Rd | 25 +++++++-- ...{plot.univariate.Rd => plot_univariate.Rd} | 23 +++++--- man/print.Rd | 7 --- 9 files changed, 132 insertions(+), 38 deletions(-) rename man/{individual_biv_plot.Rd => create_bivariate_plot.Rd} (54%) rename man/{individual_uni_plot.Rd => create_univariate_plot.Rd} (50%) rename man/{plot.univariate.Rd => plot_univariate.Rd} (59%) diff --git a/.gitignore b/.gitignore index 0d206b9..0fc4bfa 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ .Rhistory .RData .Ruserdata -\playground + diff --git a/DESCRIPTION b/DESCRIPTION index 3df3f93..e51964d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,7 +28,8 @@ LazyData: true Imports: quadprog, Rcpp, - pbapply + pbapply, + ggplot2 LinkingTo: Rcpp, RcppArmadillo, diff --git a/NAMESPACE b/NAMESPACE index ec4837b..ad4f597 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,14 +1,14 @@ # Generated by roxygen2: do not edit by hand S3method(plot,kliep) +S3method(plot,lhss) +S3method(plot,spectral) S3method(plot,ulsif) -S3method(plot,univariate) S3method(predict,kliep) S3method(predict,lhss) S3method(predict,naivedensityratio) S3method(predict,naivesubspacedensityratio) S3method(predict,ulsif) -S3method(print,bivariate.plot) S3method(print,kliep) S3method(print,lhss) S3method(print,naivedensityratio) @@ -31,8 +31,30 @@ export(lhss) export(naive) export(naivesubspace) export(plot_bivariate) +export(plot_univariate) export(ulsif) importFrom(Rcpp,sourceCpp) +importFrom(ggplot2,aes) +importFrom(ggplot2,facet_grid) +importFrom(ggplot2,facet_wrap) +importFrom(ggplot2,geom_histogram) +importFrom(ggplot2,geom_hline) +importFrom(ggplot2,geom_point) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,ggplotGrob) +importFrom(ggplot2,labs) +importFrom(ggplot2,position_dodge2) +importFrom(ggplot2,scale_color_viridis_d) +importFrom(ggplot2,scale_colour_gradient2) +importFrom(ggplot2,scale_fill_viridis_d) +importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_x_continuous) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,theme) +importFrom(ggplot2,theme_bw) +importFrom(grid,grid.draw) +importFrom(grid,grid.newpage) +importFrom(grid,nullGrob) importFrom(parallel,detectCores) importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) diff --git a/man/individual_biv_plot.Rd b/man/create_bivariate_plot.Rd similarity index 54% rename from man/individual_biv_plot.Rd rename to man/create_bivariate_plot.Rd index 6ef3e42..9e1bf6b 100644 --- a/man/individual_biv_plot.Rd +++ b/man/create_bivariate_plot.Rd @@ -1,14 +1,20 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R -\name{individual_biv_plot} -\alias{individual_biv_plot} +\name{create_bivariate_plot} +\alias{create_bivariate_plot} \title{Bivariate plot} \usage{ -individual_biv_plot(data, vars, logscale, show.sample) +create_bivariate_plot(data, ext, vars, logscale, show.sample) } \arguments{ \item{data}{Data frame with the individual values and density ratio estimates} +\item{ext}{Data frame with the density ratio estimates and sample indicator} + +\item{vars}{Character vector of variable names to be plotted.} + +\item{logscale}{Logical indicating whether the density ratio should be plotted in log scale. Defaults to TRUE.} + \item{show.sample}{Logical indicating whether to give different shapes to observations, depending on the sample they come from (numerator or denominator). Defaults to FALSE.} } \value{ diff --git a/man/individual_uni_plot.Rd b/man/create_univariate_plot.Rd similarity index 50% rename from man/individual_uni_plot.Rd rename to man/create_univariate_plot.Rd index a6bcb18..5c080b6 100644 --- a/man/individual_uni_plot.Rd +++ b/man/create_univariate_plot.Rd @@ -1,20 +1,25 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R -\name{individual_uni_plot} -\alias{individual_uni_plot} +\name{create_univariate_plot} +\alias{create_univariate_plot} \title{Indivual univariate plot} \usage{ -individual_uni_plot(data, var, y_lab, sample.facet = TRUE) +create_univariate_plot(data, ext, var, y_lab, sample.facet = TRUE) } \arguments{ \item{data}{Data frame with the individual values and density ratio estimates} -\item{var}{Name of the variable to be plotted} +\item{ext}{Data frame with the density ratio estimates and sample indicator} -\item{y_lab}{Name of the y-axis label ("Density Ratio" or "Log Density Ratio")} +\item{var}{Name of the variable to be plotted on the x-xis} + +\item{y_lab}{Name of the y-axis label, typically ("Density Ratio" or "Log Density Ratio")} \item{sample.facet}{Logical indicating whether to facet the plot by sample. Default is TRUE.} } +\value{ +A scatterplot of variable values and density ratio estimates. +} \description{ -Scatterplot of individual values and density ratio estimates. Used internally in \code{\link[=plot.univariate]{plot.univariate()}} +Scatterplot of individual values and density ratio estimates. Used internally in \code{\link[=create_univariate_plot]{create_univariate_plot()}} } diff --git a/man/dr.histogram.Rd b/man/dr.histogram.Rd index ae662d3..d5a926e 100644 --- a/man/dr.histogram.Rd +++ b/man/dr.histogram.Rd @@ -4,6 +4,8 @@ \alias{dr.histogram} \alias{plot.ulsif} \alias{plot.kliep} +\alias{plot.spectral} +\alias{plot.lhss} \title{A histogram of density ratio estimates} \usage{ dr.histogram( @@ -12,17 +14,53 @@ dr.histogram( logscale = TRUE, binwidth = NULL, bins = NULL, + tol = 0.01, ... ) -\method{plot}{ulsif}(object, samples = "both", logscale = FALSE, binwidth = NULL, bins = NULL) +\method{plot}{ulsif}( + object, + samples = "both", + logscale = TRUE, + binwidth = NULL, + bins = NULL, + tol = 0.01 +) + +\method{plot}{kliep}( + object, + samples = "both", + logscale = TRUE, + binwidth = NULL, + bins = NULL, + tol = 0.01 +) + +\method{plot}{spectral}( + object, + samples = "both", + logscale = TRUE, + binwidth = NULL, + bins = NULL, + tol = 0.01 +) -\method{plot}{kliep}(object, samples = "both", logscale = FALSE, binwidth = NULL, bins = NULL) +\method{plot}{lhss}( + object, + samples = "both", + logscale = TRUE, + binwidth = NULL, + bins = NULL, + tol = 0.01 +) } \arguments{ \item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, or \code{\link[=naive]{naive()}}} +\item{samples}{Character string indicating whether to plot the 'numerator', +'denominator', or 'both' samples. Default is 'both'.} + \item{logscale}{} \item{binwidth}{Numeric indicating the width of the bins, passed on to @@ -31,12 +69,19 @@ or \code{\link[=naive]{naive()}}} \item{bins}{Numeric indicating the number of bins. Overriden by binwidth, and passed on to \code{ggplot2}.} -\item{...}{Additional arguments passed on to \code{predict()}.} +\item{tol}{Numeric indicating the tolerance: values below this value will be set to the tolerance value, for legibility of the plots} -\item{sample}{Character string indicating whether to plot the 'numerator', -'denominator', or 'both' samples. Default is 'both'.} +\item{...}{Additional arguments passed on to \code{predict()}.} } \value{ +A histogram of density ratio estimates. + +A histogram of density ratio estimates. + +A histogram of density ratio estimates. + +A histogram of density ratio estimates. + A histogram of density ratio estimates. } \description{ diff --git a/man/plot_bivariate.Rd b/man/plot_bivariate.Rd index 0a45ca8..758802c 100644 --- a/man/plot_bivariate.Rd +++ b/man/plot_bivariate.Rd @@ -6,24 +6,37 @@ \usage{ plot_bivariate( object, - vars, + vars1, + vars2 = NULL, samples = "both", - output = "assembled", + grid = FALSE, logscale = TRUE, - show.sample = FALSE + show.sample = NULL, + tol = 0.01, + ... ) } \arguments{ \item{object}{Density ratio object created with e.g., \code{\link[=kliep]{kliep()}}, \code{\link[=ulsif]{ulsif()}}, or \code{\link[=naive]{naive()}}} -\item{vars}{Character vector of variable names to be plotted.} +\item{vars1}{Character vector of variable names to be plotted on X axis} -\item{output}{Character indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual".} +\item{vars2}{Character vector of variable names to be plotted on Y axis} -\item{logscale}{} +\item{samples}{Character string indicating whether to plot the 'numerator', +'denominator', or 'both' samples. Default is 'both'.} + +\item{grid}{Logical indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual".} + +\item{logscale}{Logical indicating whether to plot the density ratio +estimates on a log scale. Default is TRUE.} \item{show.sample}{Logical indicating whether to give different shapes to observations, depending on the sample they come from (numerator or denominator). Defaults to FALSE.} + +\item{tol}{Numeric indicating the tolerance: values below this value will be set to the tolerance value, for legibility of the plots} + +\item{...}{Additional arguments passed to the predict() function.} } \value{ Scatter plot of two variables, with density ratio mapped to the colour scale. diff --git a/man/plot.univariate.Rd b/man/plot_univariate.Rd similarity index 59% rename from man/plot.univariate.Rd rename to man/plot_univariate.Rd index a0be7e7..fde76b4 100644 --- a/man/plot.univariate.Rd +++ b/man/plot_univariate.Rd @@ -1,17 +1,19 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R -\name{plot.univariate} -\alias{plot.univariate} +\name{plot_univariate} +\alias{plot_univariate} \title{Scatter plot of density ratios and individual variables} \usage{ -\method{plot}{univariate}( +plot_univariate( object, - vars, + vars = NULL, samples = "both", logscale = TRUE, - output = "individual", + grid = FALSE, sample.facet = FALSE, - nrow = NULL + nrow = NULL, + tol = 0.01, + ... ) } \arguments{ @@ -20,13 +22,20 @@ or \code{\link[=naive]{naive()}}} \item{vars}{Character vector of variable names to be plotted.} +\item{samples}{Character string indicating whether to plot the 'numerator', +'denominator', or 'both' samples. Default is 'both'.} + \item{logscale}{} -\item{output}{Character indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual".} +\item{grid}{Logical indicating whether output should be a list of individual plots ("individual"), or one facetted plot with all variables ("assembled"). Defaults to "individual".} \item{sample.facet}{Logical indicating whether to facet the plot by sample, i.e, showing plots separate for each sample, and side to side. Defaults to FALSE.} \item{nrow}{Integer indicating the number of rows in the assembled plot. If NULL, the number of rows is automatically calculated.} + +\item{tol}{Numeric indicating the tolerance: values below this value will be set to the tolerance value, for legibility of the plots} + +\item{...}{Additional arguments passed to the predict() function.} } \value{ Scatter plot of density ratios and individual variables. diff --git a/man/print.Rd b/man/print.Rd index 47de6dd..0ae7ec5 100644 --- a/man/print.Rd +++ b/man/print.Rd @@ -11,7 +11,6 @@ \alias{print.naivesubspacedensityratio} \alias{print.summary.naivedensityratio} \alias{print.summary.naivesubspacedensityratio} -\alias{print.bivariateplot} \title{Print a \code{ulsif} object} \usage{ \method{print}{ulsif}(x, digits = max(3L, getOption("digits") - 3L), ...) @@ -33,8 +32,6 @@ \method{print}{summary.naivedensityratio}(x, digits = max(3L, getOption("digits") - 3L), ...) \method{print}{summary.naivesubspacedensityratio}(x, digits = max(3L, getOption("digits") - 3L), ...) - -\method{print}{bivariate.plot}(x, ...) } \arguments{ \item{x}{Object of class \code{ulsif}, \code{summary.ulsif}, \code{kliep} @@ -63,8 +60,6 @@ or \code{summary.kliep}.} \code{NULL} -\code{NULL} - \code{NULL} } \description{ @@ -87,6 +82,4 @@ Print a \code{naivesubspacedensityratio} object Print a \code{summary.naivedensityratio} object Print a \code{summary.naivesubspacedensityratio} object - -Print a \code{bivariate.plot} object } From 731382f7867caa7a7ca9cd2c4b652042720360e6 Mon Sep 17 00:00:00 2001 From: CarlosPoses Date: Tue, 28 May 2024 15:55:21 +0200 Subject: [PATCH 42/42] solve bug solve bug in predict function (newdata != data) --- R/plot.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/plot.R b/R/plot.R index c735193..f33b672 100644 --- a/R/plot.R +++ b/R/plot.R @@ -41,7 +41,7 @@ dr.histogram <- function(object, # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) - ext <- data.frame(dr = predict(object, data = data, ...), + ext <- data.frame(dr = predict(object, newdata = data, ...), sample = c(rep("numerator", nrow(object$df_numerator)), rep("denominator", nrow(object$df_denominator)))) @@ -230,7 +230,7 @@ plot_univariate <- function(object, vars = NULL, samples = "both", logscale = TR # Create data object, and external object with density ratio and sample indicators data <- rbind(object$df_numerator, object$df_denominator) - ext <- data.frame(dr = predict(object, data = data, ...), + ext <- data.frame(dr = predict(object, newdata = data, ...), sample = c(rep("numerator", nrow(object$df_numerator)), rep("denominator", nrow(object$df_denominator)))) # Check variable names @@ -393,7 +393,7 @@ plot_bivariate <- function(object, vars1, vars2 = NULL, samples = "both", # Create data object and estimate density ratio data <- rbind(object$df_numerator, object$df_denominator) - ext <- data.frame(dr = predict(object, data = data, ...), + ext <- data.frame(dr = predict(object, newdata = data, ...), sample = c(rep("numerator", nrow(object$df_numerator)), rep("denominator", nrow(object$df_denominator))))