diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf..931662b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,3 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^README\.Rmd$ diff --git a/.gitignore b/.gitignore index 5b6a065..b7a7bc7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rhistory .RData .Ruserdata +tests/testthat/Rplots.pdf diff --git a/DESCRIPTION b/DESCRIPTION index 9e50a62..ba595a7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,15 @@ LazyData: true Depends: R (>= 3.2) Imports: - ggplot2 + cli, + ggplot2, + grid, + rlang Suggests: viridis, - dplyr + dplyr, + roxygen2 (>= 7.2.3), + testthat (>= 3.0.0) +RoxygenNote: 7.2.3 +Roxygen: list(markdown = TRUE) +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index e28d319..007cdc8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,10 @@ -export(geom_pointdensity, stat_pointdensity) +# Generated by roxygen2: do not edit by hand + +S3method(makeContext,check_aspect_grob) +export(StatPointdensity) +export(geom_pointdensity) +export(stat_pointdensity) import(ggplot2) -useDynLib(ggpointdensity, count_neighbors_, .registration = TRUE) +import(rlang) +importFrom(grid,makeContext) +useDynLib(ggpointdensity, count_neighbors_, .registration=TRUE) diff --git a/R/geom_pointdensity.R b/R/geom_pointdensity.R index 4a0ae65..4b59f1f 100644 --- a/R/geom_pointdensity.R +++ b/R/geom_pointdensity.R @@ -13,27 +13,67 @@ count_neighbors_r <- function(x, y, r2, xy) { }) } + +#' Wraps the user supplied Geom (typically GeomPoint) to add class "check_aspect_grob" and information about the aspect ratio assumed under which the densities were calculated to the grobs it draws +#' The check is injected by providing an S3 instance to the makeContext generic that is called by grid for each grob at render time (where the actual plot aspect ratio is finally known) +#' @import rlang +addCheckToGeom <- function(orig_geom, expected_aspect_ratio = 1) { + if (is.null(orig_geom)) + cli::cli_abort("Can't create layer without a geom.", call = rlang::caller_env()) + OrigGeom <- ggplot2:::check_subclass(orig_geom, "Geom", env = parent.frame(n = 2), call = parent.frame(n = 2)) + expected_aspect_ratio <- expected_aspect_ratio %||% 1 + GeomWithCheck <- + ggproto( + paste0(class(OrigGeom)[1], "_with_ggpointdensity_checks"), + OrigGeom, + draw_layer = function(self, data, params, layout, coord) { + grobs <- OrigGeom$draw_layer(data, params, layout, coord) + is_using_coord_fixed <- inherits(layout$coord, "CoordFixed") + if(is_using_coord_fixed) return(grobs) # don't add check if actual aspect ratio is known ahead of time + check_added_once <- FALSE + grobs <- lapply(grobs, function(grob) { + if (inherits(grob, "zeroGrob")) { # don't check if nothing is drawn + return(grob) + } + if(!check_added_once) { # add check to at most one grob + class(grob) <- c("check_aspect_grob", class(grob)) + grob["expected_aspect_ratio"] <- list(expected_aspect_ratio) + check_added_once <- TRUE + } + return(grob) + }) + grobs + } + ) + return(GeomWithCheck) +} + +#' @rdname geom_pointdensity +#' @export stat_pointdensity <- function(mapping = NULL, data = NULL, geom = "point", position = "identity", ..., adjust = 1, + aspect.ratio = ggplot2::theme_get()$aspect.ratio, na.rm = FALSE, method = "auto", method.args = list(), show.legend = NA, inherit.aes = TRUE) { + GeomWithCheck <- addCheckToGeom(geom, expected_aspect_ratio = aspect.ratio) layer( data = data, mapping = mapping, stat = StatPointdensity, - geom = geom, + geom = GeomWithCheck, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list( adjust = adjust, + aspect.ratio = aspect.ratio, na.rm = na.rm, method = method, method.args = method.args, @@ -42,140 +82,319 @@ stat_pointdensity <- function(mapping = NULL, ) } +#' @export +#' @noRd +#' @importFrom grid makeContext +#' @method makeContext check_aspect_grob +makeContext.check_aspect_grob <- function(x) { + # Grab viewport information + vp <- if (is.null(x$vp)){ + grid::viewport() + } else{ + x$vp + } + width <- grid::convertWidth(unit(1, "npc"), "inch", valueOnly = TRUE) + height <- grid::convertHeight(unit(1, "npc"), "inch", valueOnly = TRUE) + actual_aspect_ratio <- height/width + expected_aspect_ratio <- x$expected_aspect_ratio + + if (getOption("ggpointdensity.verbose", default = FALSE)) cli::cli_inform(c( + "Actual aspect ratio: {actual_aspect_ratio}.", + "Expected aspect ratio: {expected_aspect_ratio}." + )) + + if(!isTRUE(all.equal(expected_aspect_ratio, actual_aspect_ratio)) ) cli::cli_warn(c( + "Actual plot aspect ratio does not match {.arg aspect.ratio} of {.code StatPointdensity}", + "!" = "The shown densities are be a bit off.", + "*" = "Actual plot aspect ratio is {actual_aspect_ratio}.", + "*" = "{.arg aspect.ratio} used by {.code StatPointdensity} is {expected_aspect_ratio}.", + ">" = "Consider using one of the three options below to fix this:", + "*" = "Add {.code theme(aspect.ratio = {expected_aspect_ratio}) +}. (This will resize your plot independently of the data).", + "*" = "Add {.code coord_fixed(ratio = {expected_aspect_ratio}) +} and set {.code aspect.ratio=NULL}. (This will resize your plot depending on the data)", + "*" = "Add {.code aspect.ratio={actual_aspect_ratio}} to {.fun stat_pointdensity}/{.fun geom_pointdensity}. (This will correct the density calculation to match your plot's current size)" + ), class = "actual_aspect_ratio_does_not_match_expectation") + NextMethod() +} + + +#' ggproto class, see [ggplot2::Stat()] +#' @format NULL +#' @usage NULL +#' @export StatPointdensity <- ggproto("StatPointdensity", Stat, - default_aes = aes(color = stat(density)), - required_aes = c("x", "y"), - - compute_layer = function(self, data, params, layout) { - # This function mostly copied from ggplot2's Stat - ggplot2:::check_required_aesthetics( - self$required_aes, - c(names(data), names(params)), - ggplot2:::snake_class(self) - ) - - # Make sure required_aes consists of the used set of aesthetics in case of - # "|" notation in self$required_aes - required_aes <- intersect( - names(data), - unlist(strsplit(self$required_aes, "|", fixed = TRUE)) - ) - - data <- ggplot2:::remove_missing(data, params$na.rm, - c(required_aes, self$non_missing_aes), - ggplot2:::snake_class(self), - finite = FALSE # Note that in ggplot2's Stat this is TRUE - ) - - # Trim off extra parameters - params <- params[intersect(names(params), self$parameters())] - - args <- c(list(data = quote(data), scales = quote(scales)), params) - ggplot2:::dapply(data, "PANEL", function(data) { - scales <- layout$get_scales(data$PANEL[1]) - tryCatch(do.call(self$compute_panel, args), error = function(e) { - warning(glue::glue("Computation failed in `{ggplot2:::snake_class(self)}()`:\n{e$message}")) - ggplot2:::new_data_frame() - }) - }) - }, - - setup_params = function(data, params) { - if (identical(params$method, "auto")) { - # Use default nn correction for small datasets, kde2d for - # larger. Based on size of the _largest_ group. - max_group <- max(table(interaction(data$group, data$PANEL, drop = TRUE))) - if (max_group > 20000) { - message(paste0("geom_pointdensity using method='kde2d' ", - "due to large number of points (>20k)")) - params$method <- "kde2d" - } else { - params$method <- "default" - } - } - - params - }, - - compute_group = function(data, scales, adjust = 1, method = "auto", - method.args = list()) { - - if (identical(method, "default")) { - - # find an appropriate bandwidth (radius), pretty ad-hoc: - xrange <- diff(scales$x$get_limits()) * adjust - yrange <- diff(scales$y$get_limits()) * adjust - r2 <- (xrange + yrange) / 70 - - # since x and y may be on different scales, we need a - # factor to weight x and y distances accordingly: - xy <- xrange / yrange - - # counting the number of neighbors around each point, - # this will be used to color the points - data$density <- count_neighbors( - data$x, data$y, r2 = r2, xy = xy) - - - } else if (identical(method, "kde2d")) { - - finites <- is.finite(data$x) & is.finite(data$y) - ddata <- data[finites,] - base.args <- list( - x = ddata$x, - y = ddata$y, - lims = c(scales$x$dimension(), scales$y$dimension())) - if (!is.element("n", names(method.args))) { - method.args["n"] <- 100 - } - if (!is.element("h", names(method.args))) { - h <- c(MASS::bandwidth.nrd(ddata$x), MASS::bandwidth.nrd(ddata$y)) - method.args$h <- h * adjust - } - - dens <- do.call(MASS::kde2d, c(base.args, method.args)) - # credits to Kamil Slowikowski: - ix <- findInterval(data$x, dens$x) - iy <- findInterval(data$y, dens$y) - ii <- cbind(ix, iy) - data$density[finites] <- dens$z[ii] - data$density[!finites] <- min(dens$z) - } else { - - if (is.character(method)) { - method <- match.fun(method) - } - data$density <- do.call(method, c(method.args)) - - } - - - data$ndensity <- data$density/max(data$density) - - data - } + default_aes = aes(color = after_stat(density)), + required_aes = c("x", "y"), + + extra_params = c("aspect.ratio", Stat$extra_params), + + compute_layer = function(self, data, params, layout) { + # This function mostly copied from ggplot2's Stat + ggplot2:::check_required_aesthetics( + self$required_aes, + c(names(data), names(params)), + ggplot2:::snake_class(self) + ) + + # Make sure required_aes consists of the used set of aesthetics in case of + # "|" notation in self$required_aes + required_aes <- intersect( + names(data), + unlist(strsplit(self$required_aes, "|", fixed = TRUE)) + ) + + data <- ggplot2:::remove_missing(data, params$na.rm, + c(required_aes, self$non_missing_aes), + ggplot2:::snake_class(self), + finite = FALSE # Note that in ggplot2's Stat this is TRUE + ) + + # Trim off extra parameters + params <- params[intersect(names(params), self$parameters())] + + args <- c(list(data = quote(data), scales = quote(scales)), params) + ggplot2:::dapply(data, "PANEL", function(data) { + scales <- layout$get_scales(data$PANEL[1]) + rlang::try_fetch( + rlang::inject(self$compute_panel(data = data, scales = scales, !!!params, coord = layout$coord)), + error = function(cnd) { + cli::cli_warn("Computation failed in {.fn {ggplot2:::snake_class(self)}}", parent = cnd) + ggplot2:::data_frame0() + } + ) + }) + }, + + setup_params = function(data, params) { + if (identical(params$method, "auto")) { + params$method <- "kde2d" + } + + params + }, + + compute_group = function(data, scales, adjust = 1, method = "auto", + method.args = list(), ..., aspect.ratio = NULL, coord) { + scale_views <- coord$setup_panel_params(scales$x, scales$y) + dx <- diff(xrange <- scale_views$x.range) + dy <- diff(yrange <- scale_views$y.range) + + ratio <- dy/dx + + is_using_aspect.ratio <- !is.null(aspect.ratio) + is_using_coord_fixed <- inherits(coord, "CoordFixed") + + if (is_using_aspect.ratio && is_using_coord_fixed) cli::cli_abort(c( + "Only one of {.arg aspect.ratio} and {.fn coord_fixed} must be used", + ">" = "either set {.code aspect.ratio=NULL} (the default) or remove {.fn coord_fixed}/{.fn coord_equal}." + )) + if (is_using_aspect.ratio) { + ratio <- dy/dx/aspect.ratio + } + if (is_using_coord_fixed) { + ratio <- 1/coord$ratio + } + + if (identical(method, "default")) { + # find an appropriate bandwidth (radius), pretty ad-hoc: + r2 <- (dx*sqrt(ratio) + dy/sqrt(ratio)) / 70 * adjust + + # find an appropriate bandwidth (radius), pretty ad-hoc: xrange <- diff(scales$x$get_limits()) * adjust yrange <- diff(scales$y$get_limits()) * adjust r2 <- (xrange + yrange) / 70 # since x and y may be on different scales, we need a # factor to weight x and y distances accordingly: xy <- xrange / yrange + data$density <- count_neighbors(data$x, data$y, r2 = r2, xy = 1/ratio) + + } else if (identical(method, "kde2d")) { + + finites <- is.finite(data$x) & is.finite(data$y) + ddata <- data[finites,] + base.args <- list( + x = ddata$x, + y = ddata$y, + lims = c(scales$x$dimension(), scales$y$dimension())) + if (!is.element("n", names(method.args))) { + method.args["n"] <- 100 + } + if (!is.element("h", names(method.args))) { + bandwidth_std <- c(x=MASS::bandwidth.nrd(ddata$x), y=MASS::bandwidth.nrd(ddata$y)) + + #alternative bandwidth calculation based on plot size that tries to match above + n <- nrow(ddata)+1 # workaround for single-row data set + bandwidth_limits <- 4 * 1.06 * c(dx, dy) / (2*qnorm(1/n/2, lower.tail = FALSE)) * n^(-1/5) + + bandwidth <- pmax(bandwidth_limits, bandwidth_std, na.rm = TRUE) + bandwidth <- mean(bandwidth) + + if (getOption("ggpointdensity.verbose", default = FALSE)) cli::cli_inform(c( + "selected joint bandwidth is {round(bandwidth, 6)}.", + "i" = "bandwidths derived from effective scale limits are {round(bandwidth_limits, 6)}.", + "i" = "bandwidths derived from data variance are {round(bandwidth_std, 6)}." + )) + + bandwidth <- sqrt(c(x=1/ratio , y=ratio)) * bandwidth + method.args$h <- bandwidth * adjust + } + + dens <- do.call(MASS::kde2d, c(base.args, method.args)) + # credits to Kamil Slowikowski: + ix <- findInterval(data$x, dens$x) + iy <- findInterval(data$y, dens$y) + ii <- cbind(ix, iy) + data$density[finites] <- dens$z[ii] + data$density[!finites] <- min(dens$z) + } else { + + if (is.character(method)) { + method <- match.fun(method) + } + data$density <- do.call(method, c(method.args)) + + } + + + data$ndensity <- data$density/max(data$density) + + data + } ) + +#' A cross between a scatter plot and a 2D density plot +#' +#' The pointdensity geom is used to create scatterplots where each point is +#' colored by the number of neighboring points. This is useful to visualize the +#' 2D-distribution of points in case of overplotting. +#' +#' +#' @aliases geom_pointdensity stat_pointdensity StatPointdensity +#' @param mapping Set of aesthetic mappings created by +#' [`aes()`][aes] or [`aes_()`][aes_]. If specified and +#' `inherit.aes = TRUE` (the default), it is combined with the default +#' mapping at the top level of the plot. You must supply `mapping` if +#' there is no plot mapping. +#' @param data The data to be displayed in this layer. There are three options: +#' +#' If `NULL`, the default, the data is inherited from the plot data as +#' specified in the call to [`ggplot()`][ggplot]. +#' +#' A `data.frame`, or other object, will override the plot data. All +#' objects will be fortified to produce a data frame. See +#' [`fortify()`][fortify] for which variables will be created. +#' +#' A `function` will be called with a single argument, the plot data. The +#' return value must be a `data.frame`, and will be used as the layer +#' data. A `function` can be created from a `formula` (e.g. `~ +#' head(.x, 10)`). +#' @param stat The statistical transformation to use on the data for this +#' layer, as a string. +#' @param position Position adjustment, either as a string, or the result of a +#' call to a position adjustment function. +#' @param \dots Other arguments passed on to [`layer()`][layer]. +#' This includes `adjust`, a multiplicate bandwidth adjustment used to +#' adjust the distance threshold to consider two points as neighbors, i.e. the +#' radius around points in which neighbors are counted. For example, +#' `adjust = 0.5` means use half of the default. Other arguments may be +#' aesthetics, used to set an aesthetic to a fixed value, like `shape = +#' 17` or `size = 3`. They may also be parameters to the paired geom/stat. +#' @param na.rm If `FALSE`, the default, missing values are removed with a +#' warning. If `TRUE`, missing values are silently removed. +#' @param show.legend logical. Should this layer be included in the legends? +#' `NA`, the default, includes if any aesthetics are mapped. `FALSE` +#' never includes, and `TRUE` always includes. It can also be a named +#' logical vector to finely select the aesthetics to display. +#' @param inherit.aes If `FALSE`, overrides the default aesthetics, rather +#' than combining with them. This is most useful for helper functions that +#' define both data and aesthetics and shouldn't inherit behaviour from the +#' default plot specification, e.g. [`borders()`][borders]. +#' @author Lukas P.M. Kremer +#' @references https://GitHub.com/LKremer/ggpointdensity +#' @examples +#' +#' library(ggplot2) +#' library(dplyr) +#' library(ggpointdensity) +#' +#' # generate some toy data +#' dat <- bind_rows( +#' tibble(x = rnorm(7000, sd = 1), +#' y = rnorm(7000, sd = 10), +#' group = "foo"), +#' tibble(x = rnorm(3000, mean = 1, sd = .5), +#' y = rnorm(3000, mean = 7, sd = 5), +#' group = "bar")) +#' +#' # plot it with geom_pointdensity() +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity() +#' +#' # adjust the smoothing bandwidth, +#' # i.e. the radius around the points +#' # in which neighbors are counted +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity(adjust = .1) +#' +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity(adjust = 4) +#' +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity(adjust = 4) + +#' scale_colour_continuous(low = "red", high = "black") +#' +#' # I recommend the viridis package +#' # for a more useful color scale +#' library(viridis) +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity() + +#' scale_color_viridis() +#' +#' # Of course you can combine the geom with standard +#' # ggplot2 features such as facets... +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity() + +#' scale_color_viridis() + +#' facet_wrap( ~ group) +#' +#' # ... or point shape and size: +#' dat_subset <- sample_frac(dat, .1) # smaller data set +#' ggplot(data = dat_subset, mapping = aes(x = x, y = y)) + +#' geom_pointdensity(size = 3, shape = 17) + +#' scale_color_viridis() +#' +#' # Zooming into the axis works as well, keep in mind +#' # that xlim() and ylim() change the density since they +#' # remove data points. +#' # It may be better to use `coord_cartesian()` instead. +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity() + +#' scale_color_viridis() + +#' xlim(c(-1, 3)) + ylim(c(-5, 15)) +#' +#' ggplot(data = dat, mapping = aes(x = x, y = y)) + +#' geom_pointdensity() + +#' scale_color_viridis() + +#' coord_cartesian(xlim = c(-1, 3), ylim = c(-5, 15)) +#' +#' @export geom_pointdensity geom_pointdensity <- function(mapping = NULL, data = NULL, stat = "pointdensity", position = "identity", ..., method = "auto", + aspect.ratio = ggplot2::theme_get()$aspect.ratio, na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) { + GeomPointWithCheck <- addCheckToGeom(ggplot2::GeomPoint, expected_aspect_ratio = aspect.ratio) ggplot2::layer( data = data, mapping = mapping, stat = stat, - geom = ggplot2::GeomPoint, + geom = GeomPointWithCheck, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list( method = method, + aspect.ratio = aspect.ratio, na.rm = na.rm, ... ) diff --git a/R/ggpointdensity-package.R b/R/ggpointdensity-package.R new file mode 100644 index 0000000..5936220 --- /dev/null +++ b/R/ggpointdensity-package.R @@ -0,0 +1,7 @@ +#' @keywords internal +#' @useDynLib ggpointdensity, count_neighbors_, .registration=TRUE +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end +NULL diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..989250c --- /dev/null +++ b/README.Rmd @@ -0,0 +1,201 @@ +--- +title: "ggpointdensity" +output: + github_document +editor_options: + chunk_output_type: inline +--- +```{r, setup, include=FALSE} +knitr::opts_chunk$set( + comment = '', fig.width = 8, fig.height = 4, out.width = "100%", dpi=300 +) +``` + +[![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/ggpointdensity)](https://cran.r-project.org/package=ggpointdensity) +[![Downloads](https://cranlogs.r-pkg.org/badges/last-month/ggpointdensity?color=brightgreen)](https://cran.r-project.org/package=ggpointdensity) + +Introduces `geom_pointdensity()`: A cross between a scatter plot and a 2D density plot. +```{r, include = FALSE} +library(tidyverse) +library(viridis) +library(ggpointdensity) +library(patchwork) +theme_set(theme_minimal()) +``` + +```{r generate-toy-data, include = FALSE} +dat <- bind_rows( + tibble(x = rnorm(7000, sd = 1), + y = rnorm(7000, sd = 10), + group = "foo"), + tibble(x = rnorm(3000, mean = 1, sd = .5), + y = rnorm(3000, mean = 7, sd = 5), + group = "bar")) +``` + +```{r logo, echo =FALSE, fig.width=2.5, fig.height=2.5 , out.width="60%"} +dat %>% + ggplot(aes(x = x, y = y)) + + geom_pointdensity(size = .3) + + scale_color_viridis() + + labs(title="geom_pointdensity()") + + theme_void() + + theme(plot.title = element_text(hjust = 0.5), + legend.position = "none", aspect.ratio=1) +``` +```{r, eval=FALSE, echo=FALSE} +dat %>% + ggplot( aes( x = x, y = y, color = group)) + + geom_point( size = .5) +``` + +## Installation +To install the package, type this command in R: +```{r, eval = FALSE} +install.packages("ggpointdensity") + +# Alternatively, you can install the latest +# development version from GitHub: +if (!requireNamespace("remotes", quietly = TRUE)) + install.packages("remotes") +remotes::install_github("LKremer/ggpointdensity") +``` + +## Motivation +There are several ways to visualize data points on a 2D coordinate system: +If you have lots of data points on top of each other, `geom_point()` fails to +give you an estimate of how many points are overlapping. +`geom_density2d()` and `geom_bin2d()` solve this issue, but they make it impossible +to investigate individual outlier points, which may be of interest. + +```{r, echo=FALSE, fig.width=10, fig.height=4} +dat %>% + ggplot( aes( x = x, y = y)) + + geom_point( size = .3) + + labs(title="geom_point()") + + +dat %>% + ggplot( aes( x = x, y = y, fill = after_stat(level))) + + stat_density_2d(geom = "polygon") + + scale_fill_viridis() + + labs(title="stat_density2d(geom='polygon')") + + +dat %>% + ggplot( aes( x = x, y = y)) + + geom_bin2d() + + scale_fill_viridis() + + labs(title="geom_bin2d()") & + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio = 1) +``` + +`geom_pointdensity()` aims to solve this problem by combining the best of both +worlds: individual points are colored by the number of neighboring points. +This allows you to see the overall distribution, as well as individual points. + +```{r, echo = FALSE} +dat %>% + ggplot(aes(x = x, y = y)) + + geom_pointdensity(size = .3) + + scale_color_viridis() + + labs(title="geom_pointdensity()") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1) +``` + +## Changelog +Added `method` argument and renamed the `n_neighbor` stat to `density`. The available options +are `method="auto"`, +`method="default"` and `method="kde2d"`. `default` is the regular n_neighbor calculation +as in the CRAN package. `kde2d` uses 2D kernel density estimation to estimate the point density +(credits to @slowkow). +This method is slower for few points, but faster for many (ca. >20k) points. By default, +`method="auto"` picks either `kde2d` or `default` depending on the number of points. + +## Demo +Generate some toy data and visualize it with `geom_pointdensity()`: +```{r simple, include=TRUE, eval=TRUE, echo=TRUE} +<> + +ggplot(data = dat, mapping = aes( x = x, y = y)) + + geom_pointdensity() + + scale_color_viridis() + + theme(aspect.ratio=1) +``` + + +Each point is colored according to the number of neighboring points. +(Note: this here is the dev branch, where I decided to plot the density estimate +instead of n_neighbors now.) +The distance threshold to consider two points as neighbors (smoothing +bandwidth) can be adjusted with the `adjust` argument, where `adjust = 0.5` +means use half of the default bandwidth. +```{r adjusting the bandwidth, fig.width=4, fig.height=3, out.width="45%", fig.show="hold"} +ggplot(data = dat, mapping = aes(x = x, y = y)) + + geom_pointdensity(size = .3, adjust = .1) + + scale_color_viridis() + + labs(title="adjust = 0.1") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1) + +ggplot(data = dat, mapping = aes(x = x, y = y)) + + geom_pointdensity(size = .3, adjust = 4, aspect.ratio=1) + + scale_color_viridis() + + labs(title="adjust = 4") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1) +``` + +Of course you can combine the geom with standard `ggplot2` features +such as facets... + +```{r facets} +dat %>% + ggplot( aes( x = x, y = y)) + + geom_pointdensity(aes(color=after_stat(ndensity)), size = .25) + + scale_color_viridis() + + facet_wrap( ~ group) + + labs(title="facet_wrap( ~ group)") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1) +``` + +... or point shape and size: +```{r different shapes} +dat_subset <- sample_frac(dat, .1) # smaller data set +ggplot(data = dat_subset, mapping = aes(x = x, y = y)) + + geom_pointdensity(size = 3, shape = 17) + + scale_color_viridis() + + labs(title="changing shape") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio = 1) +``` + +Zooming into the axis works as well, keep in mind that `xlim()` and +`ylim()` change the density since they remove data points. +It may be better to use `coord_cartesian()` instead. + +```{r zooming} +dat %>% + ggplot(aes(x = x, y = y)) + + geom_pointdensity(size = .5) + + scale_color_viridis() + + scale_x_continuous(limits = c(-1, 3)) + + scale_y_continuous(limits = c(-5, 15)) + + labs(title="using x- and ylim()") + + +dat %>% + ggplot(aes(x = x, y = y)) + + geom_pointdensity(size = .5) + + scale_color_viridis() + + coord_cartesian(xlim = c(-1, 3), ylim = c(-5, 15)) + + labs(title="using coord_cartesian()") & + theme(aspect.ratio = 1, plot.title = element_text(hjust = 0.5)) +``` + +```{r propotional ink, eval=FALSE, echo = FALSE} +dat %>% + ggplot(aes(x = x, y = y, size = after_stat(1/density), color = after_stat(density))) + + geom_pointdensity(adjust = .2) + + scale_color_viridis(option = "inferno", end = .9, direction = -1) + + scale_size_area(max_size = 3) + + theme(aspect.ratio = 1) +``` + +## Authors +Lukas PM Kremer ([@LPMKremer](https://twitter.com/LPMKremer/)) and Simon Anders ([@s_anders_m](https://twitter.com/s_anders_m/)), 2019 diff --git a/README.md b/README.md index d670a4f..5864c79 100644 --- a/README.md +++ b/README.md @@ -1,56 +1,62 @@ -# ggpointdensity -[![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/ggpointdensity)](https://cran.r-project.org/package=ggpointdensity) -[![Downloads](https://cranlogs.r-pkg.org/badges/last-month/ggpointdensity?color=brightgreen)](https://cran.r-project.org/package=ggpointdensity) +ggpointdensity +================ -Introduces `geom_pointdensity()`: A cross between a scatter plot and a 2D density plot. +[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/ggpointdensity)](https://cran.r-project.org/package=ggpointdensity) +[![Downloads](https://cranlogs.r-pkg.org/badges/last-month/ggpointdensity?color=brightgreen)](https://cran.r-project.org/package=ggpointdensity) - +Introduces `geom_pointdensity()`: A cross between a scatter plot and a +2D density plot. + ## Installation + To install the package, type this command in R: + ``` r install.packages("ggpointdensity") # Alternatively, you can install the latest # development version from GitHub: -if (!requireNamespace("devtools", quietly = TRUE)) - install.packages("devtools") -devtools::install_github("LKremer/ggpointdensity") +if (!requireNamespace("remotes", quietly = TRUE)) + install.packages("remotes") +remotes::install_github("LKremer/ggpointdensity") ``` ## Motivation -There are several ways to visualize data points on a 2D coordinate system: -If you have lots of data points on top of each other, `geom_point()` fails to -give you an estimate of how many points are overlapping. -`geom_density2d()` and `geom_bin2d()` solve this issue, but they make it impossible -to investigate individual outlier points, which may be of interest. - +There are several ways to visualize data points on a 2D coordinate +system: If you have lots of data points on top of each other, +`geom_point()` fails to give you an estimate of how many points are +overlapping. `geom_density2d()` and `geom_bin2d()` solve this issue, but +they make it impossible to investigate individual outlier points, which +may be of interest. -`geom_pointdensity()` aims to solve this problem by combining the best of both -worlds: individual points are colored by the number of neighboring points. -This allows you to see the overall distribution, as well as individual points. + - +`geom_pointdensity()` aims to solve this problem by combining the best +of both worlds: individual points are colored by the number of +neighboring points. This allows you to see the overall distribution, as +well as individual points. + + ## Changelog -Added `method` argument and renamed the `n_neighbor` stat to `density`. The available options -are `method="auto"`, -`method="default"` and `method="kde2d"`. `default` is the regular n_neighbor calculation -as in the CRAN package. `kde2d` uses 2D kernel density estimation to estimate the point density -(credits to @slowkow). -This method is slower for few points, but faster for many (ca. >20k) points. By default, -`method="auto"` picks either `kde2d` or `default` depending on the number of points. + +Added `method` argument and renamed the `n_neighbor` stat to `density`. +The available options are `method="auto"`, `method="default"` and +`method="kde2d"`. `default` is the regular n_neighbor calculation as in +the CRAN package. `kde2d` uses 2D kernel density estimation to estimate +the point density (credits to @slowkow). This method is slower for few +points, but faster for many (ca. \>20k) points. By default, +`method="auto"` picks either `kde2d` or `default` depending on the +number of points. ## Demo + Generate some toy data and visualize it with `geom_pointdensity()`: -``` r -library(ggplot2) -library(dplyr) -library(viridis) -library(ggpointdensity) +``` r dat <- bind_rows( tibble(x = rnorm(7000, sd = 1), y = rnorm(7000, sd = 10), @@ -59,70 +65,93 @@ dat <- bind_rows( y = rnorm(3000, mean = 7, sd = 5), group = "bar")) -ggplot(data = dat, mapping = aes(x = x, y = y)) + +ggplot(data = dat, mapping = aes( x = x, y = y)) + geom_pointdensity() + - scale_color_viridis() + scale_color_viridis() + + theme(aspect.ratio=1) ``` - + Each point is colored according to the number of neighboring points. -(Note: this here is the dev branch, where I decided to plot the density estimate -instead of n_neighbors now.) -The distance threshold to consider two points as neighbors (smoothing -bandwidth) can be adjusted with the `adjust` argument, where `adjust = 0.5` -means use half of the default bandwidth. +(Note: this here is the dev branch, where I decided to plot the density +estimate instead of n_neighbors now.) The distance threshold to consider +two points as neighbors (smoothing bandwidth) can be adjusted with the +`adjust` argument, where `adjust = 0.5` means use half of the default +bandwidth. + ``` r ggplot(data = dat, mapping = aes(x = x, y = y)) + - geom_pointdensity(adjust = .1) + - scale_color_viridis() - + geom_pointdensity(size = .3, adjust = .1) + + scale_color_viridis() + + labs(title="adjust = 0.1") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1) + ggplot(data = dat, mapping = aes(x = x, y = y)) + - geom_pointdensity(adjust = 4) + - scale_color_viridis() + geom_pointdensity(size = .3, adjust = 4, aspect.ratio=1) + + scale_color_viridis() + + labs(title="adjust = 4") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1) ``` - -Of course you can combine the geom with standard `ggplot2` features -such as facets... + + +Of course you can combine the geom with standard `ggplot2` features such +as facets… ``` r -# Facetting by group -ggplot(data = dat, mapping = aes(x = x, y = y)) + - geom_pointdensity() + +dat %>% + ggplot( aes( x = x, y = y)) + + geom_pointdensity(aes(color=after_stat(ndensity)), size = .25) + scale_color_viridis() + - facet_wrap( ~ group) + facet_wrap( ~ group) + + labs(title="facet_wrap( ~ group)") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1) ``` - + + +… or point shape and size: -... or point shape and size: ``` r dat_subset <- sample_frac(dat, .1) # smaller data set ggplot(data = dat_subset, mapping = aes(x = x, y = y)) + geom_pointdensity(size = 3, shape = 17) + - scale_color_viridis() + scale_color_viridis() + + labs(title="changing shape") + + theme(plot.title = element_text(hjust = 0.5), aspect.ratio = 1) ``` - + Zooming into the axis works as well, keep in mind that `xlim()` and -`ylim()` change the density since they remove data points. -It may be better to use `coord_cartesian()` instead. +`ylim()` change the density since they remove data points. It may be +better to use `coord_cartesian()` instead. ``` r -ggplot(data = dat, mapping = aes(x = x, y = y)) + - geom_pointdensity() + +dat %>% + ggplot(aes(x = x, y = y)) + + geom_pointdensity(size = .5) + scale_color_viridis() + - xlim(c(-1, 3)) + ylim(c(-5, 15)) + scale_x_continuous(limits = c(-1, 3)) + + scale_y_continuous(limits = c(-5, 15)) + + labs(title="using x- and ylim()") + -ggplot(data = dat, mapping = aes(x = x, y = y)) + - geom_pointdensity() + +dat %>% + ggplot(aes(x = x, y = y)) + + geom_pointdensity(size = .5) + scale_color_viridis() + - coord_cartesian(xlim = c(-1, 3), ylim = c(-5, 15)) + coord_cartesian(xlim = c(-1, 3), ylim = c(-5, 15)) + + labs(title="using coord_cartesian()") & + theme(aspect.ratio = 1, plot.title = element_text(hjust = 0.5)) ``` - + Warning: Removed 3528 rows containing missing values or values outside the scale range + (`stat_pointdensity()`). + + ## Authors -Lukas PM Kremer ([@LPMKremer](https://twitter.com/LPMKremer/)) and Simon Anders ([@s_anders_m](https://twitter.com/s_anders_m/)), 2019 + +Lukas PM Kremer ([@LPMKremer](https://twitter.com/LPMKremer/)) and Simon +Anders ([@s_anders_m](https://twitter.com/s_anders_m/)), 2019 diff --git a/README_files/figure-gfm/adjusting the bandwidth-1.png b/README_files/figure-gfm/adjusting the bandwidth-1.png new file mode 100644 index 0000000..c43f0af Binary files /dev/null and b/README_files/figure-gfm/adjusting the bandwidth-1.png differ diff --git a/README_files/figure-gfm/adjusting the bandwidth-2.png b/README_files/figure-gfm/adjusting the bandwidth-2.png new file mode 100644 index 0000000..77bf714 Binary files /dev/null and b/README_files/figure-gfm/adjusting the bandwidth-2.png differ diff --git a/README_files/figure-gfm/different shapes-1.png b/README_files/figure-gfm/different shapes-1.png new file mode 100644 index 0000000..9f54cb2 Binary files /dev/null and b/README_files/figure-gfm/different shapes-1.png differ diff --git a/README_files/figure-gfm/facets-1.png b/README_files/figure-gfm/facets-1.png new file mode 100644 index 0000000..dc729f9 Binary files /dev/null and b/README_files/figure-gfm/facets-1.png differ diff --git a/README_files/figure-gfm/logo-1.png b/README_files/figure-gfm/logo-1.png new file mode 100644 index 0000000..b952740 Binary files /dev/null and b/README_files/figure-gfm/logo-1.png differ diff --git a/README_files/figure-gfm/simple-1.png b/README_files/figure-gfm/simple-1.png new file mode 100644 index 0000000..ee320e2 Binary files /dev/null and b/README_files/figure-gfm/simple-1.png differ diff --git a/README_files/figure-gfm/unnamed-chunk-4-1.png b/README_files/figure-gfm/unnamed-chunk-4-1.png new file mode 100644 index 0000000..c2d828a Binary files /dev/null and b/README_files/figure-gfm/unnamed-chunk-4-1.png differ diff --git a/README_files/figure-gfm/unnamed-chunk-5-1.png b/README_files/figure-gfm/unnamed-chunk-5-1.png new file mode 100644 index 0000000..aa48ebf Binary files /dev/null and b/README_files/figure-gfm/unnamed-chunk-5-1.png differ diff --git a/README_files/figure-gfm/zooming-1.png b/README_files/figure-gfm/zooming-1.png new file mode 100644 index 0000000..6f9e4fa Binary files /dev/null and b/README_files/figure-gfm/zooming-1.png differ diff --git a/ggpointdensity.Rproj b/ggpointdensity.Rproj index 497f8bf..270314b 100644 --- a/ggpointdensity.Rproj +++ b/ggpointdensity.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/img/make_plots.Rmd b/img/make_plots.Rmd deleted file mode 100644 index 9683f02..0000000 --- a/img/make_plots.Rmd +++ /dev/null @@ -1,193 +0,0 @@ ---- -title: "R Notebook" -output: - html_document: - df_print: paged -editor_options: - chunk_output_type: inline ---- - -```{r} -# devtools::install_github("LKremer/ggpointdensity") -``` - - -```{r} -library(tidyverse) -library(viridis) -library(ggpointdensity) -library(patchwork) -theme_set(theme_minimal()) -``` - -Generate some toy data: -```{r} -dat <- bind_rows( - tibble(x = rnorm(7000, sd = 1), - y = rnorm(7000, sd = 10), - group = "foo"), - tibble(x = rnorm(3000, mean = 1, sd = .5), - y = rnorm(3000, mean = 7, sd = 5), - group = "bar")) - -dat %>% - ggplot( aes( x = x, y = y, color = group)) + - geom_point( size = .5) -``` - -```{r} -pp <- dat %>% - ggplot( aes( x = x, y = y)) + - geom_point( size = .3) + - labs(title="geom_point()") + theme(plot.title = element_text(hjust = 0.5)) - -pd <- dat %>% - ggplot( aes( x = x, y = y, fill = stat(level))) + - stat_density_2d(geom = "polygon") + - scale_fill_viridis() + - labs(title="stat_density2d(geom='polygon')") + theme(plot.title = element_text(hjust = 0.5)) - -pb <- dat %>% - ggplot( aes( x = x, y = y)) + - geom_bin2d() + - scale_fill_viridis() + - labs(title="geom_bin2d()") + theme(plot.title = element_text(hjust = 0.5)) - -pp + pd + pb - -ggsave("/home/lukas/code/ggpointdensity/img/scatter_dens_bin2d.png", - width = 22, height = 6, units = "cm", dpi = 300) -``` - - -Use the geom: -```{r} -dat %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = .3) + - scale_color_viridis() + - labs(title="geom_pointdensity()") + theme(plot.title = element_text(hjust = 0.5)) - -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity.png", - width = 10, height = 7, units = "cm", dpi = 300) -``` - -```{r} -dat %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = .3) + - scale_color_viridis() + - labs(title="geom_pointdensity()") + - theme_void() + - theme(plot.title = element_text(hjust = 0.5), - legend.position = "none") - -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity_logo.png", - width = 7, height = 8, units = "cm", dpi = 300) -``` - - -The "adjust" argument can be used to adjust the smoothing bandwidth (just as in ggplot2::stat_density): -```{r} -adj1 <- dat %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = .3, adjust = .1) + - scale_color_viridis() + - labs(title="adjust = 0.1") + theme(plot.title = element_text(hjust = 0.5)) - -adj2 <- dat %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = .3, adjust = 4) + - scale_color_viridis() + - labs(title="adjust = 4") + theme(plot.title = element_text(hjust = 0.5)) - - -adj1 + adj2 -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity_adj.png", - width = 18, height = 6, units = "cm", dpi = 300) -``` - -Not sure why anyone would do this, but you can use separate bandwidths for x and y: -```{r} -dat %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = .3, adjust = c(0.1, 2)) + - scale_color_viridis() + - labs(title="adjust = c(0.1, 2)") + theme(plot.title = element_text(hjust = 0.5)) - -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity_xyadj.png", - width = 10, height = 7, units = "cm", dpi = 300) -``` - -Facetting works fine: -```{r} -dat %>% - ggplot( aes( x = x, y = y)) + - geom_pointdensity( size = .25) + - scale_color_viridis() + - facet_wrap( ~ group) + - labs(title="facet_wrap( ~ group)") + theme(plot.title = element_text(hjust = 0.5)) - -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity_facet.png", - width = 14, height = 7, units = "cm", dpi = 300) -``` - -x- and ylim work as expected: -```{r} -plim <- dat %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = .5) + - scale_color_viridis() + - scale_x_continuous(limits = c(-1, 3)) + - scale_y_continuous(limits = c(-5, 15)) + - labs(title="using x- and ylim()") + theme(plot.title = element_text(hjust = 0.5)) -``` - -coord_cartesian also works: -```{r} -pcc <- dat %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = .5) + - scale_color_viridis() + - coord_cartesian(xlim = c(-1, 3), ylim = c(-5, 15)) + - labs(title="using coord_cartesian()") + theme(plot.title = element_text(hjust = 0.5)) -``` - -```{r} -plim + pcc -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity_zoom.png", - width = 18, height = 6, units = "cm", dpi = 300) -``` - - -All functions of geom_point should work too: -```{r} -dat %>% sample_frac(.1) %>% - ggplot(aes(x = x, y = y)) + - geom_pointdensity(size = 3, shape = 17) + - scale_color_viridis() + - labs(title="changing shape") + theme(plot.title = element_text(hjust = 0.5)) - -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity_shape.png", - width = 10, height = 7, units = "cm", dpi = 300) -``` - - - -```{r} -dat %>% - ggplot(aes(x = x, y = y, size = 1/stat(n_neighbors)^1.8)) + - geom_pointdensity(adjust = .2) + - scale_color_viridis(option = "inferno", end = .9, direction = -1) + - scale_size_continuous(range = c(.001, 3)) - -ggsave("/home/lukas/code/ggpointdensity/img/pointdensity_custom.png", - width = 14, height = 10, units = "cm", dpi = 300) -``` - - - -```{r} -date() -``` - diff --git a/img/pointdensity.png b/img/pointdensity.png deleted file mode 100644 index cb5d578..0000000 Binary files a/img/pointdensity.png and /dev/null differ diff --git a/img/pointdensity_adj.png b/img/pointdensity_adj.png deleted file mode 100644 index a3b79b7..0000000 Binary files a/img/pointdensity_adj.png and /dev/null differ diff --git a/img/pointdensity_facet.png b/img/pointdensity_facet.png deleted file mode 100644 index 692783f..0000000 Binary files a/img/pointdensity_facet.png and /dev/null differ diff --git a/img/pointdensity_logo.png b/img/pointdensity_logo.png deleted file mode 100644 index bdfbbf2..0000000 Binary files a/img/pointdensity_logo.png and /dev/null differ diff --git a/img/pointdensity_shape.png b/img/pointdensity_shape.png deleted file mode 100644 index f7af078..0000000 Binary files a/img/pointdensity_shape.png and /dev/null differ diff --git a/img/pointdensity_xyadj.png b/img/pointdensity_xyadj.png deleted file mode 100644 index 9d1eaac..0000000 Binary files a/img/pointdensity_xyadj.png and /dev/null differ diff --git a/img/pointdensity_zoom.png b/img/pointdensity_zoom.png deleted file mode 100644 index c2e9140..0000000 Binary files a/img/pointdensity_zoom.png and /dev/null differ diff --git a/img/scatter_dens_bin2d.png b/img/scatter_dens_bin2d.png deleted file mode 100644 index c63145e..0000000 Binary files a/img/scatter_dens_bin2d.png and /dev/null differ diff --git a/man/StatPointdensity.Rd b/man/StatPointdensity.Rd new file mode 100644 index 0000000..4afb87b --- /dev/null +++ b/man/StatPointdensity.Rd @@ -0,0 +1,10 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geom_pointdensity.R +\docType{data} +\name{StatPointdensity} +\alias{StatPointdensity} +\title{ggproto class, see \code{\link[ggplot2:ggplot2-ggproto]{ggplot2::Stat()}}} +\description{ +ggproto class, see \code{\link[ggplot2:ggplot2-ggproto]{ggplot2::Stat()}} +} +\keyword{datasets} diff --git a/man/addCheckToGeom.Rd b/man/addCheckToGeom.Rd new file mode 100644 index 0000000..543bbf3 --- /dev/null +++ b/man/addCheckToGeom.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geom_pointdensity.R +\name{addCheckToGeom} +\alias{addCheckToGeom} +\title{Wraps the user supplied Geom (typically GeomPoint) to add class "check_aspect_grob" and information about the aspect ratio assumed under which the densities were calculated to the grobs it draws +The check is injected by providing an S3 instance to the makeContext generic that is called by grid for each grob at render time (where the actual plot aspect ratio is finally known)} +\usage{ +addCheckToGeom(orig_geom, expected_aspect_ratio = 1) +} +\description{ +Wraps the user supplied Geom (typically GeomPoint) to add class "check_aspect_grob" and information about the aspect ratio assumed under which the densities were calculated to the grobs it draws +The check is injected by providing an S3 instance to the makeContext generic that is called by grid for each grob at render time (where the actual plot aspect ratio is finally known) +} diff --git a/man/count_neighbors_r.Rd b/man/count_neighbors_r.Rd new file mode 100644 index 0000000..a444669 --- /dev/null +++ b/man/count_neighbors_r.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geom_pointdensity.R +\name{count_neighbors_r} +\alias{count_neighbors_r} +\title{Implementation of count_neighbors in R. Not actually used, just for clarity} +\usage{ +count_neighbors_r(x, y, r2, xy) +} +\description{ +Implementation of count_neighbors in R. Not actually used, just for clarity +} diff --git a/man/geom_pointdensity.Rd b/man/geom_pointdensity.Rd index 2e6774c..ae5d393 100644 --- a/man/geom_pointdensity.Rd +++ b/man/geom_pointdensity.Rd @@ -1,78 +1,92 @@ -\name{geom_pointdensity} -\alias{geom_pointdensity} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/geom_pointdensity.R +\name{stat_pointdensity} \alias{stat_pointdensity} +\alias{geom_pointdensity} \alias{StatPointdensity} - -\title{ -A cross between a scatter plot and a 2D density plot -} -\description{ -The pointdensity geom is used to create scatterplots where each point is colored by the number of neighboring points. This is useful to visualize the 2D-distribution of points in case of overplotting. -} +\title{A cross between a scatter plot and a 2D density plot} \usage{ -geom_pointdensity(mapping = NULL, data = NULL, - stat = "pointdensity", position = "identity", - ..., na.rm = FALSE, show.legend = NA, - inherit.aes = TRUE) +stat_pointdensity( + mapping = NULL, + data = NULL, + geom = "point", + position = "identity", + ..., + adjust = 1, + aspect.ratio = ggplot2::theme_get()$aspect.ratio, + na.rm = FALSE, + method = "auto", + method.args = list(), + show.legend = NA, + inherit.aes = TRUE +) + +geom_pointdensity( + mapping = NULL, + data = NULL, + stat = "pointdensity", + position = "identity", + ..., + method = "auto", + aspect.ratio = ggplot2::theme_get()$aspect.ratio, + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) } - \arguments{ -\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}} or -\code{\link[=aes_]{aes_()}}. If specified and \code{inherit.aes = TRUE} (the -default), it is combined with the default mapping at the top level of the -plot. You must supply \code{mapping} if there is no plot mapping.} -\item{data}{The data to be displayed in this layer. There are three -options: - -If \code{NULL}, the default, the data is inherited from the plot -data as specified in the call to \code{\link[=ggplot]{ggplot()}}. - -A \code{data.frame}, or other object, will override the plot -data. All objects will be fortified to produce a data frame. See -\code{\link[=fortify]{fortify()}} for which variables will be created. +\item{mapping}{Set of aesthetic mappings created by +\code{\link[=aes]{aes()}} or \code{\link[=aes_]{aes_()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default +mapping at the top level of the plot. You must supply \code{mapping} if +there is no plot mapping.} -A \code{function} will be called with a single argument, -the plot data. The return value must be a \code{data.frame}, and -will be used as the layer data. A \code{function} can be created -from a \code{formula} (e.g. \code{~ head(.x, 10)}).} +\item{data}{The data to be displayed in this layer. There are three options: -\item{stat}{The statistical transformation to use on the data for this -layer, as a string.} +If \code{NULL}, the default, the data is inherited from the plot data as +specified in the call to \code{\link[=ggplot]{ggplot()}}. -\item{position}{Position adjustment, either as a string, or the result of -a call to a position adjustment function.} +A \code{data.frame}, or other object, will override the plot data. All +objects will be fortified to produce a data frame. See +\code{\link[=fortify]{fortify()}} for which variables will be created. + +A \code{function} will be called with a single argument, the plot data. The +return value must be a \code{data.frame}, and will be used as the layer +data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} + +\item{position}{Position adjustment, either as a string, or the result of a +call to a position adjustment function.} \item{\dots}{Other arguments passed on to \code{\link[=layer]{layer()}}. This includes \code{adjust}, a multiplicate bandwidth adjustment used to -adjust the distance threshold to consider two points as neighbors, i.e. -the radius around points in which neighbors are counted. For example, +adjust the distance threshold to consider two points as neighbors, i.e. the +radius around points in which neighbors are counted. For example, \code{adjust = 0.5} means use half of the default. Other arguments may be -aesthetics, used to set an aesthetic to a fixed value, like \code{shape = 17} -or \code{size = 3}. They may also be parameters to the paired geom/stat.} +aesthetics, used to set an aesthetic to a fixed value, like \code{shape = 17} or \code{size = 3}. They may also be parameters to the paired geom/stat.} -\item{na.rm}{If \code{FALSE}, the default, missing values are removed with -a warning. If \code{TRUE}, missing values are silently removed.} +\item{na.rm}{If \code{FALSE}, the default, missing values are removed with a +warning. If \code{TRUE}, missing values are silently removed.} \item{show.legend}{logical. Should this layer be included in the legends? -\code{NA}, the default, includes if any aesthetics are mapped. -\code{FALSE} never includes, and \code{TRUE} always includes. -It can also be a named logical vector to finely select the aesthetics to -display.} - -\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, -rather than combining with them. This is most useful for helper functions -that define both data and aesthetics and shouldn't inherit behaviour from -the default plot specification, e.g. \code{\link[=borders]{borders()}}.} -} +\code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE} +never includes, and \code{TRUE} always includes. It can also be a named +logical vector to finely select the aesthetics to display.} -\references{ - https://GitHub.com/LKremer/ggpointdensity +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather +than combining with them. This is most useful for helper functions that +define both data and aesthetics and shouldn't inherit behaviour from the +default plot specification, e.g. \code{\link[=borders]{borders()}}.} + +\item{stat}{The statistical transformation to use on the data for this +layer, as a string.} } -\author{ - Lukas P.M. Kremer +\description{ +The pointdensity geom is used to create scatterplots where each point is +colored by the number of neighboring points. This is useful to visualize the +2D-distribution of points in case of overplotting. } - \examples{ + library(ggplot2) library(dplyr) library(ggpointdensity) @@ -136,4 +150,11 @@ ggplot(data = dat, mapping = aes(x = x, y = y)) + geom_pointdensity() + scale_color_viridis() + coord_cartesian(xlim = c(-1, 3), ylim = c(-5, 15)) + +} +\references{ +https://GitHub.com/LKremer/ggpointdensity +} +\author{ +Lukas P.M. Kremer } diff --git a/man/ggpointdensity-package.Rd b/man/ggpointdensity-package.Rd new file mode 100644 index 0000000..26f3ecc --- /dev/null +++ b/man/ggpointdensity-package.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ggpointdensity-package.R +\docType{package} +\name{ggpointdensity-package} +\alias{ggpointdensity} +\alias{ggpointdensity-package} +\title{ggpointdensity: A Cross Between a 2D Density Plot and a Scatter Plot} +\description{ +A cross between a 2D density plot and a scatter plot, implemented as a 'ggplot2' geom. Points in the scatter plot are colored by the number of neighboring points. This is useful to visualize the 2D-distribution of points in case of overplotting. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/LKremer/ggpointdensity} + \item Report bugs at \url{https://github.com/LKremer/ggpointdensity/issues} +} + +} +\author{ +\strong{Maintainer}: Lukas P. M. Kremer \email{L-Kremer@web.de} (\href{https://orcid.org/0000-0003-3170-6295}{ORCID}) + +Other contributors: +\itemize{ + \item Simon Anders (\href{https://orcid.org/0000-0003-4868-1805}{ORCID}) [contributor] +} + +} +\keyword{internal} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..cc15869 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(ggpointdensity) + +test_check("ggpointdensity") diff --git a/tests/testthat/test-geom_pointdensity.R b/tests/testthat/test-geom_pointdensity.R new file mode 100644 index 0000000..2d22860 --- /dev/null +++ b/tests/testthat/test-geom_pointdensity.R @@ -0,0 +1,202 @@ +set.seed(1) +n <- 100 +df_isotropic_normal <- data.frame(idx = seq_len(n), x = rnorm(n), y = rnorm(n)) + +n <- 1000 +df_three_isotropic_normals <- data.frame(idx = seq_len(n*3), grp = rep(1:3, n), x = rnorm(n) + rep(c(0,0,1), n)*5, y = rnorm(n)+rep(c(0,1,1),n)*5) + +for( method in c("default", "kde2d") ) { + test_that(cli::format_inline("coord_fixed(method=\"{method}\") runs without warning ({.code method=\"{method}\"})"), { + p1 <- ggplot(df_isotropic_normal, aes(x, y)) + geom_pointdensity(method= method) + coord_fixed() + expect_no_warning(print(p1)) + }) + + test_that(cli::format_inline("aspect.ratio runs without warning ({.code method=\"{method}\"})"), { + a.r <- 3 + p1 <- ggplot(df_isotropic_normal, aes(x, y)) + geom_pointdensity(method= method, aspect.ratio = a.r) + theme(aspect.ratio = a.r) + expect_no_warning(print(p1)) + }) + + + test_that(cli::format_inline("wrong aspect.ratio gives warning ({.code method=\"{method}\"})"), { + a.r <- 3 + p1 <- ggplot(df_isotropic_normal, aes(x, y)) + geom_pointdensity(method= method, aspect.ratio = 2*a.r) + theme(aspect.ratio = a.r) + expect_warning(print(p1)) + }) + + test_that(cli::format_inline("No aspect.ratio / coord_fixed gives warning ({.code method=\"{method}\"})"), { + p1 <- ggplot(df_isotropic_normal, aes(x, y)) + geom_pointdensity(method= method, aspect.ratio = NULL) + theme(aspect.ratio = NULL) + expect_warning(print(p1), class = "actual_aspect_ratio_does_not_match_expectation") + }) + + test_that(cli::format_inline("No aspect.ratio / coord_fixed gives no warning if actual aspect ratio is 1 ({.code method=\"{method}\"})"), { + p1 <- ggplot(df_isotropic_normal, aes(x, y)) + geom_pointdensity(method= method, aspect.ratio = NULL) + theme(aspect.ratio = 1) + expect_no_warning(print(p1)) + }) + + test_that(cli::format_inline("aspect.ratio adjusts density for three equidistant isotropic normals ({.code method=\"{method}\"})"), { + + a.r <- 10 + p1 <- + ggplot(df_three_isotropic_normals, aes(x, y, label = grp, group =1)) + stat_pointdensity(method= method, size=5, geom="text", aspect.ratio = a.r, adjust=70) + theme(aspect.ratio = a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$density[plot_data$label==idx]) + expect_lt(get_mean_label_idx(3), get_mean_label_idx(2)) + expect_lt(get_mean_label_idx(1), get_mean_label_idx(3)) + + a.r <- 1 + p1 <- + ggplot(df_three_isotropic_normals, aes(x, y, label = grp)) + stat_pointdensity(method= method, size=5, geom="text", aspect.ratio = a.r, adjust=70) + theme(aspect.ratio = a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$ndensity[plot_data$label==idx]) + expect_gt(get_mean_label_idx(2), get_mean_label_idx(3)) + expect_equal(get_mean_label_idx(1), get_mean_label_idx(3), tolerance = 0.2) + }) + + test_that(cli::format_inline("aspect.ratio adjusts density for three equidistant isotropic normals independent of data scaling ({.code method=\"{method}\"})"), { + + a.r <- 10 + p1 <- + ggplot(df_three_isotropic_normals, aes(x, y*100, label = grp, group =1)) + stat_pointdensity(method= method, size=5, geom="text", aspect.ratio = a.r, adjust=70) + theme(aspect.ratio = a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$density[plot_data$label==idx]) + expect_lt(get_mean_label_idx(1), get_mean_label_idx(2)) + expect_gt(get_mean_label_idx(2), get_mean_label_idx(3)) + expect_lt(get_mean_label_idx(1), get_mean_label_idx(3)) + + a.r <- 1 + p1 <- + ggplot(df_three_isotropic_normals, aes(x, y*100, label = grp)) + stat_pointdensity(method= method, size=5, geom="text", aspect.ratio = a.r, adjust=70) + theme(aspect.ratio = a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$ndensity[plot_data$label==idx]) + expect_gt(get_mean_label_idx(2), get_mean_label_idx(3)) + expect_equal(get_mean_label_idx(1), get_mean_label_idx(3), tolerance = 0.2) + }) + + test_that(cli::format_inline("coord.fixed adjusts density for three isotropic normals ({.code method=\"{method}\"})"), { + a.r <- 10 + p1 <- + ggplot(df_three_isotropic_normals, aes(x, y, label = grp, group =1)) + stat_pointdensity(method= method, size=5, geom="text", adjust=70) + theme(aspect.ratio =NULL) +coord_fixed(a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$density[plot_data$label==idx]) + expect_lt(get_mean_label_idx(3), get_mean_label_idx(2)) + expect_lt(get_mean_label_idx(1), get_mean_label_idx(2)) + + a.r <- 1 + p1 <- + ggplot(df_three_isotropic_normals, aes(x, y, label = grp)) + stat_pointdensity(method= method, size=5, geom="text", adjust=70) + theme(aspect.ratio = NULL) +coord_fixed(a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$ndensity[plot_data$label==idx]) + expect_gt(get_mean_label_idx(2), get_mean_label_idx(3)) + expect_equal(get_mean_label_idx(1), get_mean_label_idx(3), tolerance = 0.2) + }) + + test_that(cli::format_inline("coord.fixed adjusts density for three isotropic normals independent of data scaling ({.code method=\"{method}\"})"), { + + p1 <- + ggplot(df_three_isotropic_normals, aes(x*2, y, label = grp, group =1)) + stat_pointdensity(method= method, size=5, geom="text", adjust=70) + theme(aspect.ratio =NULL) + coord_fixed() + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$density[plot_data$label==idx]) + expect_lt(get_mean_label_idx(3), get_mean_label_idx(1)) + expect_lt(get_mean_label_idx(1), get_mean_label_idx(2)) + + f <- 2 + p1 <- + ggplot(df_three_isotropic_normals, aes(x*f, y, label = grp)) + stat_pointdensity(method= method, size=5, geom="text", adjust=70) + coord_fixed(ratio=f) + theme(aspect.ratio = NULL) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + get_mean_label_idx <- function(idx) mean(plot_data$ndensity[plot_data$label==idx]) + expect_gt(get_mean_label_idx(2), get_mean_label_idx(3)) + expect_equal(get_mean_label_idx(1), get_mean_label_idx(3), tolerance = 0.1) + }) + + + + + + if(method != "default") { + test_that(cli::format_inline("aspect.ratio adjusts density ({.code method=\"{method}\"})"), { + n <- 1 + df <- data.frame(idx = 1:3, x = rep(c(0,0,0.1), n), y = rep(c(0,1,1), n)) + + a.r <- 10 + p1 <- + ggplot(df, aes(x, y, label = idx)) + stat_pointdensity(method= method, size=20, geom="text", aspect.ratio = a.r) + theme(aspect.ratio = a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + expect_lt(plot_data$ndensity[1], plot_data$ndensity[2]) + expect_gt(plot_data$ndensity[2], plot_data$ndensity[3]) + expect_lt(plot_data$ndensity[1], plot_data$ndensity[3]) + + a.r <- 1 + p1 <- + ggplot(df, aes(x, y)) + geom_pointdensity(method= method, size=20, aspect.ratio = a.r) + theme(aspect.ratio = a.r) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + expect_gt(plot_data$ndensity[2], plot_data$ndensity[3]) + expect_equal(plot_data$ndensity[1], plot_data$ndensity[3]) + }) + + test_that(cli::format_inline("coord_fixed adjusts density corretly ({.code method=\"{method}\"})"), { + n<-1 + df <- data.frame(idx = 1:3, x = rep(c(1,1,1.1), n), y = rep(c(1,2,2), n)) + + p1 <- + ggplot(df, aes(x, y, label =idx)) + stat_pointdensity(method= method, size=20, geom="text") + coord_fixed() + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + expect_lt(plot_data$ndensity[1], plot_data$ndensity[2]) + expect_gt(plot_data$ndensity[2], plot_data$ndensity[3]) + + p1 <- + ggplot(df, aes(x, y)) + geom_pointdensity(method= method, size=20) + coord_fixed(ratio = 1/10) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + expect_equal(plot_data$ndensity[1], plot_data$ndensity[3]) + + + p1 <- + ggplot(df, aes(x*3, y)) + geom_pointdensity(method= method, size=20) + coord_fixed(ratio = 3/10) + p1 + plot_data <- ggplot2::ggplot_build(p1)$data[[1]] + expect_lt(plot_data$ndensity[1], plot_data$ndensity[2]) + expect_equal(plot_data$ndensity[1], plot_data$ndensity[3]) + + }) + } + + test_that(cli::format_inline("isotropic normal ({.code method=\"{method}\"}) "), { + p1 <- ggplot(df_isotropic_normal, aes(x, y)) + geom_pointdensity(method= method) + coord_fixed() + p1 + expect_no_warning(print(p1)) + }) + + test_that(cli::format_inline("no variation in one axis ({.code method=\"{method}\"})"), { + p1 <- ggplot(df_isotropic_normal, aes(0*x, y)) + geom_pointdensity(method= method) + coord_fixed() + p1 + expect_no_warning(print(p1)) + }) + + test_that(cli::format_inline("no variation in both axis ({.code method=\"{method}\"})"), { + p1 <- ggplot(df_isotropic_normal, aes(0*x, 0*y)) + geom_pointdensity(method= method) + coord_fixed() + p1 + expect_no_warning(print(p1)) + }) + + test_that(cli::format_inline("single row data set ({.code method=\"{method}\"})"), { + p1 <- ggplot(df_isotropic_normal[1, ], aes(x, y)) + geom_pointdensity(method= method) + coord_fixed() + expect_no_error(print(p1)) + }) + + test_that(cli::format_inline("zero row data set ({.code method=\"{method}\"})"), { + df <- data.frame(x = numeric(0), y = numeric(0)) + p1 <- ggplot(df, aes(x, y)) + geom_pointdensity(method= method) + coord_fixed() + expect_no_error(print(p1)) + }) +}