diff --git a/DESCRIPTION b/DESCRIPTION index d0a0a6fd..75db521f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Description: A drop-in replacement for 'dplyr', powered by 'DuckDB' for performa Also defines a set of generics that provide a low-level implementer's interface for the high-level user interface of 'dplyr'. License: MIT + file LICENSE -URL: https://tidyverse.github.io/duckplyr, https://github.com/tidyverse/duckplyr +URL: https://duckplyr.tidyverse.org, https://github.com/tidyverse/duckplyr BugReports: https://github.com/tidyverse/duckplyr/issues Depends: R (>= 4.1.0) diff --git a/README.Rmd b/README.Rmd index 5bee8243..91a07085 100644 --- a/README.Rmd +++ b/README.Rmd @@ -31,7 +31,7 @@ local({ Sys.setenv(DUCKPLYR_OUTPUT_ORDER = TRUE) ``` -# duckplyr +# duckplyr [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) diff --git a/README.md b/README.md index 8b163b69..25233f1f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# duckplyr +# duckplyr @@ -41,8 +41,8 @@ Or from [GitHub](https://github.com/) with: There are two ways to use duckplyr. -1. To enable duckplyr for individual data frames, use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_tibble.html) as the first step in your pipe, without attaching the package. -2. By calling [`library(duckplyr)`](https://tidyverse.github.io/duckplyr/), it overwrites dplyr methods and is automatically enabled for the entire session without having to call `as_duckplyr_tibble()`. To turn this off, call `methods_restore()`. +1. To enable duckplyr for individual data frames, use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_df.html) as the first step in your pipe, without attaching the package. +2. By calling [`library(duckplyr)`](https://duckplyr.tidyverse.org), it overwrites dplyr methods and is automatically enabled for the entire session without having to call `as_duckplyr_tibble()`. To turn this off, call `methods_restore()`. The examples below illustrate both methods. See also the companion [demo repository](https://github.com/Tmonster/duckplyr_demo) for a use case with a large dataset. @@ -50,14 +50,14 @@ The examples below illustrate both methods. See also the companion [demo reposit This example illustrates usage of duckplyr for individual data frames. -Use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_tibble.html) to enable processing with duckdb: +Use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_df.html) to enable processing with duckdb:
out <- palmerpenguins::penguins %>% # CAVEAT: factor columns are not supported yet mutate(across(where(is.factor), as.character)) %>% - duckplyr::as_duckplyr_tibble() %>% + duckplyr::as_duckplyr_tibble() %>% mutate(bill_area = bill_length_mm * bill_depth_mm) %>% summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>% filter(species != "Gentoo")@@ -77,86 +77,100 @@ duckdb is responsible for eventually carrying out the operations. Despite the la explain() #> ┌───────────────────────────┐ #> │ ORDER_BY │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ ORDERS: │ +#> │ ──────────────────── │ #> │ dataframe_42_42 │ #> │ 42.___row_number ASC │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ FILTER │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │r_base::!=(species, 'Gentoo│ -#> │ ') │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ EC: 34 │ -#> └─────────────┬─────────────┘ +#> │ ──────────────────── │ +#> │ "r_base::!="(species, │ +#> │ 'Gentoo') │ +#> │ │ +#> │ ~34 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ #0 │ #> │ #1 │ #> │ #2 │ #> │ #3 │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~172 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ STREAMING_WINDOW │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ +#> │ Projections: │ #> │ ROW_NUMBER() OVER () │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ ORDER_BY │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ ORDERS: │ +#> │ ──────────────────── │ #> │ dataframe_42_42 │ #> │ 42.___row_number ASC │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ HASH_GROUP_BY │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ +#> │ Groups: │ #> │ #0 │ #> │ #1 │ +#> │ │ +#> │ Aggregates: │ #> │ min(#2) │ #> │ mean(#3) │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~172 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ species │ #> │ sex │ #> │ ___row_number │ #> │ bill_area │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~344 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ #0 │ #> │ #1 │ #> │ #2 │ #> │ #3 │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~344 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ STREAMING_WINDOW │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ +#> │ Projections: │ #> │ ROW_NUMBER() OVER () │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ species │ #> │ sex │ #> │ bill_area │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~344 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ R_DATAFRAME_SCAN │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ data.frame │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ │ +#> │ Projections: │ #> │ species │ #> │ bill_length_mm │ #> │ bill_depth_mm │ #> │ sex │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ EC: 344 │ +#> │ │ +#> │ ~344 Rows │ #> └───────────────────────────┘ All data frame operations are supported. Computation happens upon the first request. @@ -169,13 +183,13 @@ All data frame operations are supported. Computation happens upon the first requ #> --------------------- #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] #> Order [___row_number ASC] -#> Filter [!=(species, 'Gentoo')] +#> Filter ["!="(species, 'Gentoo')] #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area, row_number() OVER () as ___row_number] #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] #> Order [___row_number ASC] #> Aggregate [species, sex, min(___row_number), mean(bill_area)] #> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, bill_area as bill_area, row_number() OVER () as ___row_number] -#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area] +#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, "*"(bill_length_mm, bill_depth_mm) as bill_area] #> r_dataframe_scan(0xdeadbeef) #> #> --------------------- @@ -204,7 +218,7 @@ After the computation has been carried out, the results are available immediatel This example illustrates usage of duckplyr for all data frames in the R session. -Use [`library(duckplyr)`](https://tidyverse.github.io/duckplyr/) or [`duckplyr::methods_overwrite()`](https://tidyverse.github.io/duckplyr/reference/methods_overwrite.html) to overwrite dplyr methods and enable processing with duckdb for all data frames: +Use [`library(duckplyr)`](https://duckplyr.tidyverse.org) or [`duckplyr::methods_overwrite()`](https://tidyverse.github.io/duckplyr/reference/methods_overwrite.html) to overwrite dplyr methods and enable processing with duckdb for all data frames:
duckplyr::methods_overwrite() @@ -238,13 +252,13 @@ Querying the number of rows also starts the computation: #> --------------------- #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] #> Order [___row_number ASC] -#> Filter [!=(species, 'Gentoo')] +#> Filter ["!="(species, 'Gentoo')] #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area, row_number() OVER () as ___row_number] #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] #> Order [___row_number ASC] #> Aggregate [species, sex, min(___row_number), mean(bill_area)] #> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, bill_area as bill_area, row_number() OVER () as ___row_number] -#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area] +#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, "*"(bill_length_mm, bill_depth_mm) as bill_area] #> r_dataframe_scan(0xdeadbeef) #> #> --------------------- @@ -298,7 +312,7 @@ The first time the package encounters an unsupported function, data type, or opepalmerpenguins::penguins %>% - duckplyr::as_duckplyr_tibble() %>% + duckplyr::as_duckplyr_tibble() %>% transmute(bill_area = bill_length_mm * bill_depth_mm) %>% head(3) #> The duckplyr package is configured to fall back to dplyr when it encounters an @@ -353,7 +367,7 @@ The dbplyr package is a dplyr backend that connects to SQL databases, and is des This package also provides generics, for which other packages may then implement methods.-library(duckplyr)
+library(duckplyr)#> ✔ Overwriting dplyr methods with duckplyr methods. #> ℹ Turn off with `duckplyr::methods_restore()`.diff --git a/man/figures/logo.png b/man/figures/logo.png new file mode 100644 index 00000000..fa3750d6 Binary files /dev/null and b/man/figures/logo.png differ