diff --git a/README.Rmd b/README.Rmd index 4db6da8d..dc65efdb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,6 +15,20 @@ knitr::opts_chunk$set( pkgload::load_all() set.seed(20230702) + +clean_output <- function(x, options) { + x <- gsub("0x[0-9a-f]+", "0xdeadbeef", x) + x <- gsub("dataframe_[0-9]*_[0-9]*", " dataframe_42_42 ", x) + x <- gsub("[0-9]*\\.___row_number ASC", " 42.___row_number ASC ", x) + x +} + +local({ + hook_source <- knitr::knit_hooks$get('document') + knitr::knit_hooks$set(document = clean_output) +}) + +Sys.setenv(DUCKPLYR_OUTPUT_ORDER = TRUE) ``` # duckplyr diff --git a/README.md b/README.md index b965034f..836a619f 100644 --- a/README.md +++ b/README.md @@ -76,24 +76,26 @@ duckdb is responsible for eventually carrying out the operations. Despite the la out %>% explain() #> ┌───────────────────────────┐ -#> │ HASH_GROUP_BY │ +#> │ ORDER_BY │ #> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ #0 │ -#> │ #1 │ -#> │ mean(#2) │ +#> │ ORDERS: │ +#> │ dataframe_42_42 │ +#> │ 42.___row_number ASC │ #> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ -#> │ PROJECTION │ +#> │ HASH_GROUP_BY │ #> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ species │ -#> │ sex │ -#> │ bill_area │ +#> │ #0 │ +#> │ #1 │ +#> │ min(#2) │ +#> │ mean(#3) │ #> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ #> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ #> │ species │ #> │ sex │ +#> │ ___row_number │ #> │ bill_area │ #> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ @@ -104,6 +106,18 @@ duckdb is responsible for eventually carrying out the operations. Despite the la #> │ EC: 344 │ #> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ +#> │ STREAMING_WINDOW │ +#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ROW_NUMBER() OVER () │ +#> └─────────────┬─────────────┘ +#> ┌─────────────┴─────────────┐ +#> │ PROJECTION │ +#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ species │ +#> │ sex │ +#> │ bill_area │ +#> └─────────────┬─────────────┘ +#> ┌─────────────┴─────────────┐ #> │ R_DATAFRAME_SCAN │ #> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ #> │ data.frame │ @@ -125,9 +139,12 @@ All data frame operations are supported. Computation happens upon the first requ #> --- Relation Tree --- #> --------------------- #> Filter [!=(species, 'Gentoo')] -#> Aggregate [species, sex, mean(bill_area)] -#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area] -#> r_dataframe_scan(0x10c4c7628) +#> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] +#> Order [___row_number ASC] +#> Aggregate [species, sex, min(___row_number), mean(bill_area)] +#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, bill_area as bill_area, row_number() OVER () as ___row_number] +#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area] +#> r_dataframe_scan(0xdeadbeef) #> #> --------------------- #> -- Result Columns -- @@ -136,7 +153,7 @@ All data frame operations are supported. Computation happens upon the first requ #> - sex (VARCHAR) #> - mean_bill_area (DOUBLE) #> -#> [1] 770.2627 656.8523 819.7503 694.9360 984.2279 +#> [1] 770.2627 656.8523 694.9360 819.7503 984.2279 After the computation has been carried out, the results are available immediately: @@ -147,8 +164,8 @@ After the computation has been carried out, the results are available immediatel #> <chr> <chr> <dbl> #> 1 Adelie male 770. #> 2 Adelie female 657. -#> 3 Chinstrap female 820. -#> 4 Adelie NA 695. +#> 3 Adelie NA 695. +#> 4 Chinstrap female 820. #> 5 Chinstrap male 984. ### Session-wide usage @@ -169,10 +186,7 @@ This is the same query as above, without [`as_duckplyr_df()`](https://duckdblabs mutate(across(where(is.factor), as.character)) %>% mutate(bill_area = bill_length_mm * bill_depth_mm) %>% summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>% - filter(species != "Gentoo") -#> Error processing with relational. -#> Caused by error in `duckdb_rel_from_df()`: -#> ! Can't convert factor columns to relational. Affected column: `species`. + filter(species != "Gentoo") The result is a plain tibble now: @@ -189,9 +203,12 @@ Querying the number of rows also starts the computation: #> --- Relation Tree --- #> --------------------- #> Filter [!=(species, 'Gentoo')] -#> Aggregate [species, sex, mean(bill_area)] -#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area] -#> r_dataframe_scan(0x10a81d568) +#> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] +#> Order [___row_number ASC] +#> Aggregate [species, sex, min(___row_number), mean(bill_area)] +#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, bill_area as bill_area, row_number() OVER () as ___row_number] +#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area] +#> r_dataframe_scan(0xdeadbeef) #> #> --------------------- #> -- Result Columns --