diff --git a/README.Rmd b/README.Rmd index b47825d..075196c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -11,7 +11,8 @@ knitr::opts_chunk$set( fig.path = "man/figures/README-", out.width = "100%" ) -library("magrittr") + +library("purrr") # prevent `purrr` load message by `furrr` devtools::load_all() ``` @@ -24,45 +25,24 @@ devtools::load_all() ## Example +**Create a new column with average temp over last seven days in the same month**. + ```{r} set.seed(101) -air_df = airquality %>% +airquality |> # create date column - dplyr::mutate(date_col = as.Date(paste("1973", - stringr::str_pad(Month, - width = 2, - side = "left", - pad = "0" - ), - stringr::str_pad(Day, - width = 2, - side = "left", - pad = "0" - ), - sep = "-" - ) - ) - ) %>% + dplyr::mutate(date_col = lubridate::make_date(1973, Month, Day)) |> # create gaps by removing some days - dplyr::slice_sample(prop = 0.8) %>% - tibble::as_tibble() - -air_df -``` - -**Create a new column with average temp over last seven days in the same month**. - -```{r} -air_df %>% + dplyr::slice_sample(prop = 0.8) |> # compute mean temperature over last seven days in the same month - mutate(avg_temp_over_last_week = mean(Temp, na.rm = TRUE), - .order_by = Day, - .by = Month, - .frame = c(lubridate::days(7), # 7 days before current row - lubridate::days(-1) # do not include current row - ), - .index = date_col - ) + tidier::mutate(avg_temp_over_last_week = mean(Temp, na.rm = TRUE), + .order_by = Day, + .by = Month, + .frame = c(lubridate::days(7), # 7 days before current row + lubridate::days(-1) # do not include current row + ), + .index = date_col + ) ``` ## Motivation @@ -75,16 +55,24 @@ This implementation is inspired by Apache Spark's [`windowSpec`](https://spark.a 2. [`tidypyspark`](https://talegari.github.io/tidypyspark/_build/html/index.html) python package implements `mutate` style window computation API for pyspark. -## Acknowledgements - -`tidier` package is deeply indebted to two amazing packages and people behind it. - -1. [`dplyr`](https://cran.r-project.org/package=dplyr): Hadley wickham -2. [`slider`](https://cran.r-project.org/package=slider): Davis Vaughan - ## Installation - dev: `remotes::install_github("talegari/tidier")` - cran: `install.packages("tidier")` ----- +## Acknowledgements + +`tidier` package is deeply indebted to two amazing packages and people behind it. + +1. [`dplyr`](https://cran.r-project.org/package=dplyr): +``` +Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A +Grammar of Data Manipulation_. R package version 1.1.0, +. +``` + +2. [`slider`](https://cran.r-project.org/package=slider): +``` +Vaughan D (2021). _slider: Sliding Window Functions_. R package +version 0.2.2, . +``` diff --git a/README.md b/README.md index 6bf5cfa..06d6ea7 100644 --- a/README.md +++ b/README.md @@ -13,68 +13,25 @@ style window aggregation for R dataframes via ## Example -``` r -set.seed(101) -air_df = airquality %>% - # create date column - dplyr::mutate(date_col = as.Date(paste("1973", - stringr::str_pad(Month, - width = 2, - side = "left", - pad = "0" - ), - stringr::str_pad(Day, - width = 2, - side = "left", - pad = "0" - ), - sep = "-" - ) - ) - ) %>% - # create gaps by removing some days - dplyr::slice_sample(prop = 0.8) %>% - tibble::as_tibble() - -air_df -#> # A tibble: 122 × 7 -#> Ozone Solar.R Wind Temp Month Day date_col -#> -#> 1 10 264 14.3 73 7 12 1973-07-12 -#> 2 NA 127 8 78 6 26 1973-06-26 -#> 3 16 77 7.4 82 8 3 1973-08-03 -#> 4 14 191 14.3 75 9 28 1973-09-28 -#> 5 NA 138 8 83 6 30 1973-06-30 -#> 6 NA 98 11.5 80 6 28 1973-06-28 -#> 7 122 255 4 89 8 7 1973-08-07 -#> 8 47 95 7.4 87 9 5 1973-09-05 -#> 9 23 220 10.3 78 9 8 1973-09-08 -#> 10 NA 286 8.6 78 6 1 1973-06-01 -#> # … with 112 more rows -``` - **Create a new column with average temp over last seven days in the same month**. ``` r -air_df %>% +set.seed(101) +airquality |> + # create date column + dplyr::mutate(date_col = lubridate::make_date(1973, Month, Day)) |> + # create gaps by removing some days + dplyr::slice_sample(prop = 0.8) |> # compute mean temperature over last seven days in the same month - mutate(avg_temp_over_last_week = mean(Temp, na.rm = TRUE), - .order_by = Day, - .by = Month, - .frame = c(lubridate::days(7), # 7 days before current row - lubridate::days(-1) # do not include current row - ), - .index = date_col - ) -#> -#> Attaching package: 'purrr' -#> The following object is masked from 'package:testthat': -#> -#> is_null -#> The following object is masked from 'package:magrittr': -#> -#> set_names + tidier::mutate(avg_temp_over_last_week = mean(Temp, na.rm = TRUE), + .order_by = Day, + .by = Month, + .frame = c(lubridate::days(7), # 7 days before current row + lubridate::days(-1) # do not include current row + ), + .index = date_col + ) #> # A tibble: 122 × 8 #> Month Ozone Solar.R Wind Temp Day date_col avg_temp_over_last_week #> @@ -112,17 +69,27 @@ and python package implements `mutate` style window computation API for pyspark. +## Installation + +- dev: `remotes::install_github("talegari/tidier")` +- cran: `install.packages("tidier")` + ## Acknowledgements `tidier` package is deeply indebted to two amazing packages and people behind it. -1. [`dplyr`](https://cran.r-project.org/package=dplyr): Hadley wickham -2. [`slider`](https://cran.r-project.org/package=slider): Davis Vaughan +1. [`dplyr`](https://cran.r-project.org/package=dplyr): -## Installation + -- dev: `remotes::install_github("talegari/tidier")` -- cran: `install.packages("tidier")` + Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A + Grammar of Data Manipulation_. R package version 1.1.0, + . + +2. [`slider`](https://cran.r-project.org/package=slider): + + ------------------------------------------------------------------------- + Vaughan D (2021). _slider: Sliding Window Functions_. R package + version 0.2.2, . diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000..8c254c5 --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,17 @@ +citHeader("To cite tidier in publications use:") + +citEntry( + entry = "Manual", + title = "tidier: Enhanced 'mutate'", + author = "Srikanth Komala Sheshachala", + year = "2023", + url = "https://CRAN.R-project.org/package=tidier", + textVersion = paste( + "Srikanth Komala Sheshachala", + "(2023).", + "tidier: Enhanced 'mutate'.", + "Provides ‘Apache Spark’ style window aggregation for R dataframes via ‘mutate’ in ‘dplyr’ flavour.", + "R package version 4.2.0.", + "https://CRAN.R-project.org/package=tidier" + ) +)