From 72787a7684f6c16bdc70eea6c72f14102f9c5e7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= <kirill@cynkra.com>
Date: Fri, 16 Aug 2024 22:04:54 +0200
Subject: [PATCH 1/2] feat!: Rename `tbl_query()` to `tbl_function()`, remove
 `translate_duckdb()`

---
 NAMESPACE                                    |  2 +-
 R/backend-dbplyr__duckdb_connection.R        | 74 ++++++++++++--------
 man/backend-duckdb.Rd                        | 46 ++++++++----
 tests/testthat/test-tbl__duckdb_connection.R | 20 +++---
 4 files changed, 87 insertions(+), 55 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 52648629b..8eaec1966 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -24,8 +24,8 @@ export(duckdb_unregister_arrow)
 export(read_csv_duckdb)
 export(simulate_duckdb)
 export(tbl_file)
+export(tbl_function)
 export(tbl_query)
-export(translate_duckdb)
 exportClasses(duckdb_connection)
 exportClasses(duckdb_driver)
 exportClasses(duckdb_explain)
diff --git a/R/backend-dbplyr__duckdb_connection.R b/R/backend-dbplyr__duckdb_connection.R
index b946dbfe1..9512db8f0 100644
--- a/R/backend-dbplyr__duckdb_connection.R
+++ b/R/backend-dbplyr__duckdb_connection.R
@@ -11,35 +11,26 @@
 #' library(dplyr, warn.conflicts = FALSE)
 #' con <- DBI::dbConnect(duckdb(), path = ":memory:")
 #'
-#' dbiris <- copy_to(con, iris, overwrite = TRUE)
+#' db <- copy_to(con, data.frame(a = 1:3, b = letters[2:4]))
 #'
-#' dbiris %>%
-#'   select(Petal.Length, Petal.Width) %>%
-#'   filter(Petal.Length > 1.5) %>%
-#'   head(5)
+#' db %>%
+#'   filter(a > 1) %>%
+#'   select(b)
+#'
+#' path <- tempfile(fileext = ".csv")
+#' write.csv(data.frame(a = 1:3, b = letters[2:4]))
+#'
+#' db_csv <- tbl_file(con, path)
+#' db_csv %>%
+#'   summarize(sum_a = sum(a))
+#'
+#' db_csv_fun <- tbl_function(con, paste0("read_csv_auto('", path, "')"))
+#' db_csv %>%
+#'   count()
 #'
 #' DBI::dbDisconnect(con, shutdown = TRUE)
 NULL
 
-#' Connection object for simulation of the SQL generation without actual database.
-#' dbplyr overrides database specific identifier and string quotes
-#' @param ... Any parameters to be forwarded
-#' @export
-#' @rdname backend-duckdb
-simulate_duckdb <- function(...) {
-  structure(list(), ..., class = c("duckdb_connection", "TestConnection", "DBIConnection"))
-}
-
-#' Connection object for simulation of the SQL generation without actual database.
-#' This version keeps the database specific identifier and string quotes, i.e.
-#' allows to translate to DuckDB SQL dialect.
-#' @param ... Any parameters to be forwarded
-#' @export
-#' @rdname backend-duckdb
-translate_duckdb <- function(...) {
-  structure(list(), ..., class = c("duckdb_connection", "DBIConnection"))
-}
-
 # Declare which version of dbplyr API is being called.
 # @param con A \code{\link{dbConnect}} object, as returned by \code{dbConnect()}
 # @name dbplyr_edition
@@ -437,7 +428,7 @@ tbl.duckdb_connection <- function(src, from, ..., cache = FALSE) {
   NextMethod("tbl")
 }
 
-#' Create a lazy table from a Parquet or SQL file
+#' Create a lazy table from a Parquet file or SQL query
 #'
 #' `tbl_file()` is an experimental variant of [dplyr::tbl()] to directly access files on disk.
 #' It is safer than `dplyr::tbl()` because there is no risk of misinterpreting the request,
@@ -458,26 +449,51 @@ tbl_file <- function(src, path, ..., cache = FALSE) {
   if (grepl("'", path)) {
     stop("File '", path, "' contains a single quote, this is not supported", call. = FALSE)
   }
-  tbl_query(src, paste0("'", path, "'"), cache = cache)
+  tbl_function(src, paste0("'", path, "'"), cache = cache)
 }
 
 #' Create a lazy table from a query
 #'
-#' `tbl_query()` is an experimental variant of [dplyr::tbl()]
+#' @description
+#' `tbl_function()` is an experimental variant of [dplyr::tbl()]
 #' to create a lazy table from a table-generating function,
 #' useful for reading nonstandard CSV files or other data sources.
 #' It is safer than `dplyr::tbl()` because there is no risk of misinterpreting the query.
-#' Use `dplyr::tbl(src, dplyr::sql("SELECT ... FROM ..."))` for custom SQL queries.
 #' See <https://duckdb.org/docs/data/overview> for details on data importing functions.
 #'
+#' As an alternative, use `dplyr::tbl(src, dplyr::sql("SELECT ... FROM ..."))` for custom SQL queries.
+#'
 #' @param query SQL code, omitting the `FROM` clause
 #' @export
 #' @rdname backend-duckdb
-tbl_query <- function(src, query, ..., cache = FALSE) {
+tbl_function <- function(src, query, ..., cache = FALSE) {
   if (cache) DBI::dbExecute(src, "PRAGMA enable_object_cache")
   table <- dplyr::sql(paste0("FROM ", query))
   dplyr::tbl(src, table)
 }
 
+#' Deprecated
+#'
+#' `tbl_query()` is deprecated in favor of `tbl_function()`.
+#' @export
+#' @rdname backend-duckdb
+tbl_query <- function(src, query, ...) {
+  .Deprecated("tbl_function")
+  tbl_function(src, query, ...)
+}
+
+#' Connection object for simulation of the SQL generation without actual database.
+#' dbplyr overrides database specific identifier and string quotes
+#'
+#' Use `simulate_duckdb()` with `lazy_frame()`
+#' to see simulated SQL without opening a DuckDB connection.
+#' @param ... Any parameters to be forwarded
+#' @export
+#' @rdname backend-duckdb
+simulate_duckdb <- function(...) {
+  structure(list(), ..., class = c("duckdb_connection", "TestConnection", "DBIConnection"))
+}
+
+
 # Needed to suppress the R CHECK notes (due to the use of sql_expr)
 utils::globalVariables(c("REGEXP_MATCHES", "CAST", "%AS%", "INTEGER", "XOR", "%<<%", "%>>%", "LN", "LOG", "ROUND", "ROUND_EVEN", "EXTRACT", "%FROM%", "MONTH", "STRFTIME", "QUARTER", "YEAR", "DATE_TRUNC", "DATE", "DOY", "TO_SECONDS", "BIGINT", "TO_MINUTES", "TO_HOURS", "TO_DAYS", "TO_WEEKS", "TO_MONTHS", "TO_YEARS", "STRPOS", "NOT", "REGEXP_REPLACE", "TRIM", "LPAD", "RPAD", "%||%", "REPEAT", "LENGTH", "STRING_AGG", "GREATEST", "LIST_EXTRACT", "LOG10", "LOG2", "STRING_SPLIT_REGEX", "FLOOR", "FMOD", "FDIV"))
diff --git a/man/backend-duckdb.Rd b/man/backend-duckdb.Rd
index 904b85b95..8fdd06fe1 100644
--- a/man/backend-duckdb.Rd
+++ b/man/backend-duckdb.Rd
@@ -1,27 +1,27 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/backend-dbplyr__duckdb_connection.R
 \name{backend-duckdb}
-\alias{simulate_duckdb}
-\alias{translate_duckdb}
 \alias{tbl_file}
+\alias{tbl_function}
 \alias{tbl_query}
+\alias{simulate_duckdb}
 \title{DuckDB SQL backend for dbplyr}
 \usage{
-simulate_duckdb(...)
+tbl_file(src, path, ..., cache = FALSE)
 
-translate_duckdb(...)
+tbl_function(src, query, ..., cache = FALSE)
 
-tbl_file(src, path, ..., cache = FALSE)
+tbl_query(src, query, ...)
 
-tbl_query(src, query, ..., cache = FALSE)
+simulate_duckdb(...)
 }
 \arguments{
-\item{...}{Any parameters to be forwarded}
-
 \item{src}{A duckdb connection object}
 
 \item{path}{Path to existing Parquet, CSV or JSON file}
 
+\item{...}{Any parameters to be forwarded}
+
 \item{cache}{Enable object cache for Parquet files}
 
 \item{query}{SQL code, omitting the \code{FROM} clause}
@@ -35,24 +35,40 @@ contains more mapped functions.
 It is safer than \code{dplyr::tbl()} because there is no risk of misinterpreting the request,
 and paths with special characters are supported.
 
-\code{tbl_query()} is an experimental variant of \code{\link[dplyr:tbl]{dplyr::tbl()}}
+\code{tbl_function()} is an experimental variant of \code{\link[dplyr:tbl]{dplyr::tbl()}}
 to create a lazy table from a table-generating function,
 useful for reading nonstandard CSV files or other data sources.
 It is safer than \code{dplyr::tbl()} because there is no risk of misinterpreting the query.
-Use \code{dplyr::tbl(src, dplyr::sql("SELECT ... FROM ..."))} for custom SQL queries.
 See \url{https://duckdb.org/docs/data/overview} for details on data importing functions.
+
+As an alternative, use \code{dplyr::tbl(src, dplyr::sql("SELECT ... FROM ..."))} for custom SQL queries.
+
+\code{tbl_query()} is deprecated in favor of \code{tbl_function()}.
+
+Use \code{simulate_duckdb()} with \code{lazy_frame()}
+to see simulated SQL without opening a DuckDB connection.
 }
 \examples{
 \dontshow{if (duckdb:::TEST_RE2 && rlang::is_installed("dbplyr")) withAutoprint(\{ # examplesIf}
 library(dplyr, warn.conflicts = FALSE)
 con <- DBI::dbConnect(duckdb(), path = ":memory:")
 
-dbiris <- copy_to(con, iris, overwrite = TRUE)
+db <- copy_to(con, data.frame(a = 1:3, b = letters[2:4]))
+
+db \%>\%
+  filter(a > 1) \%>\%
+  select(b)
+
+path <- tempfile(fileext = ".csv")
+write.csv(data.frame(a = 1:3, b = letters[2:4]))
+
+db_csv <- tbl_file(con, path)
+db_csv \%>\%
+  summarize(sum_a = sum(a))
 
-dbiris \%>\%
-  select(Petal.Length, Petal.Width) \%>\%
-  filter(Petal.Length > 1.5) \%>\%
-  head(5)
+db_csv_fun <- tbl_function(con, paste0("read_csv_auto('", path, "')"))
+db_csv \%>\%
+  count()
 
 DBI::dbDisconnect(con, shutdown = TRUE)
 \dontshow{\}) # examplesIf}
diff --git a/tests/testthat/test-tbl__duckdb_connection.R b/tests/testthat/test-tbl__duckdb_connection.R
index 522d172ab..09c56ecb1 100644
--- a/tests/testthat/test-tbl__duckdb_connection.R
+++ b/tests/testthat/test-tbl__duckdb_connection.R
@@ -28,7 +28,7 @@ test_that("Parquet files can be registered with dplyr::tbl()", {
   expect_true(tab3 %>% dplyr::count() %>% dplyr::collect() == 1000)
 })
 
-test_that("Parquet files can be registered with tbl_file() and tbl_query()", {
+test_that("Parquet files can be registered with tbl_file() and tbl_function()", {
   skip_if_not_installed("dbplyr")
 
   con <- DBI::dbConnect(duckdb())
@@ -38,15 +38,15 @@ test_that("Parquet files can be registered with tbl_file() and tbl_query()", {
   expect_true(inherits(tab0, "tbl_duckdb_connection"))
   expect_true(tab0 %>% dplyr::count() %>% dplyr::collect() == 1000)
 
-  tab1 <- tbl_query(con, "read_parquet(['data/userdata1.parquet'])")
+  tab1 <- tbl_function(con, "read_parquet(['data/userdata1.parquet'])")
   expect_true(inherits(tab1, "tbl_duckdb_connection"))
   expect_true(tab1 %>% dplyr::count() %>% dplyr::collect() == 1000)
 
-  tab2 <- tbl_query(con, "'data/userdata1.parquet'")
+  tab2 <- tbl_function(con, "'data/userdata1.parquet'")
   expect_true(inherits(tab2, "tbl_duckdb_connection"))
   expect_true(tab2 %>% dplyr::count() %>% dplyr::collect() == 1000)
 
-  tab3 <- tbl_query(con, "parquet_scan(['data/userdata1.parquet'])")
+  tab3 <- tbl_function(con, "parquet_scan(['data/userdata1.parquet'])")
   expect_true(inherits(tab3, "tbl_duckdb_connection"))
   expect_true(tab3 %>% dplyr::count() %>% dplyr::collect() == 1000)
 })
@@ -69,7 +69,7 @@ test_that("Object cache can be enabled for parquet files with dplyr::tbl()", {
   expect_true(DBI::dbGetQuery(con, "SELECT value FROM duckdb_settings() WHERE name='enable_object_cache';") == "false")
 })
 
-test_that("Object cache can be enabled for parquet files with tbl_file() and tbl_query()", {
+test_that("Object cache can be enabled for parquet files with tbl_file() and tbl_function()", {
   skip_if_not_installed("dbplyr")
   # https://github.com/tidyverse/dbplyr/issues/1384
   skip_if(packageVersion("dbplyr") >= "2.4.0")
@@ -82,7 +82,7 @@ test_that("Object cache can be enabled for parquet files with tbl_file() and tbl
   expect_true(DBI::dbGetQuery(con, "SELECT value FROM duckdb_settings() WHERE name='enable_object_cache';") == "true")
 
   DBI::dbExecute(con, "SET enable_object_cache=False;")
-  tab2 <- tbl_query(con, "'data/userdata1.parquet'", cache = FALSE)
+  tab2 <- tbl_function(con, "'data/userdata1.parquet'", cache = FALSE)
   expect_true(DBI::dbGetQuery(con, "SELECT value FROM duckdb_settings() WHERE name='enable_object_cache';") == "false")
 })
 
@@ -108,7 +108,7 @@ test_that("CSV files can be registered with dplyr::tbl()", {
   expect_true(tab2 %>% dplyr::count() %>% dplyr::collect() == 150)
 })
 
-test_that("CSV files can be registered with tbl_file() and tbl_query()", {
+test_that("CSV files can be registered with tbl_file() and tbl_function()", {
   skip_if_not_installed("dbplyr")
 
   path <- file.path(tempdir(), "duckdbtest.csv")
@@ -122,7 +122,7 @@ test_that("CSV files can be registered with tbl_file() and tbl_query()", {
   expect_true(inherits(tab1, "tbl_duckdb_connection"))
   expect_true(tab1 %>% dplyr::count() %>% dplyr::collect() == 150)
 
-  tab2 <- tbl_query(con, paste0("read_csv_auto('", path, "')"))
+  tab2 <- tbl_function(con, paste0("read_csv_auto('", path, "')"))
   expect_true(inherits(tab2, "tbl_duckdb_connection"))
   expect_true(tab2 %>% dplyr::count() %>% dplyr::collect() == 150)
 })
@@ -141,13 +141,13 @@ test_that("Other replacement scans or functions can be registered with dplyr::tb
   expect_true(obj %>% dplyr::filter(keyword_name == "all") %>% dplyr::count() %>% dplyr::collect() == 1)
 })
 
-test_that("Other replacement scans or functions can be registered with tbl_query()", {
+test_that("Other replacement scans or functions can be registered with tbl_function()", {
   skip_if_not_installed("dbplyr")
 
   con <- DBI::dbConnect(duckdb())
   on.exit(DBI::dbDisconnect(con, shutdown = TRUE))
 
-  obj <- tbl_query(con, "duckdb_keywords()")
+  obj <- tbl_function(con, "duckdb_keywords()")
   expect_true(inherits(obj, "tbl_duckdb_connection"))
   expect_true(obj %>% dplyr::filter(keyword_name == "all") %>% dplyr::count() %>% dplyr::collect() == 1)
 })

From 79b64625d26edbde36267a7ec85ec72d66044aec Mon Sep 17 00:00:00 2001
From: krlmlr <krlmlr@users.noreply.github.com>
Date: Fri, 16 Aug 2024 20:23:28 +0000
Subject: [PATCH 2/2] chore: Auto-update from GitHub Actions

Run: https://github.com/duckdb/duckdb-r/actions/runs/10425747078
---
 .github/dep-suggests-matrix.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/dep-suggests-matrix.json b/.github/dep-suggests-matrix.json
index fc09c64f5..e69de29bb 100644
--- a/.github/dep-suggests-matrix.json
+++ b/.github/dep-suggests-matrix.json
@@ -1 +0,0 @@
-{"package":["adbcdrivermanager","arrow","bit64","clock","DBItest","dbplyr","dplyr"]}