Skip to content

Commit

Permalink
Added Redshift support (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
mermelstein authored Jul 24, 2023
1 parent c28f527 commit c47e361
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 36 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: snowquery
Title: Query 'Snowflake' Databases with 'SQL'
Version: 1.0.0
Version: 1.1.0
Authors@R:
person("Dani", "Mermelstein", , "dmermelstein@hey.com", role = c("aut", "cre", "cph"))
Maintainer: Dani Mermelstein <dmermelstein@hey.com>
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# snowquery 1.1.0

* Added Redshift support

# snowquery 1.0.0

* Formalized structure for database credential file
Expand Down
48 changes: 28 additions & 20 deletions R/queryDB.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' @title Query a Snowflake or Postgres database
#' @description Run a SQL query on a Snowflake or Postgres database (requires a `~/snowquery_creds.yaml` file)
#' @title Query a database
#' @description Run a SQL query on a Snowflake, Redshift or Postgres database and return the results as a data frame. See the [snowquery README](https://github.com/mermelstein/snowquery#credentials) for more information on how to pass in your credentials.
#'
#' @param query A string of the SQL query to execute
#' @param conn_name The name of the connection to use in snowquery_creds.yaml (e.g. "my_snowflake_dwh")
#' @param db_type The type of database to connect to (e.g. "snowflake" or "postgres")
#' @param db_type The type of database to connect to (e.g. "snowflake", "redshift" or "postgres")
#' @param username The username to use for authentication
#' @param password The password to use for authentication
#' @param host The hostname or IP address of the database server
Expand Down Expand Up @@ -53,6 +53,7 @@ queryDB <- function(
warehouse = NULL,
account = NULL,
role = NULL,
sslmode = NULL,
timeout = 15)
{

Expand All @@ -64,6 +65,9 @@ queryDB <- function(
}
}

db_type_error_message <- paste0("Invalid db_type '", db_type, "'. \n",
"Snowquery currently only supports the following database types: 'snowflake', 'redshift' and 'postgres'")

# pull in the credential file
snowquery_creds_filepath <- '~/snowquery_creds.yaml'
snowquery_creds <- yaml::read_yaml(snowquery_creds_filepath, fileEncoding = "UTF-8")
Expand All @@ -76,10 +80,10 @@ queryDB <- function(

# Check if db_type is provided
if (missing(db_type) || is.null(db_type)) {
stop(paste0("db_type is missing.\n",
"Please provide a database type to queryDB(). Expected values are 'snowflake' or 'postgres'.\n",
stop(paste0("db_type is missing for the '", conn_name, "' connection.\n",
"Please provide a database type to queryDB(). Expected values are 'snowflake', 'redshift' or 'postgres'.\n",
"You can add a db_type variable to the '", conn_name, "' connection in the snowquery_creds.yaml file or pass it in manually:\n",
"For example: queryDB('SELECT * FROM my_table', conn_name = 'snowflake', db_type = 'snowflake')"))
"For example: queryDB('SELECT * FROM my_table', conn_name = '", conn_name, "', db_type = 'snowflake')"))
}

if (tolower(db_type) == "snowflake") {
Expand All @@ -98,13 +102,9 @@ queryDB <- function(
# Import the snowflake.connector module from the snowflake-connector-python package
snowflake <- import("snowflake.connector")
}, error = function(e) {
# stop(paste0("Failed to import the snowflake.connector module. Please make sure it is installed and accessible from your environment. \n",
# "Try running the following command from your terminal or command line:\n\n",
# "pip install 'snowflake-connector-python[pandas]'\n\n",
# "Error message: ", e$message))
stop(paste0("Failed to find the python executable. Please make sure python 3 is installed and accessible from your environment.\n",
"You can download Python 3 from https://www.python.org/downloads/ or via Homebrew if on MacOS. \n",
"After installing Python 3, make sure it is added to your system PATH. \n",
stop(paste0("Failed to import the snowflake.connector module. Please make sure it is installed and accessible from your environment. \n",
"Try running the following command from your terminal or command line:\n\n",
"pip install 'snowflake-connector-python[pandas]'\n\n",
"Error message: ", e$message))
})
username_ <- check_null(username, check_null(conn_details$user, NULL))
Expand All @@ -129,7 +129,7 @@ queryDB <- function(
"The following credential variable(s) are missing: ", paste(missing_vars, collapse = ", "), ".\n",
"Please pass in credentials to queryDB() or add them to the snowquery_creds.yaml file."))
} else {
# Use available credentials to build connection string
# Use credentials to build connection string
con <- snowflake$connect(
user = username_,
password = password_,
Expand All @@ -151,13 +151,14 @@ queryDB <- function(
# Return the query results
return(df)

} else if (tolower(db_type) == "postgres") {
} else if (tolower(db_type) %in% c("postgres", "redshift")) {
# Check if credentials are provided manually by user
database_ <- check_null(database, check_null(conn_details$database, NULL))
username_ <- check_null(username, check_null(conn_details$username, NULL))
password_ <- check_null(password, check_null(conn_details$password, NULL))
port_ <- check_null(port, check_null(conn_details$port, NULL))
host_ <- check_null(host, check_null(conn_details$host, NULL))
sslmode_ <- check_null(sslmode, check_null(conn_details$sslmode, NULL))
# Check if any credentials are missing
if (is.null(username_) || is.null(password_) || is.null(host_) || is.null(database_) || is.null(port_)) {
# Get the names of the missing credential variables
Expand All @@ -168,17 +169,25 @@ queryDB <- function(
if (is.null(port_)) missing_vars <- c(missing_vars, "port")
if (is.null(host_)) missing_vars <- c(missing_vars, "host")
# Error message if credentials are missing
stop(paste0("Missing credentials for the postgres connection. \n",
stop(paste0("Missing credentials for the ", db_type, " connection. \n",
"The following credential variable(s) are missing: ", paste(missing_vars, collapse = ", "), ".\n",
"Please pass in credentials to queryDB() or add them to the snowquery_creds.yaml file."))
} else {
# Use available credentials to build connection string
con <- DBI::dbConnect(RPostgres::Postgres(),
if (tolower(db_type) == "postgres") {
driver_type <- RPostgres::Postgres()
} else if (tolower(db_type) == "redshift") {
driver_type <- RPostgres::Redshift()
} else {
stop(db_type_error_message)
}
# Use credentials to build connection string
con <- DBI::dbConnect(driver_type,
dbname = database_,
host = host_,
port = port_,
user = username_,
password = password_,
sslmode = sslmode_,
connect_timeout = timeout # seconds
)
}
Expand All @@ -193,7 +202,6 @@ queryDB <- function(
return(df)

} else {
stop(paste0("Invalid db_type '", db_type, "'. \n",
"Snowquery currently only supports 'snowflake' and 'postgres' database types."))
stop(db_type_error_message)
}
}
10 changes: 5 additions & 5 deletions R/snowquery-package.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#' @keywords internal
"_PACKAGE"

#' This package provides functions for querying Snowflake databases using R.
#' To use this package, you will need to provide your Snowflake credentials in a
#' This package provides functions for querying Snowflake, Redshift and Postgres databases using R.
#' To use this package, you will need to provide your database credentials in a
#' YAML file called `snowquery_creds.yaml`. The file should be located in the
#' root directory of your R project and should have the following format:
#'
#' ```yaml
#' ---
#' my_snowflake_dwh:
#' db_type: 'snowflake'
#' db_type: 'snowflake' # or 'redshift' or 'postgres'
#' account: 'your_account_name'
#' warehouse: 'your_warehouse_name'
#' database: 'your_database_name'
Expand All @@ -22,6 +22,6 @@
#' This follows a named connection format, where you can have multiple named connections in the same file.
#' For example you might have a `my_snowflake_dwh` connection and a `my_snowflake_admin` connection, each with their own credentials.
#'
#' Replace the values in the YAML file with your own Snowflake credentials.
#' Replace the values in the YAML file with your own credentials.
#' Once you have created the `snowquery_creds.yaml` file, you can use the
#' `queryDB()` function to query your Snowflake database.
#' `queryDB()` function to query your database.
34 changes: 24 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,9 @@

## Overview

Run SQL queries on a Snowflake instance from an R script. This will be similar to how you might be using DBI or odbc to query a postgres or Redshift database, but because Snowflake's driver requires a ton of fiddling in order to make it work for R this is an alternate solution.
Run SQL queries on Snowflake, Redshift, or a postgres database from an R script.

This sums up the current experience of running SQL against Snowflake from:

- python: good &#x2705;
- R: bad &#x274C;

That's why the `snowquery` package takes the [Snowflake python connector](https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-pandas) and leverages it in the background to run queries from R.
This package is designed to make it easy to run SQL queries from R. It is designed to work with Snowflake, Redshift, or a postgres database. It is not designed to work with other databases, but it could be extended to do so.

### Installation

Expand All @@ -26,6 +21,23 @@ install.packages("snowquery")
devtools::install_github("mermelstein/snowquery")
```

### Redshift notes

Redshift is currently only available on the development version of this package. See [installation instructions](#installation) above.

When connecting to a Redshift DWH you might need to specify an SSL connection. You can do this with a `sslmode='require'` connection variable or by passing that to the `queryDB()` function directly.

### Snowflake notes

Because Snowflake's driver requires a ton of fiddling in order to make it work for R. It sucks. A lot.

To sum up the current experience of running SQL against Snowflake from:

- python: good &#x2705;
- R: bad &#x274C;

That's why the `snowquery` package takes the [Snowflake python connector](https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-pandas) and leverages it in the background to run queries from R.

### Documentation

For more information on using `snowquery`, please see the [package website](https://snowquery.org).
Expand All @@ -44,7 +56,9 @@ If you need to install the Snowflake python connector, you can do that with the
pip install "snowflake-connector-python[pandas]"
```

You will also need to have your Snowflake credentials in a YAML file called `snowquery_creds.yaml`. The file should be located in the
### Credentials

You will need to have your database credentials in a YAML file called `snowquery_creds.yaml`. The file should be located in the
root directory of your machine and should have the following format:

```yaml
Expand All @@ -62,15 +76,15 @@ my_snowflake_dwh:

This follows a named connection format, where you can have multiple named connections in the same file. For example you might have a `my_snowflake_dwh` connection and a `my_snowflake_admin` connection, each with their own credentials.

The main function of this package looks for that file at this location: `~/snowquery_creds.yaml`. **If it is in any other location it will not work.** If the package cannot locate the file you will receive an error like: `cannot open file '/expected/path/to/file/snowquery_creds.yaml': No such file or directory`.
This package looks for the credential file at this location: `~/snowquery_creds.yaml`. **If it is in any other location it will not work.** If the package cannot locate the file you will receive an error like: `cannot open file '/expected/path/to/file/snowquery_creds.yaml': No such file or directory`. You can manually pass credentials to the `queryDB()` function but it is recommended to use the YAML file.

You are now ready to query away!

### Usage

Load this library in your R environment with `library(snowquery)`.

There is one function you need: `queryDB()`. It will take a string parameter and run that as a SQL query.
There is one function you need: `queryDB()`. It will take a SQL query as a string parameter and run it on the db.

For example:

Expand Down

0 comments on commit c47e361

Please sign in to comment.