Skip to content

Commit

Permalink
fix problem with reading parquet files using gh api
Browse files Browse the repository at this point in the history
  • Loading branch information
cole-brokamp committed Aug 6, 2024
1 parent ebf5876 commit 0a03afe
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 51 deletions.
23 changes: 14 additions & 9 deletions R/dpkg_release_gh.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dpkg_gh_release <- function(x, draft = TRUE) {
httr2::req_body_file(written_path) |>
httr2::req_perform()

message("created draft release at: ", draft_release_details$html_url)
message("created (draft) release at: ", draft_release_details$html_url)
return(invisible(draft_release_details$html_url))
}

Expand Down Expand Up @@ -79,6 +79,7 @@ get_gh_token <- function() {

# stow a github release asset created with dpkg_github_release()
# @examples
# dpkg_gh_release(as_dpkg(mtcars))
# stow_gh_release("cole-brokamp", "dpkg", "mtcars-v0.0.0.9000")
# stow_gh_release("cole-brokamp", "dpkg", "mtcars-v0.0.0.9000", force = TRUE)
stow_gh_release <- function(owner, repo, dpkg, overwrite = FALSE) {
Expand Down Expand Up @@ -110,13 +111,17 @@ stow_gh_release <- function(owner, repo, dpkg, overwrite = FALSE) {

the_asset <- the_assets[[which(vapply(the_assets, \(.) .$name == paste0(dpkg, ".parquet"), logical(1)))]]

httr2::request(glue::glue("https://api.github.com/repos/{owner}/{repo}/releases/assets/{the_asset$id}")) |>
httr2::req_headers(
Accept = "application/vnd.github+json",
Authorization = glue::glue("Bearer {get_gh_token()}"),
`X-GitHub-Api-Version` = "2022-11-28",
.redact = "Authorization"
) |>
httr2::req_perform(path = stow_path(dpkg_filename))
## # why does getting file this way break the header of the parquet file??
## httr2::request(glue::glue("https://api.github.com/repos/{owner}/{repo}/releases/assets/{the_asset$id}")) |>
## httr2::req_headers(
## Accept = "application/vnd.github+json",
## Authorization = glue::glue("Bearer {get_gh_token()}"),
## `X-GitHub-Api-Version` = "2022-11-28",
## .redact = "Authorization"
## ) |>
## httr2::req_perform(path = stow_path(dpkg_filename))

stow_url(the_asset$browser_download_url)

return(stow_path(dpkg_filename))
}
1 change: 1 addition & 0 deletions R/helpers.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.onLoad <- function(...) {
S7::methods_register()
fs::dir_create(stow_path())
}

# enable usage of <S7_object>@name in package code
Expand Down
8 changes: 5 additions & 3 deletions R/stow.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,19 @@
#' with `http://`, `https://`, or `gh://`
#' @param overwrite logical; re-download the remote file even though
#' a local file with the same name exists?
#' @returns path to the stowed file
#' @export
#' @examples
#' # get by using URL
#' stow("https://github.com/geomarker-io/appc/releases/download/v0.1.0/nei_2020.rds")
#'
#' # will be faster (even in later R sessions) next time
#' stow("https://github.com/geomarker-io/appc/releases/download/v0.1.0/nei_2020.rds")
#' stow("https://github.com/geomarker-io/appc/releases/download/v0.1.0/nei_2020.rds") |>
#' readRDS()
#'
#' # get a data package parquet file created with dpkg_gh_release()
#' stow("gh://cole-brokamp/dpkg/mtcars-v0.0.0.9000")
#' stow("gh://cole-brokamp/dpkg/cagis_parcels-v0.1.0") |>
#' read_dpkg()
stow <- function(uri, overwrite = FALSE) {
if (grepl("^https?://", uri)) {
out <- stow_url(url = uri, overwrite = overwrite)
Expand Down Expand Up @@ -63,7 +66,6 @@ stow <- function(uri, overwrite = FALSE) {
#' stow_url("https://github.com/geomarker-io/appc/releases/download/v0.1.0/nei_2020.rds")
stow_url <- function(url, overwrite = FALSE) {
if (!grepl("^https?://", url)) rlang::abort("x must start with `http://` or `https://`")
fs::dir_create(stow_path())
dest_path <- stow_path(fs::path_file(url))
httr2::req_perform(httr2::request(url), path = dest_path)
return(dest_path)
Expand Down
17 changes: 17 additions & 0 deletions inst/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# CAGIS Open Data Parcels

## About

A tabular data resource derived from the Hamilton County, OH Auditor data distributed through [CAGIS Open Data](https://cagismaps.hamilton-co.org/cagisportal/mapdata/download) of parcel-level characteristics collected by Hamilton County.

## Sources

- `parcel_example.R`
- <https://cagismaps.hamilton-co.org/cagisportal/mapdata/download>

## Details

Parcels were excluded if they were missing an identifier, missing an address number, missing an address street name, or were not considered to be residential.

Because 'second line' address components (e.g., 'Unit 2B') are not captured, a single address can refer to multiple parcels in the case of condos or otherwise shared building ownership.
Large apartment complexes often use multiple mailing addresses that are not the same as the parcel address(es).
46 changes: 9 additions & 37 deletions inst/parcel_example.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,7 @@
library(lbl)
library(dpkg)
library(dplyr, warn.conflicts = FALSE)
library(sf)

md <-
list(
name = "cagis_parcels",
title = "CAGIS Open Data Parcels",
version = "0.1.0",
description = paste(
"A tabular data resource derived from the Hamilton County, OH Auditor data distributed through [CAGIS Open Data](https://cagismaps.hamilton-co.org/cagisportal/mapdata/download) of parcel-level characteristics collected by Hamilton County.",
"Briefly, parcels were excluded if they were missing an identifier, missing an address number, missing an address street name, or were not considered to be residential.",
"Because 'second line' address components (e.g., 'Unit 2B') are not captured, a single address can refer to multiple parcels in the case of condos or otherwise shared building ownership.",
"Large apartment complexes often use multiple mailing addresses that are not the same as the parcel address(es).",
.sep = "\n"
)
)

# download CAGIS OpenData Auditor geodatabase
the_gdb <- fs::path(tools::R_user_dir("lbl_example", "cache"), "Parcels2024.gdb")
if (!fs::file_exists(the_gdb)) {
Expand Down Expand Up @@ -85,27 +71,13 @@ d <-
sep = " ", na.rm = TRUE, remove = TRUE
)

# add metadata and updated labels
out <-
d |>
as_lbl_tbl(!!!md) |>
update_labels(
parcel_address = "Derived by pasting `parcel_addr_{number, street, suffix}` together",
parcel_id = "Uniquely identifies parcels of land (i.e., auditor parcel number)",
land_use = "Parcel land usage code; distinct from city land use codes"
)

## login using profile sso account
system2("aws", c("sso", "login", "--profile", "geomarker-io"))
Sys.setenv("AWS_PROFILE" = "geomarker-io")

codec_board <-
pins::board_s3(
bucket = "io.geomarker.codec",
versioned = FALSE,
prefix = "data/",
profile = "geomarker-io",
cache = tools::R_user_dir("io.geomarker.codec.data", "cache")
d_dpkg <- d |>
as_dpkg(
name = "cagis_parcels",
title = "CAGIS Open Data Parcels",
version = "0.1.0",
homepage = "https://github.com/geomarker-io/parcel",
description = paste(readLines(fs::path_package("dpkg", "README.md")), collapse = "\n")
)

pins::pin_write(codec_board, out, name = glue::glue("{attr(out, 'name')}_v{attr(out, 'version')}"), type = "rds")
## dpkg_gh_release(d_dpkg, draft = FALSE)
9 changes: 7 additions & 2 deletions man/stow.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0a03afe

Please sign in to comment.