diff --git a/DESCRIPTION b/DESCRIPTION index 0faf4c66..d2b5740d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,7 +43,7 @@ Imports: methods, R.utils, stats -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Suggests: testthat, covr, diff --git a/NAMESPACE b/NAMESPACE index 09754811..d81a3a5d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -116,6 +116,7 @@ importFrom(httr,modify_url) importFrom(httr,timeout) importFrom(janitor,clean_names) importFrom(janitor,make_clean_names) +importFrom(jsonlite,fromJSON) importFrom(magrittr,"%>%") importFrom(methods,is) importFrom(plotly,ggplotly) @@ -134,6 +135,7 @@ importFrom(purrr,pluck) importFrom(purrr,pmap) importFrom(purrr,reduce) importFrom(purrr,set_names) +importFrom(readr,read_csv) importFrom(readr,read_tsv) importFrom(readr,write_csv) importFrom(readr,write_tsv) @@ -191,3 +193,5 @@ importFrom(utils,data) importFrom(utils,download.file) importFrom(utils,head) importFrom(utils,untar) +importFrom(xml2,read_html) +importFrom(xml2,read_xml) diff --git a/R/try_query.R b/R/try_query.R index 8016a00b..15825a75 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -13,7 +13,6 @@ #' @param type a character value that specifies the type of data at the target URL. Options are #' all options that can be supplied to httr::content, these include e.g. #' "text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values". -#' Default is "tab-separated-values". #' @param timeout a numeric value that specifies the maximum request time. Default is 60 seconds. #' @param accept a character value that specifies the type of data that should be sent by the API if #' it uses content negotiation. The default is NULL and it should only be set for APIs that use @@ -22,6 +21,9 @@ #' #' @importFrom curl has_internet #' @importFrom httr GET timeout http_error message_for_status http_status content accept +#' @importFrom readr read_tsv read_csv +#' @importFrom jsonlite fromJSON +#' @importFrom xml2 read_html read_xml #' #' @return A data frame that contains the table from the url. try_query <- @@ -88,7 +90,36 @@ try_query <- # Change variable to not show progress if readr is used options(readr.show_progress = FALSE) - result <- suppressMessages(httr::content(query_result, type = type, encoding = "UTF-8", ...)) + # Check if the content is gzip compressed + if (query_result$headers[["content-encoding"]] == "gzip") { + # Retrieve the content as raw bytes using httr::content + raw_content <- httr::content(query_result, type = "raw") + + # Decompress the raw content using base R's `memDecompress` + decompressed_content <- memDecompress(raw_content, type = "gzip") + + # Convert the raw bytes to a character string + text_content <- rawToChar(decompressed_content) + + # Read the decompressed content based on the specified type + if (type == "text/tab-separated-values") { + result <- readr::read_tsv(text_content, ...) + } else if (type == "text/html") { + result <- xml2::read_html(text_content, ...) + } else if (type == "text/xml") { + result <- xml2::read_xml(text_content, ...) + } else if (type == "text/csv" || type == "txt/csv") { + result <- readr::read_csv(text_content, ...) + } else if (type == "application/json") { + result <- jsonlite::fromJSON(text_content, ...) # Using jsonlite for JSON parsing + } else if (type == "text") { + result <- text_content # Return raw text as-is + } else { + stop("Unsupported content type: ", type) + } + } else { + result <- suppressMessages(httr::content(query_result, type = type, encoding = "UTF-8", ...)) + } return(result) } diff --git a/man/try_query.Rd b/man/try_query.Rd index 90d61467..cb9a64aa 100644 --- a/man/try_query.Rd +++ b/man/try_query.Rd @@ -26,8 +26,7 @@ that failed.} \item{type}{a character value that specifies the type of data at the target URL. Options are all options that can be supplied to httr::content, these include e.g. -"text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values". -Default is "tab-separated-values".} +"text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values".} \item{timeout}{a numeric value that specifies the maximum request time. Default is 60 seconds.}