Skip to content

Commit

Permalink
Adjust query of uniprot info
Browse files Browse the repository at this point in the history
  • Loading branch information
jpquast committed May 22, 2024
1 parent 01bca28 commit e539bfc
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 7 deletions.
16 changes: 14 additions & 2 deletions R/fetch_uniprot.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ fetch_uniprot <-
"xref_pdb"
),
batchsize = 200,
max_tries = 10,
timeout = 10,
show_progress = TRUE) {
if (!curl::has_internet()) {
message("No internet connection.")
Expand Down Expand Up @@ -125,7 +127,12 @@ They were fetched and the original input ID can be found in the "input_id" colum
collapsed_columns
))

query <- try_query(query_url, progress = FALSE, show_col_types = FALSE)
query <- try_query(query_url,
max_tries = max_tries,
try_if_timeout = TRUE,
timeout = timeout,
progress = FALSE,
show_col_types = FALSE)

if (show_progress == TRUE) {
pb$tick()
Expand Down Expand Up @@ -200,7 +207,12 @@ They were fetched and the original input ID can be found in the "input_id" colum
collapsed_columns
))

new_result <- try_query(new_query_url, progress = FALSE, show_col_types = FALSE)
new_result <- try_query(new_query_url,
max_tries = max_tries,
try_if_timeout = TRUE,
timeout = timeout,
progress = FALSE,
show_col_types = FALSE)
# If a problem occurs at this step NULL is returned.
if (!methods::is(new_result, "data.frame")) {
message(new_result)
Expand Down
18 changes: 14 additions & 4 deletions R/try_query.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
#' @param url a character value of an URL to the website that contains the table that should be
#' downloaded.
#' @param max_tries a numeric value that specifies the number of times the function tries to download
#' the data in case an error occurs.
#' the data in case an error occurs. By default does not try again if database times out see `try_if_timout`.
#' @param try_if_timeout a logical value specifying if the function should try again in case of a timeout.
#' The default is `FALSE`, which prevents queries from taking very long even though the database is not
#' responding at the time. In some cases it makes sense to set this to `TRUE`, increase the number
#' of tries and reduce the timeout time.
#' @param silent a logical value that specifies if individual messages are printed after each try
#' that failed.
#' @param type a character value that specifies the type of data at the target URL. Options are
Expand All @@ -25,7 +29,7 @@
#'
#' @return A data frame that contains the table from the url.
try_query <-
function(url, max_tries = 5, silent = TRUE, type = "text/tab-separated-values", timeout = 60, accept = NULL, ...) {
function(url, max_tries = 5, try_if_timeout = FALSE, silent = TRUE, type = "text/tab-separated-values", timeout = 60, accept = NULL, ...) {
# Check if there is an internet connection first
if (!curl::has_internet()) {
if (!silent) message("No internet connection.")
Expand All @@ -34,12 +38,18 @@ try_query <-

query_result <- "empty"
try_n <- 0
# Note: The handling of retries in case of a timeout could be adjusted in the future.
# For now I introduced try_if_timeout, but potentially it makes sense to always retry even
# in case of a timeout. Then however, the number of retries needs to be adjusted for some
# functions that retrieve a lot of data.
while (!is(query_result, "response") &
try_n < max_tries &
# this ifelse stops requery if they timeout except for if try_if_timeout is TRUE
!ifelse(is(query_result, "character"),
stringr::str_detect(query_result, pattern = "Timeout was reached"),
stringr::str_detect(query_result, pattern = "Timeout was reached") & !try_if_timeout,
FALSE
)) { # this ifelse stops requery if the timeout is too low.
)
) {
if (!missing(accept)) {
# with accept set
query_result <- tryCatch(httr::GET(url, httr::accept(accept), httr::timeout(timeout)),
Expand Down
2 changes: 2 additions & 0 deletions man/fetch_uniprot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/try_query.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e539bfc

Please sign in to comment.