046 cran fix

michalovadek · Sep 6, 2023 · 225684d · 225684d
1 parent 3a23f7f
commit 225684d
Show file tree

Hide file tree

Showing 33 changed files with 983 additions and 419 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^Meta$
 ^CRAN-SUBMISSION$
 ^cran-comments\.md$
+^vignettes/articles$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: eurlex
 Type: Package
 Title: Retrieve Data on European Union Law
-Version: 0.4.5
+Version: 0.4.6
 Authors@R: c(person(given = "Michal",
              family = "Ovadek",
              role = c("aut", "cre", "cph"),

diff --git a/NEWS.md b/NEWS.md
@@ -3,6 +3,10 @@
 ## Minor changes
 
 - minor changes to documentation
+- cleaned up http calls code
+- calls to `elx_council_votes()` and `elx_curia_list()` now fail gracefully
+- .data replaced by quoted variables for tidyselect functions
+- Internet-using vignettes moved to site-only articles
 
 # eurlex 0.4.5
 

diff --git a/R/elx_council_votes.R b/R/elx_council_votes.R
@@ -89,13 +89,30 @@ elx_council_votes <- function(){
           }
               ORDER BY DESC(?decisionDate), ?votingInstCode
 "
-
-    votes <- httr::POST(url = "https://data.consilium.europa.eu/sparql",
-                        body = list(query = query),
-                        httr::add_headers('Accept' = 'text/csv')) %>%
+
+    # run query
+    votes_resp <- graceful_http(
+      remote_file = "https://data.consilium.europa.eu/sparql",
+      body = list(query = query),
+      httr::content_type("multipart"),
+      headers = httr::add_headers('Accept' = 'text/csv'),
+      encode = "multipart",
+      verb = "POST"
+    )
+
+    # if var not created, break
+    if (is.null(votes_resp)){
+
+      return(invisible(NULL))
+
+    } 
+
+    # process response
+    votes <- votes_resp %>%
       httr::content("text") %>%
       readr::read_csv(col_types = readr::cols(.default = "c"))
 
+    # return
     return(votes)
 
 }
diff --git a/R/elx_curia_list.R b/R/elx_curia_list.R
@@ -101,7 +101,16 @@ elx_curia_list <- function(data = c("all","ecj_old","ecj_new","gc_all","cst_all"
 
 elx_curia_scraper <- function(url, ...){
 
-  page <- xml2::read_html(url(url, open = "rb"))
+  response <- graceful_http(url, verb = "GET")
+
+  # if var not created, break
+  if (is.null(response)){
+
+    return(invisible(NULL))
+
+  }
+
+  page <- xml2::read_html(response)
 
   tab <- page %>%
     rvest::html_node("table") %>%
@@ -131,7 +140,7 @@ elx_curia_scraper <- function(url, ...){
     dplyr::ungroup()
 
   out <- dplyr::left_join(tab, linked, by = c("case_id"="linked_id","n_id"="n_id")) %>%
-    dplyr::select(.data$case_id, .data$linked_celex, .data$case_info) %>%
+    dplyr::select("case_id", "linked_celex", "case_info") %>%
     dplyr::rename(case_id_celex = linked_celex)
 
   return(out)
@@ -152,16 +161,13 @@ elx_curia_parse <- function(x, ...){
                   see_case = stringr::str_extract(.data$case_info, "see Case .+") %>%
                     stringr::str_remove("see Case ") %>%
                     stringr::str_remove("APPEAL.*") %>%
-                    stringr::str_squish() %>%
-                    stringr::str_trim(),
+                    stringr::str_squish(),
                   appeal = stringr::str_extract(.data$case_info, "APPEAL.*") %>%
                     stringr::str_remove("APPEAL.? :") %>%
                     stringr::str_remove_all("\\;|\\,|\\.") %>%
-                    stringr::str_squish() %>%
-                    stringr::str_trim()
+                    stringr::str_squish()
     )
 
   return(out)
 
 }
-
diff --git a/R/elx_fetch_data.R b/R/elx_fetch_data.R
@@ -1,6 +1,6 @@
 #' Retrieve additional data on EU documents
 #'
-#' Wraps httr::GET with pre-specified headers and parses retrieved data.
+#' Get titles, texts, identifiers and XML notices for EU resources.
 #'
 #' @param url A valid url as character vector of length one based on a resource identifier such as CELEX or Cellar URI.
 #' @param type The type of data to be retrieved. When type = "text", the returned list contains named elements reflecting the source of each text. When type = "notice", the results return an XML notice associated with the url.
@@ -38,8 +38,10 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
 
   if (type == "notice" & missing(notice)){stop("notice type must be given")}
 
+  # format language query
   language <- paste(language_1,", ",language_2,";q=0.8, ",language_3,";q=0.7", sep = "")
 
+  # process URL
   if (stringr::str_detect(url,"celex.*[\\(|\\)|\\/]")){
 
     clx <- stringr::str_extract(url, "(?<=celex\\/).*") %>% 
@@ -53,12 +55,20 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
 
   }
 
+  # titles
   if (type == "title"){
 
     response <- graceful_http(url,
                               headers = httr::add_headers('Accept-Language' = language,
                                                           'Accept' = 'application/xml; notice=object'),
                               verb = "GET")
+
+    # if var not created, break
+    if (is.null(response)){
+
+      return(invisible(NULL))
+
+    }
 
     if (httr::status_code(response)==200){
 
@@ -71,6 +81,7 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
 
   }
 
+  # full text
   if (type == "text"){
 
     response <- graceful_http(url,
@@ -79,6 +90,13 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
                                                           'Accept' = 'text/html, text/html;type=simplified, text/plain, application/xhtml+xml, application/xhtml+xml;type=simplified, application/pdf, application/pdf;type=pdf1x, application/pdf;type=pdfa1a, application/pdf;type=pdfx, application/pdf;type=pdfa1b, application/msword'),
                               verb = "GET")
 
+    # if var not created, break
+    if (is.null(response)){
+
+      return(invisible(NULL))
+
+    }
+
     if (httr::status_code(response)==200){
 
       out <- elx_read_text(response, html_text = html_text)
@@ -142,12 +160,20 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
 
   }
 
+  # identifiers
   if (type == "ids"){
 
     response <- graceful_http(url,
                               headers = httr::add_headers('Accept-Language' = language,
                                                           'Accept' = 'application/xml; notice=identifiers'),
                               verb = "GET")
+
+    # if var not created, break
+    if (is.null(response)){
+
+      return(invisible(NULL))
+
+    }
 
     if (httr::status_code(response)==200){
 
@@ -160,6 +186,7 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
 
   }
 
+  # notices
   if (type == "notice"){
 
     accept_header <- paste('application/xml; notice=',
@@ -184,6 +211,13 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
 
     }
 
+    # if var not created, break
+    if (is.null(response)){
+
+      return(invisible(NULL))
+
+    }
+
     if (httr::status_code(response)==200){
 
       out <- httr::content(response)
@@ -192,6 +226,7 @@ elx_fetch_data <- function(url, type = c("title","text","ids","notice"),
 
   }
 
+  # end
   return(out)
 
 }
@@ -225,40 +260,40 @@ elx_read_text <- function(http_response, html_text = "text2"){
 
   }
 
-    if (stringr::str_detect(http_response$headers$`content-type`,"html")){
-
-      out <- http_response %>%
-        xml2::read_html() %>%
-        rvest::html_node("body") %>%
-        html_text_engine() %>%
-        paste0(collapse = " ---pagebreak--- ")
-
-      names(out) <- "html"
-
-    }
-
-    else if (stringr::str_detect(http_response$headers$`content-type`,"pdf")){
-
-      out <- http_response$url %>%
-        pdftools::pdf_text() %>%
-        paste0(collapse = " ---pagebreak--- ")
-
-      names(out) <- "pdf"
-
-    }
-
-    else if (stringr::str_detect(http_response$headers$`content-type`,"msword")){
-
-      out <- http_response$url %>%
-        antiword::antiword() %>%
-        paste0(collapse = " ---pagebreak--- ")
+  if (stringr::str_detect(http_response$headers$`content-type`,"html")){
+
+    out <- http_response %>%
+      xml2::read_html() %>%
+      rvest::html_node("body") %>%
+      html_text_engine() %>%
+      paste0(collapse = " ---pagebreak--- ")
+
+    names(out) <- "html"
+
+  }
 
-      names(out) <- "word"
+  else if (stringr::str_detect(http_response$headers$`content-type`,"pdf")){
+
+    out <- http_response$url %>%
+      pdftools::pdf_text() %>%
+      paste0(collapse = " ---pagebreak--- ")
+
+    names(out) <- "pdf"
+
+  }
 
-    } else {
-      out <- "unsupported format"
-      names(out) <- "unsupported"
-    }
+  else if (stringr::str_detect(http_response$headers$`content-type`,"msword")){
+
+    out <- http_response$url %>%
+      antiword::antiword() %>%
+      paste0(collapse = " ---pagebreak--- ")
+
+    names(out) <- "word"
+
+  } else {
+    out <- "unsupported format"
+    names(out) <- "unsupported"
+  }
 
   return(out)