fix some check errors

JBGruber · Oct 5, 2023 · 01bacd8 · 01bacd8
1 parent 92c8ce8
commit 01bacd8
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 21 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,10 +12,13 @@ Imports:
     adaR,
     callr,
     cli,
+    cookiemonster,
     curl,
     dplyr,
+    jsonlite,
     lubridate,
     magrittr,
+    methods,
     praise,
     purrr,
     rlang,
@@ -32,7 +35,8 @@ Suggests:
     rmarkdown,
     rstudioapi,
     spelling,
-    testthat
+    testthat,
+    withr
 URL: https://github.com/JBGruber/paperboy
 Encoding: UTF-8
 BugReports: https://github.com/JBGruber/paperboy/issues

diff --git a/R/collect.R b/R/collect.R
@@ -61,7 +61,7 @@ pb_collect <- function(urls,
 
   res <- purrr::map(url_batches, function(b) {
     domain <- adaR::ada_get_domain(b[1])
-    cookies_str <- cookiemonster::get_cookies(paste0("\\b", domain, "\\b"), as = "string")
+    cookies_str <- cookiemonster::get_cookies(paste0(domain, "\\b"), as = "string")
     rp <- callr::r_bg(async_requests,
                       args = list(
                         urls = b,

diff --git a/R/utils_dev.R b/R/utils_dev.R
@@ -42,7 +42,7 @@ use_new_parser <- function(x,
                            rss = NULL,
                            test_data = NULL) {
 
-  x <- head(adaR::ada_get_domain(x), 1)
+  x <- utils::head(adaR::ada_get_domain(x), 1)
 
   cli::cli_progress_step(
     "Creating R file",
@@ -73,7 +73,7 @@ use_new_parser <- function(x,
     )
 
     if (file.exists("inst/status.csv")) {
-      status <- read.csv("inst/status.csv")
+      status <- utils::read.csv("inst/status.csv")
       if (!gsub("^www.", "", x) %in% status$domain) {
         status <- status %>%
           rbind(list(domain = sub("^www.", "", x),
@@ -82,7 +82,7 @@ use_new_parser <- function(x,
                      issues = issue,
                      rss    = rss)) %>%
           dplyr::arrange(domain)
-        write.csv(status, "inst/status.csv", row.names = FALSE)
+        utils::write.csv(status, "inst/status.csv", row.names = FALSE)
       } else if (rss == "") {
         # if entry already present, get rss value
         rss <- status[grepl(gsub("^www.", "", x), status$domain), "rss"]
@@ -154,13 +154,13 @@ use_new_parser <- function(x,
       msg_done = "status.csv updated."
     )
     x <- utils::head(adaR::ada_get_domain(x), 1)
-    status <- read.csv("inst/status.csv")
+    status <- utils::read.csv("inst/status.csv")
     status[status$domain == gsub("^www.", "", x), "status"] <-
       "![](https://img.shields.io/badge/status-gold-%23ffd700.svg)"
     cli::cli_alert_info("Check the entry manually. Press quit when you're happy.")
     status[status$domain == gsub("^www.", "", x), ] <-
-      edit(status[status$domain == gsub("^www.", "", x), ])
-    write.csv(status, "inst/status.csv", row.names = FALSE)
+      utils::edit(status[status$domain == gsub("^www.", "", x), ])
+    utils::write.csv(status, "inst/status.csv", row.names = FALSE)
 
   }
 

diff --git a/README.md b/README.md
@@ -44,9 +44,9 @@ df <- pb_deliver("https://tinyurl.com/386e98k5")
 df
 ```
 
-| url                            | expanded_url                                                                      | domain              | status | datetime            | author                                                | headline                | text                           | misc |
-|:-------------------------------|:----------------------------------------------------------------------------------|:--------------------|-------:|:--------------------|:------------------------------------------------------|:------------------------|:-------------------------------|:-----|
-| <https://tinyurl.com/386e98k5> | <https://www.theguardian.com/tv-and-radio/2021/jul/12/should-marge-divorce-homer> | www.theguardian.com |    200 | 2021-07-12 12:00:13 | <https://www.theguardian.com/profile/stuart-heritage> | ’A woman trapped in an… | In the Guide’s weekly Solved!… | NULL |
+| url                            | expanded_url                                                                      | domain          | status | datetime            | author                                                | headline                | text                           | misc |
+|:-------------------------------|:----------------------------------------------------------------------------------|:----------------|-------:|:--------------------|:------------------------------------------------------|:------------------------|:-------------------------------|:-----|
+| <https://tinyurl.com/386e98k5> | <https://www.theguardian.com/tv-and-radio/2021/jul/12/should-marge-divorce-homer> | theguardian.com |    200 | 2021-07-12 12:00:13 | <https://www.theguardian.com/profile/stuart-heritage> | ’A woman trapped in an… | In the Guide’s weekly Solved!… | NULL |
 
 The returned `data.frame` contains important meta information about the
 news items and their full text. Notice, that the function had no problem
@@ -56,13 +56,12 @@ therefore often encounter this warning:
 
 ``` r
 pb_deliver("google.com")
-#> Warning: ℹ No parser for domain www.google.com yet, attempting generic
-#> approach.
+#> Warning: ℹ No parser for domain google.com yet, attempting generic approach.
 ```
 
-| url        | expanded_url             | domain         | status | datetime | author | headline | text                                         | misc |
-|:-----------|:-------------------------|:---------------|-------:|:---------|:-------|:---------|:---------------------------------------------|:-----|
-| google.com | <http://www.google.com/> | www.google.com |    200 | NA       | NA     | Google   | © 2023 - Ochrana soukromí - Smluvní podmínky | NULL |
+| url        | expanded_url             | domain     | status | datetime | author | headline | text                                                | misc |
+|:-----------|:-------------------------|:-----------|-------:|:---------|:-------|:---------|:----------------------------------------------------|:-----|
+| google.com | <http://www.google.com/> | google.com |    200 | NA       | NA     | Google   | © 2023 - Datenschutzerklärung - Nutzungsbedingungen | NULL |
 
 The function still returns a data.frame, but important information is
 missing — in this case because it isn’t there. The other URLs will be
@@ -77,9 +76,9 @@ later parse it yourself:
 pb_collect("google.com")
 ```
 
-| url        | expanded_url             | domain         | status | content_raw                        |
-|:-----------|:-------------------------|:---------------|-------:|:-----------------------------------|
-| google.com | <http://www.google.com/> | www.google.com |    200 | \<!doctype html\>\<html itemscope… |
+| url        | expanded_url             | domain     | status | content_raw                        |
+|:-----------|:-------------------------|:-----------|-------:|:-----------------------------------|
+| google.com | <http://www.google.com/> | google.com |    200 | \<!doctype html\>\<html itemscope… |
 
 `pb_collect` uses concurrent requests to download many pages at the same
 time, making the function very quick to collect large amounts of data.
@@ -113,6 +112,7 @@ column was included so these can be retained.
 
 | domain                         | status                                                        | author                                    | issues                                               |
 |:-------------------------------|:--------------------------------------------------------------|:------------------------------------------|:-----------------------------------------------------|
+| ad.nl                          | ad.nl                                                         | [@JBGruber](https://github.com/JBGruber/) |                                                      |
 | anotherangryvoice.blogspot.com | ![](https://img.shields.io/badge/status-gold-%23ffd700.svg)   | [@JBGruber](https://github.com/JBGruber/) |                                                      |
 | boston.com                     | ![](https://img.shields.io/badge/status-requested-lightgrey)  |                                           | [\#1](https://github.com/JBGruber/paperboy/issues/1) |
 | bostonglobe.com                | ![](https://img.shields.io/badge/status-requested-lightgrey)  |                                           | [\#1](https://github.com/JBGruber/paperboy/issues/1) |
@@ -157,6 +157,7 @@ column was included so these can be retained.
 | tribpub.com                    | ![](https://img.shields.io/badge/status-requested-lightgrey)  |                                           | [\#1](https://github.com/JBGruber/paperboy/issues/1) |
 | us.cnn.com                     | ![](https://img.shields.io/badge/status-gold-%23ffd700.svg)   | [@JBGruber](https://github.com/JBGruber/) |                                                      |
 | usatoday.com                   | ![](https://img.shields.io/badge/status-gold-%23ffd700.svg)   | [@JBGruber](https://github.com/JBGruber/) |                                                      |
+| volkskrant.nl                  | ![](https://img.shields.io/badge/status-gold-%23ffd700.svg)   | [@JBGruber](https://github.com/JBGruber/) |                                                      |
 | washingtonpost.com             | ![](https://img.shields.io/badge/status-gold-%23ffd700.svg)   | [@JBGruber](https://github.com/JBGruber/) |                                                      |
 | wsj.com                        | ![](https://img.shields.io/badge/status-gold-%23ffd700.svg)   | [@JBGruber](https://github.com/JBGruber/) |                                                      |
 

diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -1,6 +1,7 @@
 CMD
 Codecov
-Datenschutzerklrung
+Datenschutzerklärung
+Guide’s
 Lifecycle
 Nutzungsbedingungen
 POSIXct
@@ -16,6 +17,8 @@ cbsnews
 cnet
 cnn
 com
+csv
+cz
 dailymail
 datetime
 doctype
@@ -28,13 +31,17 @@ foxnews
 ftw
 huffingtonpost
 huffpost
+idnes
 itemscope
 latimes
 lnk
 marketwatch
 mediacloud
 msnbc
 newsweek
+nl
+nos
+nrc
 nypost
 nytimes
 org
@@ -55,9 +62,9 @@ uk
 un
 urls
 usatoday
+volkskrant
 washingtonpost
 webscraper
 webscraping
 wsj
-www
 ’A