diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index f2d06f3..211dc2e 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ Version: 0.7.15 -Date: 2024-08-24 13:03:06 UTC -SHA: d2ab4788e97bfad0a6e7b7a7c3b70938be954a5e +Date: 2024-08-25 07:16:38 UTC +SHA: 3c96e9f6872735f123da4dea2404c8f8df94810f diff --git a/DESCRIPTION b/DESCRIPTION index 1b42c46..7281c28 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: robotstxt -Date: 2024-08-24 +Date: 2024-08-25 Type: Package Title: A 'robots.txt' Parser and 'Webbot'/'Spider'/'Crawler' Permissions Checker Version: 0.7.15 @@ -24,7 +24,6 @@ Description: Provides functions to download and parse 'robots.txt' files. (spiders, crawler, scrapers, ...) are allowed to access specific resources on a domain. License: MIT + file LICENSE -LazyData: TRUE BugReports: https://github.com/ropensci/robotstxt/issues URL: https://docs.ropensci.org/robotstxt/, https://github.com/ropensci/robotstxt Imports: diff --git a/R/get_robotstxt.R b/R/get_robotstxt.R index 417bb56..42e7947 100644 --- a/R/get_robotstxt.R +++ b/R/get_robotstxt.R @@ -8,9 +8,8 @@ #' @param user_agent HTTP user-agent string to be used to retrieve robots.txt #' file from domain #' -#' @param ssl_verifypeer analog to CURL option -#' \url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} -- and -#' might help with robots.txt file retrieval in some cases +#' @param ssl_verifypeer either 1 (default) or 0, if 0 it disables SSL peer verification, which +#' might help with robots.txt file retrieval #' @param rt_robotstxt_http_getter function that executes HTTP request #' @param rt_request_handler handler function that handles request according to #' the event handlers specified diff --git a/R/get_robotstxt_http_get.R b/R/get_robotstxt_http_get.R index 2500928..369176e 100644 --- a/R/get_robotstxt_http_get.R +++ b/R/get_robotstxt_http_get.R @@ -9,11 +9,8 @@ rt_last_http$request <- list() #' get_robotstxt() worker function to execute HTTP request #' -#' -#' @param ssl_verifypeer analog to CURL option -#' \url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} -#' -- and might help with robots.txt file retrieval in some cases -#' +#' @param ssl_verifypeer either 1 (default) or 0, if 0 it disables SSL peer verification, which +#' might help with robots.txt file retrieval #' @param domain the domain to get tobots.txt. file for #' @param user_agent the user agent to use for HTTP request header #' diff --git a/R/get_robotstxts.R b/R/get_robotstxts.R index 14b3c57..fbf755c 100644 --- a/R/get_robotstxts.R +++ b/R/get_robotstxts.R @@ -7,10 +7,8 @@ #' pages and vignettes of package future on how to set up #' plans for future execution because the robotstxt package #' does not do it on its own. -#' @param ssl_verifypeer analog to CURL option -#' \url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} -#' -- and might help with robots.txt file retrieval in some cases -#' +#' @param ssl_verifypeer either 1 (default) or 0, if 0 it disables SSL peer verification, which +#' might help with robots.txt file retrieval #' @param rt_request_handler handler function that handles request according to #' the event handlers specified #' diff --git a/cran-comments.md b/cran-comments.md index 1941ccb..fe3caa1 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,5 +1,6 @@ ## R CMD check results -0 errors | 0 warnings | 0 note +0 errors | 0 warnings | 1 note -* fixing all checks problems +* fixing "incoming feasibility" URL checks problems +* changing maintainer to Pedro Baltazar ’ diff --git a/man/get_robotstxt.Rd b/man/get_robotstxt.Rd index 14a1bf5..d4384bf 100644 --- a/man/get_robotstxt.Rd +++ b/man/get_robotstxt.Rd @@ -35,9 +35,8 @@ happens,} \item{user_agent}{HTTP user-agent string to be used to retrieve robots.txt file from domain} -\item{ssl_verifypeer}{analog to CURL option -\url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} -- and -might help with robots.txt file retrieval in some cases} +\item{ssl_verifypeer}{either 1 (default) or 0, if 0 it disables SSL peer verification, which +might help with robots.txt file retrieval} \item{encoding}{Encoding of the robots.txt file.} diff --git a/man/get_robotstxt_http_get.Rd b/man/get_robotstxt_http_get.Rd index 6fbab38..7394273 100644 --- a/man/get_robotstxt_http_get.Rd +++ b/man/get_robotstxt_http_get.Rd @@ -22,9 +22,8 @@ get_robotstxt_http_get( \item{user_agent}{the user agent to use for HTTP request header} -\item{ssl_verifypeer}{analog to CURL option -\url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} --- and might help with robots.txt file retrieval in some cases} +\item{ssl_verifypeer}{either 1 (default) or 0, if 0 it disables SSL peer verification, which +might help with robots.txt file retrieval} } \description{ storage for http request response objects diff --git a/man/get_robotstxts.Rd b/man/get_robotstxts.Rd index bc4f769..2df0a1b 100644 --- a/man/get_robotstxts.Rd +++ b/man/get_robotstxts.Rd @@ -35,9 +35,8 @@ happens,} \item{user_agent}{HTTP user-agent string to be used to retrieve robots.txt file from domain} -\item{ssl_verifypeer}{analog to CURL option -\url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} --- and might help with robots.txt file retrieval in some cases} +\item{ssl_verifypeer}{either 1 (default) or 0, if 0 it disables SSL peer verification, which +might help with robots.txt file retrieval} \item{use_futures}{Should future::future_lapply be used for possible parallel/async retrieval or not. Note: check out help diff --git a/man/paths_allowed.Rd b/man/paths_allowed.Rd index 428656d..1cd178f 100644 --- a/man/paths_allowed.Rd +++ b/man/paths_allowed.Rd @@ -48,9 +48,8 @@ file from domain} will re-download the robotstxt file HTTP response status 404. If this happens,} -\item{ssl_verifypeer}{analog to CURL option -\url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} -- and -might help with robots.txt file retrieval in some cases} +\item{ssl_verifypeer}{either 1 (default) or 0, if 0 it disables SSL peer verification, which +might help with robots.txt file retrieval} \item{use_futures}{Should future::future_lapply be used for possible parallel/async retrieval or not. Note: check out help diff --git a/man/robotstxt.Rd b/man/robotstxt.Rd index 8a6b304..e6af5ab 100644 --- a/man/robotstxt.Rd +++ b/man/robotstxt.Rd @@ -38,9 +38,8 @@ file from domain} will re-download the robotstxt file HTTP response status 404. If this happens,} -\item{ssl_verifypeer}{analog to CURL option -\url{https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html} -- and -might help with robots.txt file retrieval in some cases} +\item{ssl_verifypeer}{either 1 (default) or 0, if 0 it disables SSL peer verification, which +might help with robots.txt file retrieval} \item{encoding}{Encoding of the robots.txt file.}