Skip to content

Commit

Permalink
ctan_mirrors() returns more detailed information about CTAN mirrors; …
Browse files Browse the repository at this point in the history
…the link https://ctan.org/mirrors/mirmon appears to be broken at the moment, so scrape from https://ctan.org/mirrors/ instead

this was written for #450
  • Loading branch information
yihui committed Oct 9, 2024
1 parent 239eb91 commit 6743ffc
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 4 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: tinytex
Type: Package
Title: Helper Functions to Install and Maintain TeX Live, and Compile LaTeX Documents
Version: 0.53.2
Version: 0.53.3
Authors@R: c(
person("Yihui", "Xie", role = c("aut", "cre", "cph"), email = "xie@yihui.name", comment = c(ORCID = "0000-0003-0645-5666")),
person(given = "Posit Software, PBC", role = c("cph", "fnd")),
Expand Down
36 changes: 33 additions & 3 deletions R/install.R
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,39 @@ auto_repo = function() {

# retrieve all CTAN (https) mirrors
ctan_mirrors = function() {
x = readLines('https://ctan.org/mirrors/mirmon')
u = xfun::grep_sub('.*<TD ALIGN=RIGHT><A HREF="(https://[^"]+)".*', '\\1', x)
xfun::raw_string(u)
html = xfun::file_string('https://ctan.org/mirrors/')
r = function(i) sprintf('^(.*>)?\\s*([^<]+)</h%d>\\s*(.*)$', i)
res = unlist(lapply(unlist(strsplit(html, '<h2[^>]*>')), function(x) {
x = unlist(strsplit(x, '<h3[^>]*>'))
if (length(x) < 2 || !grepl('</h2>', x[1])) return()
r2 = r(2)
continent = gsub(r2, '\\2', x[1])
x[1] = gsub(r2, '\\3', x[1])
x = x[!grepl('^\\s*$', x)]
r3 = r(3)
if (!grepl(r3, x[1])) return()
country = gsub(r3, '\\2', x)
x = gsub(r3, '\\3', x)
r4 = r(4)
x = lapply(x, function(z) {
z = unlist(strsplit(z, '<h4[^>]*>'))
m = regexec('<a href="(https://[^"]+)"[^>]*>https</a>', z)
link = unlist(lapply(regmatches(z, m), `[`, 2))
names(link) = gsub(r4, '\\2', z)
link[!is.na(link)]
})
structure(list(structure(x, names = country)), names = continent)
}))
nm = lapply(strsplit(names(res), '.', fixed = TRUE), function(x) {
x3 = paste(x[-(1:2)], collapse = '.')
r5 = '.*\\(|\\).*'
x3 = if (grepl(r5, x3)) gsub(r5, '', x3) else ''
c(x[1], x[2], x3)
})
nm = do.call(rbind, nm)
res = cbind(nm, unname(res))
colnames(res) = c('Continent', 'Country/Region', 'City', 'URL')
as.data.frame(res)
}

# use %APPDATA%/TinyTeX if it exists or doesn't contain spaces or non-ASCII
Expand Down

0 comments on commit 6743ffc

Please sign in to comment.