Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate functions to ari #7

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@ Type: Package
VignetteBuilder: knitr
URL: https://github.com/jhudsl/ariExtra
BugReports: https://github.com/jhudsl/ariExtra/issues
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Roxygen: list(markdown = TRUE)
16 changes: 1 addition & 15 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,23 @@

export("%>%")
export(ari_document)
export(download_gs_file)
export(get_folder_id)
export(get_slide_id)
export(gs_pptx_notes)
export(gs_to_ari)
export(html_to_ari)
export(images_to_ari)
export(make_ari_document)
export(make_slide_url)
export(pdf_to_ari)
export(pdf_to_pngs)
export(pngs_to_ari)
export(pptx_notes)
export(pptx_slide_note_df)
export(pptx_slide_text_df)
export(pptx_to_ari)
export(pptx_to_pdf)
export(pptx_to_pngs)
export(rmd_to_ari)
export(to_ari)
export(unzip_pptx)
export(xml_notes)
importFrom(ari,ari_spin)
importFrom(ari,download_gs_file)
importFrom(docxtractr,convert_to_pdf)
importFrom(httr,GET)
importFrom(httr,write_disk)
importFrom(jsonlite,fromJSON)
importFrom(magrittr,"%>%")
importFrom(pdftools,pdf_convert)
importFrom(pdftools,pdf_info)
Expand All @@ -41,9 +31,5 @@ importFrom(stats,na.omit)
importFrom(tools,file_ext)
importFrom(tools,file_path_sans_ext)
importFrom(utils,file.edit)
importFrom(utils,unzip)
importFrom(xml2,read_html)
importFrom(xml2,read_xml)
importFrom(xml2,xml_find_all)
importFrom(xml2,xml_text)
importFrom(yaml,as.yaml)
140 changes: 0 additions & 140 deletions R/aaa_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -218,101 +218,6 @@ sys_type <- function() {
}
}

# Constructs an URL to export an image file from a Google Slides
type_url <- function(id, page_id = NULL, type = "png") {
url <- paste0("https://docs.google.com/presentation/d/",
id, "/export/", type, "?id=", id)
if (!is.null(page_id)) {
url = paste0(url, "&pageid=", page_id)
}
url
}

png_url = type_url

# Constructs an URL to export to pptx
pptx_url = function(id) {
type_url(id, page_id = NULL, type = "pptx")
}

# Constructs an URL to export to pdf
pdf_url = function(id) {
type_url(id, page_id = NULL, type = "pdf")
}

# Extract page IDs of slides in a Google Slides presentation
#' @importFrom jsonlite fromJSON
get_page_ids = function(id) {
id = get_slide_id(id)
url = paste0("https://docs.google.com/presentation/d/", id)
tfile = tempfile(fileext = ".html")
res = httr::GET(url, httr::write_disk(tfile))
httr::stop_for_status(res)
cr = httr::content(res)
script = rvest::html_nodes(cr, xpath ="//script")
script = rvest::html_text(script)
script = unique(script)
script = gsub("DOCS_modelChunk = undefined;", "", script)
script = script[ grepl("DOCS_modelChunk\\s=\\s\\[", x = script)]

all_types = c("PREDEFINED_LAYOUT_UNSPECIFIED",
"BLANK",
"CAPTION_ONLY",
"TITLE",
"TITLE_AND_BODY",
"TITLE_AND_TWO_COLUMNS",
"TITLE_ONLY",
"SECTION_HEADER",
"SECTION_TITLE_AND_DESCRIPTION",
"ONE_COLUMN_TEXT",
"MAIN_POINT",
"BIG_NUMBER",
paste0("CUSTOM_", 1:100))
types = paste0(all_types, collapse = "|")
# script = script[grepl(types, script)]
ss = strsplit(script, "; DOC")
ss = lapply(ss, trimws)
ss = lapply(ss, function(x) {
x[!grepl("^DOC", x)] = paste0(" DOC", x[!grepl("^DOC", x)])
x
})
ss = lapply(ss, function(x) {
x = x[grepl("^DOCS_modelChunk\\s=\\s\\[", x)]
x = x[ !x %in% "DOCS_modelChunk = undefined"]
x = sub("^DOCS_modelChunk\\s=\\s\\[", "[", x)
x
})
ss = unlist(ss)
pages = lapply(ss, jsonlite::fromJSON)
pages = sapply(pages, function(x) {
x = x[sapply(x, function(r) any(unlist(r) %in% all_types))]
x = x[length(x)]
x
})
pages = sapply(pages, function(x) {
if (length(x) < 2) {
if (length(x) == 0) {
return(NA)
}
x = x[[1]]
if (length(x) < 2) {
return(NA)
}
}
x[[2]]
})
pages = pages[ !is.na(pages) ]
if (length(pages) >= 2) {
pages = c(pages[1], grep("^g", pages[2:length(pages)], value = TRUE))
}
if (pages[1] != "p") {
pages = unique(c("p", pages))
}
urls = type_url(id = id, page_id = pages)
pages = pages[check_png_urls(urls)]
pages
}

# Check if vector of URLs is valid (Status Code = 200)
check_png_urls <- function(urls) {
res = vapply(urls, function(url) {
Expand All @@ -335,49 +240,4 @@ download_png_urls = function(urls) {



#' Get Slide ID from URL
#'
#' @param x URL of slide
#'
#' @return A character vector
#' @export
#'
#' @examples
#' x = paste0("https://docs.google.com/presentation/d/",
#' "1Tg-GTGnUPduOtZKYuMoelqUNZnUp3vvg_7TtpUPL7e8",
#' "/edit#slide=id.g154aa4fae2_0_58")
#' get_slide_id(x)
get_slide_id <- function(x) {
x = sub(".*presentation/", "", x)
x = sub("/d/e", "/d", x) # if you publish by accident
x = sub("^(d|e)/", "", x)
x = strsplit(x, "/")[[1]]
x = x[ !grepl("^(edit|pub|export|png)", x)]
x = x[ nchar(x) > 5]
x
}

#' @export
#' @rdname get_slide_id
make_slide_url <- function(x) {
x = get_slide_id(x)
x = paste0("https://docs.google.com/presentation/d/",x)
x
}

#' @rdname get_slide_id
#' @export
#' @examples
#' x = "https://drive.google.com/drive/folders/1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC?usp=sharing"
#' get_folder_id(x)
#' x = "1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC"
#' get_folder_id(x)
get_folder_id = function(x) {
res = httr::parse_url(x)
x = res$path
x = sub(".*folders/", "", x)
x = sub("[?].*", "", x)
x = x[ nchar(x) > 5]
x = trimws(x)
x
}
89 changes: 8 additions & 81 deletions R/gs_ari.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,55 +5,6 @@ quick_arg_check = function(args) {
}
}

#' Download Google Slides File
#'
#' @param id Identifier of Google slides presentation, passed to
#' \code{\link{get_slide_id}}
#' @param out_type output type of file to download. Usually
#' `pdf` or `pptx`
#'
#' @note This downloads presentations if they are public and also try to make
#' sure it does not fail on large files
#' @return Downloaded file (in temporary directory)
#' @export
download_gs_file = function(id, out_type = "pptx") {
id = as.character(id)
id = get_slide_id(id)
url = type_url(id = id, page_id = NULL, type = out_type)
tfile = tempfile(fileext = paste0(".", out_type))

result = httr::GET(url, httr::write_disk(tfile))
warn_them = FALSE
fr_header = result$headers$`x-frame-options`
if (!is.null(fr_header)) {
if (all(fr_header == "DENY")) {
warn_them = TRUE
}
}
if (httr::status_code(result) >= 300) {
warn_them = TRUE
}
# Don't write something if not really a pptx
ctype = result$headers$`content-type`
if (httr::status_code(result) >= 400 &&
!is.null(ctype) && grepl("html", ctype)) {
file.remove(tfile)
}
if (grepl("ServiceLogin", result$url)) {
warn_them = TRUE
}
# if (result$times["redirect"] > 0) {
# warn_them = TRUE
# }
if (warn_them) {
warning(
paste0(
"This presentation may not be available, ",
"did you turn link sharing on?")
)
}
tfile
}

# Extract Script from PPTX and write to a text file
get_pptx_script = function(path,
Expand All @@ -64,7 +15,7 @@ get_pptx_script = function(path,
if (verbose) {
message("Getting Notes from PPTX")
}
res = pptx_notes(path, ...)
res = ari::pptx_notes(path, ...)
script = tempfile(fileext = ".txt")
if (verbose > 1) {
message(paste0("script is at: ", script))
Expand All @@ -82,6 +33,7 @@ get_pptx_script = function(path,
#'
#' @return The output from [make_ari_document]
#' @importFrom httr GET write_disk
#' @importFrom ari download_gs_file
#' @export
#' @examples
#' \donttest{
Expand All @@ -105,7 +57,7 @@ gs_to_ari = function (path,
if (verbose) {
message("Downloading PPTX")
}
pptx_file = download_gs_file(id = path, out_type = "pptx")
pptx_file = ari::download_gs_file(id = path, out_type = "pptx")
if (verbose > 1) {
message(paste0("pptx is at: ", pptx_file))
}
Expand All @@ -117,7 +69,7 @@ gs_to_ari = function (path,
if (verbose) {
message("Downloading PDF")
}
pdf_file = download_gs_file(id = path, out_type = "pdf")
pdf_file = ari::download_gs_file(id = path, out_type = "pdf")
if (verbose > 1) {
message(paste0("PDF is at: ", pdf_file))
}
Expand All @@ -129,6 +81,7 @@ gs_to_ari = function (path,

#' @export
#' @param ... additional arguments to \code{\link{pptx_notes}}
#' @importFrom ari download_gs_file
#' @rdname gs_to_ari
gs_pptx_notes = function(
path,
Expand All @@ -138,7 +91,7 @@ gs_pptx_notes = function(
if (verbose) {
message("Downloading PPTX")
}
pptx_file = download_gs_file(id = path, out_type = "pptx")
pptx_file = ari::download_gs_file(id = path, out_type = "pptx")
if (verbose > 1) {
message(paste0("pptx is at: ", pptx_file))
}
Expand Down Expand Up @@ -183,7 +136,7 @@ pptx_to_pngs = function(path, verbose = TRUE, dpi = 600) {
", see pdftools::pdf_convert for options")
)
}
pdf_to_pngs(
ari::pdf_to_pngs(
path = pdf_file,
verbose = verbose,
dpi = dpi)
Expand Down Expand Up @@ -260,7 +213,7 @@ pdf_to_ari = function(
stopifnot(!is.null(script))
args = list(...)
quick_arg_check(args)
pngs = pdf_to_pngs(path = path, dpi = dpi, verbose = verbose)
pngs = ari::pdf_to_pngs(path = path, dpi = dpi, verbose = verbose)
make_ari_document(pngs, script = script, ..., verbose = verbose)
}

Expand Down Expand Up @@ -293,32 +246,6 @@ html_to_ari = function(
..., verbose = verbose)
}


#' @rdname gs_to_ari
#' @export
pdf_to_pngs = function(
path, verbose = TRUE,
dpi = 600) {
fmts = pdftools::poppler_config()$supported_image_formats
if ("png" %in% fmts) {
format = "png"
} else {
format = fmts[1]
}
info = pdftools::pdf_info(pdf = path)
filenames = vapply(seq.int(info$pages), function(x) {
tempfile(fileext = paste0(".", format))
}, FUN.VALUE = character(1))
if (verbose) {
message("Converting PDF to PNGs")
}
pngs = pdftools::pdf_convert(
pdf = path, dpi = dpi,
format = format, filenames = filenames,
verbose = as.logical(verbose))
pngs
}

#' @rdname gs_to_ari
#' @export
images_to_ari = function(
Expand Down
Loading