From 70adb10eb8ce8cec4cf0a328f81ecd9c678feb75 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 7 Jul 2023 15:18:21 -0700 Subject: [PATCH 01/44] Resolve --- DESCRIPTION | 1 + NAMESPACE | 11 +++ R/download_gs_file.R | 81 ++++++++++++++++++ R/pptx_notes.R | 185 ++++++++++++++++++++++++++++++++++++++++ man/download_gs_file.Rd | 25 ++++++ man/get_slide_id.Rd | 23 +++++ man/pptx_notes.Rd | 36 ++++++++ man/xml_notes.Rd | 21 +++++ 8 files changed, 383 insertions(+) create mode 100644 R/download_gs_file.R create mode 100644 R/pptx_notes.R create mode 100644 man/download_gs_file.Rd create mode 100644 man/get_slide_id.Rd create mode 100644 man/pptx_notes.Rd create mode 100644 man/xml_notes.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6601ef5..d491d38 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,3 +47,4 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 SystemRequirements: ffmpeg (>= 3.2.4) +VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 649f19a..37726d3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(ari_stitch) export(ari_talk) export(audio_codec_encode) export(check_ffmpeg_version) +export(download_gs_file) export(ffmpeg_audio_codecs) export(ffmpeg_codecs) export(ffmpeg_convert) @@ -17,14 +18,20 @@ export(ffmpeg_version) export(ffmpeg_version_sufficient) export(ffmpeg_video_codecs) export(get_audio_codec) +export(get_slide_id) export(get_video_codec) export(have_ffmpeg_exec) export(have_polly) export(pad_wav) +export(pptx_notes) +export(pptx_slide_note_df) +export(pptx_slide_text_df) export(set_audio_codec) export(set_video_codec) +export(unzip_pptx) export(video_codec_encode) importFrom(cli,cli_alert_info) +export(xml_notes) importFrom(hms,hms) importFrom(progress,progress_bar) importFrom(purrr,compose) @@ -48,5 +55,9 @@ importFrom(tools,file_path_sans_ext) importFrom(tuneR,Wave) importFrom(tuneR,bind) importFrom(tuneR,writeWave) +importFrom(utils,unzip) importFrom(webshot,webshot) importFrom(xml2,read_html) +importFrom(xml2,read_xml) +importFrom(xml2,xml_find_all) +importFrom(xml2,xml_text) diff --git a/R/download_gs_file.R b/R/download_gs_file.R new file mode 100644 index 0000000..cbce4aa --- /dev/null +++ b/R/download_gs_file.R @@ -0,0 +1,81 @@ +#' Download Google Slides File +#' +#' @param gs_url Link to Google slides presentation, passed to +#' \code{\link{get_slide_id}} +#' @param out_type output type of file to download. Usually +#' `pdf` or `pptx` +#' +#' @note This downloads presentations if they are public and also try to make +#' sure it does not fail on large files +#' @return Downloaded file (in temporary directory) +#' @export +download_gs_file = function(gs_url, out_type = "pptx") { + stopifnot(is.character(gs_url)) + id = get_slide_id(gs_url) + # construct URL to export image file from Google Slides + url = export_url(id = id, page_id = NULL, type = out_type) + tmp = tempfile(fileext = paste0(".", out_type)) + + # retrieve from url and write response to disk + result = httr::GET(url, httr::write_disk(tmp)) + warn_user = FALSE + fr_header = result$headers$`x-frame-options` + if (!is.null(fr_header)) { + if (all(fr_header == "DENY")) { + warn_user = TRUE + } + } + if (httr::status_code(result) >= 300) { + warn_user = TRUE + } + # Don't write something if not really a pptx + content_type = result$headers$`content-type` + if (httr::status_code(result) >= 400 && + !is.null(content_type) && grepl("html", content_type)) { + file.remove(tmp) + } + if (grepl("ServiceLogin", result$url)) { + warn_user = TRUE + } + + if (warn_user) { + warning( + paste0( + "This presentation may not be available, ", + "did you turn link sharing on?") + ) + } + tmp +} + +#' Get Slide ID from URL +#' +#' @param x URL of slide +#' +#' @return A character vector +#' @export +#' +#' @examples +#' x = paste0("https://docs.google.com/presentation/d/", +#' "1Tg-GTGnUPduOtZKYuMoelqUNZnUp3vvg_7TtpUPL7e8", +#' "/edit#slide=id.g154aa4fae2_0_58") +#' get_slide_id(x) +get_slide_id <- function(x) { + x = sub(".*presentation/", "", x) + x = sub("/d/e", "/d", x) # if you publish by accident + x = sub("^(d|e)/", "", x) + x = strsplit(x, "/")[[1]] + x = x[ !grepl("^(edit|pub|export|png)", x)] + x = x[ nchar(x) > 5] + x +} + +# Constructs an URL to export an image file from a Google Slides +export_url <- function(id, page_id = NULL, type = "png") { + url <- paste0("https://docs.google.com/presentation/d/", + id, "/export/", type, "?id=", id) + if (!is.null(page_id)) { + url = paste0(url, "&pageid=", page_id) + } + url +} diff --git a/R/pptx_notes.R b/R/pptx_notes.R new file mode 100644 index 0000000..e4028b9 --- /dev/null +++ b/R/pptx_notes.R @@ -0,0 +1,185 @@ +#' Get Notes from a PowerPoint (usually from Google Slides) +#' +#' @param file Character. Path for `PPTX` file +#' @param ... additional arguments to pass to \code{\link{xml_notes}}, +#' particularly \code{xpath} +#' +#' @return Either a character vector or `NULL` +#' @export +#' +#' @importFrom utils unzip +#' @examples +#' ex_file = system.file("extdata", "example.pptx", +#' package = "ariExtra") +#' pptx_notes(ex_file) +#' pptx_slide_note_df(ex_file) +#' pptx_slide_text_df(ex_file) +pptx_notes = function(file, ...) { + + df = pptx_slide_note_df(file, ...) + if (is.null(df)) { + return(NULL) + } + # need factor because they can be dumb with characters + # and numerics and the file naming of PPTX files + fac = basename(df$file) + fac = factor(fac, levels = unique(fac)) + ss = split(df, fac) + res = sapply(ss, function(x) { + paste(x$text, collapse = " ") + }) + if (any(trimws(res) %in% "")) { + warning("Slides with no notes exists") + } + res[ res == ""] = ";" + return(res) +} + +#' @export +#' @rdname pptx_notes +pptx_slide_text_df = function(file, ...) { + + L = unzip_pptx(file) + slides = L$slides + + if (length(slides) > 0) { + # in case empty notes + res = lapply(slides, function(x) { + xx = xml_notes(x, collapse_text = FALSE, ...) + if (length(xx) == 0) { + return(NULL) + } + snum = sub("[.]xml", "", sub("slide", "", basename(x))) + snum = as.numeric(snum) + data.frame( + file = x, + slide = snum, + text = xx, + index = 1:length(xx), + stringsAsFactors = FALSE) + }) + res = do.call(rbind, res) + return(res) + } else { + return(NULL) + } +} + +#' @export +#' @rdname pptx_notes +pptx_slide_note_df = function(file, ...) { + + L = unzip_pptx(file) + notes = L$notes + slides = L$slides + note_dir = L$note_dir + + if (length(notes) > 0) { + # in case empty notes + assoc_notes = sub("slide", "", basename(slides)) + assoc_notes = paste0("notesSlide", assoc_notes) + assoc_notes = file.path(note_dir, assoc_notes) + no_fe = !file.exists(assoc_notes) + if (any(no_fe)) { + file.create(assoc_notes[no_fe]) + notes = assoc_notes + } + res = lapply(notes, function(x) { + if (file.size(x) == 0) { + xx = "" + } else { + xx = xml_notes(x, collapse_text = FALSE, ...) + } + if (length(xx) == 0) { + xx = "" + } + snum = sub("[.]xml", "", sub("notesSlide", "", basename(x))) + snum = as.numeric(snum) + data.frame( + file = x, + slide = snum, + text = xx, + index = 1:length(xx), + stringsAsFactors = FALSE) + }) + res = do.call(rbind, res) + return(res) + } else { + return(NULL) + } +} + + +pptx_reorder_xml = function(files) { + if (length(files) == 0) { + return(files) + } + nums = basename(files) + nums = sub("[[:alpha:]]*(\\d.*)[.].*", "\\1", nums) + nums = as.numeric(nums) + if (any(is.na(nums))) { + warning(paste0("Trying to parse set of files (example: ", files[1], + ") from PPTX, failed")) + return(files) + } + files = files[order(nums)] +} + +#' @export +#' @rdname pptx_notes +unzip_pptx = function(file) { + tdir = tempfile() + dir.create(tdir) + res = unzip(file, exdir = tdir) + rm(res) + slide_dir = file.path(tdir, "ppt", "slides") + slides = list.files(path = slide_dir, pattern = "[.]xml$", + full.names = TRUE) + slides = pptx_reorder_xml(slides) + + note_dir = file.path(tdir, "ppt", "notesSlides") + notes = list.files(path = note_dir, pattern = "[.]xml$", + full.names = TRUE) + notes = pptx_reorder_xml(notes) + + tdir = normalizePath(tdir) + props_dir = file.path(tdir, "docProps") + props_file = file.path(props_dir, "core.xml") + ari_core_file = system.file("extdata", "docProps", + "core.xml", package = "ariExtra") + if (!dir.exists(props_file)) { + dir.create(props_dir, recursive = TRUE) + file.copy(ari_core_file, props_file, + overwrite = TRUE) + } + + L = list(slides = slides, + notes = notes, + slide_dir = slide_dir, + note_dir = note_dir, + props_dir = props_dir, + props_file = props_file, + root_dir = tdir) + return(L) +} + +#' Get Notes from XML +#' +#' @param file XML file from a PPTX +#' @param collapse_text should text be collapsed by spaces? +#' @param xpath \code{xpath} to pass to [xml2::xml_find_all()] +#' +#' @return A character vector +#' @export +#' +#' @importFrom xml2 read_xml xml_text xml_find_all +xml_notes = function(file, collapse_text = TRUE, xpath = "//a:r//a:t") { + xdoc = xml2::read_xml(file) + # probably need to a:p//a:t and collapse all text within a a:p + txt = xml2::xml_find_all(x = xdoc, xpath = xpath) + txt = xml2::xml_text(txt) + if (collapse_text) { + txt = paste(txt, collapse = " ") + } + return(txt) +} diff --git a/man/download_gs_file.Rd b/man/download_gs_file.Rd new file mode 100644 index 0000000..e68b439 --- /dev/null +++ b/man/download_gs_file.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download_gs_file.R +\name{download_gs_file} +\alias{download_gs_file} +\title{Download Google Slides File} +\usage{ +download_gs_file(gs_url, out_type = "pptx") +} +\arguments{ +\item{gs_url}{Link to Google slides presentation, passed to +\code{\link{get_slide_id}}} + +\item{out_type}{output type of file to download. Usually +`pdf` or `pptx`} +} +\value{ +Downloaded file (in temporary directory) +} +\description{ +Download Google Slides File +} +\note{ +This downloads presentations if they are public and also try to make +sure it does not fail on large files +} diff --git a/man/get_slide_id.Rd b/man/get_slide_id.Rd new file mode 100644 index 0000000..d5b2e34 --- /dev/null +++ b/man/get_slide_id.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download_gs_file.R +\name{get_slide_id} +\alias{get_slide_id} +\title{Get Slide ID from URL} +\usage{ +get_slide_id(x) +} +\arguments{ +\item{x}{URL of slide} +} +\value{ +A character vector +} +\description{ +Get Slide ID from URL +} +\examples{ +x = paste0("https://docs.google.com/presentation/d/", +"1Tg-GTGnUPduOtZKYuMoelqUNZnUp3vvg_7TtpUPL7e8", +"/edit#slide=id.g154aa4fae2_0_58") +get_slide_id(x) +} diff --git a/man/pptx_notes.Rd b/man/pptx_notes.Rd new file mode 100644 index 0000000..026ded3 --- /dev/null +++ b/man/pptx_notes.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pptx_notes.R +\name{pptx_notes} +\alias{pptx_notes} +\alias{pptx_slide_text_df} +\alias{pptx_slide_note_df} +\alias{unzip_pptx} +\title{Get Notes from a PowerPoint (usually from Google Slides)} +\usage{ +pptx_notes(file, ...) + +pptx_slide_text_df(file, ...) + +pptx_slide_note_df(file, ...) + +unzip_pptx(file) +} +\arguments{ +\item{file}{Character. Path for `PPTX` file} + +\item{...}{additional arguments to pass to \code{\link{xml_notes}}, +particularly \code{xpath}} +} +\value{ +Either a character vector or `NULL` +} +\description{ +Get Notes from a PowerPoint (usually from Google Slides) +} +\examples{ +ex_file = system.file("extdata", "example.pptx", +package = "ariExtra") +pptx_notes(ex_file) +pptx_slide_note_df(ex_file) +pptx_slide_text_df(ex_file) +} diff --git a/man/xml_notes.Rd b/man/xml_notes.Rd new file mode 100644 index 0000000..72ec733 --- /dev/null +++ b/man/xml_notes.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pptx_notes.R +\name{xml_notes} +\alias{xml_notes} +\title{Get Notes from XML} +\usage{ +xml_notes(file, collapse_text = TRUE, xpath = "//a:r//a:t") +} +\arguments{ +\item{file}{XML file from a PPTX} + +\item{collapse_text}{should text be collapsed by spaces?} + +\item{xpath}{\code{xpath} to pass to [xml2::xml_find_all()]} +} +\value{ +A character vector +} +\description{ +Get Notes from XML +} From d80cb3b63d3542ad8400120199af9cfd7e3f91e2 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 2 Jun 2023 15:01:08 -0700 Subject: [PATCH 02/44] Add `download_gs_file()` and related functions from ariExtra --- R/download_gs_file.R | 112 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index cbce4aa..86721a3 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -79,3 +79,115 @@ export_url <- function(id, page_id = NULL, type = "png") { } url } + +# Constructs an URL to export to pptx +pptx_url = function(id) { + export_url(id, page_id = NULL, type = "pptx") +} + +# Constructs an URL to export to pdf +pdf_url = function(id) { + export_url(id, page_id = NULL, type = "pdf") +} + + + +# Extract page IDs of slides in a Google Slides presentation +#' @importFrom jsonlite fromJSON +get_page_ids = function(id) { + id = get_slide_id(id) + url = paste0("https://docs.google.com/presentation/d/", id) + tfile = tempfile(fileext = ".html") + res = httr::GET(url, httr::write_disk(tfile)) + httr::stop_for_status(res) + cr = httr::content(res) + script = rvest::html_nodes(cr, xpath ="//script") + script = rvest::html_text(script) + script = unique(script) + script = gsub("DOCS_modelChunk = undefined;", "", script) + script = script[ grepl("DOCS_modelChunk\\s=\\s\\[", x = script)] + + all_types = c("PREDEFINED_LAYOUT_UNSPECIFIED", + "BLANK", + "CAPTION_ONLY", + "TITLE", + "TITLE_AND_BODY", + "TITLE_AND_TWO_COLUMNS", + "TITLE_ONLY", + "SECTION_HEADER", + "SECTION_TITLE_AND_DESCRIPTION", + "ONE_COLUMN_TEXT", + "MAIN_POINT", + "BIG_NUMBER", + paste0("CUSTOM_", 1:100)) + types = paste0(all_types, collapse = "|") + # script = script[grepl(types, script)] + ss = strsplit(script, "; DOC") + ss = lapply(ss, trimws) + ss = lapply(ss, function(x) { + x[!grepl("^DOC", x)] = paste0(" DOC", x[!grepl("^DOC", x)]) + x + }) + ss = lapply(ss, function(x) { + x = x[grepl("^DOCS_modelChunk\\s=\\s\\[", x)] + x = x[ !x %in% "DOCS_modelChunk = undefined"] + x = sub("^DOCS_modelChunk\\s=\\s\\[", "[", x) + x + }) + ss = unlist(ss) + pages = lapply(ss, jsonlite::fromJSON) + pages = sapply(pages, function(x) { + x = x[sapply(x, function(r) any(unlist(r) %in% all_types))] + x = x[length(x)] + x + }) + pages = sapply(pages, function(x) { + if (length(x) < 2) { + if (length(x) == 0) { + return(NA) + } + x = x[[1]] + if (length(x) < 2) { + return(NA) + } + } + x[[2]] + }) + pages = pages[ !is.na(pages) ] + if (length(pages) >= 2) { + pages = c(pages[1], grep("^g", pages[2:length(pages)], value = TRUE)) + } + if (pages[1] != "p") { + pages = unique(c("p", pages)) + } + urls = export_url(id = id, page_id = pages) + pages = pages[check_png_urls(urls)] + pages +} + +#' @rdname get_slide_id +#' @export +#' @examples +#' x = "https://drive.google.com/drive/folders/1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC?usp=sharing" +#' get_folder_id(x) +#' x = "1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC" +#' get_folder_id(x) +get_folder_id = function(x) { + res = httr::parse_url(x) + x = res$path + x = sub(".*folders/", "", x) + x = sub("[?].*", "", x) + x = x[ nchar(x) > 5] + x = trimws(x) + x +} + +#' @export +#' @rdname get_slide_id +make_slide_url <- function(x) { + x = get_slide_id(x) + x = paste0("https://docs.google.com/presentation/d/",x) + x +} + + From a678cc26861d0d183c5229df95ae23e85319c778 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 2 Jun 2023 15:01:18 -0700 Subject: [PATCH 03/44] Update documentation --- NAMESPACE | 3 +++ man/get_slide_id.Rd | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 37726d3..7b5ac36 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,10 +18,12 @@ export(ffmpeg_version) export(ffmpeg_version_sufficient) export(ffmpeg_video_codecs) export(get_audio_codec) +export(get_folder_id) export(get_slide_id) export(get_video_codec) export(have_ffmpeg_exec) export(have_polly) +export(make_slide_url) export(pad_wav) export(pptx_notes) export(pptx_slide_note_df) @@ -33,6 +35,7 @@ export(video_codec_encode) importFrom(cli,cli_alert_info) export(xml_notes) importFrom(hms,hms) +importFrom(jsonlite,fromJSON) importFrom(progress,progress_bar) importFrom(purrr,compose) importFrom(purrr,discard) diff --git a/man/get_slide_id.Rd b/man/get_slide_id.Rd index d5b2e34..8b707c4 100644 --- a/man/get_slide_id.Rd +++ b/man/get_slide_id.Rd @@ -2,9 +2,15 @@ % Please edit documentation in R/download_gs_file.R \name{get_slide_id} \alias{get_slide_id} +\alias{get_folder_id} +\alias{make_slide_url} \title{Get Slide ID from URL} \usage{ get_slide_id(x) + +get_folder_id(x) + +make_slide_url(x) } \arguments{ \item{x}{URL of slide} @@ -20,4 +26,8 @@ x = paste0("https://docs.google.com/presentation/d/", "1Tg-GTGnUPduOtZKYuMoelqUNZnUp3vvg_7TtpUPL7e8", "/edit#slide=id.g154aa4fae2_0_58") get_slide_id(x) +x = "https://drive.google.com/drive/folders/1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC?usp=sharing" +get_folder_id(x) +x = "1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC" +get_folder_id(x) } From 9533715547acc33a720731d02d6a64370dcefa1b Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 2 Jun 2023 15:10:25 -0700 Subject: [PATCH 04/44] Add `pdf_to_pngs()` from ariExtra --- R/download_gs_file.R | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index 86721a3..82a1896 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -90,7 +90,13 @@ pdf_url = function(id) { export_url(id, page_id = NULL, type = "pdf") } - +#' @export +#' @rdname get_slide_id +make_slide_url <- function(x) { + x = get_slide_id(x) + x = paste0("https://docs.google.com/presentation/d/",x) + x +} # Extract page IDs of slides in a Google Slides presentation #' @importFrom jsonlite fromJSON @@ -183,11 +189,25 @@ get_folder_id = function(x) { } #' @export -#' @rdname get_slide_id -make_slide_url <- function(x) { - x = get_slide_id(x) - x = paste0("https://docs.google.com/presentation/d/",x) - x +pdf_to_pngs = function( + path, verbose = TRUE, + dpi = 600) { + fmts = pdftools::poppler_config()$supported_image_formats + if ("png" %in% fmts) { + format = "png" + } else { + format = fmts[1] + } + info = pdftools::pdf_info(pdf = path) + filenames = vapply(seq.int(info$pages), function(x) { + tempfile(fileext = paste0(".", format)) + }, FUN.VALUE = character(1)) + if (verbose) { + message("Converting PDF to PNGs") + } + pngs = pdftools::pdf_convert( + pdf = path, dpi = dpi, + format = format, filenames = filenames, + verbose = as.logical(verbose)) + pngs } - - From 6524a07d7f645859be236bf9ea49a558aa9fc123 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 2 Jun 2023 15:13:45 -0700 Subject: [PATCH 05/44] Stylistic changes --- R/download_gs_file.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index 82a1896..de424a4 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -189,9 +189,9 @@ get_folder_id = function(x) { } #' @export -pdf_to_pngs = function( - path, verbose = TRUE, - dpi = 600) { +pdf_to_pngs = function(path, + verbose = TRUE, + dpi = 600) { fmts = pdftools::poppler_config()$supported_image_formats if ("png" %in% fmts) { format = "png" From 7cbd0323cd6c67fc8ee4904638c243db80fb32cf Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 7 Jul 2023 15:19:37 -0700 Subject: [PATCH 06/44] Resolve --- R/ari_spin.R | 11 ++++++++--- R/pptx_notes.R | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index 4cf810a..0b044ff 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -121,6 +121,10 @@ ari_spin <- function(images, paragraphs, format = "Fetching Narration [:bar] :percent", total = length(par_along) ) + if (service == "coqui") { + cli::cli_alert_info("Coqui TTS does not support MP3 format; will produce a WAV audio output.") + } + # Iterate through arguments used in tts() for (i in par_along) { args <- tts_args @@ -131,6 +135,9 @@ ari_spin <- function(images, paragraphs, # coqui+ari doesn't work with mp3 if (service == "coqui") { args$output_format <- "wav" + args$voice <- NULL + args$model_name <- model_name + args$vocoder_name <- vocoder_name } wav <- do.call(text2speech::tts, args = args) wav <- reduce(wav$wav, bind) @@ -139,9 +146,7 @@ ari_spin <- function(images, paragraphs, wave_objects[[i]] <- wav pb$tick() } - if (service == "coqui") { - cli::cli_alert_info("Coqui TTS does not support MP3 format; will produce a WAV audio output.") - } + # Burn subtitles if (subtitles) { diff --git a/R/pptx_notes.R b/R/pptx_notes.R index e4028b9..d6ece6f 100644 --- a/R/pptx_notes.R +++ b/R/pptx_notes.R @@ -32,6 +32,7 @@ pptx_notes = function(file, ...) { warning("Slides with no notes exists") } res[ res == ""] = ";" + names(res) <- NULL return(res) } From 82265313d9fb5d15dd124c800729e8954063480c Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 7 Jul 2023 15:23:53 -0700 Subject: [PATCH 07/44] Resolve --- DESCRIPTION | 1 - NAMESPACE | 4 ++++ R/download_gs_file.R | 1 + man/get_slide_id.Rd | 6 +++--- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d491d38..6601ef5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,4 +47,3 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 SystemRequirements: ffmpeg (>= 3.2.4) -VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 7b5ac36..735cc58 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(have_ffmpeg_exec) export(have_polly) export(make_slide_url) export(pad_wav) +export(pdf_to_pngs) export(pptx_notes) export(pptx_slide_note_df) export(pptx_slide_text_df) @@ -36,6 +37,9 @@ importFrom(cli,cli_alert_info) export(xml_notes) importFrom(hms,hms) importFrom(jsonlite,fromJSON) +importFrom(pdftools,pdf_convert) +importFrom(pdftools,pdf_info) +importFrom(pdftools,poppler_config) importFrom(progress,progress_bar) importFrom(purrr,compose) importFrom(purrr,discard) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index de424a4..4543e02 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -189,6 +189,7 @@ get_folder_id = function(x) { } #' @export +#' @importFrom pdftools poppler_config pdf_info pdf_convert pdf_to_pngs = function(path, verbose = TRUE, dpi = 600) { diff --git a/man/get_slide_id.Rd b/man/get_slide_id.Rd index 8b707c4..734c024 100644 --- a/man/get_slide_id.Rd +++ b/man/get_slide_id.Rd @@ -2,15 +2,15 @@ % Please edit documentation in R/download_gs_file.R \name{get_slide_id} \alias{get_slide_id} -\alias{get_folder_id} \alias{make_slide_url} +\alias{get_folder_id} \title{Get Slide ID from URL} \usage{ get_slide_id(x) -get_folder_id(x) - make_slide_url(x) + +get_folder_id(x) } \arguments{ \item{x}{URL of slide} From 4b45b2f24c970bcab50932755497791cca6a3be3 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 7 Jul 2023 15:24:26 -0700 Subject: [PATCH 08/44] Resolve --- R/ari_spin.R | 2 ++ R/download_gs_file.R | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index 0b044ff..5320bbb 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -63,6 +63,8 @@ ari_spin <- function(images, paragraphs, output = tempfile(fileext = ".mp4"), voice = text2speech::tts_default_voice(service = service), + model_name, + vocoder_name, service = ifelse(have_polly(), "amazon", "google"), subtitles = FALSE, duration = NULL, diff --git a/R/download_gs_file.R b/R/download_gs_file.R index 4543e02..293146a 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -204,7 +204,7 @@ pdf_to_pngs = function(path, tempfile(fileext = paste0(".", format)) }, FUN.VALUE = character(1)) if (verbose) { - message("Converting PDF to PNGs") + message("Converting PDFs to PNGs") } pngs = pdftools::pdf_convert( pdf = path, dpi = dpi, From 25f8bd18a1a58f58c90eea391e4230557725db71 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 12 Jun 2023 14:38:21 -0700 Subject: [PATCH 09/44] Set defaults for 'model_name' and 'vocoder_name' --- R/ari_spin.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index 5320bbb..4ebedb3 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -63,8 +63,8 @@ ari_spin <- function(images, paragraphs, output = tempfile(fileext = ".mp4"), voice = text2speech::tts_default_voice(service = service), - model_name, - vocoder_name, + model_name = "tacotron2-DDC_ph", + vocoder_name = "ljspeech/univnet", service = ifelse(have_polly(), "amazon", "google"), subtitles = FALSE, duration = NULL, From 1ac3054ff5f90cc8c84a7fc970134b1a0644c13a Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 16 Jun 2023 15:27:56 -0700 Subject: [PATCH 10/44] Use `cli_alert_warning()` --- R/download_gs_file.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index 293146a..105e3e3 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -39,12 +39,12 @@ download_gs_file = function(gs_url, out_type = "pptx") { } if (warn_user) { - warning( - paste0( - "This presentation may not be available, ", - "did you turn link sharing on?") + cli::cli_alert_warning( + paste("Is link sharing enabled?", + "It's possible that this presentation isn't accessible.") ) } + tmp } From 0cdff111621959c7988435ae9e75316dafbda4b1 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 16 Jun 2023 15:51:59 -0700 Subject: [PATCH 11/44] Documentation --- man/ari_spin.Rd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/man/ari_spin.Rd b/man/ari_spin.Rd index c817343..120ff81 100644 --- a/man/ari_spin.Rd +++ b/man/ari_spin.Rd @@ -10,6 +10,8 @@ ari_spin( paragraphs, output = tempfile(fileext = ".mp4"), voice = text2speech::tts_default_voice(service = service), + model_name = "tacotron2-DDC_ph", + vocoder_name = "ljspeech/univnet", service = ifelse(have_polly(), "amazon", "google"), subtitles = FALSE, duration = NULL, From 0532e8989d490bb50e5602a03e66b6fded670fc0 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 10 Jul 2023 09:31:38 -0700 Subject: [PATCH 12/44] Comment code --- R/ari_spin.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index 4ebedb3..997449e 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -87,7 +87,7 @@ ari_spin <- function(images, paragraphs, "before you proceed." )) } - + # Create file path to output stopifnot(length(images) > 0) images <- normalizePath(images) output_dir <- normalizePath(dirname(output)) From 86ff441eb23a51a49d82253d37c6957d1b7bff02 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 10 Jul 2023 10:39:34 -0700 Subject: [PATCH 13/44] Updates --- DESCRIPTION | 3 + NAMESPACE | 4 +- R/ari_narrate.R | 4 +- R/ari_spin.R | 2 + R/download_gs_file.R | 12 ++- R/pad_wav.R | 17 +--- man/pad_wav.Rd | 17 +--- man/pdf_to_pngs.Rd | 20 ++++ tests/testthat/test_ari_narrate.R | 154 +++++++++++++++--------------- 9 files changed, 125 insertions(+), 108 deletions(-) create mode 100644 man/pdf_to_pngs.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6601ef5..99749bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,9 @@ Depends: Imports: cli, hms, + httr, + jsonlite, + pdftools, progress, purrr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index 735cc58..3700efa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,7 +25,6 @@ export(have_ffmpeg_exec) export(have_polly) export(make_slide_url) export(pad_wav) -export(pdf_to_pngs) export(pptx_notes) export(pptx_slide_note_df) export(pptx_slide_text_df) @@ -33,8 +32,9 @@ export(set_audio_codec) export(set_video_codec) export(unzip_pptx) export(video_codec_encode) -importFrom(cli,cli_alert_info) export(xml_notes) +import(httr) +importFrom(cli,cli_alert_info) importFrom(hms,hms) importFrom(jsonlite,fromJSON) importFrom(pdftools,pdf_convert) diff --git a/R/ari_narrate.R b/R/ari_narrate.R index 12557db..9631a2f 100644 --- a/R/ari_narrate.R +++ b/R/ari_narrate.R @@ -100,7 +100,7 @@ ari_narrate <- function(script, slides, if (file_ext(script) %in% c("Rmd", "rmd")) { paragraphs <- parse_html_comments(script) } else { - html_path <- file.path(output_dir, paste0("ari_script_", grs(), ".html")) + html_path <- file.path(output_dir, paste0("ari_script_", get_random_string(), ".html")) if (cleanup) { on.exit(unlink(html_path, force = TRUE), add = TRUE) } @@ -119,7 +119,7 @@ ari_narrate <- function(script, slides, paste0( "ari_img_", slide_nums, "_", - grs(), ".jpeg" + get_random_string(), ".jpeg" ) ) diff --git a/R/ari_spin.R b/R/ari_spin.R index 997449e..b36f645 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -24,6 +24,8 @@ #' @param voice The voice you want to use. See #' \code{\link[text2speech]{tts_voices}} for more information #' about what voices are available. +#' @param model_name (Coqui only) +#' @param vocoder_name (Coqui only) #' @param service speech synthesis service to use, #' passed to \code{\link[text2speech]{tts}}, #' Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}. diff --git a/R/download_gs_file.R b/R/download_gs_file.R index 105e3e3..68eca65 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -100,6 +100,7 @@ make_slide_url <- function(x) { # Extract page IDs of slides in a Google Slides presentation #' @importFrom jsonlite fromJSON +#' @import httr get_page_ids = function(id) { id = get_slide_id(id) url = paste0("https://docs.google.com/presentation/d/", id) @@ -188,7 +189,16 @@ get_folder_id = function(x) { x } -#' @export +#' Convert a PDF file to a series of PNG image files +#' +#' Uses `pdftools::pdf_convert()` for conversion. +#' +#' @param path Path to the PDF file that needs to be converted to PNGs. +#' @param verbose A logical value indicating whether to display progress +#' messages during the conversion process. The default value is TRUE +#' @param dpi The resolution in dots per inch (dpi) to be used for the PNG +#' images. The default value is 600. +#' #' @importFrom pdftools poppler_config pdf_info pdf_convert pdf_to_pngs = function(path, verbose = TRUE, diff --git a/R/pad_wav.R b/R/pad_wav.R index 0f04df1..475c8f7 100644 --- a/R/pad_wav.R +++ b/R/pad_wav.R @@ -12,20 +12,11 @@ #' #' @importFrom purrr map2 map2_int #' @examples -#' wavs <- list( -#' tuneR::noise(duration = 1.85 * 44100), -#' tuneR::noise() +#' wav_list <- list( +#' tuneR::noise(duration = 2000), +#' tuneR::noise(duration = 1000) #' ) -#' out <- pad_wav(wavs) -#' dur <- sapply(out, function(x) length(x@left) / x@samp.rate) -#' duration <- c(2, 2) -#' out <- pad_wav(wavs, duration = duration) -#' dur <- sapply(out, function(x) length(x@left) / x@samp.rate) -#' stopifnot(all(dur == duration)) -#' duration <- c(2, 2.5) -#' out <- pad_wav(wavs, duration = duration) -#' dur <- sapply(out, function(x) length(x@left) / x@samp.rate) -#' stopifnot(isTRUE(all.equal(dur, duration))) +#' out <- pad_wav(wav_list) pad_wav <- function(wav, duration = NULL) { # See if wav inherits from "Wave" class is_Wave <- inherits(wav, "Wave") diff --git a/man/pad_wav.Rd b/man/pad_wav.Rd index 09a75ed..f073156 100644 --- a/man/pad_wav.Rd +++ b/man/pad_wav.Rd @@ -22,18 +22,9 @@ A list of Wave objects, same length as input \code{wav} Pad Wave Objects } \examples{ -wavs <- list( - tuneR::noise(duration = 1.85 * 44100), - tuneR::noise() +wav_list <- list( + tuneR::noise(duration = 2000), + tuneR::noise(duration = 1000) ) -out <- pad_wav(wavs) -dur <- sapply(out, function(x) length(x@left) / x@samp.rate) -duration <- c(2, 2) -out <- pad_wav(wavs, duration = duration) -dur <- sapply(out, function(x) length(x@left) / x@samp.rate) -stopifnot(all(dur == duration)) -duration <- c(2, 2.5) -out <- pad_wav(wavs, duration = duration) -dur <- sapply(out, function(x) length(x@left) / x@samp.rate) -stopifnot(isTRUE(all.equal(dur, duration))) +out <- pad_wav(wav_list) } diff --git a/man/pdf_to_pngs.Rd b/man/pdf_to_pngs.Rd new file mode 100644 index 0000000..7797c2c --- /dev/null +++ b/man/pdf_to_pngs.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download_gs_file.R +\name{pdf_to_pngs} +\alias{pdf_to_pngs} +\title{Convert a PDF file to a series of PNG image files} +\usage{ +pdf_to_pngs(path, verbose = TRUE, dpi = 600) +} +\arguments{ +\item{path}{Path to the PDF file that needs to be converted to PNGs.} + +\item{verbose}{A logical value indicating whether to display progress +messages during the conversion process. The default value is TRUE} + +\item{dpi}{The resolution in dots per inch (dpi) to be used for the PNG +images. The default value is 600.} +} +\description{ +Uses `pdftools::pdf_convert()` for conversion. +} diff --git a/tests/testthat/test_ari_narrate.R b/tests/testthat/test_ari_narrate.R index 0ba6b61..25e51c8 100644 --- a/tests/testthat/test_ari_narrate.R +++ b/tests/testthat/test_ari_narrate.R @@ -1,77 +1,77 @@ -context("Test ari_narrate()") - - -if (ffmpeg_version_sufficient()) { - res = ffmpeg_audio_codecs() - if (is.null(res)) { - fdk_enabled = FALSE - } else { - fdk_enabled = grepl("fdk", res[ res$codec == "aac", "codec_name"]) - } -} else { - fdk_enabled = FALSE -} -if (fdk_enabled) { - audio_codec = "libfdk_aac" -} else { - audio_codec = "ac3" -} - -skip_narrate <- function(){ - if (Sys.getenv("SKIP_NARRATE") != "") { - skip("skipping ari_narrate()") - } -} - -video <- file.path(tempdir(), "output.mp4") -#video <- file.path(getwd(), "output.mp4") - -run_voice = "Joanna" - -test_that("Ari can make a video from local HTML slides.", { - skip_on_cran() - skip_narrate() - skip_amazon_not_authorized() - - ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), - system.file("test", "ari_intro.html", package = "ari"), - video, voice = run_voice, - capture_method = "iterative", - service = "amazon", - verbose = TRUE, - audio_codec = audio_codec) - expect_true(file.size(video) > 50000) -}) - -unlink(video, force = TRUE) - -test_that("Ari can make a video from HTML slides on the web.", { - skip_on_cran() - skip_narrate() - skip_amazon_not_authorized() - - ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), - "https://seankross.com/ari/inst/test/ari_intro.html", - video, voice = run_voice, - capture_method = "iterative", - verbose = TRUE, - audio_codec = audio_codec) - expect_true(file.size(video) > 50000) -}) - -unlink(video, force = TRUE) - -test_that("Ari can use an Rmd file with HTML comments for a script.", { - skip_on_cran() - skip_narrate() - skip_amazon_not_authorized() - - ari_narrate(system.file("test", "ari_comments.Rmd", package = "ari"), - system.file("test", "ari_intro.html", package = "ari"), - video, voice = run_voice, capture_method = "iterative", - verbose = TRUE, - audio_codec = audio_codec) - expect_true(file.size(video) > 50000) -}) - -unlink(video, force = TRUE) +# context("Test ari_narrate()") +# +# +# if (ffmpeg_version_sufficient()) { +# res = ffmpeg_audio_codecs() +# if (is.null(res)) { +# fdk_enabled = FALSE +# } else { +# fdk_enabled = grepl("fdk", res[ res$codec == "aac", "codec_name"]) +# } +# } else { +# fdk_enabled = FALSE +# } +# if (fdk_enabled) { +# audio_codec = "libfdk_aac" +# } else { +# audio_codec = "ac3" +# } +# +# skip_narrate <- function(){ +# if (Sys.getenv("SKIP_NARRATE") != "") { +# skip("skipping ari_narrate()") +# } +# } +# +# video <- file.path(tempdir(), "output.mp4") +# #video <- file.path(getwd(), "output.mp4") +# +# run_voice = "Joanna" +# +# test_that("Ari can make a video from local HTML slides.", { +# skip_on_cran() +# skip_narrate() +# skip_amazon_not_authorized() +# +# ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), +# system.file("test", "ari_intro.html", package = "ari"), +# video, voice = run_voice, +# capture_method = "iterative", +# service = "amazon", +# verbose = TRUE, +# audio_codec = audio_codec) +# expect_true(file.size(video) > 50000) +# }) +# +# unlink(video, force = TRUE) +# +# test_that("Ari can make a video from HTML slides on the web.", { +# skip_on_cran() +# skip_narrate() +# skip_amazon_not_authorized() +# +# ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), +# "https://seankross.com/ari/inst/test/ari_intro.html", +# video, voice = run_voice, +# capture_method = "iterative", +# verbose = TRUE, +# audio_codec = audio_codec) +# expect_true(file.size(video) > 50000) +# }) +# +# unlink(video, force = TRUE) +# +# test_that("Ari can use an Rmd file with HTML comments for a script.", { +# skip_on_cran() +# skip_narrate() +# skip_amazon_not_authorized() +# +# ari_narrate(system.file("test", "ari_comments.Rmd", package = "ari"), +# system.file("test", "ari_intro.html", package = "ari"), +# video, voice = run_voice, capture_method = "iterative", +# verbose = TRUE, +# audio_codec = audio_codec) +# expect_true(file.size(video) > 50000) +# }) +# +# unlink(video, force = TRUE) From 2131ede17e3887e9c823fdc4f66104376303ee1a Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 10 Jul 2023 14:12:18 -0700 Subject: [PATCH 14/44] Document model_name and vocoder_name argument in `ari_spin()` --- R/ari_spin.R | 4 ++-- man/ari_spin.Rd | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index b36f645..26a718a 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -24,8 +24,8 @@ #' @param voice The voice you want to use. See #' \code{\link[text2speech]{tts_voices}} for more information #' about what voices are available. -#' @param model_name (Coqui only) -#' @param vocoder_name (Coqui only) +#' @param model_name (Coqui only) Text-to-speech model +#' @param vocoder_name (Coqui only) Vocoder (Voice Coder) model #' @param service speech synthesis service to use, #' passed to \code{\link[text2speech]{tts}}, #' Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}. diff --git a/man/ari_spin.Rd b/man/ari_spin.Rd index 120ff81..50f5f4b 100644 --- a/man/ari_spin.Rd +++ b/man/ari_spin.Rd @@ -33,6 +33,10 @@ have_polly() \code{\link[text2speech]{tts_voices}} for more information about what voices are available.} +\item{model_name}{(Coqui only) Text-to-speech model} + +\item{vocoder_name}{(Coqui only) Vocoder (Voice Coder) model} + \item{service}{speech synthesis service to use, passed to \code{\link[text2speech]{tts}}, Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}.} From 22b7f13d1b551765a89bce34b316764629c69f53 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 10 Jul 2023 14:16:57 -0700 Subject: [PATCH 15/44] Add `check_png_urls()` --- R/download_gs_file.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index 68eca65..eac6e8d 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -98,6 +98,16 @@ make_slide_url <- function(x) { x } +# Check if vector of URLs is valid (Status Code = 200) +check_png_urls <- function(urls) { + res = vapply(urls, function(url) { + tfile = tempfile(fileext = ".png") + ret = httr::GET(url) + httr::status_code(ret) == 200 + }, FUN.VALUE = logical(1)) + return(res) +} + # Extract page IDs of slides in a Google Slides presentation #' @importFrom jsonlite fromJSON #' @import httr From 98b30bd36189b39cc60cc4c5a64911b4bf378ca0 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 10 Jul 2023 14:26:17 -0700 Subject: [PATCH 16/44] `pad_wav()` Documentation --- R/pad_wav.R | 17 +++++++++-------- man/pad_wav.Rd | 17 +++++++++-------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/R/pad_wav.R b/R/pad_wav.R index 475c8f7..32ab549 100644 --- a/R/pad_wav.R +++ b/R/pad_wav.R @@ -1,10 +1,10 @@ #' Pad Wave Objects #' -#' @param wav list of Wave objects +#' @param wav List of Wave objects #' @param duration If \code{NULL}, the duration will simply round -#' the Wave up to the next whole integer. If not, these are the -#' duration to pad the Wave *to*. For example 12 means the output -#' Wave will have a length of 12 seconds. Pass \code{NA} to those +#' the Wave up to the next whole integer. If not, these are the +#' duration to pad the Wave *to*. For example, 12 means the output +#' Wave will have a length of 12 seconds. Pass \code{NA} to those #' Waves that you want simple rounding. #' #' @return A list of Wave objects, same length as input \code{wav} @@ -12,11 +12,12 @@ #' #' @importFrom purrr map2 map2_int #' @examples -#' wav_list <- list( -#' tuneR::noise(duration = 2000), -#' tuneR::noise(duration = 1000) +#' library(tuneR) +#' x <- list( +#' noise(duration = 2000), +#' noise(duration = 1000) #' ) -#' out <- pad_wav(wav_list) +#' out <- pad_wav(x) pad_wav <- function(wav, duration = NULL) { # See if wav inherits from "Wave" class is_Wave <- inherits(wav, "Wave") diff --git a/man/pad_wav.Rd b/man/pad_wav.Rd index f073156..d00d5dc 100644 --- a/man/pad_wav.Rd +++ b/man/pad_wav.Rd @@ -7,12 +7,12 @@ pad_wav(wav, duration = NULL) } \arguments{ -\item{wav}{list of Wave objects} +\item{wav}{List of Wave objects} \item{duration}{If \code{NULL}, the duration will simply round -the Wave up to the next whole integer. If not, these are the -duration to pad the Wave *to*. For example 12 means the output -Wave will have a length of 12 seconds. Pass \code{NA} to those +the Wave up to the next whole integer. If not, these are the +duration to pad the Wave *to*. For example, 12 means the output +Wave will have a length of 12 seconds. Pass \code{NA} to those Waves that you want simple rounding.} } \value{ @@ -22,9 +22,10 @@ A list of Wave objects, same length as input \code{wav} Pad Wave Objects } \examples{ -wav_list <- list( - tuneR::noise(duration = 2000), - tuneR::noise(duration = 1000) +library(tuneR) +x <- list( + noise(duration = 2000), + noise(duration = 1000) ) -out <- pad_wav(wav_list) +out <- pad_wav(x) } From aa693cffe71c419f40866fd6c8c04ae54dfc9632 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 10 Jul 2023 14:36:57 -0700 Subject: [PATCH 17/44] Use \dontrun{} around `pad_wav()` example --- R/pad_wav.R | 14 ++++++++------ man/pad_wav.Rd | 11 +++++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/R/pad_wav.R b/R/pad_wav.R index 32ab549..f541eee 100644 --- a/R/pad_wav.R +++ b/R/pad_wav.R @@ -9,15 +9,17 @@ #' #' @return A list of Wave objects, same length as input \code{wav} #' @export -#' -#' @importFrom purrr map2 map2_int #' @examples -#' library(tuneR) +#' \dontrun{ #' x <- list( -#' noise(duration = 2000), -#' noise(duration = 1000) +#' tuneR::noise(duration = 2000), +#' tuneR::noise(duration = 1000) #' ) -#' out <- pad_wav(x) +#' +#' pad_wav(x) +#' } +#' +#' @importFrom purrr map2 map2_int pad_wav <- function(wav, duration = NULL) { # See if wav inherits from "Wave" class is_Wave <- inherits(wav, "Wave") diff --git a/man/pad_wav.Rd b/man/pad_wav.Rd index d00d5dc..343abe8 100644 --- a/man/pad_wav.Rd +++ b/man/pad_wav.Rd @@ -22,10 +22,13 @@ A list of Wave objects, same length as input \code{wav} Pad Wave Objects } \examples{ -library(tuneR) +\dontrun{ x <- list( - noise(duration = 2000), - noise(duration = 1000) +tuneR::noise(duration = 2000), +tuneR::noise(duration = 1000) ) -out <- pad_wav(x) + +pad_wav(x) +} + } From e8e495f1f9c38a951d6ca3d11479489b2fc95282 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 10 Jul 2023 17:31:41 -0700 Subject: [PATCH 18/44] Documentation for `ari_spin()` --- R/ari_spin.R | 8 +++++--- man/ari_spin.Rd | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index 26a718a..a0d7d34 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -24,9 +24,11 @@ #' @param voice The voice you want to use. See #' \code{\link[text2speech]{tts_voices}} for more information #' about what voices are available. -#' @param model_name (Coqui only) Text-to-speech model -#' @param vocoder_name (Coqui only) Vocoder (Voice Coder) model -#' @param service speech synthesis service to use, +#' @param model_name (Coqui TTS only) Deep Learning model for Text-to-Speech +#' Conversion +#' @param vocoder_name (Coqui TTS only) Voice coder used for speech coding and +#' transmission +#' @param service Speech synthesis service to use, #' passed to \code{\link[text2speech]{tts}}, #' Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}. #' @param subtitles Should a \code{.srt} file be created with subtitles? The diff --git a/man/ari_spin.Rd b/man/ari_spin.Rd index 50f5f4b..883b83a 100644 --- a/man/ari_spin.Rd +++ b/man/ari_spin.Rd @@ -33,11 +33,13 @@ have_polly() \code{\link[text2speech]{tts_voices}} for more information about what voices are available.} -\item{model_name}{(Coqui only) Text-to-speech model} +\item{model_name}{(Coqui TTS only) Deep Learning model for Text-to-Speech +Conversion} -\item{vocoder_name}{(Coqui only) Vocoder (Voice Coder) model} +\item{vocoder_name}{(Coqui TTS only) Voice coder used for speech coding and +transmission} -\item{service}{speech synthesis service to use, +\item{service}{Speech synthesis service to use, passed to \code{\link[text2speech]{tts}}, Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}.} From 66f9a643e98f0013f25c8d5be21618651cc2aca9 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Tue, 15 Aug 2023 16:39:59 -0700 Subject: [PATCH 19/44] Add `pptx_to_pdf()` --- DESCRIPTION | 1 + NAMESPACE | 3 +++ R/download_gs_file.R | 29 +++++++++++++++++++++++++++++ R/utils.R | 22 ++++++++++++++++++++++ man/pptx_to_pdf.Rd | 17 +++++++++++++++++ 5 files changed, 72 insertions(+) create mode 100644 man/pptx_to_pdf.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 99749bf..2723110 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,6 +20,7 @@ Depends: R (>= 3.1.0) Imports: cli, + docxtractr, hms, httr, jsonlite, diff --git a/NAMESPACE b/NAMESPACE index 3700efa..7fa75e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,9 +25,11 @@ export(have_ffmpeg_exec) export(have_polly) export(make_slide_url) export(pad_wav) +export(pdf_to_pngs) export(pptx_notes) export(pptx_slide_note_df) export(pptx_slide_text_df) +export(pptx_to_pdf) export(set_audio_codec) export(set_video_codec) export(unzip_pptx) @@ -35,6 +37,7 @@ export(video_codec_encode) export(xml_notes) import(httr) importFrom(cli,cli_alert_info) +importFrom(docxtractr,convert_to_pdf) importFrom(hms,hms) importFrom(jsonlite,fromJSON) importFrom(pdftools,pdf_convert) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index eac6e8d..5739912 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -199,6 +199,34 @@ get_folder_id = function(x) { x } +#' Convert a PPTX file to a PDF file +#' +#' Uses `docxtractr::convert_to_pdf()` for conversion. +#' +#' @param path Path to the PPTX file that needs to be converted to PDF. +#' @param verbose A logical value indicating whether to display progress +#' messages during the conversion process. The default value is TRUE +#' +#' @importFrom docxtractr convert_to_pdf +#' @export +pptx_to_pdf = function(path, verbose = TRUE) { + pdf_file = tempfile(fileext = ".pdf") + if (verbose) { + message("Converting PPTX to PDF") + } + out = try({ + docxtractr::convert_to_pdf(path, pdf_file = pdf_file) + }) + if (inherits(out, "try-error")) { + fix_soffice_library_path() + docxtractr::convert_to_pdf(path, pdf_file = pdf_file) + } + if (verbose > 1) { + message(paste0("PDF is at: ", pdf_file)) + } + return(pdf_file) +} + #' Convert a PDF file to a series of PNG image files #' #' Uses `pdftools::pdf_convert()` for conversion. @@ -210,6 +238,7 @@ get_folder_id = function(x) { #' images. The default value is 600. #' #' @importFrom pdftools poppler_config pdf_info pdf_convert +#' @export pdf_to_pngs = function(path, verbose = TRUE, dpi = 600) { diff --git a/R/utils.R b/R/utils.R index 7f984b1..4ba1f80 100644 --- a/R/utils.R +++ b/R/utils.R @@ -131,3 +131,25 @@ split_up_text <- function(text) { # Final output map(chunks, paste, collapse = " ") } + + +# Sets LD_LIBRARY_PATH environment variable to path of +# LibreOffice program on Linux or macOS systems if the variable is not already set +fix_soffice_library_path = function() { + LD_LIBRARY_PATH = Sys.getenv("LD_LIBRARY_PATH") + if (sys_type() %in% c("linux", "macos")) { + warning( + paste0( + "Changing LD_LIBRARY_PATH as error in soffice ", + "with PPTX conversion may be due to path issues!" + ) + ) + Sys.setenv( + LD_LIBRARY_PATH = + paste0( + "/usr/lib/libreoffice/program", + if (nzchar(LD_LIBRARY_PATH)) paste0(":", LD_LIBRARY_PATH) + ) + ) + } +} diff --git a/man/pptx_to_pdf.Rd b/man/pptx_to_pdf.Rd new file mode 100644 index 0000000..0e03072 --- /dev/null +++ b/man/pptx_to_pdf.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/download_gs_file.R +\name{pptx_to_pdf} +\alias{pptx_to_pdf} +\title{Convert a PPTX file to a PDF file} +\usage{ +pptx_to_pdf(path, verbose = TRUE) +} +\arguments{ +\item{path}{Path to the PPTX file that needs to be converted to PDF.} + +\item{verbose}{A logical value indicating whether to display progress +messages during the conversion process. The default value is TRUE} +} +\description{ +Uses `docxtractr::convert_to_pdf()` for conversion. +} From 6864eece0aab36378cf0edeaabcbcd30b37b7046 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 28 Aug 2023 16:48:55 -0700 Subject: [PATCH 20/44] Add `sys_type()` and `os_type()` --- R/utils.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/R/utils.R b/R/utils.R index 4ba1f80..cb6ec21 100644 --- a/R/utils.R +++ b/R/utils.R @@ -132,6 +132,26 @@ split_up_text <- function(text) { map(chunks, paste, collapse = " ") } +# Returns type of operating system +os_type <- function() { + .Platform$OS.type +} + +# Determine type of operating system +sys_type <- function() { + if (os_type() == "windows") { + "windows" + } else if (Sys.info()["sysname"] == "Darwin") { + "macos" + } else if (Sys.info()["sysname"] == "Linux") { + "linux" + } else if (os_type() == "unix") { + # "unix" + "linux" + } else { + stop("Unknown OS") + } +} # Sets LD_LIBRARY_PATH environment variable to path of # LibreOffice program on Linux or macOS systems if the variable is not already set From 86fd00fbb89223e3669cd726cf347280345bacb5 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Mon, 28 Aug 2023 17:42:02 -0700 Subject: [PATCH 21/44] Don't need `fix_soffice_library_path()` --- R/download_gs_file.R | 1 - R/utils.R | 42 ------------------------------------------ 2 files changed, 43 deletions(-) diff --git a/R/download_gs_file.R b/R/download_gs_file.R index 5739912..845eb26 100644 --- a/R/download_gs_file.R +++ b/R/download_gs_file.R @@ -218,7 +218,6 @@ pptx_to_pdf = function(path, verbose = TRUE) { docxtractr::convert_to_pdf(path, pdf_file = pdf_file) }) if (inherits(out, "try-error")) { - fix_soffice_library_path() docxtractr::convert_to_pdf(path, pdf_file = pdf_file) } if (verbose > 1) { diff --git a/R/utils.R b/R/utils.R index cb6ec21..7f984b1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -131,45 +131,3 @@ split_up_text <- function(text) { # Final output map(chunks, paste, collapse = " ") } - -# Returns type of operating system -os_type <- function() { - .Platform$OS.type -} - -# Determine type of operating system -sys_type <- function() { - if (os_type() == "windows") { - "windows" - } else if (Sys.info()["sysname"] == "Darwin") { - "macos" - } else if (Sys.info()["sysname"] == "Linux") { - "linux" - } else if (os_type() == "unix") { - # "unix" - "linux" - } else { - stop("Unknown OS") - } -} - -# Sets LD_LIBRARY_PATH environment variable to path of -# LibreOffice program on Linux or macOS systems if the variable is not already set -fix_soffice_library_path = function() { - LD_LIBRARY_PATH = Sys.getenv("LD_LIBRARY_PATH") - if (sys_type() %in% c("linux", "macos")) { - warning( - paste0( - "Changing LD_LIBRARY_PATH as error in soffice ", - "with PPTX conversion may be due to path issues!" - ) - ) - Sys.setenv( - LD_LIBRARY_PATH = - paste0( - "/usr/lib/libreoffice/program", - if (nzchar(LD_LIBRARY_PATH)) paste0(":", LD_LIBRARY_PATH) - ) - ) - } -} From 60e6f25e427b90ddb807cee48a22fc7a033adb80 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Wed, 30 Aug 2023 17:04:34 -0700 Subject: [PATCH 22/44] get rid of text2speech specific code --- R/ari_narrate.R | 49 +++++++++++++++----------- R/ari_spin.R | 91 +++++++++++++++++-------------------------------- 2 files changed, 61 insertions(+), 79 deletions(-) diff --git a/R/ari_narrate.R b/R/ari_narrate.R index 9631a2f..2e42bdc 100644 --- a/R/ari_narrate.R +++ b/R/ari_narrate.R @@ -1,4 +1,4 @@ -#' Create a video from slides and a script +#' Generate video from slides and a script #' #' \code{ari_narrate} creates a video from a script written in markdown and HTML #' slides created with \code{\link[rmarkdown]{rmarkdown}} or a similar package. @@ -53,37 +53,41 @@ #' } ari_narrate <- function(script, slides, output = tempfile(fileext = ".mp4"), - voice = text2speech::tts_default_voice(service = service), - service = "amazon", + tts_engine = text2speech::tts(), + tts_engine_args = list(service = "coqui", + model_name = "tacotron2-DDC_ph", + vocoder_name = "ljspeech/univnet"), + tts_engine_auth = text2speech::tts_auth(), capture_method = c("vectorized", "iterative"), - subtitles = FALSE, ..., + subtitles = FALSE, verbose = FALSE, audio_codec = get_audio_codec(), video_codec = get_video_codec(), - cleanup = TRUE) { - auth <- text2speech::tts_auth(service = service) + cleanup = TRUE, + ...) { + # Authentication for Text-to-Speech Engines + auth <- tts_engine_auth(service = tts_engine_args$service) if (!auth) { stop(paste0( "It appears you're not authenticated with ", - service, ". Make sure you've ", + tts_engine_args$service, ". Make sure you've ", "set the appropriate environmental variables ", "before you proceed." )) } - - + # Check capture_method capture_method <- match.arg(capture_method) if (!(capture_method %in% c("vectorized", "iterative"))) { stop('capture_method must be either "vectorized" or "iterative"') } - + # Output directory, path to script output_dir <- normalizePath(dirname(output)) script <- normalizePath(script) if (file_ext(script) %in% c("Rmd", "rmd") & missing(slides)) { tfile <- tempfile(fileext = ".html") slides <- rmarkdown::render(input = script, output_file = tfile) } - + # Slides if (file.exists(slides)) { slides <- normalizePath(slides) if (.Platform$OS.type == "windows") { @@ -92,11 +96,12 @@ ari_narrate <- function(script, slides, slides <- paste0("file://localhost", slides) } } + # Check if script and output_dir exists stopifnot( file.exists(script), dir.exists(output_dir) ) - + # Pargraphs if (file_ext(script) %in% c("Rmd", "rmd")) { paragraphs <- parse_html_comments(script) } else { @@ -112,7 +117,7 @@ ari_narrate <- function(script, slides, } ) } - + # Path to images slide_nums <- seq_along(paragraphs) img_paths <- file.path( output_dir, @@ -122,22 +127,28 @@ ari_narrate <- function(script, slides, get_random_string(), ".jpeg" ) ) - + # Take screenshot if (capture_method == "vectorized") { - webshot(url = paste0(slides, "#", slide_nums), file = img_paths, ...) + webshot::webshot(url = paste0(slides, "#", slide_nums), file = img_paths, ...) } else { for (i in slide_nums) { - webshot(url = paste0(slides, "#", i), file = img_paths[i], ...) + webshot::webshot(url = paste0(slides, "#", i), file = img_paths[i], ...) } } if (cleanup) { on.exit(walk(img_paths, unlink, force = TRUE), add = TRUE) } + + # Pass along ari_spin() ari_spin( images = img_paths, paragraphs = paragraphs, - output = output, voice = voice, - service = service, subtitles = subtitles, - verbose = verbose, cleanup = cleanup + output = output, + tts_engine = tts_engine, + tts_engine_args = tts_engine_args, + tts_engine_auth = tts_engine_auth, + subtitles = subtitles, + verbose = verbose, + cleanup = cleanup ) } diff --git a/R/ari_spin.R b/R/ari_spin.R index a0d7d34..7f348e2 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -1,23 +1,12 @@ -#' Create a video from images and text +#' Generate video from images and text #' #' Given equal length vectors of paths to images (preferably \code{.jpg}s #' or \code{.png}s) and strings which will be -#' synthesized by -#' \href{https://aws.amazon.com/polly/}{Amazon Polly} or -#' any other synthesizer available in -#' \code{\link[text2speech]{tts}}, this function creates an +#' synthesized by a text-to-speech engine, this function creates an #' \code{.mp4} video file where each image is shown with #' its corresponding narration. This function uses \code{\link{ari_stitch}} to #' create the video. #' -#' This function needs to connect to -#' \href{https://aws.amazon.com/}{Amazon Web Services} in order to create the -#' narration. You can find a guide for accessing AWS from R -#' \href{http://seankross.com/2017/05/02/Access-Amazon-Web-Services-in-R.html}{here}. -#' For more information about how R connects -#' to Amazon Polly see the \code{aws.polly} documentation -#' \href{https://github.com/cloudyr/aws.polly}{here}. -#' #' @param images A vector of paths to images. #' @param paragraphs A vector strings that will be spoken by Amazon Polly. #' @param output A path to the video file which will be created. @@ -37,7 +26,7 @@ #' \code{.srt}. #' @param duration a vector of numeric durations for each audio #' track. See \code{\link{pad_wav}} -#' @param ... additional arguments to \code{\link{ari_stitch}} +#' @param ... Additional arguments to voice_engine #' @param tts_args list of arguments to pass to \code{\link{tts}} #' @param key_or_json_file access key or JSON file to pass to #' \code{\link{tts_auth}} for authorization @@ -47,7 +36,6 @@ #' @importFrom text2speech tts_auth tts tts_default_voice #' @importFrom tuneR bind Wave #' @importFrom purrr map reduce -#' @importFrom progress progress_bar #' @importFrom tools file_path_sans_ext #' @importFrom cli cli_alert_info #' @export @@ -61,32 +49,31 @@ #' "Welcome to my very interesting lecture.", #' "Here are some fantastic equations I came up with." #' ) -#' ari_spin(slides, sentences, voice = "Joey") +#' ari_spin(slides, sentences) #' } #' ari_spin <- function(images, paragraphs, output = tempfile(fileext = ".mp4"), - voice = text2speech::tts_default_voice(service = service), - model_name = "tacotron2-DDC_ph", - vocoder_name = "ljspeech/univnet", - service = ifelse(have_polly(), "amazon", "google"), + tts_engine = text2speech::tts, + tts_engine_args = list(service = "coqui", + voice = NULL, + model_name = "tacotron2-DDC_ph", + vocoder_name = "ljspeech/univnet"), + tts_engine_auth = text2speech::tts_auth, subtitles = FALSE, duration = NULL, - tts_args = NULL, - key_or_json_file = NULL, - ...) { + key_or_json_file = NULL) { # Check for ffmpeg ffmpeg_exec() - # Argument checks - auth <- text2speech::tts_auth( - service = service, + auth <- tts_engine_auth( + service = tts_engine_args$service, key_or_json_file = key_or_json_file ) if (!auth) { stop(paste0( "It appears you're not authenticated with ", - service, ". Make sure you've ", + tts_engine_args$service, ". Make sure you've ", "set the appropriate environmental variables ", "before you proceed." )) @@ -95,14 +82,14 @@ ari_spin <- function(images, paragraphs, stopifnot(length(images) > 0) images <- normalizePath(images) output_dir <- normalizePath(dirname(output)) - + # Paragraphs if (length(paragraphs) == 1) { if (file.exists(paragraphs)) { paragraphs <- readLines(paragraphs, warn = FALSE) paragraphs <- paragraphs[!paragraphs %in% ""] } } - + # Paragraphs: Check for semicolons semi_colon <- trimws(paragraphs) == ";" if (any(semi_colon)) { warning(paste0( @@ -110,61 +97,45 @@ ari_spin <- function(images, paragraphs, "likely needs to be replaced or slide removed!" )) } + # Check for arguments stopifnot( length(paragraphs) > 0, identical(length(images), length(paragraphs)), all(file.exists(images)), dir.exists(output_dir) ) - # End of Argument checks # Setup objects to populate in for-loop with tts() wave_objects <- vector(mode = "list", length = length(paragraphs)) - par_along <- seq_along(paragraphs) + paragraphs_along <- seq_along(paragraphs) ideal_duration <- rep(NA, length(paragraphs)) - pb <- progress_bar$new( - format = "Fetching Narration [:bar] :percent", - total = length(par_along) - ) - if (service == "coqui") { - cli::cli_alert_info("Coqui TTS does not support MP3 format; will produce a WAV audio output.") - } - # Iterate through arguments used in tts() - for (i in par_along) { - args <- tts_args - args$text <- paragraphs[i] - args$voice <- voice - args$service <- service + for (ii in paragraphs_along) { + args <- tts_engine_args + args$text <- paragraphs[ii] args$bind_audio <- TRUE # coqui+ari doesn't work with mp3 - if (service == "coqui") { + if (tts_engine_args$service == "coqui") { args$output_format <- "wav" args$voice <- NULL - args$model_name <- model_name - args$vocoder_name <- vocoder_name } - wav <- do.call(text2speech::tts, args = args) + wav <- do.call(tts_engine, args = args) wav <- reduce(wav$wav, bind) - wav <- pad_wav(wav, duration = duration[i]) - ideal_duration[i] <- length(wav@left) / wav@samp.rate - wave_objects[[i]] <- wav - pb$tick() + wav <- pad_wav(wav, duration = duration[ii]) + ideal_duration[ii] <- length(wav@left) / wav@samp.rate + wave_objects[[ii]] <- wav } - - # Burn subtitles if (subtitles) { sub_file <- paste0(file_path_sans_ext(output), ".srt") ari_subtitles(paragraphs, wave_objects, sub_file) } - + print("Audio succesfully converted...............") # Create a video from images and audio - res <- ari_stitch(images, wave_objects, output, ...) - + res <- ari_stitch(images, wave_objects, output) # Collect output - args <- list(...) + args <- list() cleanup <- args$cleanup if (is.null(cleanup)) { cleanup <- TRUE @@ -172,11 +143,11 @@ ari_spin <- function(images, paragraphs, if (!cleanup) { attr(res, "wavs") <- wave_objects } - attr(res, "voice") <- voice + attr(res, "voice") <- tts_engine_args$voice if (subtitles) { attr(res, "subtitles") <- sub_file } - attr(res, "service") <- service + attr(res, "service") <- tts_engine_args$service return(res) } From 96c576592bd0a2dd20021969dba8350cb0b67562 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Thu, 31 Aug 2023 11:57:49 -0700 Subject: [PATCH 23/44] Fix `ari_narrate()` so we can get rid of text2speech --- R/ari_narrate.R | 24 +++++++++++------------- R/ari_spin.R | 4 +++- inst/test/ari_intro_script.md | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/R/ari_narrate.R b/R/ari_narrate.R index 2e42bdc..a3d12d1 100644 --- a/R/ari_narrate.R +++ b/R/ari_narrate.R @@ -44,20 +44,17 @@ #' @export #' @examples #' \dontrun{ -#' -#' # #' ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), -#' system.file("test", "ari_intro.html", package = "ari"), -#' voice = "Joey" -#' ) +#' system.file("test", "ari_intro.html", package = "ari")) #' } ari_narrate <- function(script, slides, output = tempfile(fileext = ".mp4"), - tts_engine = text2speech::tts(), + tts_engine = text2speech::tts, tts_engine_args = list(service = "coqui", - model_name = "tacotron2-DDC_ph", - vocoder_name = "ljspeech/univnet"), - tts_engine_auth = text2speech::tts_auth(), + voice = NULL, + model_name = "tacotron2-DDC_ph", + vocoder_name = "ljspeech/univnet"), + tts_engine_auth = text2speech::tts_auth, capture_method = c("vectorized", "iterative"), subtitles = FALSE, verbose = FALSE, @@ -67,6 +64,7 @@ ari_narrate <- function(script, slides, ...) { # Authentication for Text-to-Speech Engines auth <- tts_engine_auth(service = tts_engine_args$service) + # Stop message if (!auth) { stop(paste0( "It appears you're not authenticated with ", @@ -80,7 +78,7 @@ ari_narrate <- function(script, slides, if (!(capture_method %in% c("vectorized", "iterative"))) { stop('capture_method must be either "vectorized" or "iterative"') } - # Output directory, path to script + # Output directory, path to script output_dir <- normalizePath(dirname(output)) script <- normalizePath(script) if (file_ext(script) %in% c("Rmd", "rmd") & missing(slides)) { @@ -101,7 +99,7 @@ ari_narrate <- function(script, slides, file.exists(script), dir.exists(output_dir) ) - # Pargraphs + # Convert script to html and get text if (file_ext(script) %in% c("Rmd", "rmd")) { paragraphs <- parse_html_comments(script) } else { @@ -109,9 +107,9 @@ ari_narrate <- function(script, slides, if (cleanup) { on.exit(unlink(html_path, force = TRUE), add = TRUE) } - render(script, output_format = html_document(), output_file = html_path) + rmarkdown::render(script, output_format = rmarkdown::html_document(), output_file = html_path) paragraphs <- map_chr( - html_text(html_nodes(read_html(html_path), "p")), + rvest::html_text(rvest::html_nodes(xml2::read_html(html_path), "p")), function(x) { gsub("\u2019", "'", x) } diff --git a/R/ari_spin.R b/R/ari_spin.R index 7f348e2..2543337 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -62,7 +62,9 @@ ari_spin <- function(images, paragraphs, tts_engine_auth = text2speech::tts_auth, subtitles = FALSE, duration = NULL, - key_or_json_file = NULL) { + key_or_json_file = NULL, + verbose = FALSE, + cleanup = TRUE) { # Check for ffmpeg ffmpeg_exec() # Argument checks diff --git a/inst/test/ari_intro_script.md b/inst/test/ari_intro_script.md index 95ada8c..b2a6f58 100644 --- a/inst/test/ari_intro_script.md +++ b/inst/test/ari_intro_script.md @@ -20,4 +20,4 @@ a text to speech product from Amazon Web Services. ## Conclusion -Thank you for watching this video and good luck using Ari! \ No newline at end of file +Thank you for watching this video and good luck using Ari! From 8ff859effdaade20a9e46b9c3b3eb7fc9b8a2017 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 1 Sep 2023 13:18:39 -0700 Subject: [PATCH 24/44] Put all the ffmpeg related arguments into a list called `ffmpeg_args` --- R/ari_narrate.R | 10 +---- R/ari_stitch.R | 113 +++++++++++++++++++++++------------------------- 2 files changed, 55 insertions(+), 68 deletions(-) diff --git a/R/ari_narrate.R b/R/ari_narrate.R index a3d12d1..1e3a4b8 100644 --- a/R/ari_narrate.R +++ b/R/ari_narrate.R @@ -2,8 +2,6 @@ #' #' \code{ari_narrate} creates a video from a script written in markdown and HTML #' slides created with \code{\link[rmarkdown]{rmarkdown}} or a similar package. -#' This function uses \href{https://aws.amazon.com/polly/}{Amazon Polly} -#' via \code{\link{ari_spin}}. #' #' @param script Either a markdown file where every paragraph will be read over #' a corresponding slide, or an \code{.Rmd} file where each HTML comment will @@ -15,7 +13,7 @@ #' @param voice The voice you want to use. See #' \code{\link[text2speech]{tts_voices}} for more information #' about what voices are available. -#' @param service speech synthesis service to use, +#' @param service Speech Synthesis service to use, #' passed to \code{\link[text2speech]{tts}}. #' Either \code{"amazon"} or \code{"google"}. #' @param capture_method Either \code{"vectorized"} or \code{"iterative"}. @@ -28,10 +26,6 @@ #' \code{.srt}. #' @param ... Arguments that will be passed to \code{\link[webshot]{webshot}}. #' @param verbose print diagnostic messages. If > 1, then more are printed -#' @param audio_codec The audio encoder for the splicing. If this -#' fails, try \code{copy}. -#' @param video_codec The video encoder for the splicing. If this -#' fails, see \code{ffmpeg -codecs} #' @param cleanup If \code{TRUE}, interim files are deleted #' #' @return The output from \code{\link{ari_spin}} @@ -58,8 +52,6 @@ ari_narrate <- function(script, slides, capture_method = c("vectorized", "iterative"), subtitles = FALSE, verbose = FALSE, - audio_codec = get_audio_codec(), - video_codec = get_video_codec(), cleanup = TRUE, ...) { # Authentication for Text-to-Speech Engines diff --git a/R/ari_stitch.R b/R/ari_stitch.R index 06f9ef1..5b3c2d4 100644 --- a/R/ari_stitch.R +++ b/R/ari_stitch.R @@ -1,4 +1,4 @@ -#' Create a video from images and audio +#' Generate video from images and audio #' #' Given a vector of paths to images (preferably \code{.jpg}s #' or \code{.png}s) and a flat list of \code{\link[tuneR]{Wave}}s of equal @@ -74,44 +74,40 @@ ari_stitch <- function(images, audio, output = tempfile(fileext = ".mp4"), verbose = FALSE, cleanup = TRUE, - ffmpeg_opts = "", - divisible_height = TRUE, - audio_codec = get_audio_codec(), - video_codec = get_video_codec(), - video_sync_method = "2", - audio_bitrate = NULL, - video_bitrate = NULL, - pixel_format = "yuv420p", - fast_start = FALSE, - deinterlace = FALSE, - stereo_audio = TRUE, + ffmpeg_args = list(frames_per_second = NULL, + video_filters = NULL, + divisible_height = TRUE, + audio_codec = get_audio_codec(), + video_codec = get_video_codec(), + deinterlace = FALSE, + ffmpeg_opts = "", + audio_bitrate = NULL, + video_bitrate = NULL, + video_sync_method = "2", + pixel_format = "yuv420p", + fast_start = FALSE, + stereo_audio = TRUE), duration = NULL, - video_filters = NULL, - frames_per_second = NULL, check_inputs = TRUE) { - # Argument check and file path processing + # File path processing stopifnot(length(images) > 0) images <- normalizePath(images) output_dir <- normalizePath(dirname(output)) output <- file.path(output_dir, basename(output)) - stopifnot( - length(audio) > 0, - dir.exists(output_dir) - ) + stopifnot(length(audio) > 0, dir.exists(output_dir)) + # Input check if (check_inputs) { - stopifnot( - identical(length(images), length(audio)), - all(file.exists(images)) - ) + stopifnot(identical(length(images), length(audio)), + all(file.exists(images))) } + # If audio is filename instead of Wave object if (is.character(audio)) { audio <- lapply(audio, function(x) { ext <- tolower(tools::file_ext(x)) func <- switch(ext, wav = tuneR::readWave, mp3 = tuneR::readMP3, - tuneR::readMP3 - ) + tuneR::readMP3) func(x) }) audio <- pad_wav(audio, duration = duration) @@ -123,19 +119,17 @@ ari_stitch <- function(images, audio, if (verbose > 1) { print(audio) } - # End of Argument check and file path processing # Audio preprocessing audio <- match_sample_rate(audio, verbose = verbose) wav <- purrr::reduce(audio, tuneR::bind) wav_path <- file.path(output_dir, paste0("ari_audio_", get_random_string(), ".wav")) tuneR::writeWave(wav, filename = wav_path) - if (cleanup) { on.exit(unlink(wav_path, force = TRUE), add = TRUE) } - # if there are any gif images, convert all images to gif + # If any gif images, convert all images to gif img_ext <- tolower(tools::file_ext(images)) any_gif <- any(img_ext %in% "gif") if (any_gif & !all(img_ext %in% "gif")) { @@ -152,14 +146,6 @@ ari_stitch <- function(images, audio, } } - input_txt_path <- file.path( - output_dir, - paste0( - "ari_input_", - get_random_string(), - ".txt" - ) - ) ## on windows ffmpeg cancats names adding the working directory, so if ## complete url is provided it adds it twice. if (.Platform$OS.type == "windows") { @@ -172,7 +158,15 @@ ari_stitch <- function(images, audio, images <- basename(images) } - # add "file 'IMAGE_PATH'" and duration in txt file located at input_txt_path + # Add "file 'IMAGE_PATH'" and duration in txt file located at input_txt_path + input_txt_path <- file.path( + output_dir, + paste0( + "ari_input_", + get_random_string(), + ".txt" + ) + ) for (i in seq_along(images)) { cat(paste0("file ", "'", images[i], "'", "\n"), file = input_txt_path, @@ -194,68 +188,69 @@ ari_stitch <- function(images, audio, # ffmpeg-concat-doesnt-work-with-absolute-path # input_txt_path = normalizePath(input_txt_path, winslash = "\\") - ffmpeg <- ffmpeg_exec(quote = TRUE) - # set video filters - if (!is.null(frames_per_second)) { - video_filters <- c(video_filters, paste0("fps=", frames_per_second)) + # Start organizing ffmpeg arguments here + ffmpeg <- ffmpeg_exec(quote = TRUE) + # Frames per second (fps) + if (!is.null(ffmpeg_args$frames_per_second)) { + video_filters <- c(ffmpeg_args$video_filters, paste0("fps=", ffmpeg_args$frames_per_second)) } else { - video_filters <- c(video_filters, "fps=5") + video_filters <- c(ffmpeg_args$video_filters, "fps=5") } - if (divisible_height) { + # Divisible height + if (ffmpeg_args$divisible_height) { video_filters <- c(video_filters, '"scale=trunc(iw/2)*2:trunc(ih/2)*2"') } - # workaround for older ffmpeg # https://stackoverflow.com/questions/32931685/ # the-encoder-aac-is-experimental-but-experimental-codecs-are-not-enabled experimental <- FALSE - if (!is.null(audio_codec)) { - if (audio_codec == "aac") { + if (!is.null(ffmpeg_args$audio_codec)) { + if (ffmpeg_args$audio_codec == "aac") { experimental <- TRUE } } - if (deinterlace) { + if (ffmpeg_args$deinterlace) { video_filters <- c(video_filters, "yadif") } video_filters <- paste(video_filters, collapse = ",") video_filters <- paste0("-vf ", video_filters) - if (any(grepl("-vf", ffmpeg_opts))) { + if (any(grepl("-vf", ffmpeg_args$deinterlace))) { warning("Found video filters in ffmpeg_opts, may not be used correctly!") } - ffmpeg_opts <- c(video_filters, ffmpeg_opts) + ffmpeg_opts <- c(video_filters, ffmpeg_args$ffmpeg_opts) ffmpeg_opts <- paste(ffmpeg_opts, collapse = " ") - # create ffmpeg command + # ffmpeg command command <- paste( ffmpeg, "-y", "-f concat -safe 0 -i", shQuote(input_txt_path), "-i", shQuote(wav_path), - ifelse(!is.null(video_codec), paste("-c:v", video_codec), + ifelse(!is.null(ffmpeg_args$video_codec), paste("-c:v", ffmpeg_args$video_codec), "" ), - ifelse(!is.null(audio_codec), paste("-c:a", audio_codec), + ifelse(!is.null(ffmpeg_args$audio_codec), paste("-c:a", ffmpeg_args$audio_codec), "" ), - ifelse(stereo_audio, "-ac 2", ""), - ifelse(!is.null(audio_bitrate), paste("-b:a", audio_bitrate), + ifelse(ffmpeg_args$stereo_audio, "-ac 2", ""), + ifelse(!is.null(ffmpeg_args$audio_bitrate), paste("-b:a", ffmpeg_args$audio_bitrate), "" ), - ifelse(!is.null(video_bitrate), paste("-b:v", video_bitrate), + ifelse(!is.null(ffmpeg_args$video_bitrate), paste("-b:v", ffmpeg_args$video_bitrate), "" ), # ifelse(deinterlace, "-vf yadif", ""), - ifelse(!is.null(video_sync_method), paste("-fps_mode", "auto"), + ifelse(!is.null(ffmpeg_args$video_sync_method), paste("-fps_mode", "auto"), "" ), - ifelse(!is.null(pixel_format), paste("-pix_fmt", pixel_format), + ifelse(!is.null(ffmpeg_args$pixel_format), paste("-pix_fmt", ffmpeg_args$pixel_format), "" ), - ifelse(fast_start, "-movflags +faststart", ""), + ifelse(ffmpeg_args$fast_start, "-movflags +faststart", ""), ffmpeg_opts, - ifelse(!is.null(frames_per_second), paste0("-r ", frames_per_second), ""), + ifelse(!is.null(ffmpeg_args$frames_per_second), paste0("-r ", ffmpeg_args$frames_per_second), ""), ifelse(experimental, "-strict experimental", ""), "-max_muxing_queue_size 9999", "-threads 2", From 63eaaae19505805ffd6387f17f034239f6245b7a Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 1 Sep 2023 14:38:59 -0700 Subject: [PATCH 25/44] Syntax fix --- R/ari_spin.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index 2543337..3cfef9f 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -132,8 +132,9 @@ ari_spin <- function(images, paragraphs, if (subtitles) { sub_file <- paste0(file_path_sans_ext(output), ".srt") ari_subtitles(paragraphs, wave_objects, sub_file) + message("Subtitle file successfully generated") } - print("Audio succesfully converted...............") + message("Audio succesfully converted") # Create a video from images and audio res <- ari_stitch(images, wave_objects, output) # Collect output From 48d3cdfbbe19fa69859fca949c98b00dcdb95e7e Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 1 Sep 2023 14:42:05 -0700 Subject: [PATCH 26/44] `ari_burn_subtitles()`: Fix destination of output in system command --- R/ari_burn_subtitles.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/R/ari_burn_subtitles.R b/R/ari_burn_subtitles.R index 06c7339..33bd955 100644 --- a/R/ari_burn_subtitles.R +++ b/R/ari_burn_subtitles.R @@ -4,20 +4,23 @@ #' \code{--enable-libass} as per #' \url{https://trac.ffmpeg.org/wiki/HowToBurnSubtitlesIntoVideo} #' -#' @param video Video in \code{mp4} format -#' @param srt Subtitle file in \code{srt} format +#' @param input_video Path to video in \code{mp4} format +#' @param srt Path to subtitle file in \code{srt} format +#' @param output_video Path to video with subtitles #' @param verbose print diagnostic messages. If > 1, #' then more are printed #' #' @return Name of output video -ari_burn_subtitles <- function(video, srt, verbose = FALSE) { +ari_burn_subtitles <- function(input_video, srt, + output_video = tempfile(fileext = ".mp4"), + verbose = FALSE) { ffmpeg <- ffmpeg_exec(quote = TRUE) if (verbose > 0) { message("Burning in Subtitles") } command <- paste( - ffmpeg, "-y -i", video, paste0("-vf subtitles=", srt), - video + ffmpeg, "-y -i", input_video, paste0("-vf subtitles=", srt), + output_video ) if (verbose > 0) { @@ -28,5 +31,5 @@ ari_burn_subtitles <- function(video, srt, verbose = FALSE) { warning("Result was non-zero for ffmpeg") } - return(video) + output_video } From 0c34a4e8b455132fe3ff3b241e44c06eb19f8f58 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Tue, 5 Sep 2023 11:52:12 -0700 Subject: [PATCH 27/44] Document `ari_subtitles()` and `ari_burn_subtitles()` --- NAMESPACE | 5 ++-- R/ari_burn_subtitles.R | 1 + R/ari_subtitles.R | 15 ++++++----- man/ari_burn_subtitles.Rd | 13 +++++++--- man/ari_narrate.Rd | 38 +++++++++++++-------------- man/ari_spin.Rd | 54 +++++++++++++++------------------------ man/ari_subtitles.Rd | 20 +++++++++++++++ 7 files changed, 81 insertions(+), 65 deletions(-) create mode 100644 man/ari_subtitles.Rd diff --git a/NAMESPACE b/NAMESPACE index 7fa75e8..148b8cd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,11 @@ # Generated by roxygen2: do not edit by hand +export(ari_burn_subtitles) export(ari_example) export(ari_narrate) export(ari_spin) export(ari_stitch) +export(ari_subtitles) export(ari_talk) export(audio_codec_encode) export(check_ffmpeg_version) @@ -38,19 +40,16 @@ export(xml_notes) import(httr) importFrom(cli,cli_alert_info) importFrom(docxtractr,convert_to_pdf) -importFrom(hms,hms) importFrom(jsonlite,fromJSON) importFrom(pdftools,pdf_convert) importFrom(pdftools,pdf_info) importFrom(pdftools,poppler_config) -importFrom(progress,progress_bar) importFrom(purrr,compose) importFrom(purrr,discard) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map2_int) importFrom(purrr,map_chr) -importFrom(purrr,map_dbl) importFrom(purrr,reduce) importFrom(purrr,walk) importFrom(rmarkdown,html_document) diff --git a/R/ari_burn_subtitles.R b/R/ari_burn_subtitles.R index 33bd955..dae1254 100644 --- a/R/ari_burn_subtitles.R +++ b/R/ari_burn_subtitles.R @@ -11,6 +11,7 @@ #' then more are printed #' #' @return Name of output video +#' @export ari_burn_subtitles <- function(input_video, srt, output_video = tempfile(fileext = ".mp4"), verbose = FALSE) { diff --git a/R/ari_subtitles.R b/R/ari_subtitles.R index dee0c37..4d0020e 100644 --- a/R/ari_subtitles.R +++ b/R/ari_subtitles.R @@ -1,9 +1,12 @@ -# Generate subtitle files for audio or video content -# paragraphs - strings of text -# dutations - paragraph duration in seconds -# path - path to .srt file output -#' @importFrom purrr map map_dbl map2 -#' @importFrom hms hms +#' Generate subtitle files for audio for video content +#' +#' @param paragraphs String of text +#' @param wavs Wave objects from tuneR +#' @param path Path to .srt file output +#' @param width Width of each subtitle +#' +#' @return +#' @export ari_subtitles <- function(paragraphs, wavs, path, width = 42) { # Calculate the duration of each audio file durations <- map_dbl(wavs, ~ length(.x@left) / .x@samp.rate) diff --git a/man/ari_burn_subtitles.Rd b/man/ari_burn_subtitles.Rd index c5614b6..92a7831 100644 --- a/man/ari_burn_subtitles.Rd +++ b/man/ari_burn_subtitles.Rd @@ -4,12 +4,19 @@ \alias{ari_burn_subtitles} \title{Burn Subtitles into a video} \usage{ -ari_burn_subtitles(video, srt, verbose = FALSE) +ari_burn_subtitles( + input_video, + srt, + output_video = tempfile(fileext = ".mp4"), + verbose = FALSE +) } \arguments{ -\item{video}{Video in \code{mp4} format} +\item{input_video}{Path to video in \code{mp4} format} -\item{srt}{Subtitle file in \code{srt} format} +\item{srt}{Path to subtitle file in \code{srt} format} + +\item{output_video}{Path to video with subtitles} \item{verbose}{print diagnostic messages. If > 1, then more are printed} diff --git a/man/ari_narrate.Rd b/man/ari_narrate.Rd index 025e941..6c2cec3 100644 --- a/man/ari_narrate.Rd +++ b/man/ari_narrate.Rd @@ -2,21 +2,23 @@ % Please edit documentation in R/ari_narrate.R \name{ari_narrate} \alias{ari_narrate} -\title{Create a video from slides and a script} +\title{Generate video from slides and a script} \usage{ ari_narrate( script, slides, output = tempfile(fileext = ".mp4"), - voice = text2speech::tts_default_voice(service = service), - service = "amazon", + tts_engine = text2speech::tts, + tts_engine_args = list(service = "coqui", voice = NULL, model_name = + "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet"), + tts_engine_auth = text2speech::tts_auth, capture_method = c("vectorized", "iterative"), subtitles = FALSE, - ..., verbose = FALSE, audio_codec = get_audio_codec(), video_codec = get_video_codec(), - cleanup = TRUE + cleanup = TRUE, + ... ) } \arguments{ @@ -30,14 +32,6 @@ similar package.} \item{output}{The path to the video file which will be created.} -\item{voice}{The voice you want to use. See -\code{\link[text2speech]{tts_voices}} for more information -about what voices are available.} - -\item{service}{speech synthesis service to use, -passed to \code{\link[text2speech]{tts}}. -Either \code{"amazon"} or \code{"google"}.} - \item{capture_method}{Either \code{"vectorized"} or \code{"iterative"}. The vectorized mode is faster though it can cause screens to repeat. If making a video from an \code{\link[rmarkdown]{ioslides_presentation}} @@ -48,8 +42,6 @@ default value is \code{FALSE}. If \code{TRUE} then a file with the same name as the \code{output} argument will be created, but with the file extension \code{.srt}.} -\item{...}{Arguments that will be passed to \code{\link[webshot]{webshot}}.} - \item{verbose}{print diagnostic messages. If > 1, then more are printed} \item{audio_codec}{The audio encoder for the splicing. If this @@ -59,6 +51,16 @@ fails, try \code{copy}.} fails, see \code{ffmpeg -codecs}} \item{cleanup}{If \code{TRUE}, interim files are deleted} + +\item{...}{Arguments that will be passed to \code{\link[webshot]{webshot}}.} + +\item{voice}{The voice you want to use. See +\code{\link[text2speech]{tts_voices}} for more information +about what voices are available.} + +\item{service}{speech synthesis service to use, +passed to \code{\link[text2speech]{tts}}. +Either \code{"amazon"} or \code{"google"}.} } \value{ The output from \code{\link{ari_spin}} @@ -71,11 +73,7 @@ via \code{\link{ari_spin}}. } \examples{ \dontrun{ - -# ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), - system.file("test", "ari_intro.html", package = "ari"), - voice = "Joey" -) + system.file("test", "ari_intro.html", package = "ari")) } } diff --git a/man/ari_spin.Rd b/man/ari_spin.Rd index 883b83a..3bc6786 100644 --- a/man/ari_spin.Rd +++ b/man/ari_spin.Rd @@ -3,21 +3,21 @@ \name{ari_spin} \alias{ari_spin} \alias{have_polly} -\title{Create a video from images and text} +\title{Generate video from images and text} \usage{ ari_spin( images, paragraphs, output = tempfile(fileext = ".mp4"), - voice = text2speech::tts_default_voice(service = service), - model_name = "tacotron2-DDC_ph", - vocoder_name = "ljspeech/univnet", - service = ifelse(have_polly(), "amazon", "google"), + tts_engine = text2speech::tts, + tts_engine_args = list(service = "coqui", voice = NULL, model_name = + "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet"), + tts_engine_auth = text2speech::tts_auth, subtitles = FALSE, duration = NULL, - tts_args = NULL, key_or_json_file = NULL, - ... + verbose = FALSE, + cleanup = TRUE ) have_polly() @@ -29,6 +29,17 @@ have_polly() \item{output}{A path to the video file which will be created.} +\item{subtitles}{Should a \code{.srt} file be created with subtitles? The +default value is \code{FALSE}. If \code{TRUE} then a file with the same name +as the \code{output} argument will be created, but with the file extension +\code{.srt}.} + +\item{duration}{a vector of numeric durations for each audio +track. See \code{\link{pad_wav}}} + +\item{key_or_json_file}{access key or JSON file to pass to +\code{\link{tts_auth}} for authorization} + \item{voice}{The voice you want to use. See \code{\link[text2speech]{tts_voices}} for more information about what voices are available.} @@ -43,20 +54,9 @@ transmission} passed to \code{\link[text2speech]{tts}}, Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}.} -\item{subtitles}{Should a \code{.srt} file be created with subtitles? The -default value is \code{FALSE}. If \code{TRUE} then a file with the same name -as the \code{output} argument will be created, but with the file extension -\code{.srt}.} - -\item{duration}{a vector of numeric durations for each audio -track. See \code{\link{pad_wav}}} +\item{...}{Additional arguments to voice_engine} \item{tts_args}{list of arguments to pass to \code{\link{tts}}} - -\item{key_or_json_file}{access key or JSON file to pass to -\code{\link{tts_auth}} for authorization} - -\item{...}{additional arguments to \code{\link{ari_stitch}}} } \value{ The output from \code{\link{ari_stitch}} @@ -64,23 +64,11 @@ The output from \code{\link{ari_stitch}} \description{ Given equal length vectors of paths to images (preferably \code{.jpg}s or \code{.png}s) and strings which will be -synthesized by -\href{https://aws.amazon.com/polly/}{Amazon Polly} or -any other synthesizer available in -\code{\link[text2speech]{tts}}, this function creates an +synthesized by a text-to-speech engine, this function creates an \code{.mp4} video file where each image is shown with its corresponding narration. This function uses \code{\link{ari_stitch}} to create the video. } -\details{ -This function needs to connect to -\href{https://aws.amazon.com/}{Amazon Web Services} in order to create the -narration. You can find a guide for accessing AWS from R -\href{http://seankross.com/2017/05/02/Access-Amazon-Web-Services-in-R.html}{here}. -For more information about how R connects -to Amazon Polly see the \code{aws.polly} documentation -\href{https://github.com/cloudyr/aws.polly}{here}. -} \examples{ \dontrun{ @@ -91,7 +79,7 @@ sentences <- c( "Welcome to my very interesting lecture.", "Here are some fantastic equations I came up with." ) -ari_spin(slides, sentences, voice = "Joey") +ari_spin(slides, sentences) } } diff --git a/man/ari_subtitles.Rd b/man/ari_subtitles.Rd new file mode 100644 index 0000000..fb50959 --- /dev/null +++ b/man/ari_subtitles.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ari_subtitles.R +\name{ari_subtitles} +\alias{ari_subtitles} +\title{Generate subtitle files for audio for video content} +\usage{ +ari_subtitles(paragraphs, wavs, path, width = 42) +} +\arguments{ +\item{paragraphs}{String of text} + +\item{wavs}{Wave objects from tuneR} + +\item{path}{Path to .srt file output} + +\item{width}{Width of each subtitle} +} +\description{ +Generate subtitle files for audio for video content +} From c820e21c03d9988ce2a8de8d2f5ca4e38d7f7ec9 Mon Sep 17 00:00:00 2001 From: howardbaek Date: Fri, 8 Sep 2023 09:50:05 -0700 Subject: [PATCH 28/44] Fix dependency issue --- NAMESPACE | 2 ++ R/ari_spin.R | 2 +- R/ari_subtitles.R | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 148b8cd..a2d58da 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(xml_notes) import(httr) importFrom(cli,cli_alert_info) importFrom(docxtractr,convert_to_pdf) +importFrom(hms,hms) importFrom(jsonlite,fromJSON) importFrom(pdftools,pdf_convert) importFrom(pdftools,pdf_info) @@ -50,6 +51,7 @@ importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map2_int) importFrom(purrr,map_chr) +importFrom(purrr,map_dbl) importFrom(purrr,reduce) importFrom(purrr,walk) importFrom(rmarkdown,html_document) diff --git a/R/ari_spin.R b/R/ari_spin.R index 2543337..d1e837e 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -130,7 +130,7 @@ ari_spin <- function(images, paragraphs, } # Burn subtitles if (subtitles) { - sub_file <- paste0(file_path_sans_ext(output), ".srt") + sub_file <- paste0(tools::file_path_sans_ext(output), ".srt") ari_subtitles(paragraphs, wave_objects, sub_file) } print("Audio succesfully converted...............") diff --git a/R/ari_subtitles.R b/R/ari_subtitles.R index 4d0020e..81c57b7 100644 --- a/R/ari_subtitles.R +++ b/R/ari_subtitles.R @@ -5,7 +5,8 @@ #' @param path Path to .srt file output #' @param width Width of each subtitle #' -#' @return +#' @importFrom purrr map_dbl +#' @importFrom hms hms #' @export ari_subtitles <- function(paragraphs, wavs, path, width = 42) { # Calculate the duration of each audio file @@ -20,7 +21,7 @@ ari_subtitles <- function(paragraphs, wavs, path, width = 42) { # Convery cumulative duration to format hh:mm:ss,ms cumdur <- cumsum(durations) - cumdur <- map(cumdur, hms) + cumdur <- map(cumdur, hms::hms) cumdur <- map(cumdur, as.character) cumdur <- map(cumdur, substr, start = 0, stop = 12) cumdur <- map(cumdur, gsub, pattern = "\\.", replacement = ",") From e259d899f419632463421dba5adc427b522d5651 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Thu, 14 Sep 2023 10:42:44 -0700 Subject: [PATCH 29/44] Remove download_gs_file.R and pptx_notes.R --- NAMESPACE | 22 ---- R/download_gs_file.R | 262 ---------------------------------------- R/pptx_notes.R | 186 ---------------------------- man/ari_narrate.Rd | 38 +++--- man/ari_spin.Rd | 54 ++++----- man/download_gs_file.Rd | 25 ---- man/get_slide_id.Rd | 33 ----- man/pdf_to_pngs.Rd | 20 --- man/pptx_notes.Rd | 36 ------ man/pptx_to_pdf.Rd | 17 --- man/xml_notes.Rd | 21 ---- 11 files changed, 39 insertions(+), 675 deletions(-) delete mode 100644 R/download_gs_file.R delete mode 100644 R/pptx_notes.R delete mode 100644 man/download_gs_file.Rd delete mode 100644 man/get_slide_id.Rd delete mode 100644 man/pdf_to_pngs.Rd delete mode 100644 man/pptx_notes.Rd delete mode 100644 man/pptx_to_pdf.Rd delete mode 100644 man/xml_notes.Rd diff --git a/NAMESPACE b/NAMESPACE index 7fa75e8..749d588 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,6 @@ export(ari_stitch) export(ari_talk) export(audio_codec_encode) export(check_ffmpeg_version) -export(download_gs_file) export(ffmpeg_audio_codecs) export(ffmpeg_codecs) export(ffmpeg_convert) @@ -18,32 +17,15 @@ export(ffmpeg_version) export(ffmpeg_version_sufficient) export(ffmpeg_video_codecs) export(get_audio_codec) -export(get_folder_id) -export(get_slide_id) export(get_video_codec) export(have_ffmpeg_exec) export(have_polly) -export(make_slide_url) export(pad_wav) -export(pdf_to_pngs) -export(pptx_notes) -export(pptx_slide_note_df) -export(pptx_slide_text_df) -export(pptx_to_pdf) export(set_audio_codec) export(set_video_codec) -export(unzip_pptx) export(video_codec_encode) -export(xml_notes) -import(httr) importFrom(cli,cli_alert_info) -importFrom(docxtractr,convert_to_pdf) importFrom(hms,hms) -importFrom(jsonlite,fromJSON) -importFrom(pdftools,pdf_convert) -importFrom(pdftools,pdf_info) -importFrom(pdftools,poppler_config) -importFrom(progress,progress_bar) importFrom(purrr,compose) importFrom(purrr,discard) importFrom(purrr,map) @@ -65,9 +47,5 @@ importFrom(tools,file_path_sans_ext) importFrom(tuneR,Wave) importFrom(tuneR,bind) importFrom(tuneR,writeWave) -importFrom(utils,unzip) importFrom(webshot,webshot) importFrom(xml2,read_html) -importFrom(xml2,read_xml) -importFrom(xml2,xml_find_all) -importFrom(xml2,xml_text) diff --git a/R/download_gs_file.R b/R/download_gs_file.R deleted file mode 100644 index 845eb26..0000000 --- a/R/download_gs_file.R +++ /dev/null @@ -1,262 +0,0 @@ -#' Download Google Slides File -#' -#' @param gs_url Link to Google slides presentation, passed to -#' \code{\link{get_slide_id}} -#' @param out_type output type of file to download. Usually -#' `pdf` or `pptx` -#' -#' @note This downloads presentations if they are public and also try to make -#' sure it does not fail on large files -#' @return Downloaded file (in temporary directory) -#' @export -download_gs_file = function(gs_url, out_type = "pptx") { - stopifnot(is.character(gs_url)) - id = get_slide_id(gs_url) - # construct URL to export image file from Google Slides - url = export_url(id = id, page_id = NULL, type = out_type) - tmp = tempfile(fileext = paste0(".", out_type)) - - # retrieve from url and write response to disk - result = httr::GET(url, httr::write_disk(tmp)) - warn_user = FALSE - fr_header = result$headers$`x-frame-options` - if (!is.null(fr_header)) { - if (all(fr_header == "DENY")) { - warn_user = TRUE - } - } - if (httr::status_code(result) >= 300) { - warn_user = TRUE - } - # Don't write something if not really a pptx - content_type = result$headers$`content-type` - if (httr::status_code(result) >= 400 && - !is.null(content_type) && grepl("html", content_type)) { - file.remove(tmp) - } - if (grepl("ServiceLogin", result$url)) { - warn_user = TRUE - } - - if (warn_user) { - cli::cli_alert_warning( - paste("Is link sharing enabled?", - "It's possible that this presentation isn't accessible.") - ) - } - - tmp -} - -#' Get Slide ID from URL -#' -#' @param x URL of slide -#' -#' @return A character vector -#' @export -#' -#' @examples -#' x = paste0("https://docs.google.com/presentation/d/", -#' "1Tg-GTGnUPduOtZKYuMoelqUNZnUp3vvg_7TtpUPL7e8", -#' "/edit#slide=id.g154aa4fae2_0_58") -#' get_slide_id(x) -get_slide_id <- function(x) { - x = sub(".*presentation/", "", x) - x = sub("/d/e", "/d", x) # if you publish by accident - x = sub("^(d|e)/", "", x) - x = strsplit(x, "/")[[1]] - x = x[ !grepl("^(edit|pub|export|png)", x)] - x = x[ nchar(x) > 5] - x -} - -# Constructs an URL to export an image file from a Google Slides -export_url <- function(id, page_id = NULL, type = "png") { - url <- paste0("https://docs.google.com/presentation/d/", - id, "/export/", type, "?id=", id) - if (!is.null(page_id)) { - url = paste0(url, "&pageid=", page_id) - } - url -} - -# Constructs an URL to export to pptx -pptx_url = function(id) { - export_url(id, page_id = NULL, type = "pptx") -} - -# Constructs an URL to export to pdf -pdf_url = function(id) { - export_url(id, page_id = NULL, type = "pdf") -} - -#' @export -#' @rdname get_slide_id -make_slide_url <- function(x) { - x = get_slide_id(x) - x = paste0("https://docs.google.com/presentation/d/",x) - x -} - -# Check if vector of URLs is valid (Status Code = 200) -check_png_urls <- function(urls) { - res = vapply(urls, function(url) { - tfile = tempfile(fileext = ".png") - ret = httr::GET(url) - httr::status_code(ret) == 200 - }, FUN.VALUE = logical(1)) - return(res) -} - -# Extract page IDs of slides in a Google Slides presentation -#' @importFrom jsonlite fromJSON -#' @import httr -get_page_ids = function(id) { - id = get_slide_id(id) - url = paste0("https://docs.google.com/presentation/d/", id) - tfile = tempfile(fileext = ".html") - res = httr::GET(url, httr::write_disk(tfile)) - httr::stop_for_status(res) - cr = httr::content(res) - script = rvest::html_nodes(cr, xpath ="//script") - script = rvest::html_text(script) - script = unique(script) - script = gsub("DOCS_modelChunk = undefined;", "", script) - script = script[ grepl("DOCS_modelChunk\\s=\\s\\[", x = script)] - - all_types = c("PREDEFINED_LAYOUT_UNSPECIFIED", - "BLANK", - "CAPTION_ONLY", - "TITLE", - "TITLE_AND_BODY", - "TITLE_AND_TWO_COLUMNS", - "TITLE_ONLY", - "SECTION_HEADER", - "SECTION_TITLE_AND_DESCRIPTION", - "ONE_COLUMN_TEXT", - "MAIN_POINT", - "BIG_NUMBER", - paste0("CUSTOM_", 1:100)) - types = paste0(all_types, collapse = "|") - # script = script[grepl(types, script)] - ss = strsplit(script, "; DOC") - ss = lapply(ss, trimws) - ss = lapply(ss, function(x) { - x[!grepl("^DOC", x)] = paste0(" DOC", x[!grepl("^DOC", x)]) - x - }) - ss = lapply(ss, function(x) { - x = x[grepl("^DOCS_modelChunk\\s=\\s\\[", x)] - x = x[ !x %in% "DOCS_modelChunk = undefined"] - x = sub("^DOCS_modelChunk\\s=\\s\\[", "[", x) - x - }) - ss = unlist(ss) - pages = lapply(ss, jsonlite::fromJSON) - pages = sapply(pages, function(x) { - x = x[sapply(x, function(r) any(unlist(r) %in% all_types))] - x = x[length(x)] - x - }) - pages = sapply(pages, function(x) { - if (length(x) < 2) { - if (length(x) == 0) { - return(NA) - } - x = x[[1]] - if (length(x) < 2) { - return(NA) - } - } - x[[2]] - }) - pages = pages[ !is.na(pages) ] - if (length(pages) >= 2) { - pages = c(pages[1], grep("^g", pages[2:length(pages)], value = TRUE)) - } - if (pages[1] != "p") { - pages = unique(c("p", pages)) - } - urls = export_url(id = id, page_id = pages) - pages = pages[check_png_urls(urls)] - pages -} - -#' @rdname get_slide_id -#' @export -#' @examples -#' x = "https://drive.google.com/drive/folders/1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC?usp=sharing" -#' get_folder_id(x) -#' x = "1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC" -#' get_folder_id(x) -get_folder_id = function(x) { - res = httr::parse_url(x) - x = res$path - x = sub(".*folders/", "", x) - x = sub("[?].*", "", x) - x = x[ nchar(x) > 5] - x = trimws(x) - x -} - -#' Convert a PPTX file to a PDF file -#' -#' Uses `docxtractr::convert_to_pdf()` for conversion. -#' -#' @param path Path to the PPTX file that needs to be converted to PDF. -#' @param verbose A logical value indicating whether to display progress -#' messages during the conversion process. The default value is TRUE -#' -#' @importFrom docxtractr convert_to_pdf -#' @export -pptx_to_pdf = function(path, verbose = TRUE) { - pdf_file = tempfile(fileext = ".pdf") - if (verbose) { - message("Converting PPTX to PDF") - } - out = try({ - docxtractr::convert_to_pdf(path, pdf_file = pdf_file) - }) - if (inherits(out, "try-error")) { - docxtractr::convert_to_pdf(path, pdf_file = pdf_file) - } - if (verbose > 1) { - message(paste0("PDF is at: ", pdf_file)) - } - return(pdf_file) -} - -#' Convert a PDF file to a series of PNG image files -#' -#' Uses `pdftools::pdf_convert()` for conversion. -#' -#' @param path Path to the PDF file that needs to be converted to PNGs. -#' @param verbose A logical value indicating whether to display progress -#' messages during the conversion process. The default value is TRUE -#' @param dpi The resolution in dots per inch (dpi) to be used for the PNG -#' images. The default value is 600. -#' -#' @importFrom pdftools poppler_config pdf_info pdf_convert -#' @export -pdf_to_pngs = function(path, - verbose = TRUE, - dpi = 600) { - fmts = pdftools::poppler_config()$supported_image_formats - if ("png" %in% fmts) { - format = "png" - } else { - format = fmts[1] - } - info = pdftools::pdf_info(pdf = path) - filenames = vapply(seq.int(info$pages), function(x) { - tempfile(fileext = paste0(".", format)) - }, FUN.VALUE = character(1)) - if (verbose) { - message("Converting PDFs to PNGs") - } - pngs = pdftools::pdf_convert( - pdf = path, dpi = dpi, - format = format, filenames = filenames, - verbose = as.logical(verbose)) - pngs -} diff --git a/R/pptx_notes.R b/R/pptx_notes.R deleted file mode 100644 index d6ece6f..0000000 --- a/R/pptx_notes.R +++ /dev/null @@ -1,186 +0,0 @@ -#' Get Notes from a PowerPoint (usually from Google Slides) -#' -#' @param file Character. Path for `PPTX` file -#' @param ... additional arguments to pass to \code{\link{xml_notes}}, -#' particularly \code{xpath} -#' -#' @return Either a character vector or `NULL` -#' @export -#' -#' @importFrom utils unzip -#' @examples -#' ex_file = system.file("extdata", "example.pptx", -#' package = "ariExtra") -#' pptx_notes(ex_file) -#' pptx_slide_note_df(ex_file) -#' pptx_slide_text_df(ex_file) -pptx_notes = function(file, ...) { - - df = pptx_slide_note_df(file, ...) - if (is.null(df)) { - return(NULL) - } - # need factor because they can be dumb with characters - # and numerics and the file naming of PPTX files - fac = basename(df$file) - fac = factor(fac, levels = unique(fac)) - ss = split(df, fac) - res = sapply(ss, function(x) { - paste(x$text, collapse = " ") - }) - if (any(trimws(res) %in% "")) { - warning("Slides with no notes exists") - } - res[ res == ""] = ";" - names(res) <- NULL - return(res) -} - -#' @export -#' @rdname pptx_notes -pptx_slide_text_df = function(file, ...) { - - L = unzip_pptx(file) - slides = L$slides - - if (length(slides) > 0) { - # in case empty notes - res = lapply(slides, function(x) { - xx = xml_notes(x, collapse_text = FALSE, ...) - if (length(xx) == 0) { - return(NULL) - } - snum = sub("[.]xml", "", sub("slide", "", basename(x))) - snum = as.numeric(snum) - data.frame( - file = x, - slide = snum, - text = xx, - index = 1:length(xx), - stringsAsFactors = FALSE) - }) - res = do.call(rbind, res) - return(res) - } else { - return(NULL) - } -} - -#' @export -#' @rdname pptx_notes -pptx_slide_note_df = function(file, ...) { - - L = unzip_pptx(file) - notes = L$notes - slides = L$slides - note_dir = L$note_dir - - if (length(notes) > 0) { - # in case empty notes - assoc_notes = sub("slide", "", basename(slides)) - assoc_notes = paste0("notesSlide", assoc_notes) - assoc_notes = file.path(note_dir, assoc_notes) - no_fe = !file.exists(assoc_notes) - if (any(no_fe)) { - file.create(assoc_notes[no_fe]) - notes = assoc_notes - } - res = lapply(notes, function(x) { - if (file.size(x) == 0) { - xx = "" - } else { - xx = xml_notes(x, collapse_text = FALSE, ...) - } - if (length(xx) == 0) { - xx = "" - } - snum = sub("[.]xml", "", sub("notesSlide", "", basename(x))) - snum = as.numeric(snum) - data.frame( - file = x, - slide = snum, - text = xx, - index = 1:length(xx), - stringsAsFactors = FALSE) - }) - res = do.call(rbind, res) - return(res) - } else { - return(NULL) - } -} - - -pptx_reorder_xml = function(files) { - if (length(files) == 0) { - return(files) - } - nums = basename(files) - nums = sub("[[:alpha:]]*(\\d.*)[.].*", "\\1", nums) - nums = as.numeric(nums) - if (any(is.na(nums))) { - warning(paste0("Trying to parse set of files (example: ", files[1], - ") from PPTX, failed")) - return(files) - } - files = files[order(nums)] -} - -#' @export -#' @rdname pptx_notes -unzip_pptx = function(file) { - tdir = tempfile() - dir.create(tdir) - res = unzip(file, exdir = tdir) - rm(res) - slide_dir = file.path(tdir, "ppt", "slides") - slides = list.files(path = slide_dir, pattern = "[.]xml$", - full.names = TRUE) - slides = pptx_reorder_xml(slides) - - note_dir = file.path(tdir, "ppt", "notesSlides") - notes = list.files(path = note_dir, pattern = "[.]xml$", - full.names = TRUE) - notes = pptx_reorder_xml(notes) - - tdir = normalizePath(tdir) - props_dir = file.path(tdir, "docProps") - props_file = file.path(props_dir, "core.xml") - ari_core_file = system.file("extdata", "docProps", - "core.xml", package = "ariExtra") - if (!dir.exists(props_file)) { - dir.create(props_dir, recursive = TRUE) - file.copy(ari_core_file, props_file, - overwrite = TRUE) - } - - L = list(slides = slides, - notes = notes, - slide_dir = slide_dir, - note_dir = note_dir, - props_dir = props_dir, - props_file = props_file, - root_dir = tdir) - return(L) -} - -#' Get Notes from XML -#' -#' @param file XML file from a PPTX -#' @param collapse_text should text be collapsed by spaces? -#' @param xpath \code{xpath} to pass to [xml2::xml_find_all()] -#' -#' @return A character vector -#' @export -#' -#' @importFrom xml2 read_xml xml_text xml_find_all -xml_notes = function(file, collapse_text = TRUE, xpath = "//a:r//a:t") { - xdoc = xml2::read_xml(file) - # probably need to a:p//a:t and collapse all text within a a:p - txt = xml2::xml_find_all(x = xdoc, xpath = xpath) - txt = xml2::xml_text(txt) - if (collapse_text) { - txt = paste(txt, collapse = " ") - } - return(txt) -} diff --git a/man/ari_narrate.Rd b/man/ari_narrate.Rd index 025e941..6c2cec3 100644 --- a/man/ari_narrate.Rd +++ b/man/ari_narrate.Rd @@ -2,21 +2,23 @@ % Please edit documentation in R/ari_narrate.R \name{ari_narrate} \alias{ari_narrate} -\title{Create a video from slides and a script} +\title{Generate video from slides and a script} \usage{ ari_narrate( script, slides, output = tempfile(fileext = ".mp4"), - voice = text2speech::tts_default_voice(service = service), - service = "amazon", + tts_engine = text2speech::tts, + tts_engine_args = list(service = "coqui", voice = NULL, model_name = + "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet"), + tts_engine_auth = text2speech::tts_auth, capture_method = c("vectorized", "iterative"), subtitles = FALSE, - ..., verbose = FALSE, audio_codec = get_audio_codec(), video_codec = get_video_codec(), - cleanup = TRUE + cleanup = TRUE, + ... ) } \arguments{ @@ -30,14 +32,6 @@ similar package.} \item{output}{The path to the video file which will be created.} -\item{voice}{The voice you want to use. See -\code{\link[text2speech]{tts_voices}} for more information -about what voices are available.} - -\item{service}{speech synthesis service to use, -passed to \code{\link[text2speech]{tts}}. -Either \code{"amazon"} or \code{"google"}.} - \item{capture_method}{Either \code{"vectorized"} or \code{"iterative"}. The vectorized mode is faster though it can cause screens to repeat. If making a video from an \code{\link[rmarkdown]{ioslides_presentation}} @@ -48,8 +42,6 @@ default value is \code{FALSE}. If \code{TRUE} then a file with the same name as the \code{output} argument will be created, but with the file extension \code{.srt}.} -\item{...}{Arguments that will be passed to \code{\link[webshot]{webshot}}.} - \item{verbose}{print diagnostic messages. If > 1, then more are printed} \item{audio_codec}{The audio encoder for the splicing. If this @@ -59,6 +51,16 @@ fails, try \code{copy}.} fails, see \code{ffmpeg -codecs}} \item{cleanup}{If \code{TRUE}, interim files are deleted} + +\item{...}{Arguments that will be passed to \code{\link[webshot]{webshot}}.} + +\item{voice}{The voice you want to use. See +\code{\link[text2speech]{tts_voices}} for more information +about what voices are available.} + +\item{service}{speech synthesis service to use, +passed to \code{\link[text2speech]{tts}}. +Either \code{"amazon"} or \code{"google"}.} } \value{ The output from \code{\link{ari_spin}} @@ -71,11 +73,7 @@ via \code{\link{ari_spin}}. } \examples{ \dontrun{ - -# ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), - system.file("test", "ari_intro.html", package = "ari"), - voice = "Joey" -) + system.file("test", "ari_intro.html", package = "ari")) } } diff --git a/man/ari_spin.Rd b/man/ari_spin.Rd index 883b83a..3bc6786 100644 --- a/man/ari_spin.Rd +++ b/man/ari_spin.Rd @@ -3,21 +3,21 @@ \name{ari_spin} \alias{ari_spin} \alias{have_polly} -\title{Create a video from images and text} +\title{Generate video from images and text} \usage{ ari_spin( images, paragraphs, output = tempfile(fileext = ".mp4"), - voice = text2speech::tts_default_voice(service = service), - model_name = "tacotron2-DDC_ph", - vocoder_name = "ljspeech/univnet", - service = ifelse(have_polly(), "amazon", "google"), + tts_engine = text2speech::tts, + tts_engine_args = list(service = "coqui", voice = NULL, model_name = + "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet"), + tts_engine_auth = text2speech::tts_auth, subtitles = FALSE, duration = NULL, - tts_args = NULL, key_or_json_file = NULL, - ... + verbose = FALSE, + cleanup = TRUE ) have_polly() @@ -29,6 +29,17 @@ have_polly() \item{output}{A path to the video file which will be created.} +\item{subtitles}{Should a \code{.srt} file be created with subtitles? The +default value is \code{FALSE}. If \code{TRUE} then a file with the same name +as the \code{output} argument will be created, but with the file extension +\code{.srt}.} + +\item{duration}{a vector of numeric durations for each audio +track. See \code{\link{pad_wav}}} + +\item{key_or_json_file}{access key or JSON file to pass to +\code{\link{tts_auth}} for authorization} + \item{voice}{The voice you want to use. See \code{\link[text2speech]{tts_voices}} for more information about what voices are available.} @@ -43,20 +54,9 @@ transmission} passed to \code{\link[text2speech]{tts}}, Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}.} -\item{subtitles}{Should a \code{.srt} file be created with subtitles? The -default value is \code{FALSE}. If \code{TRUE} then a file with the same name -as the \code{output} argument will be created, but with the file extension -\code{.srt}.} - -\item{duration}{a vector of numeric durations for each audio -track. See \code{\link{pad_wav}}} +\item{...}{Additional arguments to voice_engine} \item{tts_args}{list of arguments to pass to \code{\link{tts}}} - -\item{key_or_json_file}{access key or JSON file to pass to -\code{\link{tts_auth}} for authorization} - -\item{...}{additional arguments to \code{\link{ari_stitch}}} } \value{ The output from \code{\link{ari_stitch}} @@ -64,23 +64,11 @@ The output from \code{\link{ari_stitch}} \description{ Given equal length vectors of paths to images (preferably \code{.jpg}s or \code{.png}s) and strings which will be -synthesized by -\href{https://aws.amazon.com/polly/}{Amazon Polly} or -any other synthesizer available in -\code{\link[text2speech]{tts}}, this function creates an +synthesized by a text-to-speech engine, this function creates an \code{.mp4} video file where each image is shown with its corresponding narration. This function uses \code{\link{ari_stitch}} to create the video. } -\details{ -This function needs to connect to -\href{https://aws.amazon.com/}{Amazon Web Services} in order to create the -narration. You can find a guide for accessing AWS from R -\href{http://seankross.com/2017/05/02/Access-Amazon-Web-Services-in-R.html}{here}. -For more information about how R connects -to Amazon Polly see the \code{aws.polly} documentation -\href{https://github.com/cloudyr/aws.polly}{here}. -} \examples{ \dontrun{ @@ -91,7 +79,7 @@ sentences <- c( "Welcome to my very interesting lecture.", "Here are some fantastic equations I came up with." ) -ari_spin(slides, sentences, voice = "Joey") +ari_spin(slides, sentences) } } diff --git a/man/download_gs_file.Rd b/man/download_gs_file.Rd deleted file mode 100644 index e68b439..0000000 --- a/man/download_gs_file.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/download_gs_file.R -\name{download_gs_file} -\alias{download_gs_file} -\title{Download Google Slides File} -\usage{ -download_gs_file(gs_url, out_type = "pptx") -} -\arguments{ -\item{gs_url}{Link to Google slides presentation, passed to -\code{\link{get_slide_id}}} - -\item{out_type}{output type of file to download. Usually -`pdf` or `pptx`} -} -\value{ -Downloaded file (in temporary directory) -} -\description{ -Download Google Slides File -} -\note{ -This downloads presentations if they are public and also try to make -sure it does not fail on large files -} diff --git a/man/get_slide_id.Rd b/man/get_slide_id.Rd deleted file mode 100644 index 734c024..0000000 --- a/man/get_slide_id.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/download_gs_file.R -\name{get_slide_id} -\alias{get_slide_id} -\alias{make_slide_url} -\alias{get_folder_id} -\title{Get Slide ID from URL} -\usage{ -get_slide_id(x) - -make_slide_url(x) - -get_folder_id(x) -} -\arguments{ -\item{x}{URL of slide} -} -\value{ -A character vector -} -\description{ -Get Slide ID from URL -} -\examples{ -x = paste0("https://docs.google.com/presentation/d/", -"1Tg-GTGnUPduOtZKYuMoelqUNZnUp3vvg_7TtpUPL7e8", -"/edit#slide=id.g154aa4fae2_0_58") -get_slide_id(x) -x = "https://drive.google.com/drive/folders/1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC?usp=sharing" -get_folder_id(x) -x = "1pXBQQdd1peI56GtQT-jEZ59xSmhqQlFC" -get_folder_id(x) -} diff --git a/man/pdf_to_pngs.Rd b/man/pdf_to_pngs.Rd deleted file mode 100644 index 7797c2c..0000000 --- a/man/pdf_to_pngs.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/download_gs_file.R -\name{pdf_to_pngs} -\alias{pdf_to_pngs} -\title{Convert a PDF file to a series of PNG image files} -\usage{ -pdf_to_pngs(path, verbose = TRUE, dpi = 600) -} -\arguments{ -\item{path}{Path to the PDF file that needs to be converted to PNGs.} - -\item{verbose}{A logical value indicating whether to display progress -messages during the conversion process. The default value is TRUE} - -\item{dpi}{The resolution in dots per inch (dpi) to be used for the PNG -images. The default value is 600.} -} -\description{ -Uses `pdftools::pdf_convert()` for conversion. -} diff --git a/man/pptx_notes.Rd b/man/pptx_notes.Rd deleted file mode 100644 index 026ded3..0000000 --- a/man/pptx_notes.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pptx_notes.R -\name{pptx_notes} -\alias{pptx_notes} -\alias{pptx_slide_text_df} -\alias{pptx_slide_note_df} -\alias{unzip_pptx} -\title{Get Notes from a PowerPoint (usually from Google Slides)} -\usage{ -pptx_notes(file, ...) - -pptx_slide_text_df(file, ...) - -pptx_slide_note_df(file, ...) - -unzip_pptx(file) -} -\arguments{ -\item{file}{Character. Path for `PPTX` file} - -\item{...}{additional arguments to pass to \code{\link{xml_notes}}, -particularly \code{xpath}} -} -\value{ -Either a character vector or `NULL` -} -\description{ -Get Notes from a PowerPoint (usually from Google Slides) -} -\examples{ -ex_file = system.file("extdata", "example.pptx", -package = "ariExtra") -pptx_notes(ex_file) -pptx_slide_note_df(ex_file) -pptx_slide_text_df(ex_file) -} diff --git a/man/pptx_to_pdf.Rd b/man/pptx_to_pdf.Rd deleted file mode 100644 index 0e03072..0000000 --- a/man/pptx_to_pdf.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/download_gs_file.R -\name{pptx_to_pdf} -\alias{pptx_to_pdf} -\title{Convert a PPTX file to a PDF file} -\usage{ -pptx_to_pdf(path, verbose = TRUE) -} -\arguments{ -\item{path}{Path to the PPTX file that needs to be converted to PDF.} - -\item{verbose}{A logical value indicating whether to display progress -messages during the conversion process. The default value is TRUE} -} -\description{ -Uses `docxtractr::convert_to_pdf()` for conversion. -} diff --git a/man/xml_notes.Rd b/man/xml_notes.Rd deleted file mode 100644 index 72ec733..0000000 --- a/man/xml_notes.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pptx_notes.R -\name{xml_notes} -\alias{xml_notes} -\title{Get Notes from XML} -\usage{ -xml_notes(file, collapse_text = TRUE, xpath = "//a:r//a:t") -} -\arguments{ -\item{file}{XML file from a PPTX} - -\item{collapse_text}{should text be collapsed by spaces?} - -\item{xpath}{\code{xpath} to pass to [xml2::xml_find_all()]} -} -\value{ -A character vector -} -\description{ -Get Notes from XML -} From 45119d5c357a13deced2ea764519eb5a937c7dc5 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Thu, 14 Sep 2023 14:54:18 -0700 Subject: [PATCH 30/44] Run `document()` --- man/ari_narrate.Rd | 12 +----------- man/ari_stitch.Rd | 44 ++++++++++++++++++-------------------------- 2 files changed, 19 insertions(+), 37 deletions(-) diff --git a/man/ari_narrate.Rd b/man/ari_narrate.Rd index 6c2cec3..d5d7386 100644 --- a/man/ari_narrate.Rd +++ b/man/ari_narrate.Rd @@ -15,8 +15,6 @@ ari_narrate( capture_method = c("vectorized", "iterative"), subtitles = FALSE, verbose = FALSE, - audio_codec = get_audio_codec(), - video_codec = get_video_codec(), cleanup = TRUE, ... ) @@ -44,12 +42,6 @@ as the \code{output} argument will be created, but with the file extension \item{verbose}{print diagnostic messages. If > 1, then more are printed} -\item{audio_codec}{The audio encoder for the splicing. If this -fails, try \code{copy}.} - -\item{video_codec}{The video encoder for the splicing. If this -fails, see \code{ffmpeg -codecs}} - \item{cleanup}{If \code{TRUE}, interim files are deleted} \item{...}{Arguments that will be passed to \code{\link[webshot]{webshot}}.} @@ -58,7 +50,7 @@ fails, see \code{ffmpeg -codecs}} \code{\link[text2speech]{tts_voices}} for more information about what voices are available.} -\item{service}{speech synthesis service to use, +\item{service}{Speech Synthesis service to use, passed to \code{\link[text2speech]{tts}}. Either \code{"amazon"} or \code{"google"}.} } @@ -68,8 +60,6 @@ The output from \code{\link{ari_spin}} \description{ \code{ari_narrate} creates a video from a script written in markdown and HTML slides created with \code{\link[rmarkdown]{rmarkdown}} or a similar package. -This function uses \href{https://aws.amazon.com/polly/}{Amazon Polly} -via \code{\link{ari_spin}}. } \examples{ \dontrun{ diff --git a/man/ari_stitch.Rd b/man/ari_stitch.Rd index 8dc9ad2..5307886 100644 --- a/man/ari_stitch.Rd +++ b/man/ari_stitch.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ari_stitch.R \name{ari_stitch} \alias{ari_stitch} -\title{Create a video from images and audio} +\title{Generate video from images and audio} \usage{ ari_stitch( images, @@ -10,20 +10,12 @@ ari_stitch( output = tempfile(fileext = ".mp4"), verbose = FALSE, cleanup = TRUE, - ffmpeg_opts = "", - divisible_height = TRUE, - audio_codec = get_audio_codec(), - video_codec = get_video_codec(), - video_sync_method = "2", - audio_bitrate = NULL, - video_bitrate = NULL, - pixel_format = "yuv420p", - fast_start = FALSE, - deinterlace = FALSE, - stereo_audio = TRUE, + ffmpeg_args = list(frames_per_second = NULL, video_filters = NULL, divisible_height = + TRUE, audio_codec = get_audio_codec(), video_codec = get_video_codec(), deinterlace = + FALSE, ffmpeg_opts = "", audio_bitrate = NULL, video_bitrate = NULL, + video_sync_method = "2", pixel_format = "yuv420p", fast_start = FALSE, stereo_audio = + TRUE), duration = NULL, - video_filters = NULL, - frames_per_second = NULL, check_inputs = TRUE ) } @@ -38,6 +30,12 @@ ari_stitch( \item{cleanup}{If \code{TRUE}, interim files are deleted} +\item{duration}{a vector of numeric durations for each audio +track. See \code{\link{pad_wav}}} + +\item{check_inputs}{Should the inputs be checked? Almost always should +be \code{TRUE}, but may be useful if trying to do customized stuff.} + \item{ffmpeg_opts}{additional options to send to \code{ffmpeg}. This is an advanced option, use at your own risk} @@ -50,13 +48,13 @@ fails, try \code{copy}.} \item{video_codec}{The video encoder for the splicing. If this fails, see \code{ffmpeg -codecs}} -\item{video_sync_method}{Video sync method. Should be -"auto" or `"vfr"` or a numeric. See \url{https://ffmpeg.org/ffmpeg.html}.} - \item{audio_bitrate}{Bit rate for audio. Passed to \code{-b:a}.} \item{video_bitrate}{Bit rate for video. Passed to \code{-b:v}.} +\item{video_sync_method}{Video sync method. Should be +"auto" or `"vfr"` or a numeric. See \url{https://ffmpeg.org/ffmpeg.html}.} + \item{pixel_format}{pixel format to encode for `ffmpeg`.} \item{fast_start}{Adding `faststart` flags for YouTube and other sites, @@ -66,20 +64,14 @@ see \url{https://trac.ffmpeg.org/wiki/Encode/YouTube}} see \url{https://ffmpeg.org/ffmpeg-filters.html}, generally for YouTube} +\item{frames_per_second}{frames per second of the video, should +be an integer} + \item{stereo_audio}{should the audio be forced to stereo, corresponds to `-ac 2`} -\item{duration}{a vector of numeric durations for each audio -track. See \code{\link{pad_wav}}} - \item{video_filters}{any options that are passed to \code{-vf} arguments for \code{ffmpeg}} - -\item{frames_per_second}{frames per second of the video, should -be an integer} - -\item{check_inputs}{Should the inputs be checked? Almost always should -be \code{TRUE}, but may be useful if trying to do customized stuff.} } \value{ A logical value, with the attribute \code{outfile} for the From 0183e3ffaa1b6431a5e1f65609fbe3c9a11b8b82 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 9 Oct 2023 16:20:20 -0700 Subject: [PATCH 31/44] Get rid of unnecessary Imports in DESCRIPTION --- DESCRIPTION | 4 ---- 1 file changed, 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2723110..6601ef5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,11 +20,7 @@ Depends: R (>= 3.1.0) Imports: cli, - docxtractr, hms, - httr, - jsonlite, - pdftools, progress, purrr, rmarkdown, From be9b52c9e9f19159f966181523ff121b92ebacb2 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 9 Oct 2023 16:45:12 -0700 Subject: [PATCH 32/44] Put progress bar back into `ari_spin()` --- R/ari_spin.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/ari_spin.R b/R/ari_spin.R index 2543337..fb7b945 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -112,6 +112,11 @@ ari_spin <- function(images, paragraphs, paragraphs_along <- seq_along(paragraphs) ideal_duration <- rep(NA, length(paragraphs)) + # Progress bar + pb <- progress_bar$new( + format = " Downloading [:bar] :percent eta: :eta", + total = 100, clear = TRUE, width = 60) + # Iterate through arguments used in tts() for (ii in paragraphs_along) { args <- tts_engine_args @@ -127,6 +132,8 @@ ari_spin <- function(images, paragraphs, wav <- pad_wav(wav, duration = duration[ii]) ideal_duration[ii] <- length(wav@left) / wav@samp.rate wave_objects[[ii]] <- wav + # Advance progress bar + pb$tick() } # Burn subtitles if (subtitles) { From 35c1a606167c66acc24dc832137f1797dd6163a3 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 9 Oct 2023 17:09:44 -0700 Subject: [PATCH 33/44] Get rid of `print()` --- R/ari_spin.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index fb7b945..857d364 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -140,7 +140,6 @@ ari_spin <- function(images, paragraphs, sub_file <- paste0(file_path_sans_ext(output), ".srt") ari_subtitles(paragraphs, wave_objects, sub_file) } - print("Audio succesfully converted...............") # Create a video from images and audio res <- ari_stitch(images, wave_objects, output) # Collect output From e6711baf6cda6f4ca7ffc48e6d9ae4f64aa06ada Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Tue, 10 Oct 2023 14:53:43 -0700 Subject: [PATCH 34/44] progress_bar --- R/ari_spin.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index 857d364..615ded6 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -113,7 +113,7 @@ ari_spin <- function(images, paragraphs, ideal_duration <- rep(NA, length(paragraphs)) # Progress bar - pb <- progress_bar$new( + pb <- progress::progress_bar$new( format = " Downloading [:bar] :percent eta: :eta", total = 100, clear = TRUE, width = 60) From 74ad4894d881a2f8d0b1b2edcc432dc26de983e5 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Tue, 10 Oct 2023 15:58:10 -0700 Subject: [PATCH 35/44] Create `coqui_args()` --- R/coqui_args.R | 17 +++++++++++++++++ man/coqui_args.Rd | 22 ++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 R/coqui_args.R create mode 100644 man/coqui_args.Rd diff --git a/R/coqui_args.R b/R/coqui_args.R new file mode 100644 index 0000000..7ba5902 --- /dev/null +++ b/R/coqui_args.R @@ -0,0 +1,17 @@ +#' List of arguments to the Coqui text-to-speech engine +#' +#' @param model_name Deep Learning model for Text-to-Speech Conversion +#' @param vocoder_name Voice coder used for speech coding and transmission +#' +#' @return List of arguments +#' @export +#' +#' @examples +#' coqui_args(model_name = "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet") +coqui_args <- function(model_name = "jenny", vocoder_name = "jenny") { +list(service = "coqui", + voice = NULL, + model_name = model_name, + vocoder_name = vocoder_name, + output_format = "wav") +} diff --git a/man/coqui_args.Rd b/man/coqui_args.Rd new file mode 100644 index 0000000..68ea724 --- /dev/null +++ b/man/coqui_args.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/coqui_args.R +\name{coqui_args} +\alias{coqui_args} +\title{List of arguments to the Coqui text-to-speech engine} +\usage{ +coqui_args(model_name = "jenny", vocoder_name = "jenny") +} +\arguments{ +\item{model_name}{Deep Learning model for Text-to-Speech Conversion} + +\item{vocoder_name}{Voice coder used for speech coding and transmission} +} +\value{ +List of arguments +} +\description{ +List of arguments to the Coqui text-to-speech engine +} +\examples{ +coqui_args(model_name = "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet") +} From bbb8f0256810fbaa9448dae15b02d7b80a7e0711 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Tue, 10 Oct 2023 15:58:55 -0700 Subject: [PATCH 36/44] Replace `tts_engine_args` with `coqui_args()` --- NAMESPACE | 1 + R/ari_narrate.R | 5 +---- R/ari_spin.R | 14 +++----------- man/ari_narrate.Rd | 3 +-- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 749d588..adc4e97 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(ari_stitch) export(ari_talk) export(audio_codec_encode) export(check_ffmpeg_version) +export(coqui_args) export(ffmpeg_audio_codecs) export(ffmpeg_codecs) export(ffmpeg_convert) diff --git a/R/ari_narrate.R b/R/ari_narrate.R index a3d12d1..0044189 100644 --- a/R/ari_narrate.R +++ b/R/ari_narrate.R @@ -50,10 +50,7 @@ ari_narrate <- function(script, slides, output = tempfile(fileext = ".mp4"), tts_engine = text2speech::tts, - tts_engine_args = list(service = "coqui", - voice = NULL, - model_name = "tacotron2-DDC_ph", - vocoder_name = "ljspeech/univnet"), + tts_engine_args = coqui_args(), tts_engine_auth = text2speech::tts_auth, capture_method = c("vectorized", "iterative"), subtitles = FALSE, diff --git a/R/ari_spin.R b/R/ari_spin.R index 615ded6..cae1e19 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -53,12 +53,9 @@ #' } #' ari_spin <- function(images, paragraphs, - output = tempfile(fileext = ".mp4"), + output, tts_engine = text2speech::tts, - tts_engine_args = list(service = "coqui", - voice = NULL, - model_name = "tacotron2-DDC_ph", - vocoder_name = "ljspeech/univnet"), + tts_engine_args = coqui_args(), tts_engine_auth = text2speech::tts_auth, subtitles = FALSE, duration = NULL, @@ -114,7 +111,7 @@ ari_spin <- function(images, paragraphs, # Progress bar pb <- progress::progress_bar$new( - format = " Downloading [:bar] :percent eta: :eta", + format = " Downloading [:bar] :percent eta: :eta", total = 100, clear = TRUE, width = 60) # Iterate through arguments used in tts() @@ -122,11 +119,6 @@ ari_spin <- function(images, paragraphs, args <- tts_engine_args args$text <- paragraphs[ii] args$bind_audio <- TRUE - # coqui+ari doesn't work with mp3 - if (tts_engine_args$service == "coqui") { - args$output_format <- "wav" - args$voice <- NULL - } wav <- do.call(tts_engine, args = args) wav <- reduce(wav$wav, bind) wav <- pad_wav(wav, duration = duration[ii]) diff --git a/man/ari_narrate.Rd b/man/ari_narrate.Rd index 6c2cec3..96c5340 100644 --- a/man/ari_narrate.Rd +++ b/man/ari_narrate.Rd @@ -9,8 +9,7 @@ ari_narrate( slides, output = tempfile(fileext = ".mp4"), tts_engine = text2speech::tts, - tts_engine_args = list(service = "coqui", voice = NULL, model_name = - "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet"), + tts_engine_args = coqui_args(), tts_engine_auth = text2speech::tts_auth, capture_method = c("vectorized", "iterative"), subtitles = FALSE, From 674c9cfc88ec4506f68b7e2bab7513c082f27d71 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 11:53:16 -0700 Subject: [PATCH 37/44] Made final changes to code --- R/ari_spin.R | 26 ++++++++++++-------------- R/ari_stitch.R | 2 +- R/coqui_args.R | 10 +++++----- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/R/ari_spin.R b/R/ari_spin.R index cae1e19..5bd4eb1 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -132,23 +132,21 @@ ari_spin <- function(images, paragraphs, sub_file <- paste0(file_path_sans_ext(output), ".srt") ari_subtitles(paragraphs, wave_objects, sub_file) } + # Create a video from images and audio res <- ari_stitch(images, wave_objects, output) - # Collect output - args <- list() - cleanup <- args$cleanup - if (is.null(cleanup)) { - cleanup <- TRUE - } - if (!cleanup) { - attr(res, "wavs") <- wave_objects - } - attr(res, "voice") <- tts_engine_args$voice - if (subtitles) { - attr(res, "subtitles") <- sub_file + # Path to output + output_path <- attr(res, "outfile") + + # Check if larger than 0 bytes + output_size <- file.info(output_path)$size + + if (output_size > 0) { + return(output_path) + } else { + stop("File does not exist. Something went wrong.") } - attr(res, "service") <- tts_engine_args$service - return(res) + } #' @rdname ari_spin diff --git a/R/ari_stitch.R b/R/ari_stitch.R index 06f9ef1..8241086 100644 --- a/R/ari_stitch.R +++ b/R/ari_stitch.R @@ -71,7 +71,7 @@ #' } #' } ari_stitch <- function(images, audio, - output = tempfile(fileext = ".mp4"), + output, verbose = FALSE, cleanup = TRUE, ffmpeg_opts = "", diff --git a/R/coqui_args.R b/R/coqui_args.R index 7ba5902..645f1e0 100644 --- a/R/coqui_args.R +++ b/R/coqui_args.R @@ -9,9 +9,9 @@ #' @examples #' coqui_args(model_name = "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet") coqui_args <- function(model_name = "jenny", vocoder_name = "jenny") { -list(service = "coqui", - voice = NULL, - model_name = model_name, - vocoder_name = vocoder_name, - output_format = "wav") + list(service = "coqui", + voice = NULL, + model_name = model_name, + vocoder_name = vocoder_name, + output_format = "wav") } From 21be0aee9408e6d932cd35c50cc336a03835dd38 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:36:27 -0700 Subject: [PATCH 38/44] Passed R CMD CHECK --- R/ari_narrate.R | 19 ++++++----------- R/ari_spin.R | 28 ++++++++----------------- R/coqui_args.R | 3 +++ man/ari_narrate.Rd | 16 +++++++-------- man/ari_spin.Rd | 37 +++++++++++----------------------- man/ari_stitch.Rd | 2 +- man/coqui_args.Rd | 3 +++ tests/testthat/test_ari_spin.R | 19 ++++++++++------- 8 files changed, 52 insertions(+), 75 deletions(-) diff --git a/R/ari_narrate.R b/R/ari_narrate.R index 0044189..413bd92 100644 --- a/R/ari_narrate.R +++ b/R/ari_narrate.R @@ -12,12 +12,9 @@ #' \code{\link[rmarkdown]{rmarkdown}}, \code{xaringan}, or a #' similar package. #' @param output The path to the video file which will be created. -#' @param voice The voice you want to use. See -#' \code{\link[text2speech]{tts_voices}} for more information -#' about what voices are available. -#' @param service speech synthesis service to use, -#' passed to \code{\link[text2speech]{tts}}. -#' Either \code{"amazon"} or \code{"google"}. +#' @param tts_engine The desired engine for converting text-to-speech +#' @param tts_engine_args List of parameters provided to the designated text-to-speech engine +#' @param tts_engine_auth Authentication required for the designated text-to-speech engine #' @param capture_method Either \code{"vectorized"} or \code{"iterative"}. #' The vectorized mode is faster though it can cause screens to repeat. If #' making a video from an \code{\link[rmarkdown]{ioslides_presentation}} @@ -26,13 +23,13 @@ #' default value is \code{FALSE}. If \code{TRUE} then a file with the same name #' as the \code{output} argument will be created, but with the file extension #' \code{.srt}. -#' @param ... Arguments that will be passed to \code{\link[webshot]{webshot}}. #' @param verbose print diagnostic messages. If > 1, then more are printed #' @param audio_codec The audio encoder for the splicing. If this #' fails, try \code{copy}. #' @param video_codec The video encoder for the splicing. If this #' fails, see \code{ffmpeg -codecs} #' @param cleanup If \code{TRUE}, interim files are deleted +#' @param ... Arguments that will be passed to \code{\link[webshot]{webshot}}. #' #' @return The output from \code{\link{ari_spin}} #' @importFrom xml2 read_html @@ -47,8 +44,7 @@ #' ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), #' system.file("test", "ari_intro.html", package = "ari")) #' } -ari_narrate <- function(script, slides, - output = tempfile(fileext = ".mp4"), +ari_narrate <- function(script, slides, output, tts_engine = text2speech::tts, tts_engine_args = coqui_args(), tts_engine_auth = text2speech::tts_auth, @@ -142,8 +138,5 @@ ari_narrate <- function(script, slides, tts_engine = tts_engine, tts_engine_args = tts_engine_args, tts_engine_auth = tts_engine_auth, - subtitles = subtitles, - verbose = verbose, - cleanup = cleanup - ) + subtitles = subtitles) } diff --git a/R/ari_spin.R b/R/ari_spin.R index 5bd4eb1..d97c949 100644 --- a/R/ari_spin.R +++ b/R/ari_spin.R @@ -10,24 +10,15 @@ #' @param images A vector of paths to images. #' @param paragraphs A vector strings that will be spoken by Amazon Polly. #' @param output A path to the video file which will be created. -#' @param voice The voice you want to use. See -#' \code{\link[text2speech]{tts_voices}} for more information -#' about what voices are available. -#' @param model_name (Coqui TTS only) Deep Learning model for Text-to-Speech -#' Conversion -#' @param vocoder_name (Coqui TTS only) Voice coder used for speech coding and -#' transmission -#' @param service Speech synthesis service to use, -#' passed to \code{\link[text2speech]{tts}}, -#' Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}. +#' @param tts_engine The desired engine for converting text-to-speech +#' @param tts_engine_args List of parameters provided to the designated text-to-speech engine +#' @param tts_engine_auth Authentication required for the designated text-to-speech engine #' @param subtitles Should a \code{.srt} file be created with subtitles? The #' default value is \code{FALSE}. If \code{TRUE} then a file with the same name #' as the \code{output} argument will be created, but with the file extension #' \code{.srt}. #' @param duration a vector of numeric durations for each audio #' track. See \code{\link{pad_wav}} -#' @param ... Additional arguments to voice_engine -#' @param tts_args list of arguments to pass to \code{\link{tts}} #' @param key_or_json_file access key or JSON file to pass to #' \code{\link{tts_auth}} for authorization #' @@ -49,19 +40,16 @@ #' "Welcome to my very interesting lecture.", #' "Here are some fantastic equations I came up with." #' ) -#' ari_spin(slides, sentences) +#' ari_spin(slides, sentences, output = "test.mp4", +#' tts_engine_args = coqui_args(model_name = "tacotron2-DDC_ph", +#' vocoder_name = "ljspeech/univnet")) #' } #' -ari_spin <- function(images, paragraphs, - output, +ari_spin <- function(images, paragraphs, output, tts_engine = text2speech::tts, tts_engine_args = coqui_args(), tts_engine_auth = text2speech::tts_auth, - subtitles = FALSE, - duration = NULL, - key_or_json_file = NULL, - verbose = FALSE, - cleanup = TRUE) { + subtitles = FALSE, duration = NULL, key_or_json_file = NULL) { # Check for ffmpeg ffmpeg_exec() # Argument checks diff --git a/R/coqui_args.R b/R/coqui_args.R index 645f1e0..db3356c 100644 --- a/R/coqui_args.R +++ b/R/coqui_args.R @@ -7,7 +7,10 @@ #' @export #' #' @examples +#' # Female Voices: +#' coqui_args(model_name = "jenny", vocoder_name = "jenny") #' coqui_args(model_name = "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet") +#' coqui_args <- function(model_name = "jenny", vocoder_name = "jenny") { list(service = "coqui", voice = NULL, diff --git a/man/ari_narrate.Rd b/man/ari_narrate.Rd index 96c5340..1642be2 100644 --- a/man/ari_narrate.Rd +++ b/man/ari_narrate.Rd @@ -7,7 +7,7 @@ ari_narrate( script, slides, - output = tempfile(fileext = ".mp4"), + output, tts_engine = text2speech::tts, tts_engine_args = coqui_args(), tts_engine_auth = text2speech::tts_auth, @@ -31,6 +31,12 @@ similar package.} \item{output}{The path to the video file which will be created.} +\item{tts_engine}{The desired engine for converting text-to-speech} + +\item{tts_engine_args}{List of parameters provided to the designated text-to-speech engine} + +\item{tts_engine_auth}{Authentication required for the designated text-to-speech engine} + \item{capture_method}{Either \code{"vectorized"} or \code{"iterative"}. The vectorized mode is faster though it can cause screens to repeat. If making a video from an \code{\link[rmarkdown]{ioslides_presentation}} @@ -52,14 +58,6 @@ fails, see \code{ffmpeg -codecs}} \item{cleanup}{If \code{TRUE}, interim files are deleted} \item{...}{Arguments that will be passed to \code{\link[webshot]{webshot}}.} - -\item{voice}{The voice you want to use. See -\code{\link[text2speech]{tts_voices}} for more information -about what voices are available.} - -\item{service}{speech synthesis service to use, -passed to \code{\link[text2speech]{tts}}. -Either \code{"amazon"} or \code{"google"}.} } \value{ The output from \code{\link{ari_spin}} diff --git a/man/ari_spin.Rd b/man/ari_spin.Rd index 3bc6786..9e68733 100644 --- a/man/ari_spin.Rd +++ b/man/ari_spin.Rd @@ -8,16 +8,13 @@ ari_spin( images, paragraphs, - output = tempfile(fileext = ".mp4"), + output, tts_engine = text2speech::tts, - tts_engine_args = list(service = "coqui", voice = NULL, model_name = - "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet"), + tts_engine_args = coqui_args(), tts_engine_auth = text2speech::tts_auth, subtitles = FALSE, duration = NULL, - key_or_json_file = NULL, - verbose = FALSE, - cleanup = TRUE + key_or_json_file = NULL ) have_polly() @@ -29,6 +26,12 @@ have_polly() \item{output}{A path to the video file which will be created.} +\item{tts_engine}{The desired engine for converting text-to-speech} + +\item{tts_engine_args}{List of parameters provided to the designated text-to-speech engine} + +\item{tts_engine_auth}{Authentication required for the designated text-to-speech engine} + \item{subtitles}{Should a \code{.srt} file be created with subtitles? The default value is \code{FALSE}. If \code{TRUE} then a file with the same name as the \code{output} argument will be created, but with the file extension @@ -39,24 +42,6 @@ track. See \code{\link{pad_wav}}} \item{key_or_json_file}{access key or JSON file to pass to \code{\link{tts_auth}} for authorization} - -\item{voice}{The voice you want to use. See -\code{\link[text2speech]{tts_voices}} for more information -about what voices are available.} - -\item{model_name}{(Coqui TTS only) Deep Learning model for Text-to-Speech -Conversion} - -\item{vocoder_name}{(Coqui TTS only) Voice coder used for speech coding and -transmission} - -\item{service}{Speech synthesis service to use, -passed to \code{\link[text2speech]{tts}}, -Either \code{"amazon"}, \code{"microsoft"}, or \code{"google"}.} - -\item{...}{Additional arguments to voice_engine} - -\item{tts_args}{list of arguments to pass to \code{\link{tts}}} } \value{ The output from \code{\link{ari_stitch}} @@ -79,7 +64,9 @@ sentences <- c( "Welcome to my very interesting lecture.", "Here are some fantastic equations I came up with." ) -ari_spin(slides, sentences) +ari_spin(slides, sentences, output = "test.mp4", + tts_engine_args = coqui_args(model_name = "tacotron2-DDC_ph", + vocoder_name = "ljspeech/univnet")) } } diff --git a/man/ari_stitch.Rd b/man/ari_stitch.Rd index 8dc9ad2..7deb92f 100644 --- a/man/ari_stitch.Rd +++ b/man/ari_stitch.Rd @@ -7,7 +7,7 @@ ari_stitch( images, audio, - output = tempfile(fileext = ".mp4"), + output, verbose = FALSE, cleanup = TRUE, ffmpeg_opts = "", diff --git a/man/coqui_args.Rd b/man/coqui_args.Rd index 68ea724..2afc330 100644 --- a/man/coqui_args.Rd +++ b/man/coqui_args.Rd @@ -18,5 +18,8 @@ List of arguments List of arguments to the Coqui text-to-speech engine } \examples{ +# Female Voices: +coqui_args(model_name = "jenny", vocoder_name = "jenny") coqui_args(model_name = "tacotron2-DDC_ph", vocoder_name = "ljspeech/univnet") + } diff --git a/tests/testthat/test_ari_spin.R b/tests/testthat/test_ari_spin.R index d43f261..accf3cb 100644 --- a/tests/testthat/test_ari_spin.R +++ b/tests/testthat/test_ari_spin.R @@ -8,7 +8,7 @@ skip_spin <- function(){ video <- file.path(tempdir(), "output.mp4") -qmm <- c("I will now perform the Mercutio's speech from Shakespeare's Romeo and Juliet.", +qmm <- c("I will now perform the Mercutio's speech from Shakespeare's Romeo and Juliet.", "O, then, I see Queen Mab hath been with you. She is the fairies' midwife, and she comes In shape no bigger than an agate-stone @@ -59,7 +59,7 @@ if (ffmpeg_version_sufficient()) { fdk_enabled = FALSE } else { fdk_enabled = grepl("fdk", res[ res$codec == "aac", "codec_name"]) - } + } } else { fdk_enabled = FALSE } @@ -76,11 +76,16 @@ test_that("Ari can process text with over 1500 characters.", { run_voice = "Joanna" ari_spin( - system.file("test", c("mab1.png", "mab2.png"), package = "ari"), - qmm, output = video, voice = run_voice, - service = "amazon", - audio_codec = audio_codec) - + images = system.file("test", c("mab1.png", "mab2.png"), package = "ari"), + paragraphs = qmm, + output = video, + tts_engine = text2speech::tts, + tts_engine_args = coqui_args(), + tts_engine_auth = text2speech::tts_auth, + subtitles = FALSE, + duration = NULL, + key_or_json_file = NULL) + expect_true(file.size(video) > 50000) }) From b745ff33907d7bbd0a7f564feca8ccd1499ad099 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:41:18 -0700 Subject: [PATCH 39/44] Get rid of default argument for `output_video` --- R/ari_burn_subtitles.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/ari_burn_subtitles.R b/R/ari_burn_subtitles.R index dae1254..9efffee 100644 --- a/R/ari_burn_subtitles.R +++ b/R/ari_burn_subtitles.R @@ -12,16 +12,13 @@ #' #' @return Name of output video #' @export -ari_burn_subtitles <- function(input_video, srt, - output_video = tempfile(fileext = ".mp4"), - verbose = FALSE) { +ari_burn_subtitles <- function(input_video, srt, output_video, verbose = FALSE) { ffmpeg <- ffmpeg_exec(quote = TRUE) if (verbose > 0) { message("Burning in Subtitles") } command <- paste( - ffmpeg, "-y -i", input_video, paste0("-vf subtitles=", srt), - output_video + ffmpeg, "-y -i", input_video, paste0("-vf subtitles=", srt), output_video ) if (verbose > 0) { From cbbeff216e272740b1500d2125e9f32dbc2f6781 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 12:46:18 -0700 Subject: [PATCH 40/44] Create `set_ffmpeg_args()` --- R/ari_stitch.R | 14 +------------- R/set_ffmpeg_args.R | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 13 deletions(-) create mode 100644 R/set_ffmpeg_args.R diff --git a/R/ari_stitch.R b/R/ari_stitch.R index 5b3c2d4..8cf3da8 100644 --- a/R/ari_stitch.R +++ b/R/ari_stitch.R @@ -74,19 +74,7 @@ ari_stitch <- function(images, audio, output = tempfile(fileext = ".mp4"), verbose = FALSE, cleanup = TRUE, - ffmpeg_args = list(frames_per_second = NULL, - video_filters = NULL, - divisible_height = TRUE, - audio_codec = get_audio_codec(), - video_codec = get_video_codec(), - deinterlace = FALSE, - ffmpeg_opts = "", - audio_bitrate = NULL, - video_bitrate = NULL, - video_sync_method = "2", - pixel_format = "yuv420p", - fast_start = FALSE, - stereo_audio = TRUE), + ffmpeg_args = set_ffmpeg_args(), duration = NULL, check_inputs = TRUE) { # File path processing diff --git a/R/set_ffmpeg_args.R b/R/set_ffmpeg_args.R new file mode 100644 index 0000000..3dba1f3 --- /dev/null +++ b/R/set_ffmpeg_args.R @@ -0,0 +1,15 @@ +set_ffmpeg_args <- function() { + list(frames_per_second = NULL, + video_filters = NULL, + divisible_height = TRUE, + audio_codec = get_audio_codec(), + video_codec = get_video_codec(), + deinterlace = FALSE, + ffmpeg_opts = "", + audio_bitrate = NULL, + video_bitrate = NULL, + video_sync_method = "2", + pixel_format = "yuv420p", + fast_start = FALSE, + stereo_audio = TRUE) +} From 1b0b1eb7cb6bb44fa24449a64bf895bbe297a4d2 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:15:19 -0700 Subject: [PATCH 41/44] Build ffmpeg command to supply to `system()` --- R/ari_stitch.R | 74 +------------------------------- R/build_ffmpeg_command.R | 91 ++++++++++++++++++++++++++++++++++++++++ R/set_ffmpeg_args.R | 15 ------- 3 files changed, 93 insertions(+), 87 deletions(-) create mode 100644 R/build_ffmpeg_command.R delete mode 100644 R/set_ffmpeg_args.R diff --git a/R/ari_stitch.R b/R/ari_stitch.R index 8cf3da8..3ce7adc 100644 --- a/R/ari_stitch.R +++ b/R/ari_stitch.R @@ -171,79 +171,9 @@ ari_stitch <- function(images, audio, # define separator to be used on Windows input_txt_path <- normalizePath(input_txt_path, winslash = "/") - # needed for users as per - # https://superuser.com/questions/718027/ - # ffmpeg-concat-doesnt-work-with-absolute-path - # input_txt_path = normalizePath(input_txt_path, winslash = "\\") + # Build ffmpeg command to input into system() + command <- build_ffmpeg_command(input_txt_path, wav_path, ffmpeg_args, output) - - # Start organizing ffmpeg arguments here - ffmpeg <- ffmpeg_exec(quote = TRUE) - # Frames per second (fps) - if (!is.null(ffmpeg_args$frames_per_second)) { - video_filters <- c(ffmpeg_args$video_filters, paste0("fps=", ffmpeg_args$frames_per_second)) - } else { - video_filters <- c(ffmpeg_args$video_filters, "fps=5") - } - # Divisible height - if (ffmpeg_args$divisible_height) { - video_filters <- c(video_filters, '"scale=trunc(iw/2)*2:trunc(ih/2)*2"') - } - - # workaround for older ffmpeg - # https://stackoverflow.com/questions/32931685/ - # the-encoder-aac-is-experimental-but-experimental-codecs-are-not-enabled - experimental <- FALSE - if (!is.null(ffmpeg_args$audio_codec)) { - if (ffmpeg_args$audio_codec == "aac") { - experimental <- TRUE - } - } - if (ffmpeg_args$deinterlace) { - video_filters <- c(video_filters, "yadif") - } - video_filters <- paste(video_filters, collapse = ",") - video_filters <- paste0("-vf ", video_filters) - - if (any(grepl("-vf", ffmpeg_args$deinterlace))) { - warning("Found video filters in ffmpeg_opts, may not be used correctly!") - } - ffmpeg_opts <- c(video_filters, ffmpeg_args$ffmpeg_opts) - ffmpeg_opts <- paste(ffmpeg_opts, collapse = " ") - - # ffmpeg command - command <- paste( - ffmpeg, "-y", - "-f concat -safe 0 -i", shQuote(input_txt_path), - "-i", shQuote(wav_path), - ifelse(!is.null(ffmpeg_args$video_codec), paste("-c:v", ffmpeg_args$video_codec), - "" - ), - ifelse(!is.null(ffmpeg_args$audio_codec), paste("-c:a", ffmpeg_args$audio_codec), - "" - ), - ifelse(ffmpeg_args$stereo_audio, "-ac 2", ""), - ifelse(!is.null(ffmpeg_args$audio_bitrate), paste("-b:a", ffmpeg_args$audio_bitrate), - "" - ), - ifelse(!is.null(ffmpeg_args$video_bitrate), paste("-b:v", ffmpeg_args$video_bitrate), - "" - ), - # ifelse(deinterlace, "-vf yadif", ""), - ifelse(!is.null(ffmpeg_args$video_sync_method), paste("-fps_mode", "auto"), - "" - ), - ifelse(!is.null(ffmpeg_args$pixel_format), paste("-pix_fmt", ffmpeg_args$pixel_format), - "" - ), - ifelse(ffmpeg_args$fast_start, "-movflags +faststart", ""), - ffmpeg_opts, - ifelse(!is.null(ffmpeg_args$frames_per_second), paste0("-r ", ffmpeg_args$frames_per_second), ""), - ifelse(experimental, "-strict experimental", ""), - "-max_muxing_queue_size 9999", - "-threads 2", - shQuote(output) - ) if (verbose > 0) { message(command) } diff --git a/R/build_ffmpeg_command.R b/R/build_ffmpeg_command.R new file mode 100644 index 0000000..9dac34e --- /dev/null +++ b/R/build_ffmpeg_command.R @@ -0,0 +1,91 @@ +# Build ffmpeg command to input into system() +build_ffmpeg_command <- function(input_txt_path, wav_path, ffmpeg_args, output) { + # Path to ffmpeg + ffmpeg <- ffmpeg_exec(quote = TRUE) + + # Frames per second (fps) + if (!is.null(ffmpeg_args$frames_per_second)) { + video_filters <- c(ffmpeg_args$video_filters, paste0("fps=", ffmpeg_args$frames_per_second)) + } else { + video_filters <- c(ffmpeg_args$video_filters, "fps=5") + } + + # Divisible height + if (ffmpeg_args$divisible_height) { + video_filters <- c(video_filters, '"scale=trunc(iw/2)*2:trunc(ih/2)*2"') + } + + # workaround for older ffmpeg + # https://stackoverflow.com/questions/32931685/ + # the-encoder-aac-is-experimental-but-experimental-codecs-are-not-enabled + experimental <- FALSE + if (!is.null(ffmpeg_args$audio_codec)) { + if (ffmpeg_args$audio_codec == "aac") { + experimental <- TRUE + } + } + if (ffmpeg_args$deinterlace) { + video_filters <- c(video_filters, "yadif") + } + video_filters <- paste(video_filters, collapse = ",") + video_filters <- paste0("-vf ", video_filters) + + if (any(grepl("-vf", ffmpeg_args$deinterlace))) { + warning("Found video filters in ffmpeg_opts, may not be used correctly!") + } + ffmpeg_opts <- c(video_filters, ffmpeg_args$ffmpeg_opts) + ffmpeg_opts <- paste(ffmpeg_opts, collapse = " ") + + # ffmpeg command + command <- paste( + ffmpeg, "-y", + "-f concat -safe 0 -i", shQuote(input_txt_path), + "-i", shQuote(wav_path), + ifelse(!is.null(ffmpeg_args$video_codec), paste("-c:v", ffmpeg_args$video_codec), + "" + ), + ifelse(!is.null(ffmpeg_args$audio_codec), paste("-c:a", ffmpeg_args$audio_codec), + "" + ), + ifelse(ffmpeg_args$stereo_audio, "-ac 2", ""), + ifelse(!is.null(ffmpeg_args$audio_bitrate), paste("-b:a", ffmpeg_args$audio_bitrate), + "" + ), + ifelse(!is.null(ffmpeg_args$video_bitrate), paste("-b:v", ffmpeg_args$video_bitrate), + "" + ), + # ifelse(deinterlace, "-vf yadif", ""), + ifelse(!is.null(ffmpeg_args$video_sync_method), paste("-fps_mode", "auto"), + "" + ), + ifelse(!is.null(ffmpeg_args$pixel_format), paste("-pix_fmt", ffmpeg_args$pixel_format), + "" + ), + ifelse(ffmpeg_args$fast_start, "-movflags +faststart", ""), + ffmpeg_opts, + ifelse(!is.null(ffmpeg_args$frames_per_second), paste0("-r ", ffmpeg_args$frames_per_second), ""), + ifelse(experimental, "-strict experimental", ""), + "-max_muxing_queue_size 9999", + "-threads 2", + shQuote(output) + ) + + command +} + +# Default ffmpeg arguments +set_ffmpeg_args <- function() { + list(frames_per_second = NULL, + video_filters = NULL, + divisible_height = TRUE, + audio_codec = get_audio_codec(), + video_codec = get_video_codec(), + deinterlace = FALSE, + ffmpeg_opts = "", + audio_bitrate = NULL, + video_bitrate = NULL, + video_sync_method = "2", + pixel_format = "yuv420p", + fast_start = FALSE, + stereo_audio = TRUE) +} diff --git a/R/set_ffmpeg_args.R b/R/set_ffmpeg_args.R deleted file mode 100644 index 3dba1f3..0000000 --- a/R/set_ffmpeg_args.R +++ /dev/null @@ -1,15 +0,0 @@ -set_ffmpeg_args <- function() { - list(frames_per_second = NULL, - video_filters = NULL, - divisible_height = TRUE, - audio_codec = get_audio_codec(), - video_codec = get_video_codec(), - deinterlace = FALSE, - ffmpeg_opts = "", - audio_bitrate = NULL, - video_bitrate = NULL, - video_sync_method = "2", - pixel_format = "yuv420p", - fast_start = FALSE, - stereo_audio = TRUE) -} From 42ba661c6de103e07f7596b49e8c694bd7d3f728 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:36:28 -0700 Subject: [PATCH 42/44] Documentation stuff --- man/ari_burn_subtitles.Rd | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/man/ari_burn_subtitles.Rd b/man/ari_burn_subtitles.Rd index 92a7831..91b2ab6 100644 --- a/man/ari_burn_subtitles.Rd +++ b/man/ari_burn_subtitles.Rd @@ -4,12 +4,7 @@ \alias{ari_burn_subtitles} \title{Burn Subtitles into a video} \usage{ -ari_burn_subtitles( - input_video, - srt, - output_video = tempfile(fileext = ".mp4"), - verbose = FALSE -) +ari_burn_subtitles(input_video, srt, output_video, verbose = FALSE) } \arguments{ \item{input_video}{Path to video in \code{mp4} format} From e14f95926c33449c6f5be196aa9303978d013cd1 Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:55:32 -0700 Subject: [PATCH 43/44] More Documentation stuff --- R/ari_narrate.R | 3 ++- man/ari_narrate.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/ari_narrate.R b/R/ari_narrate.R index bdd9243..c7b1064 100644 --- a/R/ari_narrate.R +++ b/R/ari_narrate.R @@ -36,7 +36,8 @@ #' @examples #' \dontrun{ #' ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), -#' system.file("test", "ari_intro.html", package = "ari")) +#' system.file("test", "ari_intro.html", package = "ari"), +#' output = "test.mp4") #' } ari_narrate <- function(script, slides, output, tts_engine = text2speech::tts, diff --git a/man/ari_narrate.Rd b/man/ari_narrate.Rd index a84b5f1..3473686 100644 --- a/man/ari_narrate.Rd +++ b/man/ari_narrate.Rd @@ -61,6 +61,7 @@ slides created with \code{\link[rmarkdown]{rmarkdown}} or a similar package. \examples{ \dontrun{ ari_narrate(system.file("test", "ari_intro_script.md", package = "ari"), - system.file("test", "ari_intro.html", package = "ari")) + system.file("test", "ari_intro.html", package = "ari"), + output = "test.mp4") } } From d286fde1e3ade562cf2bab37fb651363c9edfa3b Mon Sep 17 00:00:00 2001 From: Howard Baek <50791792+howardbaek@users.noreply.github.com> Date: Mon, 16 Oct 2023 17:16:35 -0700 Subject: [PATCH 44/44] Fix documentation --- R/ari_stitch.R | 29 +++------------------- man/ari_stitch.Rd | 41 +++----------------------------- tests/testthat/test_ari_stitch.R | 21 ++++++++-------- 3 files changed, 16 insertions(+), 75 deletions(-) diff --git a/R/ari_stitch.R b/R/ari_stitch.R index a6887c9..f06317b 100644 --- a/R/ari_stitch.R +++ b/R/ari_stitch.R @@ -17,35 +17,12 @@ #' #' @param images A vector of paths to images. #' @param audio A list of \code{Wave}s from tuneR. -#' @param duration a vector of numeric durations for each audio -#' track. See \code{\link{pad_wav}} #' @param output A path to the video file which will be created. #' @param verbose print diagnostic messages. If > 1, then more are printed #' @param cleanup If \code{TRUE}, interim files are deleted -#' @param ffmpeg_opts additional options to send to \code{ffmpeg}. -#' This is an advanced option, use at your own risk -#' @param divisible_height Make height divisible by 2, which may -#' be required if getting "height not divisible by 2" error. -#' @param audio_codec The audio encoder for the splicing. If this -#' fails, try \code{copy}. -#' @param video_codec The video encoder for the splicing. If this -#' fails, see \code{ffmpeg -codecs} -#' @param audio_bitrate Bit rate for audio. Passed to \code{-b:a}. -#' @param video_bitrate Bit rate for video. Passed to \code{-b:v}. -#' @param video_sync_method Video sync method. Should be -#' "auto" or `"vfr"` or a numeric. See \url{https://ffmpeg.org/ffmpeg.html}. -#' @param pixel_format pixel format to encode for `ffmpeg`. -#' @param fast_start Adding `faststart` flags for YouTube and other sites, -#' see \url{https://trac.ffmpeg.org/wiki/Encode/YouTube} -#' @param deinterlace should the video be de-interlaced, -#' see \url{https://ffmpeg.org/ffmpeg-filters.html}, generally for -#' YouTube -#' @param frames_per_second frames per second of the video, should -#' be an integer -#' @param stereo_audio should the audio be forced to stereo, -#' corresponds to `-ac 2` -#' @param video_filters any options that are passed to \code{-vf} arguments -#' for \code{ffmpeg} +#' @param ffmpeg_args Ffmpeg arguments set by \code{set_ffmpeg_args()} +#' @param duration a vector of numeric durations for each audio +#' track. See \code{\link{pad_wav}} #' @param check_inputs Should the inputs be checked? Almost always should #' be \code{TRUE}, but may be useful if trying to do customized stuff. #' @return A logical value, with the attribute \code{outfile} for the diff --git a/man/ari_stitch.Rd b/man/ari_stitch.Rd index 9314291..3ec8735 100644 --- a/man/ari_stitch.Rd +++ b/man/ari_stitch.Rd @@ -26,48 +26,13 @@ ari_stitch( \item{cleanup}{If \code{TRUE}, interim files are deleted} +\item{ffmpeg_args}{Ffmpeg arguments set by \code{set_ffmpeg_args()}} + \item{duration}{a vector of numeric durations for each audio -track. See \code{\link{pad_wav}}} +track. See \code{\link{pad_wav}}} \item{check_inputs}{Should the inputs be checked? Almost always should be \code{TRUE}, but may be useful if trying to do customized stuff.} - -\item{ffmpeg_opts}{additional options to send to \code{ffmpeg}. -This is an advanced option, use at your own risk} - -\item{divisible_height}{Make height divisible by 2, which may -be required if getting "height not divisible by 2" error.} - -\item{audio_codec}{The audio encoder for the splicing. If this -fails, try \code{copy}.} - -\item{video_codec}{The video encoder for the splicing. If this -fails, see \code{ffmpeg -codecs}} - -\item{audio_bitrate}{Bit rate for audio. Passed to \code{-b:a}.} - -\item{video_bitrate}{Bit rate for video. Passed to \code{-b:v}.} - -\item{video_sync_method}{Video sync method. Should be -"auto" or `"vfr"` or a numeric. See \url{https://ffmpeg.org/ffmpeg.html}.} - -\item{pixel_format}{pixel format to encode for `ffmpeg`.} - -\item{fast_start}{Adding `faststart` flags for YouTube and other sites, -see \url{https://trac.ffmpeg.org/wiki/Encode/YouTube}} - -\item{deinterlace}{should the video be de-interlaced, -see \url{https://ffmpeg.org/ffmpeg-filters.html}, generally for -YouTube} - -\item{frames_per_second}{frames per second of the video, should -be an integer} - -\item{stereo_audio}{should the audio be forced to stereo, -corresponds to `-ac 2`} - -\item{video_filters}{any options that are passed to \code{-vf} arguments -for \code{ffmpeg}} } \value{ A logical value, with the attribute \code{outfile} for the diff --git a/tests/testthat/test_ari_stitch.R b/tests/testthat/test_ari_stitch.R index a37c2c9..d7f6902 100644 --- a/tests/testthat/test_ari_stitch.R +++ b/tests/testthat/test_ari_stitch.R @@ -6,7 +6,7 @@ if (ffmpeg_version_sufficient()) { fdk_enabled = FALSE } else { fdk_enabled = grepl("fdk", res[ res$codec == "aac", "codec_name"]) - } + } } else { fdk_enabled = FALSE } @@ -20,25 +20,24 @@ test_that("ari_stitch() can combine audio and images into a video", { skip_on_cran() # should work without polly temp_dir <- tempdir() - + for (i in 1:3) { jpeg(file.path(temp_dir, paste0("plot", i, ".jpg"))) plot(1:5 * i, 1:5, main = i) dev.off() } - + sound <- replicate( - 3, - tuneR::Wave(round(rnorm(88200, 127, 20)), + 3, + tuneR::Wave(round(rnorm(88200, 127, 20)), samp.rate = 44100, bit = 16)) - + graphs <- file.path(temp_dir, paste0("plot", 1:3, ".jpg")) video <- file.path(temp_dir, "output.mp4") - + on.exit(walk(c(graphs, video), unlink, force = TRUE), add = TRUE) - - ari_stitch(graphs, sound, output = video, - audio_codec = audio_codec, verbose = 2) - + + ari_stitch(graphs, sound, output = video) + expect_true(file.size(video) > 50000) })