PennChopMicrobiomeProgram · kylebittinger · Jun 27, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 21, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,3 +6,5 @@
 ^codecov\.yml$
 ^data-raw$
 ^R/abx_index_old\.R$
+^\.github$
+^.*\.Rproj$
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -0,0 +1,52 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: R-CMD-check
+
+permissions: read-all
+
+jobs:
+  R-CMD-check:
+    runs-on: ${{ matrix.config.os }}
+
+    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
+
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - {os: macos-latest,   r: 'release'}
+          - {os: windows-latest, r: 'release'}
+          - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
+          - {os: ubuntu-latest,   r: 'release'}
+          - {os: ubuntu-latest,   r: 'oldrel-1'}
+
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          needs: check
+
+      - uses: r-lib/actions/check-r-package@v2
+        with:
+          upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,61 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: test-coverage
+
+permissions: read-all
+
+jobs:
+  test-coverage:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::covr, any::xml2
+          needs: coverage
+
+      - name: Test coverage
+        run: |
+          cov <- covr::package_coverage(
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+          )
+          covr::to_cobertura(cov)
+        shell: Rscript {0}
+
+      - uses: codecov/codecov-action@v4
+        with:
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Show testthat output
+        if: always()
+        run: |
+          ## --------------------------------------------------------------------
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
+        shell: bash
+
+      - name: Upload test results
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-test-failures
+          path: ${{ runner.temp }}/package
diff --git a/.travis.yml b/.travis.yml
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,16 +3,19 @@ Title: Modeling the Susceptibility of a Bacterial Community to Antibiotics
 Version: 0.0.1.0
 Author: Vincent Tu <tuv@email.chop.edu>
 Maintainer: Vincent Tu <tuv@email.chop.edu>
-Description: This package calculates an index for a given bacterial community's susceptibility to the specified antibiotics.
+Description: Calculates an index for a given bacterial community's susceptibility to specified antibiotics.
 License: GPL-3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.2
+RoxygenNote: 7.2.3
 Suggests:
     knitr,
     rmarkdown,
     covr,
     testthat
-Depends: 
+Depends:
     R (>= 2.10)
+Imports:
+    whatbacteria
+Remotes: PennChopMicrobiomeProgram/whatbacteria
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,3 +14,4 @@ export(mirix_metronidazole)
 export(mirix_vancomycin)
 export(phenotype_susceptibility)
 export(predict_abundance)
+import(whatbacteria)
diff --git a/R/abx_index.R b/R/abx_index.R
@@ -24,8 +24,8 @@ NULL
 mirix_vancomycin <- function(abundance,
                              lineage,
                              replace_zero = 1e-4,
-                             antibiotic_db = taxon_susceptibility,
-                             phenotype_db = taxon_phenotypes) {
+                             antibiotic_db = whatbacteria::taxon_susceptibility,
+                             phenotype_db = whatbacteria::taxon_phenotypes) {
   susceptibility <- antibiotic_susceptibility_vancomycin(
     lineage, antibiotic_db, phenotype_db)
   mirix(abundance, susceptibility, replace_zero)
@@ -36,7 +36,7 @@ mirix_vancomycin <- function(abundance,
 mirix_doxycycline <- function(abundance,
                                lineage,
                                replace_zero = 1e-4,
-                               antibiotic_db = taxon_susceptibility) {
+                               antibiotic_db = whatbacteria::taxon_susceptibility) {
   susceptibility <- antibiotic_susceptibility_tetracycline(
     lineage, antibiotic_db)
   mirix(abundance, susceptibility, replace_zero)
@@ -47,7 +47,7 @@ mirix_doxycycline <- function(abundance,
 mirix_amoxicillin <- function(abundance,
                              lineage,
                              replace_zero = 1e-4,
-                             antibiotic_db = taxon_susceptibility) {
+                             antibiotic_db = whatbacteria::taxon_susceptibility) {
   susceptibility <- antibiotic_susceptibility_penicillin(
     lineage, antibiotic_db)
   mirix(abundance, susceptibility, replace_zero)
@@ -58,7 +58,7 @@ mirix_amoxicillin <- function(abundance,
 mirix_metronidazole <- function(abundance,
                             lineage,
                             replace_zero = 1e-4,
-                            phenotype_db = taxon_phenotypes) {
+                            phenotype_db = whatbacteria::taxon_phenotypes) {
   susceptibility <- phenotype_susceptibility(
     lineage = lineage,
     phenotype = "aerobic_status",
@@ -75,7 +75,7 @@ mirix_metronidazole <- function(abundance,
 mirix_ciprofloxacin <- function(abundance,
                           lineage,
                           replace_zero = 1e-4,
-                          phenotype_db = taxon_phenotypes) {
+                          phenotype_db = whatbacteria::taxon_phenotypes) {
   susceptibility <- phenotype_susceptibility(
     lineage = lineage,
     phenotype = "aerobic_status",
@@ -92,8 +92,8 @@ mirix_ciprofloxacin <- function(abundance,
 mirix_gentamicin <- function(abundance,
                                  lineage,
                                  replace_zero = 1e-4,
-                                 antibiotic_db = taxon_susceptibility,
-                                 phenotype_db = taxon_phenotypes) {
+                                 antibiotic_db = whatbacteria::taxon_susceptibility,
+                                 phenotype_db = whatbacteria::taxon_phenotypes) {
   susceptibility <- antibiotic_susceptibility_aminoglycoside(
     lineage, antibiotic_db, phenotype_db)
   mirix(abundance, susceptibility, replace_zero)
@@ -111,6 +111,7 @@ mirix_gentamicin <- function(abundance,
 #'   0.5 is typical. For relative abundances, a number that is slightly lower
 #'   than the lowest relative abundance will work.
 #'
+#' @import whatbacteria
 #' @return The MiRIx value
 #' @export
 mirix <- function (abundance, susceptibility, replace_zero = 1e-4) {

diff --git a/R/data.R b/R/data.R
@@ -1,31 +1,3 @@
-#' Gram stain and aerobic status of bacterial taxa
-#' @format A data frame with the following columns:
-#' \describe{
-#'   \item{taxon}{The name of the taxon}
-#'   \item{rank}{The rank of the taxon}
-#'   \item{aerobic_status}{
-#'     The aerobic status. One of "aerobe", "facultative anaerobe", or
-#'     "obligate anaerobe".}
-#'   \item{gram_stain}{
-#'     How the taxon appears when Gram-stained. One of "Gram-positive" or
-#'     "Gram-negative".}
-#'   \item{doi}{DOI of the publication from which the information was obtained.}
-#' }
-"taxon_phenotypes"
-
-#' Antibiotic susceptibility of bacterial taxa
-#' @format A data frame with the following columns:
-#' \describe{
-#'   \item{taxon}{The name of the taxon}
-#'   \item{rank}{The rank of the taxon}
-#'   \item{antibiotic}{The antibiotic or antibiotic class}
-#'   \item{value}{
-#'     The susceptibility of the taxon to the antibiotic, one of "susceptible"
-#'     or "resistant".}
-#'   \item{doi}{DOI of the publication from which the information was obtained.}
-#' }
-"taxon_susceptibility"
-
 #' Example data from Weiss et al.
 #'
 #' @format A data frame with the following columns:

diff --git a/R/match.R b/R/match.R
@@ -28,12 +28,8 @@
 #' @export
 antibiotic_susceptibility <- function (lineage,
                                        antibiotic,
-                                       db = taxon_susceptibility) {
-  is_relevant <- db$antibiotic %in% antibiotic
-  db <- db[is_relevant, c("taxon", "rank", "value")]
-
-  susceptibility_values <- match_annotation(lineage, db)
-  susceptibility_values
+                                       db = whatbacteria::taxon_susceptibility) {
+  whatbacteria::what_antibiotic(lineage, antibiotic, db)
 }
 
 #' Evaluate antibiotic susceptibility based on phenotype
@@ -74,107 +70,9 @@ antibiotic_susceptibility <- function (lineage,
 phenotype_susceptibility <- function (lineage,
                                       phenotype,
                                       susceptibility,
-                                      db = taxon_phenotypes) {
-  is_relevant <- db[[phenotype]] %in% names(susceptibility)
-  db <- db[is_relevant, c("taxon", "rank", phenotype)]
-  # match_annotation() requires a column named "value"
-  colnames(db)[3] <- "value"
-
-  phenotype_values <- match_annotation(lineage, db)
-
+                                      db = whatbacteria::taxon_phenotypes) {
+  phenotype_values <- whatbacteria::what_phenotype(lineage, phenotype, db)
   susceptibility_values <- susceptibility[phenotype_values]
   susceptibility_values <- unname(susceptibility_values)
   susceptibility_values
 }
-
-# Determine the annotation values for each lineage
-#
-# @param lineage A vector of taxonomic assignments or lineages
-# @param db A data frame with columns named "taxon", "rank", and "value"
-# @return A vector of assigned values
-match_annotation <- function (lineage, db) {
-  get_rank_specific_db <- function (r) {
-    rank_is_r <- db[["rank"]] %in% r
-    db[rank_is_r,]
-  }
-  db_ranks <- lapply(rev(taxonomic_ranks), get_rank_specific_db)
-  names(db_ranks) <- rev(taxonomic_ranks)
-
-  get_values_by_rank <- function (rank_specific_db) {
-    taxa_idx <- match_taxa(lineage, rank_specific_db[["taxon"]])
-    rank_specific_db[["value"]][taxa_idx]
-  }
-  values_by_rank <- vapply(
-    db_ranks,
-    get_values_by_rank,
-    rep("a", length(lineage)))
-
-  if (length(lineage) == 1) {
-    assigned_values <- first_non_na_value(values_by_rank)
-  } else {
-    assigned_values <- apply(values_by_rank, 1, first_non_na_value)
-  }
-  assigned_values
-}
-
-# The 'official' taxonomic ranks supported by this package
-taxonomic_ranks <- c(
-  "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
-
-# Return the first value that is not NA. If all values are NA, return NA. The
-# resultant vector will not have names.
-first_non_na_value <- function (x) {
-  unname(x[first_true_idx(!is.na(x))])
-}
-
-# For each lineage, return the index of the taxon that is found within the
-# lineage. If no taxa are found, return NA for that element. If multiple taxa
-# are found, we issue a warning and return the index of the first taxon in the
-# vector of taxa.
-match_taxa <- function (lineages, taxa) {
-  n_lineages <- length(lineages)
-  if (length(taxa) == 0) {
-    return(rep_len(NA_character_, length(lineages)))
-  }
-
-  taxa_patterns <- paste0("(?<=__|\\b)(?:", taxa, ")\\b")
-  lineage_matches <- vapply(
-    X = taxa_patterns,
-    FUN = grepl,
-    FUN.VALUE = rep_len(TRUE, n_lineages),
-    x = lineages,
-    perl = TRUE,
-    USE.NAMES = TRUE)
-
-  # If the user passes only one lineage, lineage_matches will be a vector
-  # rather than an array. After some trial and error, I found that it's better
-  # to deal with this at each stage of the computation, rather than trying to
-  # coerce the vector to an array up front.
-  if (n_lineages == 1) {
-    multi_matches <- sum(lineage_matches) > 1
-  } else {
-    multi_matches <- rowSums(lineage_matches) > 1
-  }
-  if (any(multi_matches)) {
-    warning(
-      "The following lineages match more than one taxon:\n",
-      paste(lineages[multi_matches], collapse = "\n"), "\n")
-  }
-
-  if (n_lineages == 1) {
-    taxon_idx <- first_true_idx(lineage_matches)
-  } else {
-    taxon_idx <- apply(lineage_matches, 1, first_true_idx)
-  }
-  taxon_idx
-}
-
-# Return the first index of a boolean vector that is TRUE. If all elements of
-# the vector are FALSE, return NA. Tempted to call this function minwhich.
-first_true_idx <- function (x) {
-  if (any(x)) {
-    min(which(x == TRUE))
-  } else {
-    NA_integer_
-  }
-}
diff --git a/R/predict.R b/R/predict.R
@@ -1,7 +1,7 @@
 #' Predict taxon abundances at given values of an index
 #'
 #' @param index_value Value or values of the index at which to make predictions.
-#' @param abundances A vector of taxon abundances in a sample.
+#' @param abundance A vector of taxon abundances in a sample.
 #' @param susceptibility A character vector of antibiotic susceptibility, with
 #'   values that are "susceptible", "resistant", or \code{NA}.
 #' @return A new vector of abundances if \code{index_value} has length 1. If