Skip to content

Commit

Permalink
Merge pull request #5 from ahgroup/strain-name-fun
Browse files Browse the repository at this point in the history
Strain name fun
  • Loading branch information
wzbillings authored Aug 12, 2024
2 parents c6c858b + 2d64ce9 commit f921aa0
Show file tree
Hide file tree
Showing 15 changed files with 1,038 additions and 53 deletions.
3 changes: 2 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
^.*\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
^README\.Rmd$
^README\.qmd$
^data-raw$
18 changes: 13 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
Package: hgp
Type: Package
Title: HandelGroupPackage
Version: 0.0.1
Version: 0.0.2
Authors@R:
person("Zane", "Billings", , "wz.billings@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-0184-6134"))
Description: Miscellaneous functions used for Handelgroup research
Description: Miscellaneous functions used for Handelgroup research.
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Date: 2023-11-03
Suggests:
ggplot2
URL: https://github.com/ahgroup/hgp
BugReports: https://github.com/ahgroup/hgp/issues
Depends:
R (>= 2.10)
Suggests:
dplyr,
forcats,
ggplot2,
here,
readr,
tibble
License: AGPL (>= 3)
659 changes: 659 additions & 0 deletions LICENSE.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Generated by roxygen2: do not edit by hand

export(replace_strain_names)
export(theme_ms)
16 changes: 16 additions & 0 deletions R/handelgroup_strain_names.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#' Handelgroup Standardized Strain Names Dataset
#'
#' A dataset containing various formats of the names for the influenza strains
#' we use in our research.
#'
#' @format ## `handelgroup_strain_names`
#' A data frame with 46 rows and 6 columns:
#' \describe{
#' \item{subtype}{Whether the strain is H1N1 or H3N2.}
#' \item{analysis_name}{Strain name format used in clean_data.Rds.}
#' \item{genbank_strain_name}{The accepted full strain name, as found in
#' genbank.}
#' \item{short_name}{The abbrevated name, usually 2-4 letters and the
#' last two digits of the year, useful for saving space in displays.}
#' }
"handelgroup_strain_names"
89 changes: 89 additions & 0 deletions R/replace_strain_names.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#' Replace strain names with a different format
#'
#' Using the standardized list of handelgroup strain names from
#' ["handelgroup_strain_names"], pass in a vector of strain names of arbitrary
#' length and return the same sequence of names in a different format.
#'
#' @param x A vector of strain names.
#' @param from Format of the names in the vector `x`. Should be one of
#' "analysis", "full", or "short". See ["handelgroup_strain_names"] for the
#' allowed names in each of the formats. If you want to transform a strain
#' that is not currently in the strain list, you will need to add it and
#' submit a PR to `hgp`!
#' @param to Format of the returned names. Should be one of "short", "full",
#' "analysis", or "subtype".
#' @param drop If TRUE, levels of the returned factor variable are dropped. If
#' FALSE, the level set of the factor will still contain every strain in
#' ["handelgroup_strain_names"], which is typically not desirable.
#'
#' @return A factor vector of the same length as `x`.
#' @export
#'
#' @examples
#' replace_strain_names("CA/09", from = "short", to = "analysis")
#'
#' dat <- data.frame(s = c("CA/09", "MI/15"), x = c(1, 2))
#' transform(
#' dat,
#' s_long = replace_strain_names(s, from = "short", to = "analysis")
#' )
replace_strain_names <- function(x, from = "analysis", to = "short",
drop = TRUE) {
# Load needed packages
requireNamespace("forcats", quietly = TRUE)
requireNamespace("tibble", quietly = TRUE)
requireNamespace("dplyr", quietly = TRUE)
requireNamespace("readr", quietly = TRUE)

# Load the strain names data
#utils::data("handelgroup_strain_names", envir=environment())
handelgroup_strain_names <- hgp::handelgroup_strain_names

# Check if from and to are the same
if (from == to) {
warning("From and to are the same, returning original vector.")
return(x)
}

# Find the right column for selecting names from
if (from == "analysis") {
from_vec <- handelgroup_strain_names$analysis_name
} else if (from == "full") {
from_vec <- handelgroup_strain_names$genbank_strain_name
} else if (from == "short") {
from_vec <- handelgroup_strain_names$short_name
} else {
stop("'from' should be 'analysis', 'full', or 'short'.")
}

# Make sure all values of x exist in the virus info table
if (!(all(x %in% from_vec))) {
stop(paste0(
"'x' should be a vector of ", from, " names that exist in the",
' virus-info sheet.'
))
}

# Now get the location in the virus info table for each element of x
locs <- match(x, from_vec)

# Based on the names argument, get the correct names to return.
if (to == "analysis") {
vals <- handelgroup_strain_names$analysis_name[locs]
} else if (to == "full") {
vals <- handelgroup_strain_names$genbank_strain_name[locs]
} else if (to == "short") {
vals <- handelgroup_strain_names$short_name[locs]
} else if (to == "subtype") {
vals <- handelgroup_strain_names$subtype[locs]
} else {
stop("'to' should be 'analysis', 'full', 'short', or 'subtype'.")
}

# If requested, remove unseen factor levels
if (isTRUE(drop)) {
vals <- forcats::fct_drop(vals)
}

return(vals)
}
37 changes: 20 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@

<!-- README.md is generated from README.qmd. Please edit that file -->

<!-- README.md is generated from README.qmd. Please edit that file -->
<!-- ASCII ART BANNER TITLE -->
```
_ _ _ _ _____ _____ _ ___ __
| | | | | | | |/ ____| | __ \ | | / / | \ \
| |__| | __ _ _ __ __| | ___| | | __ _ __ ___ _ _ _ __ | |__) |_ _ ___| | ____ _ __ _ ___ | || |__ __ _ _ __ | |
| __ |/ _` | '_ \ / _` |/ _ \ | | |_ | '__/ _ \| | | | '_ \| ___/ _` |/ __| |/ / _` |/ _` |/ _ \ | || '_ \ / _` | '_ \| |
| | | | (_| | | | | (_| | __/ | |__| | | | (_) | |_| | |_) | | | (_| | (__| < (_| | (_| | __/ | || | | | (_| | |_) | |
|_| |_|\__,_|_| |_|\__,_|\___|_|\_____|_| \___/ \__,_| .__/|_| \__,_|\___|_|\_\__,_|\__, |\___| | ||_| |_|\__, | .__/| |
| | __/ | \_\ __/ | | /_/
|_| |___/ |___/|_|
```

_ _ _ _ _____ _____ _
| | | | | | | |/ ____| | __ \ | |
| |__| | __ _ _ __ __| | ___| | | __ _ __ ___ _ _ _ __ | |__) |_ _ ___| | ____ _ __ _ ___
| __ |/ _` | '_ \ / _` |/ _ \ | | |_ | '__/ _ \| | | | '_ \| ___/ _` |/ __| |/ / _` |/ _` |/ _ \
| | | | (_| | | | | (_| | __/ | |__| | | | (_) | |_| | |_) | | | (_| | (__| < (_| | (_| | __/
|_| |_|\__,_|_| |_|\__,_|\___|_|\_____|_| \___/ \__,_| .__/|_| \__,_|\___|_|\_\__,_|\__, |\___|
| | __/ |
|_| |___/

<!-- END OF TITLE -->

# `hgp: HandelGroupPackage`
Expand All @@ -20,8 +20,13 @@
<!-- badges: end -->

`hgp` is a package for storing utility functions for use across
handelgroup. Right now, `hgp` only contains a `ggplot2` theme. But we
will update this description as we add more stuff.
handelgroup. Right now, `hgp` contains the following utilities:

- A group-standard `ggplot2` theme function;
- A set of functions for working with HAI data;
- A function to standardize influenza strain names for the strains used
in the datasets we have available to us; along with a corresponding
data set of standardized strain names.

**Because this repo is public, absolutely NO sensitive or confidential
information should be stored here. This repo is strictly for shared
Expand All @@ -33,8 +38,8 @@ You can install the development version of hgp from
[GitHub](https://github.com/) with:

``` r
# install.packages("devtools")
devtools::install_github("ahgroup/hgp")
# install.packages("remotes")
remotes::install_github("ahgroup/hgp")
```

Or if you are in a repository with `renv` enabled (recommended):
Expand Down Expand Up @@ -79,8 +84,6 @@ ggplot2::theme_set(hgp::theme_ms())
- Please run `devtools:check()` before merging any new functionality to
the main branch and fix any resulting messages. It is unnecessary to
`build` the package, we just need to ensure that any checks pass.
- Note that as of `2023-11-03`, we have not decided on a package
license, so you will get one warning in the check results for that.
- `renv` is initialized for this repository using the EXPLICIT snapshot
mode.
- If you need to import a new package, you MUST update the `NAMESPACE`
Expand Down
28 changes: 22 additions & 6 deletions README.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,32 @@ knitr::opts_chunk$set(
)
```

<!-- ASCII ART BANNER TITLE -->
```
_ _ _ _ _____ _____ _
| | | | | | | |/ ____| | __ \ | |
| |__| | __ _ _ __ __| | ___| | | __ _ __ ___ _ _ _ __ | |__) |_ _ ___| | ____ _ __ _ ___
| __ |/ _` | '_ \ / _` |/ _ \ | | |_ | '__/ _ \| | | | '_ \| ___/ _` |/ __| |/ / _` |/ _` |/ _ \
| | | | (_| | | | | (_| | __/ | |__| | | | (_) | |_| | |_) | | | (_| | (__| < (_| | (_| | __/
|_| |_|\__,_|_| |_|\__,_|\___|_|\_____|_| \___/ \__,_| .__/|_| \__,_|\___|_|\_\__,_|\__, |\___|
| | __/ |
|_| |___/
```
<!-- END OF TITLE -->

# `hgp: HandelGroupPackage`

<!-- badges: start -->
<!-- badges: end -->

`hgp` is a package for storing utility functions for use across handelgroup.
Right now, `hgp` only contains a `ggplot2` theme. But we will update this
description as we add more stuff.
Right now, `hgp` contains the following utilities:

* A group-standard `ggplot2` theme function;
* A set of functions for working with HAI data;
* A function to standardize influenza strain names for the strains used in the
datasets we have available to us; along with a corresponding data set of
standardized strain names.

**Because this repo is public, absolutely NO sensitive or confidential
information should be stored here. This repo is strictly for shared utilities.**
Expand All @@ -30,8 +48,8 @@ information should be stored here. This repo is strictly for shared utilities.**
You can install the development version of hgp from [GitHub](https://github.com/) with:

``` r
# install.packages("devtools")
devtools::install_github("ahgroup/hgp")
# install.packages("remotes")
remotes::install_github("ahgroup/hgp")
```

Or if you are in a repository with `renv` enabled (recommended):
Expand Down Expand Up @@ -75,8 +93,6 @@ should use `devtools::document()` to generate the documentation files.
* Please run `devtools:check()` before merging any new functionality to the
main branch and fix any resulting messages. It is unnecessary to `build` the
package, we just need to ensure that any checks pass.
* Note that as of `2023-11-03`, we have not decided on a package license,
so you will get one warning in the check results for that.
* `renv` is initialized for this repository using the EXPLICIT snapshot mode.
* If you need to import a new package, you MUST update the `NAMESPACE` file
appropriately before invoking `renv::snapshot()`.
Expand Down
47 changes: 47 additions & 0 deletions data-raw/handelgroup-strain-names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
subtype,analysis_name,genbank_strain_name,short_name,factor_order,vaccine_strain
h1,H1N1-South Carolina-1918,A/H1N1/South Carolina/1/1918,SC/18,1,FALSE
h1,H1N1-Puerto Rico-1934,A/H1N1/Puerto Rico/8/1934,PR/34,2,FALSE
h1,H1N1-Weiss-1943,A/H1N1/Weiss/43,Wei/43,3,FALSE
h1,H1N1-Fort Monmouth-1947,A/H1N1/Fort Monmouth/1/1947,FM/47,4,FALSE
h1,H1N1-Denver-1957,A/H1N1/Denver/1957,Den/57,5,FALSE
h1,H1N1-New Jersey-1976,A/H1N1/New Jersey/8/1976,NJ/76,6,FALSE
h1,H1N1-Ussr-1977,A/H1N1/Ussr/90/1977,USSR/77,7,FALSE
h1,H1N1-Brazil-1978,A/H1N1/Brazil/11/1978,Bra/78,8,FALSE
h1,H1N1-California-1978,A/H1N1/California/10/1978,CA/78,9,FALSE
h1,H1N1-Chile-1983,A/H1N1/Chile/1/1983,Chi/83,10,FALSE
h1,H1N1-Singapore-1986,A/H1N1/Singapore/6/1986,Sing/86,11,FALSE
h1,H1N1-Texas-1991,A/H1N1/Texas/36/1991,TX/91,12,FALSE
h1,H1N1-Beijing-1995,A/H1N1/Beijing/262/1995,Bei/95,13,FALSE
h1,H1N1-New Caledonia-1999,A/H1N1/New Caledonia/20/1999,NC/99,14,FALSE
h1,H1N1-Solomon Islands-2006,A/H1N1/Solomon Islands/3/2006,SI/06,15,FALSE
h1,H1N1-Brisbane-2007,A/H1N1/Brisbane/59/2007,Bris/07,16,FALSE
h1,H1N1-California-2009,A/H1N1/California/07/2009,CA/09,17,TRUE
h1,H1N1-Michigan-2015,A/H1N1/Michigan 45/2015,MI/15,18,TRUE
h1,H1N1-Brisbane-2018,A/H1N1/Brisbane/02/2018,Bris/18,19,TRUE
h1,H1N1-Guangdong Maonan-2019,A/H1N1/Guangdong-Maonan/SWL1536/201,GD/19,20,TRUE
h1,H1N1-Victoria-2019,A/H1N1/Victoria/2570/2019,Vic/19,21,TRUE
h3,H3N2-Hong Kong-1968,A/H3N2/Hong Kong/8/1968,HK/68,22,FALSE
h3,H3N2-Port Chalmers-1973,A/H3N2/Port Chalmers/1/1973,PC/73,23,FALSE
h3,H3N2-Texas-1977,A/H3N2/Texas/1/1977,TX/77,24,FALSE
h3,H3N2-Mississippi-1985,A/H3N2/Mississippi/1/1985,MI/85,25,FALSE
h3,H3N2-Sichuan-1987,A/H3N2/Sichuan/2/1987,Sich/87,26,FALSE
h3,H3N2-Shangdong-1993,A/H3N2/Shangdong/9/1993,Shan/93,27,FALSE
h3,H3N2-Nanchang-1995,A/H3N2/Nanchang/933/1995,Nan/95,28,FALSE
h3,H3N2-Sydney-1997,A/H3N2/Sydney/5/1997,Syd/97,29,FALSE
h3,H3N2-Panama-1999,A/H3N2/Panama/2007/1999,Pan/99,30,FALSE
h3,H3N2-Fujian-2002,A/H3N2/Fujian/411/2002,Fuj/02,31,FALSE
h3,H3N2-New York-2004,A/H3N2/New York/55/2004,NY/04,32,FALSE
h3,H3N2-Brisbane-2007,A/H3N2/Brisbane/10/2007,Br/07,33,TRUE
h3,H3N2-Wisconsin-2005,A/H3N2/Wisconsin/67/2005,WI/05,34,FALSE
h3,H3N2-Uruguay-2007,A/H3N2/Uruguay/716/2007,Uru/07,35,FALSE
h3,H3N2-Perth-2009,A/H3N2/Perth/16/2009,Per/09,36,FALSE
h3,H3N2-Victoria-2011,A/H3N2/Victoria/361/2011,Vic/11,37,FALSE
h3,H3N2-Texas-2012,A/H3N2/Texas/50/2012,TX/12,38,TRUE
h3,H3N2-Switzerland-2013,A/H3N2/Switzerland/9715293/2013,Switz/13,39,TRUE
h3,H3N2-Hong Kong-2014,A/H3N2/Hong Kong/4801/2014,HK/14,40,TRUE
h3,H3N2-Singapore-2016,A/H3N2/Singapore/infimh-16-0019/2016,Sing/16,41,TRUE
h3,H3N2-Kansas-2017,A/H3N2/Kansas/14/2017,KS/17,42,TRUE
h3,H3N2-Hong Kong-2019,A/H3N2/Hong Kong/2671/2019,HK/19,43,TRUE
h3,H3N2-South Australia-2019,A/H3N2/South Australia/34/2019,SA/19,44,TRUE
h3,H3N2-Tasmania-2020,A/H3N2/Tasmania/503/2020 ,Tas/20,45,TRUE
h3,H3N2-Darwin-2021,A/H3N2/Darwin/9/2021,Dar/21,46,TRUE
38 changes: 38 additions & 0 deletions data-raw/strain-names-data-prep.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
###
# Code to prepare the strain names data for package inclusion
# Zane Billings
# 2024-08-12
# The strain names data is a table of strain names, currently the ones that
# are used in UGAFluVac. We encourage updates to raw CSV file to add strain
# names that are used in other handelgroup datasets.
###

handelgroup_strain_names <- readr::read_csv(
here::here("data-raw", "handelgroup-strain-names.csv"),
col_types = 'fcccil'
) |>
# Remove the useless columns
dplyr::select(-c(vaccine_strain)) |>
# Append a row so sorting the overall entry for CATEs is easy
tibble::add_row(
subtype = "",
analysis_name = "Overall",
short_name = "Overall",
genbank_strain_name = "Overall",
factor_order = 9999L
) |>
# Make all of the name variables ordered factors and clean up the subtypes
dplyr::mutate(
subtype = factor(
as.character(subtype),
levels = c("h1", "h3", ""),
labels = c("H1N1", "H3N2", "")
),
# Put the different name factors in order
dplyr::across(
c(analysis_name, genbank_strain_name, short_name),
\(x) forcats::fct_reorder(x, factor_order)
),
)

usethis::use_data(handelgroup_strain_names, overwrite = TRUE)
Binary file added data/handelgroup_strain_names.rda
Binary file not shown.
26 changes: 26 additions & 0 deletions man/handelgroup_strain_names.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f921aa0

Please sign in to comment.