-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from ahgroup/strain-name-fun
Strain name fun
- Loading branch information
Showing
15 changed files
with
1,038 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,5 @@ | |
^.*\.Rproj$ | ||
^\.Rproj\.user$ | ||
^LICENSE\.md$ | ||
^README\.Rmd$ | ||
^README\.qmd$ | ||
^data-raw$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,24 @@ | ||
Package: hgp | ||
Type: Package | ||
Title: HandelGroupPackage | ||
Version: 0.0.1 | ||
Version: 0.0.2 | ||
Authors@R: | ||
person("Zane", "Billings", , "wz.billings@gmail.com", role = c("aut", "cre"), | ||
comment = c(ORCID = "0000-0002-0184-6134")) | ||
Description: Miscellaneous functions used for Handelgroup research | ||
Description: Miscellaneous functions used for Handelgroup research. | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 7.2.3 | ||
RoxygenNote: 7.3.2 | ||
Date: 2023-11-03 | ||
Suggests: | ||
ggplot2 | ||
URL: https://github.com/ahgroup/hgp | ||
BugReports: https://github.com/ahgroup/hgp/issues | ||
Depends: | ||
R (>= 2.10) | ||
Suggests: | ||
dplyr, | ||
forcats, | ||
ggplot2, | ||
here, | ||
readr, | ||
tibble | ||
License: AGPL (>= 3) |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(replace_strain_names) | ||
export(theme_ms) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#' Handelgroup Standardized Strain Names Dataset | ||
#' | ||
#' A dataset containing various formats of the names for the influenza strains | ||
#' we use in our research. | ||
#' | ||
#' @format ## `handelgroup_strain_names` | ||
#' A data frame with 46 rows and 6 columns: | ||
#' \describe{ | ||
#' \item{subtype}{Whether the strain is H1N1 or H3N2.} | ||
#' \item{analysis_name}{Strain name format used in clean_data.Rds.} | ||
#' \item{genbank_strain_name}{The accepted full strain name, as found in | ||
#' genbank.} | ||
#' \item{short_name}{The abbrevated name, usually 2-4 letters and the | ||
#' last two digits of the year, useful for saving space in displays.} | ||
#' } | ||
"handelgroup_strain_names" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#' Replace strain names with a different format | ||
#' | ||
#' Using the standardized list of handelgroup strain names from | ||
#' ["handelgroup_strain_names"], pass in a vector of strain names of arbitrary | ||
#' length and return the same sequence of names in a different format. | ||
#' | ||
#' @param x A vector of strain names. | ||
#' @param from Format of the names in the vector `x`. Should be one of | ||
#' "analysis", "full", or "short". See ["handelgroup_strain_names"] for the | ||
#' allowed names in each of the formats. If you want to transform a strain | ||
#' that is not currently in the strain list, you will need to add it and | ||
#' submit a PR to `hgp`! | ||
#' @param to Format of the returned names. Should be one of "short", "full", | ||
#' "analysis", or "subtype". | ||
#' @param drop If TRUE, levels of the returned factor variable are dropped. If | ||
#' FALSE, the level set of the factor will still contain every strain in | ||
#' ["handelgroup_strain_names"], which is typically not desirable. | ||
#' | ||
#' @return A factor vector of the same length as `x`. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' replace_strain_names("CA/09", from = "short", to = "analysis") | ||
#' | ||
#' dat <- data.frame(s = c("CA/09", "MI/15"), x = c(1, 2)) | ||
#' transform( | ||
#' dat, | ||
#' s_long = replace_strain_names(s, from = "short", to = "analysis") | ||
#' ) | ||
replace_strain_names <- function(x, from = "analysis", to = "short", | ||
drop = TRUE) { | ||
# Load needed packages | ||
requireNamespace("forcats", quietly = TRUE) | ||
requireNamespace("tibble", quietly = TRUE) | ||
requireNamespace("dplyr", quietly = TRUE) | ||
requireNamespace("readr", quietly = TRUE) | ||
|
||
# Load the strain names data | ||
#utils::data("handelgroup_strain_names", envir=environment()) | ||
handelgroup_strain_names <- hgp::handelgroup_strain_names | ||
|
||
# Check if from and to are the same | ||
if (from == to) { | ||
warning("From and to are the same, returning original vector.") | ||
return(x) | ||
} | ||
|
||
# Find the right column for selecting names from | ||
if (from == "analysis") { | ||
from_vec <- handelgroup_strain_names$analysis_name | ||
} else if (from == "full") { | ||
from_vec <- handelgroup_strain_names$genbank_strain_name | ||
} else if (from == "short") { | ||
from_vec <- handelgroup_strain_names$short_name | ||
} else { | ||
stop("'from' should be 'analysis', 'full', or 'short'.") | ||
} | ||
|
||
# Make sure all values of x exist in the virus info table | ||
if (!(all(x %in% from_vec))) { | ||
stop(paste0( | ||
"'x' should be a vector of ", from, " names that exist in the", | ||
' virus-info sheet.' | ||
)) | ||
} | ||
|
||
# Now get the location in the virus info table for each element of x | ||
locs <- match(x, from_vec) | ||
|
||
# Based on the names argument, get the correct names to return. | ||
if (to == "analysis") { | ||
vals <- handelgroup_strain_names$analysis_name[locs] | ||
} else if (to == "full") { | ||
vals <- handelgroup_strain_names$genbank_strain_name[locs] | ||
} else if (to == "short") { | ||
vals <- handelgroup_strain_names$short_name[locs] | ||
} else if (to == "subtype") { | ||
vals <- handelgroup_strain_names$subtype[locs] | ||
} else { | ||
stop("'to' should be 'analysis', 'full', 'short', or 'subtype'.") | ||
} | ||
|
||
# If requested, remove unseen factor levels | ||
if (isTRUE(drop)) { | ||
vals <- forcats::fct_drop(vals) | ||
} | ||
|
||
return(vals) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
subtype,analysis_name,genbank_strain_name,short_name,factor_order,vaccine_strain | ||
h1,H1N1-South Carolina-1918,A/H1N1/South Carolina/1/1918,SC/18,1,FALSE | ||
h1,H1N1-Puerto Rico-1934,A/H1N1/Puerto Rico/8/1934,PR/34,2,FALSE | ||
h1,H1N1-Weiss-1943,A/H1N1/Weiss/43,Wei/43,3,FALSE | ||
h1,H1N1-Fort Monmouth-1947,A/H1N1/Fort Monmouth/1/1947,FM/47,4,FALSE | ||
h1,H1N1-Denver-1957,A/H1N1/Denver/1957,Den/57,5,FALSE | ||
h1,H1N1-New Jersey-1976,A/H1N1/New Jersey/8/1976,NJ/76,6,FALSE | ||
h1,H1N1-Ussr-1977,A/H1N1/Ussr/90/1977,USSR/77,7,FALSE | ||
h1,H1N1-Brazil-1978,A/H1N1/Brazil/11/1978,Bra/78,8,FALSE | ||
h1,H1N1-California-1978,A/H1N1/California/10/1978,CA/78,9,FALSE | ||
h1,H1N1-Chile-1983,A/H1N1/Chile/1/1983,Chi/83,10,FALSE | ||
h1,H1N1-Singapore-1986,A/H1N1/Singapore/6/1986,Sing/86,11,FALSE | ||
h1,H1N1-Texas-1991,A/H1N1/Texas/36/1991,TX/91,12,FALSE | ||
h1,H1N1-Beijing-1995,A/H1N1/Beijing/262/1995,Bei/95,13,FALSE | ||
h1,H1N1-New Caledonia-1999,A/H1N1/New Caledonia/20/1999,NC/99,14,FALSE | ||
h1,H1N1-Solomon Islands-2006,A/H1N1/Solomon Islands/3/2006,SI/06,15,FALSE | ||
h1,H1N1-Brisbane-2007,A/H1N1/Brisbane/59/2007,Bris/07,16,FALSE | ||
h1,H1N1-California-2009,A/H1N1/California/07/2009,CA/09,17,TRUE | ||
h1,H1N1-Michigan-2015,A/H1N1/Michigan 45/2015,MI/15,18,TRUE | ||
h1,H1N1-Brisbane-2018,A/H1N1/Brisbane/02/2018,Bris/18,19,TRUE | ||
h1,H1N1-Guangdong Maonan-2019,A/H1N1/Guangdong-Maonan/SWL1536/201,GD/19,20,TRUE | ||
h1,H1N1-Victoria-2019,A/H1N1/Victoria/2570/2019,Vic/19,21,TRUE | ||
h3,H3N2-Hong Kong-1968,A/H3N2/Hong Kong/8/1968,HK/68,22,FALSE | ||
h3,H3N2-Port Chalmers-1973,A/H3N2/Port Chalmers/1/1973,PC/73,23,FALSE | ||
h3,H3N2-Texas-1977,A/H3N2/Texas/1/1977,TX/77,24,FALSE | ||
h3,H3N2-Mississippi-1985,A/H3N2/Mississippi/1/1985,MI/85,25,FALSE | ||
h3,H3N2-Sichuan-1987,A/H3N2/Sichuan/2/1987,Sich/87,26,FALSE | ||
h3,H3N2-Shangdong-1993,A/H3N2/Shangdong/9/1993,Shan/93,27,FALSE | ||
h3,H3N2-Nanchang-1995,A/H3N2/Nanchang/933/1995,Nan/95,28,FALSE | ||
h3,H3N2-Sydney-1997,A/H3N2/Sydney/5/1997,Syd/97,29,FALSE | ||
h3,H3N2-Panama-1999,A/H3N2/Panama/2007/1999,Pan/99,30,FALSE | ||
h3,H3N2-Fujian-2002,A/H3N2/Fujian/411/2002,Fuj/02,31,FALSE | ||
h3,H3N2-New York-2004,A/H3N2/New York/55/2004,NY/04,32,FALSE | ||
h3,H3N2-Brisbane-2007,A/H3N2/Brisbane/10/2007,Br/07,33,TRUE | ||
h3,H3N2-Wisconsin-2005,A/H3N2/Wisconsin/67/2005,WI/05,34,FALSE | ||
h3,H3N2-Uruguay-2007,A/H3N2/Uruguay/716/2007,Uru/07,35,FALSE | ||
h3,H3N2-Perth-2009,A/H3N2/Perth/16/2009,Per/09,36,FALSE | ||
h3,H3N2-Victoria-2011,A/H3N2/Victoria/361/2011,Vic/11,37,FALSE | ||
h3,H3N2-Texas-2012,A/H3N2/Texas/50/2012,TX/12,38,TRUE | ||
h3,H3N2-Switzerland-2013,A/H3N2/Switzerland/9715293/2013,Switz/13,39,TRUE | ||
h3,H3N2-Hong Kong-2014,A/H3N2/Hong Kong/4801/2014,HK/14,40,TRUE | ||
h3,H3N2-Singapore-2016,A/H3N2/Singapore/infimh-16-0019/2016,Sing/16,41,TRUE | ||
h3,H3N2-Kansas-2017,A/H3N2/Kansas/14/2017,KS/17,42,TRUE | ||
h3,H3N2-Hong Kong-2019,A/H3N2/Hong Kong/2671/2019,HK/19,43,TRUE | ||
h3,H3N2-South Australia-2019,A/H3N2/South Australia/34/2019,SA/19,44,TRUE | ||
h3,H3N2-Tasmania-2020,A/H3N2/Tasmania/503/2020 ,Tas/20,45,TRUE | ||
h3,H3N2-Darwin-2021,A/H3N2/Darwin/9/2021,Dar/21,46,TRUE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
### | ||
# Code to prepare the strain names data for package inclusion | ||
# Zane Billings | ||
# 2024-08-12 | ||
# The strain names data is a table of strain names, currently the ones that | ||
# are used in UGAFluVac. We encourage updates to raw CSV file to add strain | ||
# names that are used in other handelgroup datasets. | ||
### | ||
|
||
handelgroup_strain_names <- readr::read_csv( | ||
here::here("data-raw", "handelgroup-strain-names.csv"), | ||
col_types = 'fcccil' | ||
) |> | ||
# Remove the useless columns | ||
dplyr::select(-c(vaccine_strain)) |> | ||
# Append a row so sorting the overall entry for CATEs is easy | ||
tibble::add_row( | ||
subtype = "", | ||
analysis_name = "Overall", | ||
short_name = "Overall", | ||
genbank_strain_name = "Overall", | ||
factor_order = 9999L | ||
) |> | ||
# Make all of the name variables ordered factors and clean up the subtypes | ||
dplyr::mutate( | ||
subtype = factor( | ||
as.character(subtype), | ||
levels = c("h1", "h3", ""), | ||
labels = c("H1N1", "H3N2", "") | ||
), | ||
# Put the different name factors in order | ||
dplyr::across( | ||
c(analysis_name, genbank_strain_name, short_name), | ||
\(x) forcats::fct_reorder(x, factor_order) | ||
), | ||
) | ||
|
||
usethis::use_data(handelgroup_strain_names, overwrite = TRUE) |
Binary file not shown.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.