Skip to content

Commit

Permalink
Fixing assign_peptide_type
Browse files Browse the repository at this point in the history
  • Loading branch information
jpquast committed Sep 29, 2024
1 parent 151e795 commit 86ac58c
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 38 deletions.
56 changes: 32 additions & 24 deletions R/assign_peptide_type.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ peptide_type <- function(...) {
#' peptide is located at the N- or C-terminus of a protein and fulfills the criterium to be
#' fully-tryptic otherwise, it is also considered as fully-tryptic. Peptides that only fulfill the
#' criterium on one terminus are semi-tryptic peptides. Lastly, peptides that are not fulfilling
#' the criteria for both termini are non-tryptic peptides.
#' the criteria for both termini are non-tryptic peptides. In addition, peptides that miss the initial
#' Methionine of a protein are considered "tryptic" at that site if there is no other peptide
#' starting at position 1 for that protein.
#'
#' @param data a data frame containing at least information about the preceding and C-terminal
#' amino acids of peptides.
Expand All @@ -36,80 +38,86 @@ peptide_type <- function(...) {
#' acid as one letter code.
#' @param protein_id a character column in the \code{data} data frame that contains the protein
#' accession numbers.
#' @param start_pos A numeric column in the \code{data} data frame that contains the start position of
#' each peptide within the corresponding protein. This is used to check if the peptide starts at position 1
#' or position 2, which affects whether the peptide can be considered fully-tryptic.
#'
#' @param start a numeric column in the \code{data} data frame that contains the start position of
#' each peptide within the corresponding protein. This is used to check if the protein is consistently
#' missing the initial Methionine, making peptides starting at position 2 "tryptic" on that site.
#'
#' @return A data frame that contains the input data and an additional column with the peptide
#' type information.
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stringr str_detect
#' @export
#'
#' @examples
#' data <- data.frame(
#' aa_before = c("K", "S", "K", "S", "T", "M"),
#' last_aa = c("R", "K", "R", "K", "Y", "K"),
#' aa_after = c("T", "R", "T", "R", "T", "R"),
#' protein_id = c("P1", "P1", "P3", "P3", "P2", "P2"),
#' start_pos = c(2, 2, 1, 2, 1, 2),
#' )
#' aa_before = c("K", "M", "", "M", "S", "M", "-"),
#' last_aa = c("R", "K", "R", "R", "Y", "K", "K"),
#' aa_after = c("T", "R", "T", "R", "T", "R", "T"),
#' protein_id = c("P1", "P1", "P3", "P3", "P2", "P2", "P2"),
#' start = c(38, 2, 1, 2, 10, 2, 1)
#' )
#'
#' assign_peptide_type(data, aa_before, last_aa, aa_after, protein_id, start_pos)
#' assign_peptide_type(data, aa_before, last_aa, aa_after, protein_id, start)
assign_peptide_type <- function(data,
aa_before = aa_before,
last_aa = last_aa,
aa_after = aa_after,
protein_id = protein_id,
start_pos = start_pos) {
start = start) {
# Check if there's any peptide starting at position 1 for each protein
start_summary <- data %>%
dplyr::group_by({{ protein_id }}) %>%
dplyr::summarize(has_start_pos_1 = any({{ start_pos }} == 1), .groups = "drop")
dplyr::summarize(has_start_1 = any({{ start }} == 1), .groups = "drop")

peptide_data <- data %>%
dplyr::distinct({{ aa_before }}, {{ last_aa }}, {{ aa_after }}, {{ protein_id }}, {{ start_pos }}, .keep_all = TRUE) %>%
dplyr::distinct({{ aa_before }}, {{ last_aa }}, {{ aa_after }}, {{ protein_id }}, {{ start }}, .keep_all = TRUE) %>%
dplyr::left_join(start_summary, by = rlang::as_name(rlang::enquo(protein_id))) %>%
# Determine N-terminal trypticity
dplyr::mutate(N_term_tryp = dplyr::if_else(
{{ aa_before }} == "" | {{ aa_before }} == "K" | {{ aa_before }} == "R",
!stringr::str_detect({{ aa_before }}, "[A-Y]") | {{ aa_before }} == "K" | {{ aa_before }} == "R",
TRUE,
FALSE
)) %>%
# Determine C-terminal trypticity
dplyr::mutate(C_term_tryp = dplyr::if_else(
{{ last_aa }} == "K" | {{ last_aa }} == "R" | {{ aa_after }} == "",
{{ last_aa }} == "K" | {{ last_aa }} == "R" | !stringr::str_detect({{ aa_after }}, "[A-Y]"),
TRUE,
FALSE
)) %>%
# Assign peptide type based on N-term and C-term trypticity
dplyr::mutate(pep_type = dplyr::case_when(
N_term_tryp & C_term_tryp ~ "fully-tryptic",
N_term_tryp | C_term_tryp ~ "semi-tryptic",
.data$N_term_tryp & .data$C_term_tryp ~ "fully-tryptic",
.data$N_term_tryp | .data$C_term_tryp ~ "semi-tryptic",
TRUE ~ "non-tryptic"
)) %>%
# Reassign semi-tryptic peptides at position 2 to fully-tryptic if no start_pos == 1
# Reassign semi-tryptic peptides at position 2 to fully-tryptic if no start == 1
dplyr::mutate(pep_type = dplyr::if_else(
pep_type == "semi-tryptic" & {{ start }} == 2 & !.data$has_start_1 & .data$C_term_tryp,
"fully-tryptic",
.data$pep_type
)) %>%
# Reassign non-tryptic peptides at position 2 to semi-tryptic if no start == 1
dplyr::mutate(pep_type = dplyr::if_else(
pep_type == "semi-tryptic" & {{ start_pos }} == 2 & !.data$has_start_pos_1 & C_term_tryp,
pep_type == "non-tryptic" & {{ start }} == 2 & !.data$has_start_1 & !.data$C_term_tryp,
"fully-tryptic",
.data$pep_type
)) %>%
# Drop unnecessary columns
dplyr::select(-N_term_tryp, -C_term_tryp, -has_start_pos_1)
dplyr::select(-c("N_term_tryp", "C_term_tryp", "has_start_1"))

# Join back to original data to return the full result
result <- data %>%
dplyr::left_join(
peptide_data %>%
dplyr::select({{ aa_before }}, {{ last_aa }}, {{ aa_after }}, {{ protein_id }}, {{ start_pos }}, pep_type),
dplyr::select({{ aa_before }}, {{ last_aa }}, {{ aa_after }}, {{ protein_id }}, {{ start }}, .data$pep_type),
by = c(
rlang::as_name(rlang::enquo(aa_before)),
rlang::as_name(rlang::enquo(last_aa)),
rlang::as_name(rlang::enquo(aa_after)),
rlang::as_name(rlang::enquo(protein_id)),
rlang::as_name(rlang::enquo(start_pos))
rlang::as_name(rlang::enquo(start))
)
)

Expand Down
30 changes: 16 additions & 14 deletions man/assign_peptide_type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 86ac58c

Please sign in to comment.