Skip to content

Commit

Permalink
replace apply with vectorized approach and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kriemo committed Aug 31, 2023
1 parent f3fa628 commit 35e9ada
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 16 deletions.
49 changes: 35 additions & 14 deletions R/calc_AEI.R
Original file line number Diff line number Diff line change
Expand Up @@ -417,20 +417,10 @@ correct_strand <- function(rse, genes_gr) {

rowData(rse)$REF[flip_rows] <- comp_bases(rowData(rse)$REF[flip_rows])

flipped_variants <- vector(mode = "list", ncol(rse))
to_flip <- assay(rse, "ALT")[flip_rows, , drop = FALSE]
flipped_variants <- apply(to_flip, c(1, 2), function(x) {
vapply(
strsplit(x, ","), function(y) {
paste0(comp_bases(y),
collapse = ","
)
},
character(1)
)
})

assay(rse, "ALT")[flip_rows, ] <- flipped_variants
# complement the ALT variants
alts <- assay(rse, "ALT")[flip_rows, , drop = FALSE]
comp_alts <- complement_variant_matrix(alts)
assay(rse, "ALT")[flip_rows, ] <- comp_alts

# complement the nucleotide counts by reordering the assays
assays_to_swap <- c("nA", "nT", "nC", "nG")
Expand All @@ -450,3 +440,34 @@ correct_strand <- function(rse, genes_gr) {
rse
}


# complement the ALT assay matrix, including multiallelics (e.g. c("A", "A,T", "C))
complement_variant_matrix <- function(alt_mat) {
stopifnot(all(!is.na(alt_mat)))

# find multiallelics
n_alleles <- lengths(regmatches(alt_mat, gregexpr(",", alt_mat)))

# convert to a vector for processing
alts <- as.vector(alt_mat)
comp_alts <- alts

# single allele sites
comp_alts[n_alleles == 0] <- comp_bases(alts[n_alleles == 0])

# split and complement multiallelics
multialts <- alts[n_alleles > 0]
multialts <- vapply(strsplit(multialts, ","), function(y) {
paste0(comp_bases(y), collapse = ",")
},
character(1)
)
comp_alts[n_alleles > 0] <- multialts

# get back to a matrix
comp_alts <- matrix(comp_alts,
nrow = nrow(alt_mat),
ncol = ncol(alt_mat),
dimnames = dimnames(alt_mat))
comp_alts
}
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ knitr::opts_chunk$set(

<!-- badges: start -->
[![R-CMD-check-bioc](https://github.com/rnabioco/raer/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/rnabioco/raer/actions/workflows/check-bioc.yml)
[![Codecov test coverage](https://codecov.io/gh/rnabioco/raer/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rnabioco/raer?branch=main)
[![Codecov test coverage](https://codecov.io/gh/rnabioco/raer/branch/devel/graph/badge.svg)](https://app.codecov.io/gh/rnabioco/raer?branch=devel)
<!-- badges: end -->

raer facilitates analysis of RNA adenosine editing in the
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

[![R-CMD-check-bioc](https://github.com/rnabioco/raer/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/rnabioco/raer/actions/workflows/check-bioc.yml)
[![Codecov test
coverage](https://codecov.io/gh/rnabioco/raer/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rnabioco/raer?branch=main)
coverage](https://codecov.io/gh/rnabioco/raer/branch/devel/graph/badge.svg)](https://app.codecov.io/gh/rnabioco/raer?branch=devel)
<!-- badges: end -->

raer facilitates analysis of RNA adenosine editing in the
Expand Down
52 changes: 52 additions & 0 deletions tests/testthat/test_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,55 @@ test_that("filter mispriming works", {
expect_true(width(res) == 3L)
})


test_that("correct_strand works", {
bamfn <- raer_example("SRR5564269_Aligned.sortedByCoord.out.md.bam")
fafn <- raer_example("human.fasta")
fp <- FilterParam(library_type = "unstranded")
rse <- pileup_sites(bamfn, fafn, param = fp)

genes <- GRanges(c(
"DHFR:200-400:+",
"SPCS3:100-200:-"
))

ans <- correct_strand(rse, genes)
expect_true("-" %in% strand(ans))

genic_rse <- subsetByOverlaps(rse, genes, ignore.strand = TRUE)
expect_equal(nrow(ans), nrow(genic_rse))

# sites overlapping ambiguous regions are discarded
genes <- GRanges(c(
"DHFR:200-400:+",
"DHFR:200-400:-"
))
ans <- correct_strand(rse, genes)
expect_true(nrow(ans) == 0L)

# - strand alts are complemented
genes <- GRanges(c(
"DHFR:200-400:-"
))

ans <- correct_strand(rse, genes)
alts <- assay(ans, "ALT")[, 1]
alts <- alts[alts != "-" & as.vector(strand(ans)) == "-"]
og_alts <- assay(rse[names(alts), ], "ALT")[, 1]
expect_true(all(og_alts != alts))
expect_true(all(nchar(og_alts) == nchar(alts)))

# check all sites are retained if non-disjoint genic regions overlap all sites
all_regions <- GRanges(seqinfo(rse))
strand(all_regions) <- c("+", "-", "+")

ans <- correct_strand(rse, all_regions)
expect_equal(nrow(ans), nrow(rse))
minus_rse <- subsetByOverlaps(ans, all_regions[2])
expect_true(all(strand(minus_rse) == "-"))

alts <- assay(minus_rse, "ALT")[, 1]
alts <- alts[alts != "-" & as.vector(strand(minus_rse)) == "-"]
og_alts <- assay(rse[names(alts), ], "ALT")[, 1]
expect_true(all(comp_bases(alts) == og_alts))
})

0 comments on commit 35e9ada

Please sign in to comment.