Skip to content

Commit

Permalink
use txdbmaker package for makeTxDb... functions (#119)
Browse files Browse the repository at this point in the history
* use txdbmaker package for makeTxDb... functions

* use devel containers for gh actions
  • Loading branch information
kriemo authored Mar 13, 2024
1 parent 4dbccb8 commit ee5157e
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 138 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ jobs:
fail-fast: false
matrix:
config:
- { os: ubuntu-latest, r: '4.3', bioc: '3.18', cont: "bioconductor/bioconductor_docker:RELEASE_3_18", rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest" }
- { os: windows-latest, r: '4.3', bioc: '3.18'}
- { os: ubuntu-latest, r: '4.4', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest" }
- { os: windows-latest, r: '4.4', bioc: 'devel'}
## Check https://github.com/r-lib/actions/tree/master/examples
## for examples using the http-user-agent
env:
Expand Down Expand Up @@ -106,16 +106,16 @@ jobs:
uses: actions/cache@v3
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-

- name: Cache R packages on Linux
if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' "
uses: actions/cache@v3
with:
path: /home/runner/work/_temp/Library
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-
key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-devel-r-4.4-

- name: Install Linux system dependencies
if: runner.os == 'Linux'
Expand Down Expand Up @@ -289,7 +289,7 @@ jobs:
if: failure()
uses: actions/upload-artifact@master
with:
name: ${{ runner.os }}-biocversion-RELEASE_3_18-r-4.3-results
name: ${{ runner.os }}-biocversion-devel-r-4.4-results
path: check

## Note that DOCKER_PASSWORD is really a token for your dockerhub
Expand Down
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: raer
Type: Package
Title: RNA editing tools in R
Version: 1.1.4
Version: 1.1.5
Authors@R: c(
person("Kent", "Riemondy", , "kent.riemondy@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0750-1273")),
Expand Down Expand Up @@ -55,7 +55,8 @@ Suggests:
scuttle,
AnnotationHub,
covr,
raerdata
raerdata,
txdbmaker
LinkingTo:
Rhtslib
SystemRequirements:
Expand Down
81 changes: 42 additions & 39 deletions R/filter_rse.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,25 +106,27 @@ get_splice_sites <- function(txdb, slop = 4) {
#' @family se-filters
#'
#' @examples
#' library(GenomicFeatures)
#' rse_adar_ifn <- mock_rse()
#'
#' # mock up a txdb with genes
#' gr <- GRanges(c(
#' "DHFR:310-330:-",
#' "DHFR:410-415:-",
#' "SSR3:100-155:-",
#' "SSR3:180-190:-"
#' ))
#' gr$source <- "raer"
#' gr$type <- "exon"
#' gr$source <- NA
#' gr$phase <- NA_integer_
#' gr$gene_id <- c(1, 1, 2, 2)
#' gr$transcript_id <- rep(c("1.1", "2.1"), each = 2)
#' txdb <- makeTxDbFromGRanges(gr)
#'
#' filter_splice_variants(rse_adar_ifn, txdb)
#' if(require("txdbmaker")) {
#' rse_adar_ifn <- mock_rse()
#'
#' # mock up a txdb with genes
#' gr <- GRanges(c(
#' "DHFR:310-330:-",
#' "DHFR:410-415:-",
#' "SSR3:100-155:-",
#' "SSR3:180-190:-"
#' ))
#' gr$source <- "raer"
#' gr$type <- "exon"
#' gr$source <- NA
#' gr$phase <- NA_integer_
#' gr$gene_id <- c(1, 1, 2, 2)
#' gr$transcript_id <- rep(c("1.1", "2.1"), each = 2)
#' txdb <- txdbmaker::makeTxDbFromGRanges(gr)
#'
#' filter_splice_variants(rse_adar_ifn, txdb)
#' }
#'
#'
#' @returns `SummarizedExperiment::SummarizedExperiment` with sites
#' adjacent to splice sites removed.
Expand Down Expand Up @@ -183,27 +185,28 @@ filter_splice_variants <- function(rse, txdb,
#' variants
#'
#' @examples
#' library(GenomicFeatures)
#'
#' rse_adar_ifn <- mock_rse()
#' rse <- rse_adar_ifn[seqnames(rse_adar_ifn) == "SPCS3"]
#'
#' # mock up a txdb with genes
#' gr <- GRanges(c(
#' "SPCS3:100-120:-",
#' "SPCS3:325-350:-"
#' ))
#' gr$source <- "raer"
#' gr$type <- "exon"
#' gr$source <- NA
#' gr$phase <- NA_integer_
#' gr$gene_id <- c(1, 2)
#' gr$transcript_id <- c("1.1", "2.1")
#' txdb <- makeTxDbFromGRanges(gr)
#'
#' rse <- filter_multiallelic(rse)
#' filter_clustered_variants(rse, txdb, variant_dist = 10)
#' if(require("txdbmaker")){
#' rse_adar_ifn <- mock_rse()
#' rse <- rse_adar_ifn[seqnames(rse_adar_ifn) == "SPCS3"]
#'
#' # mock up a txdb with genes
#' gr <- GRanges(c(
#' "SPCS3:100-120:-",
#' "SPCS3:325-350:-"
#' ))
#' gr$source <- "raer"
#' gr$type <- "exon"
#' gr$source <- NA
#' gr$phase <- NA_integer_
#' gr$gene_id <- c(1, 2)
#' gr$transcript_id <- c("1.1", "2.1")
#' txdb <- txdbmaker::makeTxDbFromGRanges(gr)
#'
#' rse <- filter_multiallelic(rse)
#' filter_clustered_variants(rse, txdb, variant_dist = 10)
#'
#' }
#'
#' @family se-filters
#'
#' @return `SummarizedExperiment::SummarizedExperiment` with sites removed from
Expand Down
39 changes: 20 additions & 19 deletions man/filter_clustered_variants.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 20 additions & 18 deletions man/filter_splice_variants.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

107 changes: 55 additions & 52 deletions tests/testthat/test_rse.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pkgs <- c(
"GenomicRanges", "GenomicFeatures",
"SummarizedExperiment", "rtracklayer"
"GenomicRanges", "SummarizedExperiment", "rtracklayer", "GenomicFeatures"
)

msg <- lapply(pkgs, function(x) {
Expand All @@ -16,7 +15,7 @@ sites <- import(bedfn)
res <- pileup_sites(bamfn, fafn)

test_that("annot_snps works", {
if (require(SNPlocs.Hsapiens.dbSNP144.GRCh38)) {
if (require("SNPlocs.Hsapiens.dbSNP144.GRCh38")) {
gr <- GRanges(rep("22", 10),
IRanges(
seq(10510077,
Expand All @@ -43,7 +42,7 @@ test_that("annot_snps works", {
expect_true(is(res, "RangedSummarizedExperiment"))
expect_true("RefSNP_id" %in% colnames(mcols(res)))

if (require(BSgenome.Hsapiens.NCBI.GRCh38)) {
if (require("BSgenome.Hsapiens.NCBI.GRCh38")) {
res <- annot_snps(se,
dbsnp = SNPlocs.Hsapiens.dbSNP144.GRCh38,
genome = BSgenome.Hsapiens.NCBI.GRCh38
Expand Down Expand Up @@ -121,62 +120,66 @@ mock_txdb <- function() {
gr$phase <- NA_integer_
gr$gene_id <- c(1, 1, 2, 2)
gr$transcript_id <- rep(c("1.1", "2.1"), each = 2)
makeTxDbFromGRanges(gr)
txdbmaker::makeTxDbFromGRanges(gr)
}

test_that("get_splice_sites and filtering works", {
txdb <- mock_txdb()
spl_sites <- get_splice_sites(txdb,
slop = 3
)
expect_true(all(width(spl_sites) == 6))
expect_error(get_splice_sites(spl_sites))

rse_adar_ifn <- mock_rse()
expect_message(rse <- filter_splice_variants(rse_adar_ifn, txdb))
n_removed <- nrow(subsetByOverlaps(rse_adar_ifn, rse, invert = TRUE))
expect_equal(n_removed, 5L)
expect_true("site_DHFR_328_2" %in% rownames(rse_adar_ifn))
expect_false("site_DHFR_328_2" %in% rownames(rse))
if("txdbmaker" %in% rownames(installed.packages())) {
txdb <- mock_txdb()
spl_sites <- get_splice_sites(txdb,
slop = 3
)
expect_true(all(width(spl_sites) == 6))
expect_error(get_splice_sites(spl_sites))

rse_adar_ifn <- mock_rse()
expect_message(rse <- filter_splice_variants(rse_adar_ifn, txdb))
n_removed <- nrow(subsetByOverlaps(rse_adar_ifn, rse, invert = TRUE))
expect_equal(n_removed, 5L)
expect_true("site_DHFR_328_2" %in% rownames(rse_adar_ifn))
expect_false("site_DHFR_328_2" %in% rownames(rse))
}
})

test_that("removing clustered variants works", {
txdb <- mock_txdb()

rse_adar_ifn <- mock_rse()
expect_message(rse <- filter_multiallelic(rse_adar_ifn))

gr <- rowRanges(rse)
nd <- distanceToNearest(gr)
gr1 <- gr[mcols(nd)$distance > 100]
expect_warning(expect_warning(gr2 <- filter_clustered_variants(rse, txdb)))
expect_true(identical(rowRanges(gr2), gr1))

# check that transcript based removal works
# DHFR_328 -> DHFR_423 overlaps 310-330 -> 410-430 splice
ex <- exons(txdb)
rse_ex <- subsetByOverlaps(rse, ex)

expect_warning(
gr2 <- filter_clustered_variants(rse_ex,
txdb,
regions = "transcript",
variant_dist = 20
if("txdbmaker" %in% rownames(installed.packages())) {
txdb <- mock_txdb()

rse_adar_ifn <- mock_rse()
expect_message(rse <- filter_multiallelic(rse_adar_ifn))

gr <- rowRanges(rse)
nd <- distanceToNearest(gr)
gr1 <- gr[mcols(nd)$distance > 100]
expect_warning(expect_warning(gr2 <- filter_clustered_variants(rse, txdb)))
expect_true(identical(rowRanges(gr2), gr1))

# check that transcript based removal works
# DHFR_328 -> DHFR_423 overlaps 310-330 -> 410-430 splice
ex <- exons(txdb)
rse_ex <- subsetByOverlaps(rse, ex)

expect_warning(
gr2 <- filter_clustered_variants(rse_ex,
txdb,
regions = "transcript",
variant_dist = 20
)
)
)

expt <- rowRanges(subsetByOverlaps(rse_ex, gr2, invert = T))
ds <- distanceToNearest(mapToTranscripts(expt, txdb,
extractor.fun = exonsBy
))
expect_true(all(mcols(ds)$distance < 20))

expect_warning(
gr2 <- filter_clustered_variants(rse_ex, txdb,
variant_dist = 25
expt <- rowRanges(subsetByOverlaps(rse_ex, gr2, invert = T))
ds <- distanceToNearest(mapToTranscripts(expt, txdb,
extractor.fun = exonsBy
))
expect_true(all(mcols(ds)$distance < 20))
expect_warning(
gr2 <- filter_clustered_variants(rse_ex, txdb,
variant_dist = 25
)
)
)
expect_equal(length(gr2), 3L)
expect_equal(length(gr2), 3L)
}
})

test_that("calc_confidence works", {
Expand Down
2 changes: 1 addition & 1 deletion vignettes/raer.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ region of chr4.

- `TxDb.Hsapiens.UCSC.hg38.knownGene`, a database of transcript models.
Alternatively these can be generated from a `.gtf` file using
`makeTxDbFromGRanges()` from the `GenomicFeatures` package.
`makeTxDbFromGRanges()` from the `txdbmaker` package.

- RepeatMasker annotations, which can be obtained from the `AnnotationHub()`
for hg38, as shown in the [bulk RNA-seq](#bulk) tutorial.
Expand Down

0 comments on commit ee5157e

Please sign in to comment.