Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
KrystynaGrzesiak committed Aug 28, 2024
2 parents f922367 + e1ba4d7 commit 4da04db
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 69 deletions.
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(add_motifs)
export(contains_motif)
export(count_seq_kmers)
export(filter_chisq)
export(filter_fcbf)
Expand All @@ -11,7 +12,7 @@ export(filter_quipt)
export(generate_kmer_data)
export(generate_motif)
export(generate_motifs)
export(generate_sequence)
export(generate_negative_sequence)
export(generate_sequence_data)
export(get_target_additive)
export(get_target_interactions)
Expand Down
2 changes: 1 addition & 1 deletion R/motifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ add_motifs <- function(motifs, sequence) {

for(iter in na.omit(iterations)) {

if(length(iterations) == 1)
if(length(motifs) == 1)
motifs_grid <- data.frame(unlist(maximum_motifs_positions))
else
motifs_grid <- t(sapply(1:iter, function(i){
Expand Down
102 changes: 92 additions & 10 deletions R/sequences.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,40 @@

#' Simple motifs check
#'
#' This function check if at least one of provided motifs is contained in the
#' provided sequence.
#'
#' @inheritParams generate_kmer_data
#' @param sequence a vector of characters
#'
#' @return randomly generated sequences
#'
#' @examples
#' alph <- 1:4
#' motifs <- generate_motifs(alph, 3, 3, 3, 2)
#' sequence <- sample(alph, 100, replace = TRUE)
#' contains_motif(sequence, motifs)
#'
#' @export

contains_motif <- function(sequence, motifs) {

sequence <- paste0(sequence, collapse = "")

for(i in 1:length(motifs)) {
ith_motif <- motifs[[i]]
ith_motif[ith_motif == "_"] <- "."
ith_motif <- paste0(ith_motif, collapse = "")

find_motifs <- regexec(ith_motif, sequence)[[1]]

if(find_motifs != -1)
return(TRUE)
}
FALSE
}


#' Sampling from alphabet
#'
#' This function generates sequence of elements from alphabet with replacement
Expand All @@ -7,14 +44,58 @@
#' @return randomly generated sequences
#'
#' @examples
#' generate_sequence(5, 1L:4)
#' generate_sequence(10, c("a", "b", "c"))
#' generate_sequence(10, c("a", "b", "c"), c(0.6, 0.2, 0.2))
#' set.seed(2)
#' alph <- 1:4
#' motifs <- generate_motifs(alph, 3, 3, 3, 2)
#' generate_negative_sequence(5, alph, motifs)
#' generate_negative_sequence(10, c("a", "b", "c"), motifs)
#' generate_negative_sequence(10, c("a", "b", "c"), c(0.6, 0.2, 0.2), motifs)
#'
#' @export

generate_sequence <- function(sequence_length, alphabet, seqProbs = NULL){
sample(alphabet, size = sequence_length, replace = TRUE, prob = seqProbs)
generate_negative_sequence <- function(sequence_length,
alphabet,
motifs,
seqProbs = NULL,
attempts = 100){

candidate <- sample(alphabet, size = sequence_length,
replace = TRUE, prob = seqProbs)

if(contains_motif(candidate, motifs)) {

mask <- rep(FALSE, sequence_length)

for(i in 1:length(motifs)) {
ith_motif <- motifs[[i]]
ith_motif[ith_motif == "_"] <- "."
ith_motif <- paste0(ith_motif, collapse = "")

found_motifs <- gregexpr(ith_motif, paste0(candidate, collapse = ""))[[1]]

if(length(found_motifs) == 1 && found_motifs == -1) {
next
} else {
motifs_ids <- as.vector(
sapply(found_motifs, function(x) x:(x + nchar(ith_motif) - 1))
)
mask[motifs_ids] <- TRUE
}
}

attempt <- 1

while (contains_motif(candidate, motifs) & attempt <= attempts) {
attempt <- attempt + 1
candidate[mask] <- sample(candidate[mask])
}

if(attempt == attempts)
stop(paste0("It was impossible to generate negative sequence under
provided assumptions within ", attempts, " attempts."))
}

candidate
}

#' Sequences generation
Expand Down Expand Up @@ -60,17 +141,18 @@ generate_sequence_data <- function(n_seq,
motifs_ids <- sample(1:length(motifs), n_injections[i])
motifs_map[i, motifs_ids] <- 1
selected_motifs <- motifs[motifs_ids]

new_seq <- add_motifs(selected_motifs,
generate_sequence(sequence_length,
alphabet, seqProbs))
generate_negative_sequence(sequence_length,
alphabet, seqProbs))
list_of_masks[[i]] <- attr(new_seq, "masks")
sequences[i, ] <- new_seq
}

for (i in 1:(n_seq - n_pos)) {
sequences[n_pos + i, ] <- generate_sequence(sequence_length,
alphabet,
seqProbs)
sequences[n_pos + i, ] <- generate_negative_sequence(sequence_length,
alphabet,
seqProbs)
}
attr(sequences, "max_injection") <- max_injection
attr(sequences, "motifs_map") <- motifs_map
Expand Down
27 changes: 27 additions & 0 deletions man/contains_motif.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/generate_negative_sequence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 0 additions & 27 deletions man/generate_sequence.Rd

This file was deleted.

12 changes: 6 additions & 6 deletions tests/testthat/test-motifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ test_that("generate list of motifs", {

motifs <- generate_motifs(alphabet, 1, 1, n, d, weights, validate = FALSE)

expect_equal(motifs, list(c("c", "_", "_", "a", "a", "a")))
expect_equal(motifs, list(c("c", "_", "a", "_", "a")))
})

test_that("motif injection", {
Expand All @@ -81,13 +81,13 @@ test_that("motif injection", {
motifs <- generate_motifs(letters[1:4], 2, 2, 4, 2)
injected_sequence <- add_motifs(motifs, sequence)
expect_equal(injected_sequence,
structure(c("a", "c", "b", "d", "c", "c", "d", "d"),
structure(c("d", "c", "a", "c", "c", "c", "c", "d"),
motifs = list(c("d", "c"),
c("a", "c", "_", "_", "c")),
masks = list(c(FALSE, FALSE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE),
c(TRUE, TRUE, FALSE, FALSE,
TRUE, FALSE, FALSE, FALSE))))
masks = list(c(TRUE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE),
c(FALSE, FALSE, TRUE, TRUE, FALSE,
FALSE, TRUE, FALSE))))
})

test_that("set of motifs cannot be injected", {
Expand Down
51 changes: 27 additions & 24 deletions tests/testthat/test-sequences.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,23 @@ library(testthat)

context("Sequence generation")

test_that("generate single sequence", {

test_that("generate single sequence", {
set.seed(42)
seq <- generate_sequence(5, 1L:4)
expect_equal(seq, c(1L, 1L, 1L, 1L, 2L))

motifs <- generate_motifs(1:4, 3, 1, n = 4, d = 6)
sequence <- generate_negative_sequence(5, 1L:4, motifs)
expect_equal(sequence, c(3L, 2L, 1L, 2L, 4L))
})

test_that("sequence probabilites", {
set.seed(42)
seqProbs <- runif(6)
seqProbs <- seqProbs / sum(seqProbs)

long_sequence <- generate_sequence(10e6, 1:6, seqProbs)
motifs <- generate_motifs(1:4, 3, 1, n = 4, d = 6)

long_sequence <- generate_negative_sequence(10e5, 1:6, motifs, seqProbs)
seqFreqs <- c(table(long_sequence) / length(long_sequence))
names(seqFreqs) <- NULL
expect_equal(seqProbs, c(seqFreqs), tolerance = 10e-4)
Expand Down Expand Up @@ -64,7 +69,7 @@ test_that("k-mer counts", {

kmers <- count_seq_kmers(seq_data, alph)

expect_equal(dim(kmers), c(20L, 6189L))
expect_equal(dim(kmers), c(20L, 6201L))

kmers <- count_seq_kmers(seq_data, alph, n = 1, d = 0)
expect_equal(ncol(kmers), length(alph))
Expand All @@ -87,8 +92,7 @@ test_that("k-mer data", {
kmer_data <- generate_kmer_data(n_seq, sequence_length, alph, motifs,
n_injections, fraction)


expect_equal(dim(kmer_data), c(20L, 5718L))
expect_equal(dim(kmer_data), c(20L, 5022L))
expect_equal(n_seq, nrow(kmer_data))

# correct fraction of positive sequences
Expand All @@ -113,18 +117,17 @@ test_that("Interaction model works", {
results <- generate_kmer_data(n_seq, sequence_length, alph,
motifs, n_injections = 3)
expect_identical(
c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 0L, 0L, 0L), get_target_interactions(results)
c(1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L), get_target_interactions(results)
)

expect_equal(
c(3.2669325505849, 3.2669325505849, 0.824915252858773, 3.2669325505849,
0.824915252858773, 1.01497719204053, 0.745780731318519,
2.05459328205325, 1.16902287956327, 1.77926696231589,
-1.69982326356694, -1.69982326356694, -1.69982326356694,
-1.69982326356694, -1.69982326356694, -1.69982326356694,
-1.69982326356694, -1.69982326356694, -1.69982326356694,
-1.69982326356694), get_target_interactions(results, binary = FALSE)
c(2.3877928876318, 4.19229120924138, 2.12166230170988, 0.988599377218634,
4.47515172231942, 1.58585822791792, 1.17262414400466, 0.266789596760646,
4.73356275283732, 2.3877928876318, -1.03832712979056, -1.03832712979056,
-1.03832712979056, -1.03832712979056, -1.03832712979056, -1.03832712979056,
-1.03832712979056, -1.03832712979056, -1.03832712979056, -1.03832712979056
), get_target_interactions(results, binary = FALSE)
)
})

Expand All @@ -141,12 +144,12 @@ test_that("Additive model works", {
results <- generate_kmer_data(n_seq, sequence_length, alph,
motifs, n_injections = 3)
expect_identical(
c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 1L), get_target_additive(results)
c(0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 0L), get_target_additive(results)
)

expect_equal(
c(2.05890567833558, 2.05890567833558, 0.39565183990635),
c(0.675145752727985, 1.19026312511414, 1.00235199206509),
get_target_additive(results, binary = FALSE)[1:3]
)

Expand All @@ -168,13 +171,13 @@ test_that("Logic model works", {
results <- generate_kmer_data(n_seq, sequence_length, alph,
motifs, n_injections = 3)
expect_identical(
c(0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 1L), get_target_logic(results)
c(1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), get_target_logic(results)
)

expect_equal(
c(0.854367734864354, 0.854367734864354, 0.854367734864354,
0.854367734864354), get_target_logic(results, binary = FALSE)[1:4]
c(0.583639514865354, 1.4445967883803, 1.4445967883803, 0.583639514865354
), get_target_logic(results, binary = FALSE)[1:4]
)

expressions = matrix(rbinom(n_seq*2, 1, .5), nrow = n_seq)
Expand All @@ -185,7 +188,7 @@ test_that("Logic model works", {
)

expect_identical(
c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
c(0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L),
get_target_logic(results, expressions = expressions, weights = c(1, 2))
)
Expand Down

0 comments on commit 4da04db

Please sign in to comment.