Skip to content

Commit

Permalink
returns scores instead of chosen k-mers; adds some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
KrystynaGrzesiak committed Sep 10, 2024
1 parent cc7a02e commit add67a3
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 35 deletions.
37 changes: 21 additions & 16 deletions R/filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
#' @param target a numeric response variable
#' @param kmers a matrix of kmers with named columns or an object obtained via
#' \code{\link{generate_kmer_data}} function.
#' @param significance_level a number from 0-1 interval denoting significance
#' level for testing.
#' @param ... other arguments for \code{\link[biogram]{test_features}} function.
#'
#' @return a character vector of names of selected kmers
#' @return a numeric vector of named p-values corresponding to k-mers in the
#' feature space.
#'
#' @details
#' This function uses \code{\link[biogram]{test_features}}
Expand All @@ -29,11 +28,11 @@
#'
#' @export

filter_quipt <- function(target, kmers, significance_level = 0.05, ...) {
filter_quipt <- function(target, kmers, ...) {

pvals <- biogram::test_features(target, kmers, ...)

names(pvals[which(pvals < significance_level)])
pvals
}


Expand All @@ -45,7 +44,8 @@ filter_quipt <- function(target, kmers, significance_level = 0.05, ...) {
#'
#' @inheritParams filter_quipt
#'
#' @return a character vector of names of selected kmers
#' @return a numeric vector of named p-values corresponding to k-mers in the
#' feature space.
#'
#' @details
#' This function uses \code{\link[stats]{chisq.test}}
Expand All @@ -62,9 +62,9 @@ filter_quipt <- function(target, kmers, significance_level = 0.05, ...) {
#'
#' @export

filter_chisq <- function(target, kmers, significance_level = 0.05) {
filter_chisq <- function(target, kmers) {

pvals <- unlist(lapply(1:ncol(kmers[, 1:10]), function(i) {
pvals <- unlist(lapply(1:ncol(kmers), function(i) {

ith_kmer <- as.vector(kmers[, i])
suppressWarnings({
Expand All @@ -74,7 +74,9 @@ filter_chisq <- function(target, kmers, significance_level = 0.05) {
pval
}))

colnames(kmers)[which(pvals < significance_level)]
names(pvals) <- colnames(kmers)

pvals
}


Expand Down Expand Up @@ -131,9 +133,9 @@ filter_fcbf <- function(target, kmers, thresh = 0.25) {
#' @param method a character name of a filter type. One of "infogain",
#' "gainratio" or "symuncert". For more details see
#' \code{\link[FSelectorRcpp]{information_gain}}.
#' @param thresh a numeric threshold for variable selection.
#'
#' @return a character vector of names of selected kmers
#' @return a numeric vector of named p-values corresponding to k-mers in the
#' feature space.
#'
#' @details
#' This function uses \code{\link[FSelectorRcpp]{information_gain}}
Expand All @@ -150,14 +152,18 @@ filter_fcbf <- function(target, kmers, thresh = 0.25) {
#'
#' @export

filter_ig <- function(target, kmers, method, thresh) {
filter_ig <- function(target, kmers, method) {

method <- match.arg(method, c("infogain", "gainratio", "symuncert"))

res <- information_gain(x = kmers, y = target, discIntegers = FALSE,
type = "gainratio")

res[["attributes"]][res[["importance"]] < thresh]
scores <- res[["importance"]]

names(scores) <- res[["attributes"]]

scores
}


Expand All @@ -179,7 +185,6 @@ filter_ig <- function(target, kmers, method, thresh) {
#'
#' @param method a character name of a filter type. One of "MIM", "MRMR", "JMI",
#' "JMIM", "DISR", "NJMIM", "CMIM", "CMI". See details for more information.
#' @param thresh a threshold for corresponfing score.
#'
#' @return a character vector of names of selected kmers
#'
Expand All @@ -206,7 +211,7 @@ filter_ig <- function(target, kmers, method, thresh) {
#'
#' @export

filter_praznik <- function(target, kmers, method, thresh) {
filter_praznik <- function(target, kmers, method) {

praznik_methods <-
c("MIM", "MRMR", "JMI", "JMIM", "DISR", "NJMIM", "CMIM", "CMI")
Expand All @@ -215,7 +220,7 @@ filter_praznik <- function(target, kmers, method, thresh) {

res <- get(method)(X = as.data.frame(as.matrix(kmers)), Y = target,
k = ncol(kmers))
names(res[["score"]])[res[["score"]] < thresh]
res[["score"]]
}


Expand Down
8 changes: 3 additions & 5 deletions man/filter_chisq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/filter_ic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 3 additions & 4 deletions man/filter_ig.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions man/filter_praznik.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 3 additions & 5 deletions man/filter_quipt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions tests/testthat/test-filtering.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@

library(kmerFilters)
library(testthat)


test_that("list filtering methods works", {
expect_equal(list_filters(),
c("filter_chisq", "filter_fcbf", "filter_ic", "filter_ig",
"filter_praznik", "filter_quipt"))
})



n_seq <- 50
sequence_length <- 10
alph <- letters[1:4]
motifs <- generate_motifs(alph, 4, 4, 4, 6)
kmers <- generate_kmer_data(n_seq, sequence_length, alph,
motifs, n_injections = 4)
target <- get_target_additive(kmers)


test_that("quipt filter works", {
res <- filter_quipt(target, kmers)

expect_equal(res[1:10],
c(a_0 = 0.433460286708944, d_0 = 0.537216985854405,
b_0 = 0.862413139242882, d.c_0 = 1, d.b_0 = 0.904876795162511,
d.d_0 = 0.0668944277862377, b.a_0 = 0.904876795162511,
a.d_0 = 0.741734233008649, a.a_0 = 0.904876795162511,
c.b_0 = 0.904876795162511))
})


test_that("chisq filter works", {
res <- filter_chisq(target, kmers)

expect_equal(res[1:10],
c(a_0 = 0.359163595525953, d_0 = 0.619081488098256,
b_0 = 0.772148036951171, c_0 = 1, d.c_0 = 0.999999999999999,
d.b_0 = 0.693010969821401, d.d_0 = 0.00137293131428169,
b.a_0 = 0.504838137702313, a.d_0 = 0.113732669593295,
a.a_0 = 0.438514651784921))
})


test_that("fcbf filter works", {
res <- filter_fcbf(target, kmers)

expect_equal(res, c("d.d.c_0.4", "d.d.b_0.1"))
})


test_that("FSelectorRcpp filter works", {
res <- filter_ig(target, kmers, method = "infogain")

expect_equal(res[1:5], c(a_0 = 0.0926487792836429, d_0 = NaN, b_0 = NaN,
c_0 = NaN, d.a_0 = 0.0318668917281004))
})


test_that("FSelectorRcpp filter works", {
res <- filter_ig(target, kmers, method = "infogain")

expect_equal(res[1:5], c(a_0 = 0.0926487792836429, d_0 = NaN, b_0 = NaN,
c_0 = NaN, d.a_0 = 0.0318668917281004))
})


test_that("praznik filter works", {
res <- filter_praznik(target, kmers, method = "MIM")

expect_equal(res[1:5], c(d.d.c_1.4 = 0.309448173049744,
b.c.a.d_0.0.1 = 0.309448173049744,
b.c.b.c_0.5.1 = 0.309448173049744,
b.c.d.b_0.2.2 = 0.309448173049744,
b.c.d_0.2 = 0.29557289101432))
})

0 comments on commit add67a3

Please sign in to comment.