Skip to content

Commit

Permalink
refactor filterparam interface with c functions
Browse files Browse the repository at this point in the history
  • Loading branch information
kriemo committed Sep 1, 2023
1 parent e23a5ce commit 769ec35
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 196 deletions.
97 changes: 52 additions & 45 deletions R/pileup.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,7 @@ pileup_sites <- function(bamfiles,
)
}

fp <- .adjustParams(param, n_files)
fp <- .c_args_FilterParam(fp)
fp <- cfilterParam(param, n_files)

if (!is.null(region)) {
chroms_to_process <- region
Expand Down Expand Up @@ -549,44 +548,6 @@ empty_plp_record <- function() {
}


.adjust_arg_length <- function(obj, name, len) {
if (length(slot(obj, name)) != len) {
if (length(slot(obj, name)) == 1) {
slot(obj, name) <- rep(slot(obj, name), len)
} else {
stop(
"%s requires either 1 value, or individual values,",
"for all input bamfiles", slot
)
}
}
slot(obj, name)
}
## Check validity and adjust
.adjustParams <- function(filterParam, nFiles) {
if (!inherits(filterParam, "FilterParam")) {
stop(
"'filterParam' must inherit from 'FilterParam', got '%s'",
class(filterParam)
)
}
filterParam@min_mapq <- .adjust_arg_length(
filterParam,
"min_mapq",
nFiles
)
filterParam@only_keep_variants <- .adjust_arg_length(
filterParam,
"only_keep_variants",
nFiles
)
filterParam@library_type <- .adjust_arg_length(
filterParam,
"library_type",
nFiles
)
filterParam
}


#' @importFrom methods slot slot<- slotNames
Expand Down Expand Up @@ -624,7 +585,7 @@ setMethod(show, "FilterParam", function(object) {
})


.encode_libtype <- function(library_type = c("unstranded",
encode_libtype <- function(library_type = c("unstranded",
"fr-first-strand",
"fr-second-strand"),
n_files) {
Expand All @@ -647,8 +608,48 @@ setMethod(show, "FilterParam", function(object) {
as.integer(lib_code)
}

.c_args_FilterParam <- function(x, ...) {
fp <-.as.list_FilterParam(x)
adjust_arg_length <- function(obj, name, len) {
if (length(slot(obj, name)) != len) {
if (length(slot(obj, name)) == 1) {
slot(obj, name) <- rep(slot(obj, name), len)
} else {
stop(
"%s requires either 1 value, or individual values,",
"for all input bamfiles", slot
)
}
}
slot(obj, name)
}
## Check validity and adjust
adjustParams <- function(filterParam, nFiles) {
if (!inherits(filterParam, "FilterParam")) {
stop(
"'filterParam' must inherit from 'FilterParam', got '%s'",
class(filterParam)
)
}
filterParam@min_mapq <- adjust_arg_length(
filterParam,
"min_mapq",
nFiles
)
filterParam@only_keep_variants <- adjust_arg_length(
filterParam,
"only_keep_variants",
nFiles
)
filterParam@library_type <- adjust_arg_length(
filterParam,
"library_type",
nFiles
)
filterParam
}


c_args_FilterParam <- function(x, ...) {
fp <-as_list_FilterParam(x)

# consistent length args are populated into vectors
# note that unlisting will increase vector size greater than number of args
Expand Down Expand Up @@ -691,12 +692,18 @@ setMethod(show, "FilterParam", function(object) {
)
}

.as.list_FilterParam <- function(x, ...) {
as_list_FilterParam <- function(x, ...) {
slotnames <- slotNames(x)
names(slotnames) <- slotnames
lapply(slotnames, slot, object = x)
}

cfilterParam <- function(param, nfiles) {
fp <- adjustParams(param, nfiles)
fp <- c_args_FilterParam(fp)
fp
}

#' @param min_depth min read depth needed to report site
#' @param max_depth maximum read depth considered at each site
#' @param min_base_quality min base quality score to consider read for pileup
Expand Down Expand Up @@ -801,7 +808,7 @@ FilterParam <-
}
}

library_type <- .encode_libtype(library_type)
library_type <- encode_libtype(library_type)

## creation
.FilterParam(
Expand Down
46 changes: 12 additions & 34 deletions R/sc-pileup.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,23 +172,6 @@ pileup_cells <- function(bamfiles,

sites <- sites[seqnames(sites) %in% chroms_to_process, ]

fp <- .adjustParams(param, 1)
fp <- .as.list_FilterParam(fp)

lib_code <- fp$library_type

event_filters <- unlist(fp[c(
"trim_5p",
"trim_3p",
"splice_dist",
"indel_dist",
"homopolymer_len",
"max_mismatch_type",
"min_read_qual",
"min_splice_overhang",
"min_variant_reads"
)])

if (verbose) cli::cli_alert("Beginning pileup")
bf <- path.expand(path(bamfiles))
bfi <- path.expand(index(bamfiles))
Expand All @@ -210,9 +193,7 @@ pileup_cells <- function(bamfiles,
outfile_prefix = output_directory,
cb_tag = cb_tag,
umi_tag = umi_tag,
libtype_code = lib_code,
event_filters = event_filters,
fp = fp,
param = param,
pe = paired_end,
verbose = verbose
),
Expand All @@ -232,9 +213,7 @@ pileup_cells <- function(bamfiles,
outfile_prefix = output_directory,
cb_tag = cb_tag,
umi_tag = umi_tag,
libtype_code = lib_code,
event_filters = event_filters,
fp = fp,
param = param,
pe = paired_end,
verbose = verbose
),
Expand Down Expand Up @@ -278,8 +257,8 @@ pileup_cells <- function(bamfiles,

get_sc_pileup <- function(bamfn, index, id, sites, barcodes,
outfile_prefix, chrom,
umi_tag, cb_tag, libtype_code,
event_filters, fp, pe, verbose) {
umi_tag, cb_tag, param,
pe, verbose) {
if (length(chrom) > 0) {
sites <- sites[seqnames(sites) %in% chrom, ]
}
Expand All @@ -293,8 +272,11 @@ get_sc_pileup <- function(bamfn, index, id, sites, barcodes,
plp_outfns <- path.expand(plp_outfns)
on.exit(unlink(plp_outfns))

if (verbose) cli::cli_alert("working on group: {id}")
fp <- cfilterParam(param, 1)
lst <- gr_to_regions(sites)

if (verbose) cli::cli_alert("working on group: {id}")

res <- .Call(
".scpileup",
bamfn,
Expand All @@ -303,17 +285,13 @@ get_sc_pileup <- function(bamfn, index, id, sites, barcodes,
lst,
barcodes,
cb_tag,
event_filters,
fp$min_mapq,
fp$max_depth,
fp$min_base_quality,
fp$read_bqual,
as.integer(libtype_code),
fp$bam_flags,
fp[["int_args"]],
fp[["numeric_args"]],
fp[["library_type"]],
plp_outfns,
umi_tag,
pe,
max(fp$min_variant_reads, fp$min_depth)
max(fp$int_args["min_variant_reads"], fp$int_args["min_depth"])
)

if (res < 0) cli::cli_abort("pileup failed")
Expand Down
4 changes: 2 additions & 2 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
/* .Call calls */
extern SEXP get_region(SEXP);
extern SEXP pileup(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP scpileup(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP scpileup(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);

static const R_CallMethodDef CallEntries[] = {
{".get_region", (DL_FUNC) &get_region, 1},
{".pileup",(DL_FUNC) &pileup, 15},
{".scpileup",(DL_FUNC) &scpileup, 17},
{".scpileup",(DL_FUNC) &scpileup, 13},
{NULL, NULL, 0}
};

Expand Down
68 changes: 35 additions & 33 deletions src/plp.c
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,10 @@ static int store_counts(PLP_DATA pd, pcounts* pc, const char* ctig,
// predicated on the true or false values in only_variants
for (i = 0; i < conf->nbam; ++i) {
// check depth
if ((pc + i)->pc->total >= conf->min_depth && (pc + i)->pc->nv >= conf->min_var_reads) {
if ((pc + i)->pc->total >= conf->min_depth && (pc + i)->pc->nv >= conf->ef.min_var_reads) {
write_p = 1;
}
if ((pc + i)->mc->total >= conf->min_depth && (pc + i)->mc->nv >= conf->min_var_reads) {
if ((pc + i)->mc->total >= conf->min_depth && (pc + i)->mc->nv >= conf->ef.min_var_reads) {
write_m = 1;
}

Expand Down Expand Up @@ -542,30 +542,30 @@ static int check_read_filters(const bam_pileup1_t* p, mplp_conf_t* conf, int baq
}

// check if pos is within x dist from 5' end of read, qpos is 0-based
if (conf->trim.f5p > 0 || conf->trim.f3p > 0) {
res = check_variant_fpos(p->b, p->qpos, conf->trim.f5p, conf->trim.f3p);
if (conf->ef.trim_f5p > 0 || conf->ef.trim_f3p > 0) {
res = check_variant_fpos(p->b, p->qpos, conf->ef.trim_f5p, conf->ef.trim_f3p);
if (res < 0) return -1; // error
if (res > 0) return 1;
}

if (conf->trim.i5p > 0 || conf->trim.i3p > 0) {
res = check_variant_pos(p->b, p->qpos, conf->trim.i5p, conf->trim.i3p);
if (conf->ef.trim_i5p > 0 || conf->ef.trim_i3p > 0) {
res = check_variant_pos(p->b, p->qpos, conf->ef.trim_i5p, conf->ef.trim_i3p);
if (res < 0) return -1; // error
if (res > 0) return 1;
}

// check for splice in alignment nearby
if (conf->splice_dist && dist_to_splice(p->b, p->qpos, conf->splice_dist) >= 0) return (1);
if (conf->ef.splice_dist && dist_to_splice(p->b, p->qpos, conf->ef.splice_dist) >= 0) return (1);

// check if site in splice overhang and > min_overhang
if (conf->min_overhang) {
res = check_splice_overhang(p->b, p->qpos, conf->min_overhang);
if (conf->ef.min_overhang) {
res = check_splice_overhang(p->b, p->qpos, conf->ef.min_overhang);
if (res == -2) return -1; // error
if (res > 0) return (1);
}

// check if indel event nearby
if (conf->indel_dist && dist_to_indel(p->b, p->qpos, conf->indel_dist) >= 0) return (1);
if (conf->ef.indel_dist && dist_to_indel(p->b, p->qpos, conf->ef.indel_dist) >= 0) return (1);

return 0;
}
Expand Down Expand Up @@ -769,7 +769,7 @@ while ((ret = bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
}

// check if site is in a homopolymer
if (conf->nmer > 0 && check_simple_repeat(&ref, &ref_len, pos, conf->nmer)) continue;
if (conf->ef.nmer > 0 && check_simple_repeat(&ref, &ref_len, pos, conf->ef.nmer)) continue;

// check if read count less than min_depth
int pass_reads = 0;
Expand Down Expand Up @@ -824,9 +824,9 @@ while ((ret = bam_mplp64_auto(iter, &tid, &pos, n_plp, plp)) > 0) {
if (invert) ci = (char)comp_base[(unsigned char)c];

// check read for >= mismatch different types and at least n_mm mismatches
if (conf->n_mm_type > 0 || conf->n_mm > 0) {
if (conf->ef.n_mm_type > 0 || conf->ef.n_mm > 0) {
if ((invert && mref_b != ci) || pref_b != ci) {
int m = parse_mismatches(p->b, conf->n_mm_type, conf->n_mm);
int m = parse_mismatches(p->b, conf->ef.n_mm_type, conf->ef.n_mm);
if (m == -1) {
ret = -1;
goto fail;
Expand Down Expand Up @@ -1031,26 +1031,28 @@ static int set_mplp_conf(mplp_conf_t* conf, int n_bams,
}
}

conf->max_depth = i_args[0];
conf->min_depth = i_args[1];
conf->min_bq = i_args[2];
conf->trim.i5p = i_args[3];
conf->trim.i3p = i_args[4];
conf->indel_dist = i_args[5];
conf->splice_dist = i_args[6];
conf->min_overhang = i_args[7];
conf->nmer = i_args[8];
conf->min_var_reads = i_args[9];
conf->n_mm_type = i_args[10];
conf->n_mm = i_args[11];
conf->keep_flag[0] = i_args[12];
conf->keep_flag[1] = i_args[13];

conf->trim.f5p = d_args[0];
conf->trim.f3p = d_args[1];
conf->min_af = d_args[2];
conf->read_qual.pct = d_args[3];
conf->read_qual.minq = d_args[4];
memset(&conf->ef, 0, sizeof(efilter_t));

conf->max_depth = i_args[0];
conf->min_depth = i_args[1];
conf->min_bq = i_args[2];
conf->ef.trim_i5p = i_args[3];
conf->ef.trim_i3p = i_args[4];
conf->ef.indel_dist = i_args[5];
conf->ef.splice_dist = i_args[6];
conf->ef.min_overhang = i_args[7];
conf->ef.nmer = i_args[8];
conf->ef.min_var_reads = i_args[9];
conf->ef.n_mm_type = i_args[10];
conf->ef.n_mm = i_args[11];
conf->keep_flag[0] = i_args[12];
conf->keep_flag[1] = i_args[13];

conf->ef.trim_f5p = d_args[0];
conf->ef.trim_f3p = d_args[1];
conf->min_af = d_args[2];
conf->read_qual.pct = d_args[3];
conf->read_qual.minq = d_args[4];

conf->report_multiallelics = b_args[0];

Expand Down
Loading

0 comments on commit 769ec35

Please sign in to comment.