From 9ff82a820afa8c3437db22dbeb3dd41fd7a81b8a Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Wed, 27 Mar 2024 19:59:31 -0700 Subject: [PATCH] clean up print_clonotypes (#409) Tidies up the implementation of print_clonotypes in preparation for additional refactoring. * Replace mutable arguments with a return data structure. * Refactor orbit traversal to use Result and a struct. * Clean up field naming and remove needless level of Vec nesting. * Remove unused gex_low value. * Collect most gene_scan args to simplify calling. * Excise some of the gene_scan stuff into enclone proper. * Replace mutability with itertools. * Use an option for the loupe clonotype. --- enclone_args/src/load_gex.rs | 2 +- enclone_args/src/proc_args_post.rs | 16 +- enclone_args/src/process_special_arg2.rs | 10 +- enclone_core/src/defs.rs | 11 +- enclone_print/src/finish_table.rs | 15 +- enclone_print/src/gene_scan.rs | 183 +++++++-------- enclone_print/src/print_clonotypes.rs | 284 +++++++++-------------- enclone_print/src/print_utils1.rs | 6 +- enclone_print/src/print_utils2.rs | 17 +- enclone_print/src/print_utils4.rs | 23 +- enclone_ranger/src/stop.rs | 17 -- 11 files changed, 242 insertions(+), 342 deletions(-) diff --git a/enclone_args/src/load_gex.rs b/enclone_args/src/load_gex.rs index 47f81399a..ad4370ac6 100644 --- a/enclone_args/src/load_gex.rs +++ b/enclone_args/src/load_gex.rs @@ -52,7 +52,7 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { &mut json_metrics, &mut metrics, )?; - if ctl.gen_opt.gene_scan_test.is_some() && !ctl.gen_opt.accept_inconsistent { + if ctl.gen_opt.gene_scan.is_some() && !ctl.gen_opt.accept_inconsistent { let mut allf = gex_features.clone(); unique_sort(&mut allf); if allf.len() != 1 { diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 8b2b1bed2..2f250da15 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -425,7 +425,7 @@ pub fn proc_args_post( "\nIf you use ALIGN_JALIGN_CONSISTENCY, you should also use PLAIN.\n".to_string(), ); } - if ctl.gen_opt.gene_scan_exact && ctl.gen_opt.gene_scan_test.is_none() { + if ctl.gen_opt.gene_scan_exact && ctl.gen_opt.gene_scan.is_none() { return Err( "\nIt doesn't make sense to specify SCAN_EXIT unless SCAN is also specified.\n" .to_string(), @@ -680,17 +680,9 @@ pub fn proc_args_post( for i in 0..ctl.clono_filt_opt.bounds.len() { ctl.clono_filt_opt.bounds[i].require_valid_variables(ctl)?; } - if ctl.gen_opt.gene_scan_test.is_some() { - ctl.gen_opt - .gene_scan_test - .as_ref() - .unwrap() - .require_valid_variables(ctl)?; - ctl.gen_opt - .gene_scan_control - .as_ref() - .unwrap() - .require_valid_variables(ctl)?; + if let Some(gene_scan_opts) = &ctl.gen_opt.gene_scan { + gene_scan_opts.test.require_valid_variables(ctl)?; + gene_scan_opts.control.require_valid_variables(ctl)?; } Ok(()) } diff --git a/enclone_args/src/process_special_arg2.rs b/enclone_args/src/process_special_arg2.rs index a0bfe6cac..6ebf1c9f6 100644 --- a/enclone_args/src/process_special_arg2.rs +++ b/enclone_args/src/process_special_arg2.rs @@ -3,7 +3,7 @@ // Process a special argument, i.e. one that does not fit into a neat bucket. use crate::proc_args2::{is_f64_arg, is_usize_arg}; -use enclone_core::defs::EncloneControl; +use enclone_core::defs::{EncloneControl, GeneScanOpts}; use enclone_core::linear_condition::LinearCondition; use enclone_core::{require_readable_file, tilde_expand_me}; use evalexpr::build_operator_tree; @@ -363,15 +363,17 @@ pub fn process_special_arg2( if x.len() != 3 { return Err("\nArgument to SCAN must have three components.\n".to_string()); } - ctl.gen_opt.gene_scan_test = Some(LinearCondition::new(x[0])?); - ctl.gen_opt.gene_scan_control = Some(LinearCondition::new(x[1])?); let threshold = LinearCondition::new(x[2])?; for i in 0..threshold.var.len() { if threshold.var[i] != *"t" && threshold.var[i] != *"c" { return Err("\nIllegal variable in threshold for scan.\n".to_string()); } } - ctl.gen_opt.gene_scan_threshold = Some(threshold); + ctl.gen_opt.gene_scan = Some(GeneScanOpts { + test: LinearCondition::new(x[0])?, + control: LinearCondition::new(x[1])?, + threshold, + }); } else if arg.starts_with("PLOT=") { *using_plot = true; let x = arg.after("PLOT=").split(',').collect::>(); diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 407132063..b3948dcfe 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -158,9 +158,7 @@ pub struct GeneralOpt { pub summary_csv: bool, pub cr_version: String, pub nwarn: bool, - pub gene_scan_test: Option, - pub gene_scan_control: Option, - pub gene_scan_threshold: Option, + pub gene_scan: Option, pub gene_scan_exact: bool, pub clonotype_group_names: Option, pub origin_color_map: HashMap, @@ -251,6 +249,13 @@ pub struct GeneralOpt { pub session_narrative: String, } +#[derive(Clone, PartialEq)] +pub struct GeneScanOpts { + pub test: LinearCondition, + pub control: LinearCondition, + pub threshold: LinearCondition, +} + // Some plot options. Note that plot options are not allowed to affect intermediate computation. #[derive(Clone, Default)] diff --git a/enclone_print/src/finish_table.rs b/enclone_print/src/finish_table.rs index 6b036b459..e62f0e3bc 100644 --- a/enclone_print/src/finish_table.rs +++ b/enclone_print/src/finish_table.rs @@ -17,6 +17,8 @@ pub struct Sr { pub subrows: Vec>, } +/// Return the string "picture" of this clonotype. +/// Also mutates out_data in some unclear way. pub fn finish_table( n: usize, ctl: &EncloneControl, @@ -31,7 +33,6 @@ pub fn finish_table( dref: &[DonorReferenceItem], peer_groups: &[Vec<(usize, u8, u32)>], mlog: &mut Vec, - logz: &mut String, stats: &[(String, Vec)], sr: Vec, extra_args: &[String], @@ -40,7 +41,7 @@ pub fn finish_table( rord: &[usize], pass: usize, cdr3_con: &[Vec], -) { +) -> String { // Fill in exact_subclonotype_id, reorder. let nexacts = exacts.len(); @@ -242,7 +243,8 @@ pub fn finish_table( justify.push(justification(&rsi.cvars[cx][m])); } } - make_table(ctl, &mut rows, &justify, mlog, logz); + let mut logz = String::new(); + make_table(ctl, &mut rows, &justify, mlog, &mut logz); // Add phylogeny. @@ -304,9 +306,9 @@ pub fn finish_table( } if (d1 == 0) ^ (d2 == 0) { if d1 == 0 { - write!(*logz, "{} ==> {}", u1 + 1, u2 + 1).unwrap(); + write!(logz, "{} ==> {}", u1 + 1, u2 + 1).unwrap(); } else { - write!(*logz, "{} ==> {}", u2 + 1, u1 + 1).unwrap(); + write!(logz, "{} ==> {}", u2 + 1, u1 + 1).unwrap(); } let s = format!( "; u1 = {}, u2 = {}, d1 = {}, d2 = {}, d = {}\n", @@ -316,9 +318,10 @@ pub fn finish_table( d2, d ); - *logz += &s; + logz += &s; } } } } + logz } diff --git a/enclone_print/src/gene_scan.rs b/enclone_print/src/gene_scan.rs index c5b2fca53..8bdf16fb7 100644 --- a/enclone_print/src/gene_scan.rs +++ b/enclone_print/src/gene_scan.rs @@ -1,121 +1,106 @@ // Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use enclone_core::defs::EncloneControl; +use enclone_core::{defs::GeneScanOpts, linear_condition::LinearCondition}; + +pub struct InSet { + pub test: bool, + pub control: bool, +} pub fn gene_scan_test( - ctl: &EncloneControl, + opts: &GeneScanOpts, + exact: bool, stats: &[(String, Vec)], stats_orig: &[(String, Vec)], nexacts: usize, n: usize, - in_test: &mut Vec, - in_control: &mut Vec, -) { +) -> Vec { // See if we're in the test and control sets for gene scan (non-exact case). - if let Some(ref scan_test) = ctl.gen_opt.gene_scan_test { - if !ctl.gen_opt.gene_scan_exact { - let x = scan_test; - let means = x - .var - .iter() - .take(x.n()) - .map(|xn| { - stats - .iter() - .find_map(|stat| { - if stat.0 == *xn { - Some( - stat.1 - .iter() - .filter_map(|k| k.parse::().ok()) - .sum::(), - ) - } else { - None - } - }) - .unwrap_or_default() - / n as f64 - }) - .collect::>(); - - in_test.push(x.satisfied(&means)); - let x = ctl.gen_opt.gene_scan_control.as_ref().unwrap(); - let means = x - .var - .iter() - .take(x.n()) - .map(|xn| { - stats - .iter() - .find_map(|stat| { - if stat.0 == *xn { - Some( - stat.1 - .iter() - .filter_map(|k| k.parse::().ok()) - .sum::(), - ) - } else { - None - } - }) - .unwrap_or_default() - / n as f64 - }) - .collect::>(); - in_control.push(x.satisfied(&means)); - } - } - - // See if we're in the test and control sets for gene scan (exact case). - - if ctl.gen_opt.gene_scan_test.is_some() && ctl.gen_opt.gene_scan_exact { - let x = ctl.gen_opt.gene_scan_test.clone().unwrap(); - for k in 0..nexacts { - let mut means = Vec::::new(); - for xn in x.var.iter().take(x.n()) { - let mut vals = Vec::::new(); - let mut count = 0; - for stat in stats_orig { - if stat.0 == *xn { - if count == k { - for k in &stat.1 { - if let Ok(v) = k.parse::() { - vals.push(v); + if !exact { + let in_set = |cond: &LinearCondition| { + cond.satisfied( + &cond + .var + .iter() + .take(cond.n()) + .map(|xn| { + stats + .iter() + .find_map(|stat| { + if stat.0 == *xn { + Some( + stat.1 + .iter() + .filter_map(|k| k.parse::().ok()) + .sum::(), + ) + } else { + None } + }) + .unwrap_or_default() + / n as f64 + }) + .collect::>(), + ) + }; + vec![InSet { + test: in_set(&opts.test), + control: in_set(&opts.control), + }] + } else { + // See if we're in the test and control sets for gene scan (exact case). + (0..nexacts) + .map(|k| { + let x = &opts.test; + let mut means = Vec::::new(); + for xn in x.var.iter().take(x.n()) { + let mut vals = Vec::::new(); + let mut count = 0; + for stat in stats_orig { + if stat.0 == *xn { + if count == k { + for k in &stat.1 { + if let Ok(v) = k.parse::() { + vals.push(v); + } + } + break; } - break; + count += 1; } - count += 1; } + let n = vals.len() as f64; + means.push(vals.into_iter().sum::() / n); } - let n = vals.len() as f64; - means.push(vals.into_iter().sum::() / n); - } - in_test.push(x.satisfied(&means)); - let x = ctl.gen_opt.gene_scan_control.clone().unwrap(); - let mut means = Vec::::new(); - for xn in x.var.iter().take(x.n()) { - let mut vals = Vec::::new(); - let mut count = 0; - for stat in stats_orig { - if stat.0 == *xn { - if count == k { - for k in &stat.1 { - if let Ok(v) = k.parse::() { - vals.push(v); + let in_test = x.satisfied(&means); + let x = &opts.control; + let mut means = Vec::::new(); + for xn in x.var.iter().take(x.n()) { + let mut vals = Vec::::new(); + let mut count = 0; + for stat in stats_orig { + if stat.0 == *xn { + if count == k { + for k in &stat.1 { + if let Ok(v) = k.parse::() { + vals.push(v); + } } + break; } - break; + count += 1; } - count += 1; } + means.push(vals.into_iter().sum::() / n as f64); + } + let in_control = x.satisfied(&means); + InSet { + test: in_test, + control: in_control, } - means.push(vals.into_iter().sum::() / n as f64); - } - in_control.push(x.satisfied(&means)); - } + }) + .collect() } } diff --git a/enclone_print/src/print_clonotypes.rs b/enclone_print/src/print_clonotypes.rs index 54118a714..c9d488215 100644 --- a/enclone_print/src/print_clonotypes.rs +++ b/enclone_print/src/print_clonotypes.rs @@ -5,17 +5,17 @@ // // Problem: stack traces from this file consistently do not go back to the main program. -use crate::define_mat::{define_mat, Od}; +use crate::define_mat::define_mat; use crate::filter::survives_filter; use crate::finish_table::{finish_table, Sr}; -use crate::gene_scan::gene_scan_test; +use crate::gene_scan::{gene_scan_test, InSet}; use crate::loupe::{loupe_out, make_loupe_clonotype}; use crate::print_utils1::{compute_field_types, extra_args, start_gen}; use crate::print_utils2::row_fill; use crate::print_utils3::{ consensus_codon_cdr3, define_column_info, get_extra_parseables, process_complete, }; -use crate::print_utils4::{build_show_aa, compute_bu, compute_some_stats}; +use crate::print_utils4::{build_show_aa, compute_bu, compute_some_stats, SomeStats}; use crate::print_utils5::{delete_weaks, vars_and_shares}; use enclone_args::proc_args_check::involves_gex_fb; use enclone_core::allowed_vars::{CVARS_ALLOWED, CVARS_ALLOWED_PCELL, LVARS_ALLOWED}; @@ -26,10 +26,9 @@ use enclone_core::set_speakers::set_speakers; use enclone_proto::types::{Clonotype, DonorReferenceItem}; use equiv::EquivRel; use hdf5::Reader; -use itertools::izip; +use itertools::{izip, Itertools}; use qd::Double; use rayon::prelude::*; -use std::cmp::max; use std::collections::{HashMap, HashSet}; use std::fs::File; use std::io::BufWriter; @@ -37,19 +36,28 @@ use string_utils::TextUtils; use vdj_ann::refx::RefData; use vector_utils::{bin_member, bin_position, erase_if, next_diff12_3, unique_sort}; -// Print clonotypes. A key challenge here is to define the columns that represent shared -// chains. This is given below by the code that forms an equivalence relation on the CDR3_AAs. -// -// This code carries out a second function, which is to filter out exact subclonotypes in orbits -// that appear to be junk. Exactly how these should be reflected in files is TBD. -// -// Some inputs for this section: -// refdata = reference sequence data -// ctl = control parameters -// exact_clonotypes = the vector of all exact subclonotypes -// info = vector of clonotype info -// eq = equivalence relation on info +#[derive(Default)] +pub struct PrintClonotypesResult { + pub pics: Vec, + pub exacts: Vec>, + pub in_center: Vec, + pub rsi: Vec, + pub out_datas: Vec>>, + pub gene_scan_result: Vec>, +} +/// Print clonotypes. A key challenge here is to define the columns that represent shared +/// chains. This is given below by the code that forms an equivalence relation on the CDR3_AAs. +/// +/// This code carries out a second function, which is to filter out exact subclonotypes in orbits +/// that appear to be junk. Exactly how these should be reflected in files is TBD. +/// +/// Some inputs for this section: +/// refdata = reference sequence data +/// ctl = control parameters +/// exact_clonotypes = the vector of all exact subclonotypes +/// info = vector of clonotype info +/// eq = equivalence relation on info pub fn print_clonotypes( is_bcr: bool, to_bc: &HashMap<(usize, usize), Vec>, @@ -66,16 +74,9 @@ pub fn print_clonotypes( d_readers: &[Option>], ind_readers: &[Option>], h5_data: &[(usize, Vec, Vec)], - pics: &mut Vec, - exacts: &mut Vec>, - in_center: &mut Vec, - rsi: &mut Vec, - out_datas: &mut Vec>>, - tests: &mut Vec, - controls: &mut Vec, fate: &mut [HashMap], allele_data: &AlleleData, -) -> Result<(), String> { +) -> Result { let lvars = &ctl.clono_print_opt.lvars; // Compute extra args. @@ -105,11 +106,11 @@ pub fn print_clonotypes( // Define parseable output columns. The entire machinery for parseable output is controlled // by macros that begin with "speak". - let mut max_chains = 4; + let max_chains = 4; // This seems like a bug, since rsi is uninitialized upon entry to print_clonotypes. - for r in rsi.iter() { - max_chains = max(max_chains, r.mat.len()); - } + // for r in rsi.iter() { + // max_chains = max(max_chains, r.mat.len()); + // } let mut parseable_fields = Vec::::new(); set_speakers(ctl, &mut parseable_fields, max_chains); let pcols_sort = &ctl.parseable_opt.pcols_sort; @@ -182,55 +183,40 @@ pub fn print_clonotypes( // Traverse the orbits. + #[derive(Default)] + struct TraverseResult { + subdata: Option, + loupe_clonotype: Option, + out_data: Vec>, + num_cells: isize, + gene_scan_membership: Vec, + } + + /// All of the fields appear or do not, together. + struct TraverseResultSubdata { + pic: String, + exacts: Vec, + rsi: ColInfo, + in_center: bool, + } + // 0: index in reps // 1: vector of clonotype pictures // 2: vector of some clonotype info // [parallel to 1] // next to last three entries = whitelist contam, denominator for that, low gex count // added out_datas (used to be next to last three, now one more) - let mut results = Vec::<( - usize, - Vec, - Vec<(Vec, ColInfo)>, - usize, - usize, - usize, - Vec, - Vec>>, - isize, - Vec, - Vec, - Vec<(usize, String, BarcodeFate)>, - Vec, - String, - )>::new(); - for i in 0..orbits.len() { - results.push(( - i, - Vec::::new(), - Vec::<(Vec, ColInfo)>::new(), - 0, - 0, - 0, - Vec::::new(), - Vec::>>::new(), - 0, - Vec::::new(), - Vec::::new(), - Vec::new(), - Vec::new(), - String::new(), - )); - } - results.par_iter_mut().for_each(|res| { - let i = res.0; - let o = &orbits[i]; - let mut od = Vec::::new(); - for id in o { - let x: &CloneInfo = &info[*id as usize]; - od.push((x.origin.clone(), x.clonotype_id, *id)); - } - od.sort(); + let result_iter = orbits.par_iter().map(|o| { + let mut res = TraverseResult::default(); + + let od: Vec<_> = o + .iter() + .map(|id| { + let x: &CloneInfo = &info[*id as usize]; + (x.origin.clone(), x.clonotype_id, *id) + }) + .sorted() + .collect(); // Reconstruct the participating clones. This is needed because most exact subclonotypes // having more than two chains have been split up. @@ -242,7 +228,6 @@ pub fn print_clonotypes( let mut exacts = Vec::::new(); let mut mults = Vec::::new(); let mut j = 0; - let loupe_clonotypes = &mut res.6; while j < od.len() { let k = next_diff12_3(&od, j as i32) as usize; let mut mult = 0_usize; @@ -261,7 +246,8 @@ pub fn print_clonotypes( let mut bads = vec![false; exacts.len()]; let mut stats_pass1 = Vec::)>>::new(); - for pass in 1..=2 { + + for pass in [1, 2] { // Delete weak exact subclonotypes. if pass == 2 && !ctl.clono_filt_opt.protect_bads { @@ -359,7 +345,7 @@ pub fn print_clonotypes( // Generate Loupe data. if (!ctl.gen_opt.binary.is_empty() || !ctl.gen_opt.proto.is_empty()) && pass == 2 { - loupe_clonotypes.push(make_loupe_clonotype( + res.loupe_clonotype = Some(make_loupe_clonotype( exact_clonotypes, &exacts, &rsi, @@ -371,8 +357,6 @@ pub fn print_clonotypes( // Set up for parseable output. - let mut out_data = Vec::>::new(); - // Print the orbit. // ◼ An assumption of this code is that a productive pair does not have two contigs // ◼ having identical CDR3_AA sequences. At present this is not enforced by the @@ -380,8 +364,11 @@ pub fn print_clonotypes( // ◼ some unsavory workarounds below. let mut mlog = Vec::::new(); - if n >= ctl.clono_filt_opt.ncells_low - || ctl.clono_group_opt.asymmetric_center == "from_filters" + if !(n >= ctl.clono_filt_opt.ncells_low + || ctl.clono_group_opt.asymmetric_center == "from_filters") + { + continue; + } { // Start to generate parseable output. @@ -390,7 +377,7 @@ pub fn print_clonotypes( ctl, &exacts, exact_clonotypes, - &mut out_data, + &mut res.out_data, &mut mlog, &extra_args, ); @@ -414,7 +401,7 @@ pub fn print_clonotypes( &mut vars_amino, &mut shares_amino, &mut ref_diff_pos, - &mut out_data, + &mut res.out_data, ); // Mark some weak exact subclonotypes for deletion. @@ -575,11 +562,7 @@ pub fn print_clonotypes( // Compute some stats; - let mut cred = Vec::>::new(); - let mut pe = Vec::>::new(); - let mut ppe = Vec::>::new(); - let mut npe = Vec::>::new(); - compute_some_stats( + let SomeStats { cred, pe, ppe, npe } = compute_some_stats( ctl, &lvars, &exacts, @@ -587,10 +570,6 @@ pub fn print_clonotypes( gex_info, vdj_cells, &n_vdj_gex, - &mut cred, - &mut pe, - &mut ppe, - &mut npe, ); // Precompute for near and far. @@ -617,7 +596,6 @@ pub fn print_clonotypes( for u in 0..nexacts { let mut typex = vec![false; cols]; let mut row = Vec::::new(); - let mut gex_low = 0; let mut cx = Vec::>::new(); for col in 0..cols { cx.push(vec![String::new(); rsi.cvars[col].len()]); @@ -627,7 +605,7 @@ pub fn print_clonotypes( let mut d_all = vec![Vec::::new(); ex.clones.len()]; let mut ind_all = vec![Vec::::new(); ex.clones.len()]; let mut these_stats = Vec::<(String, Vec)>::new(); - let resx = row_fill( + row_fill( pass, u, ctl, @@ -643,9 +621,8 @@ pub fn print_clonotypes( &ref_diff_pos, &field_types, &mut bads, - &mut gex_low, &mut row, - &mut out_data, + &mut res.out_data, &mut cx, &mut d_all, &mut ind_all, @@ -669,16 +646,12 @@ pub fn print_clonotypes( fate, &cdr3_con, allele_data, - ); + )?; stats.append(&mut these_stats.clone()); if pass == 1 { stats_pass1.push(these_stats.clone()); } these_stats.sort_by(|a, b| a.0.cmp(&b.0)); - if let Err(e) = resx { - res.13 = e; - return; - } let mut bli = ex .clones .iter() @@ -695,7 +668,6 @@ pub fn print_clonotypes( for mut cxr in cx { row.append(&mut cxr); } - res.5 = gex_low; // Compute per-cell entries. @@ -852,21 +824,20 @@ pub fn print_clonotypes( } // See if we're in the test and control sets for gene scan. - - gene_scan_test( - ctl, - &stats, - &stats_orig, - nexacts, - n, - &mut res.9, - &mut res.10, - ); + if let Some(gene_scan_opts) = &ctl.gen_opt.gene_scan { + res.gene_scan_membership = gene_scan_test( + gene_scan_opts, + ctl.gen_opt.gene_scan_exact, + &stats, + &stats_orig, + nexacts, + n, + ); + } // Make the table. - let mut logz = String::new(); - finish_table( + let clonotype_pic = finish_table( n, ctl, &exacts, @@ -880,54 +851,35 @@ pub fn print_clonotypes( dref, &peer_groups, &mut mlog, - &mut logz, &stats, sr, &extra_args, pcols_sort, - &mut out_data, + &mut res.out_data, &rord, pass, &cdr3_con, ); // Save. - - res.1.push(logz); - res.2.push((exacts.clone(), rsi.clone())); - res.12.push(in_center); + res.subdata = Some(TraverseResultSubdata { + pic: clonotype_pic, + exacts: exacts.clone(), + rsi: rsi.clone(), + in_center, + }); for u in 0..exacts.len() { - res.8 += exact_clonotypes[exacts[u]].ncells() as isize; + res.num_cells += exact_clonotypes[exacts[u]].ncells() as isize; } } - if pass == 2 { - res.7.push(out_data); - } } + Ok(res) }); - for r in &results { - if !r.13.is_empty() { - return Err(r.13.clone()); - } - } - - for ri in &results { - for vj in &ri.11 { - fate[vj.0].insert(vj.1.clone(), vj.2.clone()); - } - } + let mut results: Vec<_> = result_iter.collect::, String>>()?; // Sort results in descending order by number of cells. - results.sort_by_key(|x| -x.8); - - // Write loupe output. - - let mut all_loupe_clonotypes = Vec::::new(); - for r in &mut results { - all_loupe_clonotypes.append(&mut r.6); - } - loupe_out(ctl, all_loupe_clonotypes, refdata, dref); + results.sort_by_key(|x| -x.num_cells); // Write out the fate of each filtered barcode. if !ctl.gen_opt.fate_file.is_empty() { @@ -937,43 +889,23 @@ pub fn print_clonotypes( serde_json::to_writer_pretty(&mut wtr, fate).map_err(|e| e.to_string())?; } - // Set up to group and print clonotypes. + let mut out = PrintClonotypesResult::default(); + let mut all_loupe_clonotypes = Vec::::new(); - for ri in results.iter_mut().take(orbits.len()) { - for (v1, (v2, &v12)) in ri.1.iter().zip(ri.2.iter().zip(ri.12.iter())) { - pics.push(v1.clone()); - exacts.push(v2.0.clone()); - rsi.push(v2.1.clone()); - in_center.push(v12); + for ri in results { + if let Some(subdata) = ri.subdata { + out.pics.push(subdata.pic); + out.exacts.push(subdata.exacts); + out.rsi.push(subdata.rsi); + out.in_center.push(subdata.in_center); } - out_datas.append(&mut ri.7); + out.out_datas.push(ri.out_data); + out.gene_scan_result.push(ri.gene_scan_membership); + all_loupe_clonotypes.extend(ri.loupe_clonotype); } - // Gather some data for gene scan. + // Write loupe output. + loupe_out(ctl, all_loupe_clonotypes, refdata, dref); - if ctl.gen_opt.gene_scan_test.is_some() && !ctl.gen_opt.gene_scan_exact { - for (i, r) in results.iter().take(orbits.len()).enumerate() { - for (&v9, &v10) in r.9.iter().zip(r.10.iter()) { - if v9 { - tests.push(i); - } - if v10 { - controls.push(i); - } - } - } - } - if ctl.gen_opt.gene_scan_test.is_some() && ctl.gen_opt.gene_scan_exact { - for (r, e) in results.iter().zip(exacts.iter()) { - for (&ej, (&v9, &v10)) in e.iter().zip(r.9.iter().zip(r.10.iter())) { - if v9 { - tests.push(ej); - } - if v10 { - controls.push(ej); - } - } - } - } - Ok(()) + Ok(out) } diff --git a/enclone_print/src/print_utils1.rs b/enclone_print/src/print_utils1.rs index 44ac7de9c..89259cf62 100644 --- a/enclone_print/src/print_utils1.rs +++ b/enclone_print/src/print_utils1.rs @@ -869,9 +869,9 @@ pub fn extra_args(ctl: &EncloneControl) -> Vec { for i in 0..ctl.clono_filt_opt.bounds.len() { extra_args.append(&mut ctl.clono_filt_opt.bounds[i].var.clone()); } - if ctl.gen_opt.gene_scan_test.is_some() { - extra_args.append(&mut ctl.gen_opt.gene_scan_test.as_ref().unwrap().var.clone()); - extra_args.append(&mut ctl.gen_opt.gene_scan_control.as_ref().unwrap().var.clone()); + if let Some(gene_scan_opts) = &ctl.gen_opt.gene_scan { + extra_args.extend(gene_scan_opts.test.var.iter().cloned()); + extra_args.extend(gene_scan_opts.control.var.iter().cloned()); } extra_args.append(&mut ctl.plot_opt.sim_mat_plot_vars.clone()); for i in 0..ctl.gen_opt.var_def.len() { diff --git a/enclone_print/src/print_utils2.rs b/enclone_print/src/print_utils2.rs index d5b362ae0..06b841185 100644 --- a/enclone_print/src/print_utils2.rs +++ b/enclone_print/src/print_utils2.rs @@ -26,12 +26,11 @@ use vdj_ann::refx::RefData; use vector_utils::next_diff12_4; use vector_utils::{bin_member, bin_position, unique_sort}; -// The following code creates a row in the enclone output table for a clonotype. Simultaneously -// it generates a row of parseable output. And it does some other things that are not described -// here. -// -// TODO: Awful interface, should work to improve. - +/// The following code creates a row in the enclone output table for a clonotype. Simultaneously +/// it generates a row of parseable output. And it does some other things that are not described +/// here. +/// +/// TODO: Awful interface, should work to improve. pub fn row_fill( pass: usize, u: usize, @@ -48,7 +47,6 @@ pub fn row_fill( ref_diff_pos: &[Vec>], field_types: &[Vec], bads: &mut [bool], - gex_low: &mut usize, row: &mut Vec, // row of human-readable output out_data: &mut [HashMap], // row of parseable output cx: &mut [Vec], @@ -261,11 +259,6 @@ pub fn row_fill( } gex_counts_unsorted = counts.clone(); counts.sort_unstable(); - for n in &counts { - if *n < 100 { - *gex_low += 1; - } - } if !counts.is_empty() { gex_sum = gex_fcounts_unsorted.iter().sum::(); gex_mean = gex_sum / gex_fcounts_unsorted.len() as f64; diff --git a/enclone_print/src/print_utils4.rs b/enclone_print/src/print_utils4.rs index 82331a6cc..e8ea0f67d 100644 --- a/enclone_print/src/print_utils4.rs +++ b/enclone_print/src/print_utils4.rs @@ -251,6 +251,13 @@ pub fn build_show_aa( // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ +pub struct SomeStats { + pub cred: Vec>, + pub pe: Vec>, + pub ppe: Vec>, + pub npe: Vec>, +} + pub fn compute_some_stats( ctl: &EncloneControl, lvars: &[String], @@ -259,16 +266,12 @@ pub fn compute_some_stats( gex_info: &GexInfo, vdj_cells: &[Vec], n_vdj_gex: &[usize], - cred: &mut Vec>, - pe: &mut Vec>, - ppe: &mut Vec>, - npe: &mut Vec>, -) { +) -> SomeStats { let nexacts = exacts.len(); // Compute "cred" stats (credibility/# of neighboring cells that are also B cells). - *cred = vec![Vec::::new(); lvars.len()]; + let mut cred = vec![Vec::::new(); lvars.len()]; for (lvar, cred) in lvars.iter().zip(cred.iter_mut()) { if lvar == "cred" { for &clonotype_id in exacts.iter().take(nexacts) { @@ -307,7 +310,7 @@ pub fn compute_some_stats( // Compute pe (PCA distance). - *pe = vec![Vec::::new(); lvars.len()]; + let mut pe = vec![Vec::::new(); lvars.len()]; for k in 0..lvars.len() { if lvars[k].starts_with("pe") { let n = lvars[k].after("pe").force_usize(); @@ -364,7 +367,7 @@ pub fn compute_some_stats( // Compute ppe (PCA distance). - *ppe = vec![Vec::::new(); lvars.len()]; + let mut ppe = vec![Vec::::new(); lvars.len()]; for k in 0..lvars.len() { if lvars[k].starts_with("ppe") { let n = lvars[k].after("ppe").force_usize(); @@ -435,7 +438,7 @@ pub fn compute_some_stats( // Compute npe (PCA distance). - *npe = vec![Vec::::new(); lvars.len()]; + let mut npe = vec![Vec::::new(); lvars.len()]; for k in 0..lvars.len() { if lvars[k].starts_with("npe") { let n = lvars[k].after("npe").force_usize(); @@ -482,6 +485,8 @@ pub fn compute_some_stats( } } } + + SomeStats { cred, pe, ppe, npe } } // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ diff --git a/enclone_ranger/src/stop.rs b/enclone_ranger/src/stop.rs index dad6d7ecb..84709820a 100644 --- a/enclone_ranger/src/stop.rs +++ b/enclone_ranger/src/stop.rs @@ -1,11 +1,9 @@ // Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use enclone_core::defs::ColInfo; use enclone_core::enclone_structs::EncloneIntermediates; use enclone_print::print_clonotypes::print_clonotypes; use hdf5::Reader; use rayon::prelude::*; -use std::collections::HashMap; pub fn main_enclone_stop_ranger(mut inter: EncloneIntermediates) -> Result<(), String> { // Unpack inputs. @@ -53,14 +51,6 @@ pub fn main_enclone_stop_ranger(mut inter: EncloneIntermediates) -> Result<(), S }); // Find and print clonotypes. (But we don't actually print them here.) - - let mut pics = Vec::::new(); - let mut exacts = Vec::>::new(); // ugly reuse of name - let mut in_center = Vec::::new(); - let mut rsi = Vec::::new(); // ditto - let mut out_datas = Vec::>>::new(); - let mut tests = Vec::::new(); - let mut controls = Vec::::new(); print_clonotypes( is_bcr, to_bc, @@ -77,13 +67,6 @@ pub fn main_enclone_stop_ranger(mut inter: EncloneIntermediates) -> Result<(), S &d_readers, &ind_readers, &h5_data, - &mut pics, - &mut exacts, - &mut in_center, - &mut rsi, - &mut out_datas, - &mut tests, - &mut controls, fate, allele_data, )?;