diff --git a/.cargo/config b/.cargo/config index 23702a96e..d2621da5d 100644 --- a/.cargo/config +++ b/.cargo/config @@ -60,7 +60,6 @@ rustflags = [ # --- lint allow --- "-A", "clippy::comparison_chain", # TODO: burn down these allow exceptions and then deny them - "-A", "clippy::type_complexity", "-A", "clippy::too_many_arguments", "-A", "clippy::needless_range_loop", ] diff --git a/enclone/src/join.rs b/enclone/src/join.rs index 62b976702..a4fcae3c4 100644 --- a/enclone/src/join.rs +++ b/enclone/src/join.rs @@ -7,11 +7,12 @@ // contigs that represent the sequence of the "other" allele. This does not look easy to // execute. +use enclone_core::enclone_structs::JoinInfo; use vdj_ann::{annotate, refx}; use self::annotate::print_annotations; use self::refx::RefData; -use crate::join2::finish_join; +use crate::join2::{finish_join, JoinResult}; use crate::join_core::join_core; use debruijn::dna_string::DnaString; use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype, PotentialJoin}; @@ -34,7 +35,7 @@ pub fn join_exacts( ctl: &EncloneControl, exact_clonotypes: &[ExactClonotype], info: &[CloneInfo], - join_info: &mut Vec<(usize, usize, bool, Vec)>, + join_info: &mut Vec, raw_joins: &mut Vec<(i32, i32)>, sr: &[Vec], dref: &[DonorReferenceItem], @@ -66,14 +67,7 @@ pub fn join_exacts( // Find potential joins. let mut i = 0; - let mut results = Vec::<( - usize, // i - usize, // j - usize, // joins - usize, // errors - Vec<(usize, usize, bool, Vec)>, // log+ (index1, index2, err?, log) - Vec<(usize, usize)>, // joinlist - )>::new(); + let mut results = Vec::::new(); while i < info.len() { let mut j = i + 1; while j < info.len() { @@ -90,32 +84,18 @@ pub fn join_exacts( } j += 1; } - results.push(( - i, - j, - 0, - 0, - Vec::<(usize, usize, bool, Vec)>::new(), - Vec::<(usize, usize)>::new(), - )); + results.push(JoinResult::new(i, j)); i = j; } if !ctl.silent { println!("comparing {} simple clonotypes", info.len()); } - let joinf = |r: &mut ( - usize, - usize, - usize, - usize, - Vec<(usize, usize, bool, Vec)>, - Vec<(usize, usize)>, - )| { - let (i, j) = (r.0, r.1); - let joins = &mut r.2; - let errors = &mut r.3; - let logplus = &mut r.4; + let joinf = |r: &mut JoinResult| { + let (i, j) = (r.i, r.j); + let joins = &mut r.joins; + let errors = &mut r.errors; + let logplus = &mut r.join_info; let mut pot = Vec::>::new(); // Main join logic. If you change par_iter_mut to iter_mut above, and run happening, @@ -228,7 +208,7 @@ pub fn join_exacts( // Save join and tally stats. - r.5.push((k1, k2)); + r.join_list.push((k1, k2)); *joins += 1; if err { *errors += 1; @@ -592,16 +572,21 @@ pub fn join_exacts( } } */ - logplus.push((info[k1].clonotype_index, info[k2].clonotype_index, err, log)); + logplus.push(JoinInfo { + index1: info[k1].clonotype_index, + index2: info[k2].clonotype_index, + err, + log, + }); } }; results.par_iter_mut().for_each(joinf); for r in &results { - for &j in &r.5 { + for &j in &r.join_list { raw_joins.push((j.0 as i32, j.1 as i32)); } } - finish_join(ctl, info, &results, join_info) + finish_join(ctl, info, results, join_info) } diff --git a/enclone/src/join2.rs b/enclone/src/join2.rs index 1097da8a8..a96801596 100644 --- a/enclone/src/join2.rs +++ b/enclone/src/join2.rs @@ -2,39 +2,54 @@ // This file provides the tail end code for join.rs, plus a small function used there. -use enclone_core::defs::{CloneInfo, EncloneControl}; +use enclone_core::{ + defs::{CloneInfo, EncloneControl}, + enclone_structs::JoinInfo, +}; use equiv::EquivRel; use stats_utils::percent_ratio; use vector_utils::next_diff1_2; -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ +pub struct JoinResult { + pub i: usize, + pub j: usize, + pub joins: usize, + pub errors: usize, + pub join_info: Vec, + pub join_list: Vec<(usize, usize)>, +} + +impl JoinResult { + pub fn new(i: usize, j: usize) -> Self { + Self { + i, + j, + joins: 0, + errors: 0, + join_info: Default::default(), + join_list: Default::default(), + } + } +} pub fn finish_join( ctl: &EncloneControl, info: &[CloneInfo], - results: &[( - usize, - usize, - usize, - usize, - Vec<(usize, usize, bool, Vec)>, - Vec<(usize, usize)>, - )], - join_info: &mut Vec<(usize, usize, bool, Vec)>, + results: Vec, + join_info: &mut Vec, ) -> EquivRel { // Tally results. - + // Make equivalence relation. let (mut joins, mut errors) = (0, 0); + let mut eq: EquivRel = EquivRel::new(info.len() as i32); + for r in results { - joins += r.2; - errors += r.3; - for i in &r.4 { - let u1 = i.0; - let u2 = i.1; - let err = i.2; - let log = i.3.clone(); - join_info.push((u1, u2, err, log)); + joins += r.joins; + errors += r.errors; + join_info.extend(r.join_info.into_iter()); + for j in &r.join_list { + eq.join(j.0 as i32, j.1 as i32); } } if !ctl.silent { @@ -43,14 +58,12 @@ pub fn finish_join( println!("{errors} errors"); } } - - // Make equivalence relation. - - let mut eq: EquivRel = EquivRel::new(info.len() as i32); - for r in results { - for j in &r.5 { - eq.join(j.0 as i32, j.1 as i32); - } + // Report whitelist contamination. + // WARNING: THIS ONLY WORKS IF YOU RUN WITH CLONES=1 AND NO OTHER FILTERS. + // TODO: we should actually make an assertion that this is true. + if ctl.clono_filt_opt_def.whitef || ctl.clono_print_opt.cvars.iter().any(|var| var == "white") { + let bad_rate = percent_ratio(joins, errors); + println!("whitelist contamination rate = {bad_rate:.2}%"); } // Join orbits that cross subclones of a clone. This arose because we split up multi-chain @@ -70,25 +83,5 @@ pub fn finish_join( i = j; } - // Tally whitelist contamination. - // WARNING: THIS ONLY WORKS IF YOU RUN WITH CLONES=1 AND NO OTHER FILTERS. - - let mut white = ctl.clono_filt_opt_def.whitef; - for j in 0..ctl.clono_print_opt.cvars.len() { - if ctl.clono_print_opt.cvars[j] == "white" { - white = true; - } - } - if white { - let mut bads = 0; - let mut denom = 0; - for r in results { - bads += r.2; - denom += r.3; - } - let bad_rate = percent_ratio(bads, denom); - println!("whitelist contamination rate = {bad_rate:.2}%"); - } - eq } diff --git a/enclone_core/src/enclone_structs.rs b/enclone_core/src/enclone_structs.rs index 6eec61a06..ce8be4fde 100644 --- a/enclone_core/src/enclone_structs.rs +++ b/enclone_core/src/enclone_structs.rs @@ -57,10 +57,18 @@ pub struct EncloneExacts { pub info: Vec, pub orbits: Vec>, pub vdj_cells: Vec>, - pub join_info: Vec<(usize, usize, bool, Vec)>, + pub join_info: Vec, pub drefs: Vec, pub sr: Vec>, pub fate: Vec>, // GETS MODIFIED SUBSEQUENTLY pub is_bcr: bool, pub allele_data: AlleleData, } + +#[derive(Clone)] +pub struct JoinInfo { + pub index1: usize, + pub index2: usize, + pub err: bool, + pub log: Vec, +} diff --git a/enclone_core/src/mammalian_fixed_len.rs b/enclone_core/src/mammalian_fixed_len.rs index 6ab76175c..d54ebd8c6 100644 --- a/enclone_core/src/mammalian_fixed_len.rs +++ b/enclone_core/src/mammalian_fixed_len.rs @@ -9,6 +9,7 @@ use vdj_ann::vdj_features::{cdr1_start, cdr2_start, cdr3_start, fr1_start, fr2_s // {chain, feature, len, {{(count, amino_acid)}}} +#[allow(clippy::type_complexity)] pub fn mammalian_fixed_len() -> Vec<(&'static str, &'static str, usize, Vec>)> { const X: &str = include_str!("mammalian_fixed_len.table"); X.lines() diff --git a/enclone_print/src/define_mat.rs b/enclone_print/src/define_mat.rs index 8c6e6dc03..4e75d2992 100644 --- a/enclone_print/src/define_mat.rs +++ b/enclone_print/src/define_mat.rs @@ -53,11 +53,11 @@ fn joiner( e } -pub fn setup_define_mat( - orbit: &[i32], - info: &[CloneInfo], -) -> (Vec<(Vec, usize, i32)>, Vec) { - let mut od = Vec::<(Vec, usize, i32)>::new(); +// TOOD: refactor this into a struct +pub type Od = (Vec, usize, i32); + +pub fn setup_define_mat(orbit: &[i32], info: &[CloneInfo]) -> (Vec, Vec) { + let mut od = Vec::::new(); for id in orbit { let x: &CloneInfo = &info[*id as usize]; od.push((x.origin.clone(), x.clonotype_id, *id)); @@ -83,7 +83,7 @@ pub fn define_mat( ctl: &EncloneControl, exact_clonotypes: &[ExactClonotype], exacts: &[usize], - od: &[(Vec, usize, i32)], + od: &[Od], info: &[CloneInfo], raw_joins: &[Vec], refdata: &RefData, diff --git a/enclone_print/src/finish_table.rs b/enclone_print/src/finish_table.rs index 1e3f174f6..6b036b459 100644 --- a/enclone_print/src/finish_table.rs +++ b/enclone_print/src/finish_table.rs @@ -12,6 +12,11 @@ use string_utils::TextUtils; use vdj_ann::refx::RefData; use vector_utils::bin_member; +pub struct Sr { + pub row: Vec, + pub subrows: Vec>, +} + pub fn finish_table( n: usize, ctl: &EncloneControl, @@ -28,7 +33,7 @@ pub fn finish_table( mlog: &mut Vec, logz: &mut String, stats: &[(String, Vec)], - sr: &mut [(Vec, Vec>, Vec>, usize)], + sr: Vec, extra_args: &[String], pcols_sort: &[String], out_data: &mut Vec>, @@ -144,10 +149,10 @@ pub fn finish_table( // Finish building table content. - for (j, srj) in sr.iter_mut().enumerate() { - srj.0[0] = format!("{}", j + 1); // row number (#) - rows.push(srj.0.clone()); - rows.extend(srj.1.clone()); + for (j, mut srj) in sr.into_iter().enumerate() { + srj.row[0] = format!("{}", j + 1); // row number (#) + rows.push(srj.row); + rows.extend(srj.subrows); } // Add sum and mean rows. diff --git a/enclone_print/src/print_clonotypes.rs b/enclone_print/src/print_clonotypes.rs index 186dcd8aa..54118a714 100644 --- a/enclone_print/src/print_clonotypes.rs +++ b/enclone_print/src/print_clonotypes.rs @@ -5,9 +5,9 @@ // // Problem: stack traces from this file consistently do not go back to the main program. -use crate::define_mat::define_mat; +use crate::define_mat::{define_mat, Od}; use crate::filter::survives_filter; -use crate::finish_table::finish_table; +use crate::finish_table::{finish_table, Sr}; use crate::gene_scan::gene_scan_test; use crate::loupe::{loupe_out, make_loupe_clonotype}; use crate::print_utils1::{compute_field_types, extra_args, start_gen}; @@ -225,7 +225,7 @@ pub fn print_clonotypes( results.par_iter_mut().for_each(|res| { let i = res.0; let o = &orbits[i]; - let mut od = Vec::<(Vec, usize, i32)>::new(); + let mut od = Vec::::new(); for id in o { let x: &CloneInfo = &info[*id as usize]; od.push((x.origin.clone(), x.clonotype_id, *id)); @@ -524,7 +524,7 @@ pub fn print_clonotypes( // Now build table content. - let mut sr = Vec::<(Vec, Vec>, Vec>, usize)>::new(); + let mut sr = Vec::::new(); let mut groups = HashMap::>::new(); for lvar in &lvars { if let Some(Ok(d)) = lvar.strip_prefix('g').map(str::parse::) { @@ -712,7 +712,6 @@ pub fn print_clonotypes( exact_clonotypes, &mut row, &mut subrows, - &varmat, have_gex, gex_info, &rsi, @@ -883,7 +882,7 @@ pub fn print_clonotypes( &mut mlog, &mut logz, &stats, - &mut sr, + sr, &extra_args, pcols_sort, &mut out_data, diff --git a/enclone_print/src/print_utils4.rs b/enclone_print/src/print_utils4.rs index 591a65356..82331a6cc 100644 --- a/enclone_print/src/print_utils4.rs +++ b/enclone_print/src/print_utils4.rs @@ -13,6 +13,8 @@ use string_utils::TextUtils; use vdj_ann::refx::RefData; use vector_utils::{bin_member, bin_position, bin_position1_2, unique_sort}; +use crate::finish_table::Sr; + // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ pub fn get_gex_matrix_entry( @@ -495,11 +497,10 @@ pub fn compute_bu( exact_clonotypes: &[ExactClonotype], row: &mut [String], subrows: &mut Vec>, - varmat: &[Vec>], have_gex: bool, gex_info: &GexInfo, rsi: &ColInfo, - sr: &mut Vec<(Vec, Vec>, Vec>, usize)>, + sr: &mut Vec, fate: &[HashMap], nd_fields: &[String], alt_bcs: &[String], @@ -870,5 +871,8 @@ pub fn compute_bu( subrows.push(row); } } - sr.push((row.to_vec(), subrows.clone(), varmat[u].clone(), u)); + sr.push(Sr { + row: row.to_vec(), + subrows: subrows.clone(), + }); } diff --git a/enclone_stuff/src/disintegrate.rs b/enclone_stuff/src/disintegrate.rs index f74d6dcf1..70e887dc6 100644 --- a/enclone_stuff/src/disintegrate.rs +++ b/enclone_stuff/src/disintegrate.rs @@ -3,7 +3,10 @@ // If NWEAK_ONESIES is not specified, disintegrate certain onesie clonotypes into single cell // clonotypes. This requires editing of exact_clonotypes, info, eq, join_info and raw_joins. -use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype}; +use enclone_core::{ + defs::{CloneInfo, EncloneControl, ExactClonotype}, + enclone_structs::JoinInfo, +}; use equiv::EquivRel; use std::collections::HashMap; @@ -15,7 +18,7 @@ pub fn disintegrate_onesies( eq: &mut EquivRel, exact_clonotypes: &mut Vec, info: &mut Vec, - join_info: &mut Vec<(usize, usize, bool, Vec)>, + join_info: &mut Vec, raw_joins: &mut Vec<(i32, i32)>, ) { if ctl.clono_filt_opt_def.weak_onesies { @@ -55,13 +58,13 @@ pub fn disintegrate_onesies( } let mut join_info2 = Vec::new(); for ji in join_info.iter() { - let (u1, u2) = (ji.0, ji.1); + let (u1, u2) = (ji.index1, ji.index2); for v1 in &to_exact_new[u1] { join_info2.reserve(to_exact_new[u2].len()); for v2 in &to_exact_new[u2] { let mut x = ji.clone(); - x.0 = *v1; - x.1 = *v2; + x.index1 = *v1; + x.index2 = *v2; join_info2.push(x); } } diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs index 5199b4024..4aed8210a 100644 --- a/enclone_stuff/src/start.rs +++ b/enclone_stuff/src/start.rs @@ -21,9 +21,9 @@ use enclone::misc3::sort_tig_bc; use enclone_args::read_json::{parse_json_annotations_files, Annotations}; use enclone_core::barcode_fate::BarcodeFate; use enclone_core::defs::{AlleleData, CloneInfo}; -use enclone_core::enclone_structs::{EncloneExacts, EncloneIntermediates, EncloneSetup}; +use enclone_core::enclone_structs::{EncloneExacts, EncloneIntermediates, EncloneSetup, JoinInfo}; use enclone_core::hcomp::heavy_complexity; -use enclone_print::define_mat::{define_mat, setup_define_mat}; +use enclone_print::define_mat::{define_mat, setup_define_mat, Od}; use enclone_print::loupe::make_donor_refs; use equiv::EquivRel; use io_utils::{fwriteln, open_for_read}; @@ -360,7 +360,7 @@ pub fn main_enclone_start(setup: EncloneSetup) -> Result)>::new(); + let mut join_info = Vec::::new(); let mut raw_joins = Vec::<(i32, i32)>::new(); let mut eq: EquivRel = join_exacts( is_bcr, @@ -623,7 +623,7 @@ pub fn main_enclone_start(setup: EncloneSetup) -> Result, usize, i32)>::new(); + let mut od = Vec::::new(); for id in o { let x: &CloneInfo = &info[*id as usize]; od.push((x.origin.clone(), x.clonotype_id, *id));