diff --git a/Cargo.lock b/Cargo.lock index cc73fa959..33ef4cd76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,7 +405,6 @@ dependencies = [ "hdf5", "io_utils", "itertools", - "pager", "petgraph", "qd", "rayon", @@ -514,6 +513,7 @@ dependencies = [ name = "enclone_ranger" version = "0.5.219" dependencies = [ + "anyhow", "enclone", "enclone_args", "enclone_core", @@ -1201,16 +1201,6 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "pager" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2599211a5c97fbbb1061d3dc751fa15f404927e4846e07c643287d6d1f462880" -dependencies = [ - "errno", - "libc", -] - [[package]] name = "parking_lot" version = "0.12.1" diff --git a/enclone/Cargo.toml b/enclone/Cargo.toml index 6764478dd..62586ba2f 100644 --- a/enclone/Cargo.toml +++ b/enclone/Cargo.toml @@ -38,9 +38,6 @@ string_utils = { path = "../string_utils" } vdj_ann = { path = "../vdj_ann" } vector_utils = { path = "../vector_utils" } -[target.'cfg(not(windows))'.dependencies] -pager = "0.16" - [target.'cfg(not(windows))'.dependencies.hdf5] git = "https://github.com/10XGenomics/hdf5-rust.git" branch = "conda_nov2021" diff --git a/enclone/src/UNDOC_OPTIONS b/enclone/src/UNDOC_OPTIONS index 9f4489ebc..0e450927c 100644 --- a/enclone/src/UNDOC_OPTIONS +++ b/enclone/src/UNDOC_OPTIONS @@ -48,7 +48,6 @@ Optional arguments governing input and output files: Optional arguments that control printing of individual clonotypes: - white = percent of sequences implicated in whitelist expansion. -- CHAIN_BRIEF: show abbreviated chain column headers - DEBUG_TABLE_PRINTING: add print lines to help debug printing of tables. - NOTE_SIMPLE: note if the first sequence for the chain is simple, in the sense that it exactly equals the concatenation of the right-truncated V with the full J segment. @@ -56,8 +55,6 @@ Optional arguments that control printing of individual clonotypes: Other optional arguments: - FORCE: make joins even if redundant -- EXP: exploratory code for exact clonotyping on -- WEAK: for EXP, print all and show weaks - GRAPH: show logging from light-heavy graph construction - UTR_CON: run experimental UTR consensus code - CON_CON: run experimental constant region consensus code @@ -67,7 +64,6 @@ Other optional arguments: 2. You want to see the effect of changed annotation code. - NPLAIN: reverses PLAIN - INDELS: search for and list CDR3s from clonotypes with possible SHM indels (exploratory) -- NOPRETTY: turn off pretty trace entirely - HEAVY_CHAIN_REUSE: look for instances of heavy chain reuse - BINARY=filename: generate binary output file - PROTO=filename: generate proto output file @@ -83,13 +79,6 @@ expanded out. CELLRANGER: for use if called from cellranger -- changes failure message and prevents exit upon normal completion -EXT=filename: -Given output of an external clonotyping algorithm which took as inputs the pipeline outputs -for the lenas in enclone.testdata, for each exact subclonotype found by enclone, report its -composition in the external clonotyping, as clonotype_id[count], ... -The input file should have lines of the form: -sample barcode clonotype_id. - SUMMARY_CLEAN: if SUMMARY specified, don't show computational performance stats, so we can regress on output diff --git a/enclone/src/graph_filter.rs b/enclone/src/graph_filter.rs index f82f8bdd4..474740c65 100644 --- a/enclone/src/graph_filter.rs +++ b/enclone/src/graph_filter.rs @@ -379,7 +379,7 @@ pub fn graph_filter( .insert(tig_bc[i][0].barcode.clone(), BarcodeFate::GraphFilter); } } - if !ctl.gen_opt.ngraph_filter { + if !ctl.cr_opt.ngraph_filter { erase_if(tig_bc, &to_delete); } if graph { diff --git a/enclone/src/info.rs b/enclone/src/info.rs index 87b80684d..f9cc03470 100644 --- a/enclone/src/info.rs +++ b/enclone/src/info.rs @@ -222,10 +222,6 @@ pub fn build_info( } else { // maybe can't happen vs.push(rt.clone()); - // At one point there was a bug in which the following line was missing. - // This caused a traceback on "enclone 123085 RE". It is interesting because - // the traceback did not get back to the main program, even with - // "enclone 123085 RE NOPRETTY". vs_notes.push(String::new()); vsnx = String::new(); } diff --git a/enclone/src/misc1.rs b/enclone/src/misc1.rs index bfe0c586a..c66ef1a9c 100644 --- a/enclone/src/misc1.rs +++ b/enclone/src/misc1.rs @@ -9,8 +9,6 @@ use enclone_core::{ }; use equiv::EquivRel; use itertools::Itertools; -#[cfg(not(target_os = "windows"))] -use pager::Pager; use std::time::Instant; use string_utils::stringme; @@ -20,44 +18,6 @@ use vector_utils::{ // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// This section contains a function that supports paging. It does not work under Windows, and -// we describe here all the *known* problems with getting enclone to work under Windows. -// 1. It does not compile for us. When we tried, there was a problem with libhdf-5. -// 2. Paging is turned off, because the pager crate doesn't compile under Windows, and porting -// it to Windows appears nontrivial. -// 3. ANSI escape characters are not handled correctly, at least by default. -// In addition, we have some concerns about what it would mean to properly test enclone on Windows, -// given that some users might have older OS installs, and support for ANSI escape characters -// appears to have been changed in 2018. This is not made easier by the Windows Subsystem for -// Linux. - -#[cfg(not(target_os = "windows"))] -pub fn setup_pager(pager: bool) { - // If the output is going to a terminal, set up paging so that output is in effect piped to - // "less -R -F -X -K". - // - // ∙ The option -R is used to render ANSI escape characters correctly. We do not use - // -r instead because if you navigate backwards in less -r, stuff gets screwed up, - // which is consistent with the scary stuff in the man page for less at -r. However -R will - // not display all unicode characters correctly, so those have to be picked carefully, - // by empirically testing that e.g. "echo ◼ | less -R -F -X" renders correctly. - // - // ∙ The -F option makes less exit immediately if all the output can be seen in one screen. - // - // ∙ The -X option is needed because we found that in full screen mode on OSX Catalina, output - // was sent to the alternate screen, and hence it appeared that one got no output at all - // from enclone. This is really bad, so do not turn off this option! - - if pager { - Pager::with_pager("less -R -F -X -K").setup(); - } -} - -#[cfg(target_os = "windows")] -pub fn setup_pager(_pager: bool) {} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - // Lookup for heavy chain reuse (special purpose experimental option). // This is interesting but not likely to yield interesting examples of heavy chain reuse // because biologically it doesn't make sense that one would have both H-L1 and H-L2 expanded. diff --git a/enclone_args/src/lib.rs b/enclone_args/src/lib.rs index 45cf34085..8c6460b31 100644 --- a/enclone_args/src/lib.rs +++ b/enclone_args/src/lib.rs @@ -1,8 +1,6 @@ // Copyright (c) 2021 10x Genomics, Inc. All rights reserved. #![allow(clippy::needless_range_loop)] -use io_utils::path_exists; - pub mod load_gex; pub mod load_gex_core; pub mod load_gex_util; @@ -14,38 +12,3 @@ pub mod proc_args_post; pub mod process_special_arg1; pub mod process_special_arg2; pub mod read_json; - -// parse_csv_pure: same as parse_csv, but don't strip out quotes - -pub fn parse_csv_pure(x: &str) -> Vec<&str> { - let w = x.char_indices().collect::>(); - let mut y = Vec::new(); - let (mut quotes, mut i) = (0, 0); - while i < w.len() { - let mut j = i; - while j < w.len() { - if quotes % 2 == 0 && w[j].1 == ',' { - break; - } - if w[j].1 == '"' { - quotes += 1; - } - j += 1; - } - let (start, stop) = (w[i].0, w.get(j).map_or(x.len(), |(ind, _)| *ind)); - y.push(&x[start..stop]); - i = j + 1; - } - if !w.is_empty() && w.last().unwrap().1 == ',' { - y.push(""); - } - y -} - -pub fn fnx(outs: &str, name: &str) -> String { - let mut file = format!("{outs}/../{name}"); - if !path_exists(&file) { - file = format!("{outs}/{name}"); - } - file -} diff --git a/enclone_args/src/load_gex.rs b/enclone_args/src/load_gex.rs index ad4370ac6..b6c2f0123 100644 --- a/enclone_args/src/load_gex.rs +++ b/enclone_args/src/load_gex.rs @@ -19,7 +19,6 @@ use vector_utils::{bin_position, unique_sort}; pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { let mut gex_features = Vec::>::new(); let mut gex_barcodes = Vec::>::new(); - let mut feature_refs = Vec::::new(); let mut cluster = Vec::>::new(); let mut cell_type = Vec::>::new(); let mut cell_type_specified = Vec::::new(); @@ -32,12 +31,10 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { let mut h5_paths = Vec::::new(); let mut feature_metrics = Vec::>::new(); let mut json_metrics = Vec::>::new(); - let mut metrics = Vec::::new(); load_gex( ctl, &mut gex_features, &mut gex_barcodes, - &mut feature_refs, &mut cluster, &mut cell_type, &mut cell_type_specified, @@ -50,7 +47,6 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { &mut h5_paths, &mut feature_metrics, &mut json_metrics, - &mut metrics, )?; if ctl.gen_opt.gene_scan.is_some() && !ctl.gen_opt.accept_inconsistent { let mut allf = gex_features.clone(); @@ -137,7 +133,6 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { Ok(GexInfo { gex_features, gex_barcodes, - feature_refs, cluster, cell_type, cell_type_specified, @@ -154,6 +149,5 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { have_fb, feature_metrics, json_metrics, - metrics, }) } diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs index ca6a80fdf..6ee47b239 100644 --- a/enclone_args/src/load_gex_core.rs +++ b/enclone_args/src/load_gex_core.rs @@ -3,17 +3,14 @@ // Load gene expression and feature barcoding (antibody, antigen) data from Cell Ranger outputs. use crate::load_gex_util::{ - find_cluster_file, find_feature_metrics_file, find_json_metrics_file, find_metrics_file, - find_pca_file, + find_cluster_file, find_feature_metrics_file, find_json_metrics_file, find_pca_file, }; -use crate::{fnx, parse_csv_pure}; use enclone_core::defs::EncloneControl; use enclone_core::slurp::slurp_h5; use io_utils::{dir_list, open_for_read, open_userfile_for_read, path_exists}; -use itertools::Itertools; use rayon::prelude::*; use serde_json::Value; -use std::{collections::HashMap, fmt::Write, fs::read_to_string, io::BufRead}; +use std::{collections::HashMap, io::BufRead}; use string_utils::{parse_csv, TextUtils}; use vector_utils::{unique_sort, VecUtils}; @@ -30,18 +27,14 @@ struct LoadResult { cell_type_specified: bool, error: String, h5_path: String, - f15: Vec, feature_metrics: HashMap<(String, String), String>, json_metrics: HashMap, - metrics: String, - feature_refs: String, } pub fn load_gex( ctl: &mut EncloneControl, gex_features: &mut Vec>, gex_barcodes: &mut Vec>, - feature_refs: &mut Vec, cluster: &mut Vec>, cell_type: &mut Vec>, cell_type_specified: &mut Vec, @@ -54,7 +47,6 @@ pub fn load_gex( h5_paths: &mut Vec, feature_metrics: &mut Vec>, json_metrics: &mut Vec>, - metrics: &mut Vec, ) -> Result<(), String> { let mut results = Vec::<(usize, LoadResult)>::new(); for i in 0..ctl.origin_info.gex_path.len() { @@ -70,7 +62,6 @@ pub fn load_gex( // somehow the parallelism is not working. // 2. We know where the time is spent in the loop, and this is marked below. results.par_iter_mut().for_each(|(i, r)| { - let pathlist = &mut r.f15; let i = *i; if !gex_outs[i].is_empty() { // First define the path where the GEX files should live, and make sure that the path @@ -102,7 +93,6 @@ pub fn load_gex( for x in &h5p { let p = format!("{outs}/{x}"); if path_exists(&p) { - pathlist.push(p.clone()); h5_path = p; break; } @@ -141,15 +131,21 @@ pub fn load_gex( // Find files. - let pca_file = find_pca_file(ctl, &outs, &analysis, pathlist); - let json_metrics_file = find_json_metrics_file(ctl, &outs, &analysis, pathlist); - let feature_metrics_file = find_feature_metrics_file(ctl, &outs, &analysis, pathlist); - let metrics_file = find_metrics_file(ctl, &outs, &analysis, pathlist); - let cluster_file = find_cluster_file(ctl, &outs, &analysis, pathlist); + let pca_file = find_pca_file(&analysis); + let cluster_file = find_cluster_file(&analysis); + + let (json_metrics_file, feature_metrics_file) = if !ctl.cr_opt.cellranger {( + find_json_metrics_file(&analysis), + find_feature_metrics_file(&analysis), + )} else { + Default::default() + }; // Proceed. for f in &[pca_file.clone(), cluster_file.clone()] { + + if !path_exists(f) { r.error = format!( "\nThe file\n{f}\ndoes not exist. \ @@ -166,7 +162,6 @@ pub fn load_gex( ); return; } - pathlist.push(f.to_string()); } // Find metrics summary file. @@ -188,7 +183,6 @@ pub fn load_gex( for c in &csvs { if path_exists(c) { csv = c.clone(); - pathlist.push(c.to_string()); break; } } @@ -204,7 +198,6 @@ pub fn load_gex( // Read cell types. if path_exists(&types_file) { - pathlist.push(types_file.clone()); let f = open_userfile_for_read(&types_file); let mut count = 0; for line in f.lines() { @@ -246,38 +239,6 @@ pub fn load_gex( } } - // Read and parse metrics file. Rewrite as metrics class, metric name, metric value. - - if !metrics_file.is_empty() { - let m = std::fs::read_to_string(&metrics_file).unwrap(); - let fields = parse_csv_pure(m.before("\n")); - let (mut class, mut name, mut value) = (None, None, None); - for field in fields { - if field == "Library Type" { - class = Some(i); - } else if field == "Metric Name" { - name = Some(i); - } else if field == "Metric Value" { - value = Some(i); - } - } - let (class, name, value) = (class.unwrap(), name.unwrap(), value.unwrap()); - let mut lines = Vec::::new(); - let mut first = true; - for line in m.lines() { - if first { - first = false; - } else { - let fields = parse_csv_pure(line); - lines.push(format!( - "{},{},{}", - fields[class], fields[name], fields[value] - )); - } - } - r.metrics = format!("{}\n", lines.iter().format("\n")); - } - // Read feature metrics file. Note that we do not enforce the requirement of this // file, so it may not be present. @@ -529,14 +490,6 @@ pub fn load_gex( r.gex_mult = gene_mult; r.fb_mult = fb_mult; - // Read the feature reference file. - - let fref_file = fnx(&outs, "feature_reference.csv"); - if path_exists(&fref_file) { - pathlist.push(fref_file.clone()); - r.feature_refs = read_to_string(&fref_file).unwrap(); - } - // Read the feature barcode matrix file. if let Err(err) = slurp_h5( &h5_path, @@ -549,9 +502,6 @@ pub fn load_gex( } unique_sort(&mut r.gex_cell_barcodes); }); - for (_, r) in &results { - ctl.pathlist.extend(r.f15.iter().cloned()); - } // Test for error. @@ -573,42 +523,6 @@ pub fn load_gex( } h5_paths.extend(results.iter().map(|(_, r)| r.h5_path.clone())); - // Add some metrics. - - let extras = [ - ( - "ANTIBODY_G_perfect_homopolymer_frac", - "Antibody Capture,G Homopolymer Frac", - ), - ( - "GRCh38_raw_rpc_20000_subsampled_filtered_bcs_median_unique_genes_detected", - "Gene Expression,GRCh38 Median genes per cell (20k raw reads per cell)", - ), - ( - "GRCh38_raw_rpc_20000_subsampled_filtered_bcs_median_counts", - "Gene Expression,GRCh38 Median UMI counts per cell (20k raw reads per cell)", - ), - ]; - for x in &extras { - let metric_name = x.0.to_string(); - let metric_display_name = x.1.to_string(); - let mut have = false; - for (_, result) in &results { - if result.json_metrics.contains_key(&metric_name) { - have = true; - } - } - if have { - for (_, result) in &mut results { - let mut value = String::new(); - if result.json_metrics.contains_key(&metric_name) { - value = format!("{:.3}", result.json_metrics[&metric_name]); - } - writeln!(result.metrics, "{metric_display_name},{value}").unwrap(); - } - } - } - for (_, r) in results { gex_features.push(r.gex_features); gex_barcodes.push(r.gex_barcodes); @@ -621,8 +535,6 @@ pub fn load_gex( cell_type_specified.push(r.cell_type_specified); feature_metrics.push(r.feature_metrics); json_metrics.push(r.json_metrics); - metrics.push(r.metrics); - feature_refs.push(r.feature_refs); } // Done. diff --git a/enclone_args/src/load_gex_util.rs b/enclone_args/src/load_gex_util.rs index bec9577c5..e7abc2a54 100644 --- a/enclone_args/src/load_gex_util.rs +++ b/enclone_args/src/load_gex_util.rs @@ -1,108 +1,57 @@ // Copyright (c) 2022 10X Genomics, Inc. All rights reserved. -use enclone_core::defs::EncloneControl; -use io_utils::{dir_list, path_exists}; -use vector_utils::VecUtils; +use io_utils::path_exists; -pub fn find_pca_file( - _ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_pca_file(analysis: &[String]) -> String { let mut pca_file = String::new(); for x in analysis { pca_file = format!("{x}/pca/10_components/projection.csv"); if path_exists(&pca_file) { - pathlist.push(pca_file.clone()); break; } pca_file = format!("{x}/pca/gene_expression_10_components/projection.csv"); if path_exists(&pca_file) { - pathlist.push(pca_file.clone()); break; } } pca_file } -pub fn find_json_metrics_file( - ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_json_metrics_file(analysis: &[String]) -> String { let mut json_metrics_file = String::new(); - if !ctl.gen_opt.cellranger { - for x in analysis { - let f = format!("{x}/metrics_summary_json.json"); - if path_exists(&f) { - json_metrics_file = f.clone(); - pathlist.push(f); - break; - } + for x in analysis { + let f = format!("{x}/metrics_summary_json.json"); + if path_exists(&f) { + json_metrics_file = f.clone(); + break; } } + json_metrics_file } -pub fn find_feature_metrics_file( - ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_feature_metrics_file(analysis: &[String]) -> String { let mut feature_metrics_file = String::new(); - if !ctl.gen_opt.cellranger { - for x in analysis { - let f = format!("{x}/per_feature_metrics.csv"); - if path_exists(&f) { - feature_metrics_file = f.clone(); - pathlist.push(f); - break; - } + for x in analysis { + let f = format!("{x}/per_feature_metrics.csv"); + if path_exists(&f) { + feature_metrics_file = f.clone(); + break; } } - feature_metrics_file -} -pub fn find_metrics_file( - ctl: &EncloneControl, - outs: &str, - _analysis: &[String], - pathlist: &mut Vec, -) -> String { - let mut metrics_file = String::new(); - if !ctl.gen_opt.cellranger { - let summary_dir = format!("{outs}/../multi_web_summary_json/metrics_summary_csv"); - if path_exists(&summary_dir) { - let list = dir_list(&summary_dir); - if list.solo() { - let path = format!("{summary_dir}/{}", list[0]); - pathlist.push(path.clone()); - metrics_file = path; - } - } - } - metrics_file + feature_metrics_file } -pub fn find_cluster_file( - _ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_cluster_file(analysis: &[String]) -> String { let mut cluster_file = String::new(); for x in analysis { cluster_file = format!("{x}/clustering/graphclust/clusters.csv"); if path_exists(&cluster_file) { - pathlist.push(cluster_file.clone()); break; } cluster_file = format!("{x}/clustering/gene_expression_graphclust/clusters.csv"); if path_exists(&cluster_file) { - pathlist.push(cluster_file.clone()); break; } } diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index fadd910c8..34aea000f 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -1,14 +1,12 @@ // Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use crate::proc_args2::{ - is_f64_arg, is_i32_arg, is_simple_arg, is_string_arg, is_usize_arg, test_writeable, -}; +use crate::proc_args2::{is_f64_arg, is_i32_arg, is_simple_arg, is_string_arg, is_usize_arg}; use crate::proc_args_post::proc_args_post; use crate::process_special_arg1::process_special_arg1; use crate::process_special_arg2::process_special_arg2; use enclone_core::defs::{ClonotypeHeuristics, EncloneControl}; use enclone_core::test_def::replace_at_test; -use enclone_core::{require_readable_file, tilde_expand_me}; +use enclone_core::{require_readable_file, test_writeable, tilde_expand_me}; use itertools::Itertools; use std::fmt::Write; use std::process::Command; @@ -123,15 +121,9 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ctl.clono_filt_opt.min_umi = 0; ctl.clono_filt_opt.max_chains = 1000000; ctl.clono_filt_opt.qual_filter = true; - ctl.clono_filt_opt_def.signature = true; - ctl.clono_filt_opt_def.weak_chains = true; ctl.clono_filt_opt_def.weak_onesies = true; - ctl.clono_filt_opt_def.weak_foursies = true; - ctl.clono_filt_opt_def.doublet = true; ctl.clono_filt_opt_def.bc_dup = true; ctl.clono_filt_opt.max_datasets = 1000000000; - ctl.clono_filt_opt_def.umi_filt = true; - ctl.clono_filt_opt_def.umi_ratio_filt = true; ctl.clono_filt_opt.max_exacts = 1_000_000_000; ctl.clono_print_opt.amino = vec![ @@ -175,7 +167,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let mut have_meta = false; let mut gex = String::new(); let mut bc = String::new(); - let mut metas = Vec::::new(); let mut metaxs = Vec::::new(); let mut xcrs = Vec::::new(); for i in 1..args.len() { @@ -189,7 +180,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String have_bcr = true; } else if args[i].starts_with("GEX=") { have_gex = true; - } else if args[i].starts_with("META=") || args[i].starts_with("METAX=") { + } else if !ctl.cr_opt.metas.is_empty() || args[i].starts_with("METAX=") { have_meta = true; } if args[i].starts_with("GEX=") { @@ -335,10 +326,11 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String } // Preprocess NALL and NALL_GEX. - - for i in 1..args.len() { - if args[i] == *"NALL" || args[i] == "NALL_CELL" || args[i] == "NALL_GEX" { - let f = [ + // FIXME: these should be implmeneted as a direct action on opt rather than + // pushing additional command line args. + for arg in &args[1..].to_vec() { + if arg == "NALL" || arg == "NALL_CELL" || arg == "NALL_GEX" { + for arg_to_append in [ "NCELL", "NGEX", "NCROSS", @@ -356,18 +348,17 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "MIX_DONORS", "NIMPROPER", "NSIG", - ]; - for j in 0..f.len() { - if f[j] == "NCELL" { - if args[i] != "NALL_CELL" { - args.push(f[j].to_string()); + ] { + if arg_to_append == "NCELL" { + if arg != "NALL_CELL" { + args.push(arg_to_append.to_string()); } - } else if f[j] == "NGEX" { - if args[i] != "NALL_GEX" { - args.push(f[j].to_string()); + } else if arg_to_append == "NGEX" { + if arg != "NALL_GEX" { + args.push(arg_to_append.to_string()); } } else { - args.push(f[j].to_string()); + args.push(arg_to_append.to_string()); } } break; @@ -393,7 +384,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("BCJOIN", &mut ctl.join_alg_opt.bcjoin), ("BUILT_IN", &mut ctl.gen_opt.built_in), ("CDIFF", &mut ctl.clono_filt_opt.cdiff), - ("CHAIN_BRIEF", &mut ctl.clono_print_opt.chain_brief), ("COMPLETE", &mut ctl.gen_opt.complete), ("CON", &mut ctl.allele_print_opt.con), ("CON_CON", &mut ctl.gen_opt.con_con), @@ -401,7 +391,10 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("CONP", &mut ctl.clono_print_opt.conp), ("CONX", &mut ctl.clono_print_opt.conx), ("CURRENT_REF", &mut ctl.gen_opt.current_ref), - ("DEBUG_TABLE_PRINTING", &mut ctl.debug_table_printing), + ( + "DEBUG_TABLE_PRINTING", + &mut ctl.clono_print_opt.debug_table_printing, + ), ("DEL", &mut ctl.clono_filt_opt.del), ("DESCRIP", &mut ctl.gen_opt.descrip), ("D_INCONSISTENT", &mut ctl.clono_filt_opt.d_inconsistent), @@ -414,7 +407,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("FOLD_HEADERS", &mut ctl.gen_opt.fold_headers), ("FORCE", &mut ctl.force), ("FULL_SEQC", &mut ctl.clono_print_opt.full_seqc), - ("GAMMA_DELTA", &mut ctl.gen_opt.gamma_delta), ("GRAPH", &mut ctl.gen_opt.graph), ( "GROUP_CDR3H_LEN_VAR", @@ -441,8 +433,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("NCROSS", &mut ctl.clono_filt_opt_def.ncross), ("NEWICK", &mut ctl.gen_opt.newick), ("NGEX", &mut ctl.clono_filt_opt_def.ngex), - ("NOGRAY", &mut ctl.nogray), - ("NGRAPH_FILTER", &mut ctl.gen_opt.ngraph_filter), + ("NOGRAY", &mut ctl.clono_print_opt.nogray), ("NGROUP", &mut ctl.clono_group_opt.ngroup), ("NIMPROPER", &mut ctl.merge_all_impropers), ("NMAX", &mut ctl.clono_filt_opt_def.nmax), @@ -501,7 +492,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("UTR_CON", &mut ctl.gen_opt.utr_con), ("VDUP", &mut ctl.clono_filt_opt.vdup), ("VIS_DUMP", &mut ctl.gen_opt.vis_dump), - ("WEAK", &mut ctl.gen_opt.weak), ("WHITEF", &mut ctl.clono_filt_opt_def.whitef), ]; @@ -510,15 +500,9 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let mut set_false = vec![ ("H5_SLICE", &mut ctl.gen_opt.h5_pre), ("NBC_DUP", &mut ctl.clono_filt_opt_def.bc_dup), - ("NDOUBLET", &mut ctl.clono_filt_opt_def.doublet), - ("NFOURSIE_KILL", &mut ctl.clono_filt_opt_def.weak_foursies), ("NMERGE_ONESIES", &mut ctl.join_alg_opt.merge_onesies_ctl), ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), - ("NSIG", &mut ctl.clono_filt_opt_def.signature), ("NSILENT", &mut ctl.silent), - ("NUMI", &mut ctl.clono_filt_opt_def.umi_filt), - ("NUMI_RATIO", &mut ctl.clono_filt_opt_def.umi_ratio_filt), - ("NWEAK_CHAINS", &mut ctl.clono_filt_opt_def.weak_chains), ("NWEAK_ONESIES", &mut ctl.clono_filt_opt_def.weak_onesies), ("PRINT_FAILED_JOINS", &mut ctl.join_print_opt.quiet), ]; @@ -584,16 +568,10 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "AG_DIST_BOUND", &mut ctl.clono_group_opt.asymmetric_dist_bound, ), - ( - "AG_DIST_FORMULA", - &mut ctl.clono_group_opt.asymmetric_dist_formula, - ), ("CLUSTAL_AA", &mut ctl.gen_opt.clustal_aa), ("CLUSTAL_DNA", &mut ctl.gen_opt.clustal_dna), - ("EXT", &mut ctl.gen_opt.ext), ("GROUP_CDR3", &mut ctl.clono_group_opt.cdr3), ("PCHAINS", &mut ctl.parseable_opt.pchains), - ("SESSION_NAME", &mut ctl.gen_opt.session_name), ("TRACE_BARCODE", &mut ctl.gen_opt.trace_barcode), ]; @@ -601,10 +579,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let set_string_writeable = [ ("BINARY", &mut ctl.gen_opt.binary), - ("DONOR_REF_FILE", &mut ctl.gen_opt.dref_file), - ("FATE_FILE", &mut ctl.gen_opt.fate_file), ("HONEY_OUT", &mut ctl.plot_opt.honey_out), - ("PROTO", &mut ctl.gen_opt.proto), ("SUBSET_JSON", &mut ctl.gen_opt.subset_json), ]; @@ -625,7 +600,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String &mut ctl.gen_opt.clonotype_group_names, ), ("HONEY_IN", &mut ctl.plot_opt.honey_in), - ("PROTO_METADATA", &mut ctl.gen_opt.proto_metadata), ]; // Define arguments that set something to a string that is an input file name, not represented @@ -635,15 +609,14 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("BC_JOINT", &mut ctl.gen_opt.bc_joint), ("EXTERNAL_REF", &mut ctl.gen_opt.external_ref), ("POST_FILTER", &mut ctl.gen_opt.post_filter), - ("REF", &mut ctl.gen_opt.refname), ]; // Define arguments that do nothing (because already parsed), and which have no "= value" part. let set_nothing_simple = [ - "CELLRANGER", "DUMP_INTERNAL_IDS", "EVIL_EYE", + "FORCE_EXTERNAL", "LONG_HELP", "MARKED_B", "MARK_STATS", @@ -651,9 +624,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "NALL", "NALL_CELL", "NALL_GEX", - "NO_KILL", "NOPAGER", - "NOPRETTY", "PLAIN", "PRINT_CPU", "PRINT_CPU_INFO", @@ -908,22 +879,8 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String if processed[i] { continue; } - if !process_special_arg1( - &args[i], - ctl, - &mut metas, - &mut metaxs, - &mut xcrs, - &mut using_plot, - )? { - process_special_arg2( - &args[i], - ctl, - &mut metas, - &mut metaxs, - &mut xcrs, - &mut using_plot, - )?; + if !process_special_arg1(&args[i], ctl)? { + process_special_arg2(&args[i], ctl, &mut metaxs, &mut xcrs, &mut using_plot)?; } } @@ -931,19 +888,17 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String // Do residual argument processing. - if ctl.gen_opt.gamma_delta && !have_tcrgd || !ctl.gen_opt.gamma_delta && have_tcrgd { + if ctl.cr_opt.gamma_delta && !have_tcrgd || !ctl.cr_opt.gamma_delta && have_tcrgd { return Err( "\n. GAMMA_DELTA flag has to be enabled for using TCRGD= and vice versa.\n".to_string(), ); } - if ctl.gen_opt.gamma_delta && (have_bcr || have_gex || have_meta || have_tcr) { + if ctl.cr_opt.gamma_delta && (have_bcr || have_gex || have_meta || have_tcr) { return Err( "\n. Unsupported input type in GAMMA_DELTA mode. Only TCRGD= input is supported.\n" .to_string(), ); } - proc_args_post( - ctl, &args, &metas, &metaxs, &xcrs, have_gex, &gex, &bc, using_plot, - )?; + proc_args_post(ctl, &args, &metaxs, &xcrs, have_gex, &gex, &bc, using_plot)?; Ok(()) } diff --git a/enclone_args/src/proc_args2.rs b/enclone_args/src/proc_args2.rs index e0c0ecd09..ae06c2253 100644 --- a/enclone_args/src/proc_args2.rs +++ b/enclone_args/src/proc_args2.rs @@ -3,47 +3,12 @@ use enclone_core::defs::EncloneControl; use io_utils::{open_userfile_for_read, path_exists}; use rayon::prelude::*; -use std::fmt::Write; -use std::fs::{remove_file, File}; use std::io::BufRead; use string_utils::TextUtils; use vector_utils::next_diff; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// Test a file for writeability by writing and then deleting it. - -pub fn test_writeable(val: &str, evil_eye: bool) -> Result<(), String> { - if evil_eye { - println!("creating file {val} to test writability"); - } - let f = File::create(val); - if f.is_err() { - let mut msgx = - format!("\nYou've specified an output file\n{val}\nthat cannot be written.\n"); - if val.contains('/') { - let dir = val.rev_before("/"); - let msg = if path_exists(dir) { - "exists" - } else { - "does not exist" - }; - writeln!(msgx, "Note that the path {dir} {msg}.").unwrap(); - } - return Err(msgx); - } - if evil_eye { - println!("removing file {val}"); - } - remove_file(val).unwrap_or_else(|_| panic!("could not remove file {val}")); - if evil_eye { - println!("removal of file {val} complete"); - } - Ok(()) -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - // Simple arguments. We test for e.g. PLAIN or PLAIN=, the latter to allow for the case // where the argument has been set by an environment variable. @@ -144,7 +109,7 @@ pub fn proc_args_tail(ctl: &mut EncloneControl, args: &[String]) -> Result<(), S if !ctl.clono_print_opt.amino.is_empty() { ctl.clono_print_opt.cvars.insert(0, "amino".to_string()); } - if ctl.gen_opt.mouse && !ctl.gen_opt.refname.is_empty() { + if ctl.gen_opt.mouse && !ctl.cr_opt.refname.is_empty() { return Err( "\nIf you specify REF, please do not also specify MOUSE. It is enough to\n\ set REF to a mouse reference sequence.\n" diff --git a/enclone_args/src/proc_args3.rs b/enclone_args/src/proc_args3.rs index 12aeb1fb6..4d5169095 100644 --- a/enclone_args/src/proc_args3.rs +++ b/enclone_args/src/proc_args3.rs @@ -68,14 +68,14 @@ fn expand_analysis_sets(x: &str) -> Result { // Functions to find the path to data. pub fn get_path_fail(p: &str, ctl: &EncloneControl, source: &str) -> Result { - for x in &ctl.gen_opt.pre { + for x in &ctl.cr_opt.pre { let pp = format!("{x}/{p}"); if path_exists(&pp) { return Ok(pp); } } if !path_exists(p) { - if ctl.gen_opt.pre.is_empty() { + if ctl.cr_opt.pre.is_empty() { let path = std::env::current_dir().unwrap(); return Err(format!( "\nIn directory {}, unable to find the path {}. This came from the {} argument.\n", @@ -86,7 +86,7 @@ pub fn get_path_fail(p: &str, ctl: &EncloneControl, source: &str) -> Result Result Result String { *ok = false; - for x in &ctl.gen_opt.pre { + for x in &ctl.cr_opt.pre { let mut pp = format!("{x}/{p}"); if pp.starts_with('~') { tilde_expand_me(&mut pp); @@ -314,14 +314,14 @@ pub fn proc_xcr( } let val = expand_integer_ranges(val); let val = expand_analysis_sets(&val)?; - let donor_groups = if ctl.gen_opt.cellranger { + let donor_groups = if ctl.cr_opt.cellranger { vec![&val[..]] } else { val.split(';').collect::>() }; let mut gex2 = expand_integer_ranges(gex); gex2 = expand_analysis_sets(&gex2)?; - let donor_groups_gex = if ctl.gen_opt.cellranger { + let donor_groups_gex = if ctl.cr_opt.cellranger { vec![&gex2[..]] } else { gex2.split(';').collect::>() @@ -353,14 +353,14 @@ pub fn proc_xcr( } for (id, d) in donor_groups.iter().enumerate() { - let origin_groups = if ctl.gen_opt.cellranger { + let origin_groups = if ctl.cr_opt.cellranger { vec![&d[..]] } else { (*d).split(':').collect::>() }; let mut origin_groups_gex = Vec::<&str>::new(); if have_gex { - if ctl.gen_opt.cellranger { + if ctl.cr_opt.cellranger { origin_groups_gex = vec![donor_groups_gex[id]]; } else { origin_groups_gex = donor_groups_gex[id].split(':').collect::>(); @@ -389,7 +389,7 @@ pub fn proc_xcr( } } for (is, s) in origin_groups.iter().enumerate() { - let mut datasets = if ctl.gen_opt.cellranger { + let mut datasets = if ctl.cr_opt.cellranger { vec![&s[..]] } else { (*s).split(',').collect::>() @@ -402,7 +402,7 @@ pub fn proc_xcr( let datasets_gex: Vec<&str>; let mut datasets_bc = Vec::<&str>::new(); if have_gex { - if ctl.gen_opt.cellranger { + if ctl.cr_opt.cellranger { datasets_gex = vec![origin_groups_gex[is]]; } else { datasets_gex = origin_groups_gex[is].split(',').collect::>(); diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 714037f7d..000af447a 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -147,7 +147,6 @@ fn parse_bc_joint(ctl: &mut EncloneControl) -> Result<(), String> { pub fn proc_args_post( ctl: &mut EncloneControl, args: &[String], - metas: &[String], metaxs: &[String], xcrs: &[String], have_gex: bool, @@ -246,7 +245,7 @@ pub fn proc_args_post( &mut ctl.plot_opt.plot_file, &mut ctl.gen_opt.fasta_filename, &mut ctl.gen_opt.fasta_aa_filename, - &mut ctl.gen_opt.dref_file, + &mut ctl.cr_opt.dref_file, &mut ctl.parseable_opt.pout, ]; for f in &mut files { @@ -328,23 +327,21 @@ pub fn proc_args_post( if ctl.clono_group_opt.style == "asymmetric" && (ctl.clono_group_opt.asymmetric_center.is_empty() - || ctl.clono_group_opt.asymmetric_dist_formula.is_empty() || ctl.clono_group_opt.asymmetric_dist_bound.is_empty()) { return Err( "\nIf the AGROUP option is used to specify asymmetric grouping, then all\n\ - of the options AG_CENTER, AG_DIST_FORMULA and AG_DIST_BOUND must also be \ + of the options AG_CENTER and AG_DIST_BOUND must also be \ specified.\n" .to_string(), ); } if (!ctl.clono_group_opt.asymmetric_center.is_empty() - || !ctl.clono_group_opt.asymmetric_dist_formula.is_empty() || !ctl.clono_group_opt.asymmetric_dist_bound.is_empty()) && ctl.clono_group_opt.style == "symmetric" { return Err("\nIf any of the asymmetric grouping options AG_CENTER or \ - AG_DIST_FORMULA or\nAG_DIST_BOUND are specified, then the option AGROUP \ + AG_DIST_BOUND are specified, then the option AGROUP \ must also be specified, to turn on asymmetric grouping.\n" .to_string()); } @@ -358,11 +355,6 @@ pub fn proc_args_post( .to_string(), ); } - if ctl.clono_group_opt.asymmetric_dist_formula != "cdr3_edit_distance" { - return Err( - "\nThe only allowed form for AG_DIST_FORMULA is cdr3_edit_distance.\n".to_string(), - ); - } let ok1 = ctl .clono_group_opt .asymmetric_dist_bound @@ -473,14 +465,14 @@ pub fn proc_args_post( "\nIf the value of PHYLIP_DNA is not stdout, it must end in .tar.\n".to_string(), ); } - if ctl.clono_filt_opt_def.umi_filt && ctl.clono_filt_opt_def.umi_filt_mark { + if ctl.cr_opt.umi_filt && ctl.clono_filt_opt_def.umi_filt_mark { return Err( "\nIf you use UMI_FILT_MARK, you should also use NUMI, to turn off \ the filter,\nas otherwise nothing will be marked.\n" .to_string(), ); } - if ctl.clono_filt_opt_def.umi_ratio_filt && ctl.clono_filt_opt_def.umi_ratio_filt_mark { + if ctl.cr_opt.umi_ratio_filt && ctl.clono_filt_opt_def.umi_ratio_filt_mark { return Err( "\nIf you use UMI_RATIO_FILT_MARK, you should also use NUMI_RATIO, to turn off \ the filter,\nas otherwise nothing will be marked.\n" @@ -488,17 +480,17 @@ pub fn proc_args_post( ); } - // Process TCR, BCR and META. + // Process TCR, BCR and metas. check_cvars(ctl)?; - if !metas.is_empty() { - let mut v = Vec::::with_capacity(metas.len()); - for meta in metas { + if !ctl.cr_opt.metas.is_empty() { + let mut v = Vec::::with_capacity(ctl.cr_opt.metas.len()); + for meta in &ctl.cr_opt.metas { let f = get_path_fail(meta, ctl, "META")?; if f.contains('/') { let d = f.rev_before("/").to_string(); - if !ctl.gen_opt.pre.contains(&d) { - ctl.gen_opt.pre.push(d); + if !ctl.cr_opt.pre.contains(&d) { + ctl.cr_opt.pre.push(d); } } v.push(f); diff --git a/enclone_args/src/process_special_arg1.rs b/enclone_args/src/process_special_arg1.rs index 161e15327..04280d6b0 100644 --- a/enclone_args/src/process_special_arg1.rs +++ b/enclone_args/src/process_special_arg1.rs @@ -2,31 +2,23 @@ // Process a special argument, i.e. one that does not fit into a neat bucket. -use crate::proc_args2::test_writeable; use crate::proc_args2::{is_simple_arg, is_usize_arg}; use enclone_core::cell_color::{ CellColor, ColorByCategoricalVariableValue, ColorByDataset, ColorByVariableValue, }; use enclone_core::defs::EncloneControl; -use enclone_core::tilde_expand_me; +use enclone_core::{test_writeable, tilde_expand_me}; use enclone_vars::encode_arith; use evalexpr::build_operator_tree; use expr_tools::test_functions_in_node; use io_utils::path_exists; use itertools::Itertools; use std::fmt::Write; -use std::fs::{read_to_string, remove_file, File}; +use std::fs::{remove_file, File}; use string_utils::TextUtils; use vector_utils::{unique_sort, VecUtils}; -pub fn process_special_arg1( - arg: &str, - ctl: &mut EncloneControl, - _metas: &mut [String], - _metaxs: &mut [String], - _xcrs: &mut [String], - _using_plot: &mut bool, -) -> Result { +pub fn process_special_arg1(arg: &str, ctl: &mut EncloneControl) -> Result { // Process the argument. if is_simple_arg(arg, "SEQ")? { @@ -65,32 +57,6 @@ pub fn process_special_arg1( return Err(format!("\nArgument {arg} is not properly specified.\n")); } ctl.gen_opt.chains_to_jun_align2.push(n.force_usize()); - } else if arg.starts_with("STATE_NARRATIVE=") { - let mut narrative = arg.after("STATE_NARRATIVE=").to_string(); - if narrative.starts_with('@') { - let filename = narrative.after("@"); - if !path_exists(filename) { - return Err( - "\nThe file referenced by your STATE_NARRATIVE argument could not be found.\n" - .to_string(), - ); - } - narrative = read_to_string(filename).unwrap(); - ctl.gen_opt.state_narrative = narrative; - } - } else if arg.starts_with("SESSION_NARRATIVE=") { - let mut narrative = arg.after("SESSION_NARRATIVE=").to_string(); - if narrative.starts_with('@') { - let filename = narrative.after("@"); - if !path_exists(filename) { - return Err( - "\nThe file referenced by your SESSION_NARRATIVE argument could not be found.\n" - .to_string() - ); - } - narrative = read_to_string(filename).unwrap(); - ctl.gen_opt.session_narrative = narrative; - } } else if arg.starts_with("JOIN_BASIC=") { let val = arg.after("JOIN_BASIC="); if val.parse::().is_err() || val.force_f64() < 0.0 || val.force_f64() > 100.0 { @@ -339,34 +305,6 @@ pub fn process_special_arg1( return Err(format!("\nArgument {arg} is not properly specified.\n")); } ctl.gen_opt.chains_to_jun_align.push(n.force_usize()); - } else if arg.starts_with("FB_SHOW=") { - let fields = arg.after("FB_SHOW=").split(','); - let mut found_k = false; - let mut ok = true; - for field in fields { - if field.parse::().is_ok() { - if found_k { - return Err("\nFB_SHOW argument contains more than one integer.\n".to_string()); - } - found_k = true; - } else { - if field.len() != 15 { - ok = false; - } - for c in field.chars() { - if c != 'A' && c != 'C' && c != 'G' && c != 'T' { - ok = false; - } - } - } - } - if !ok { - return Err("\nFB_SHOW argument must be a comma-separated list \ - containing at most one nonnegative integer and zero or more DNA \ - sequences of length 15 (in the alphabet A,C,G,T).\n" - .to_string()); - } - ctl.gen_opt.fb_show = arg.after("FB_SHOW=").to_string(); } else if arg.starts_with("POUT=") { let val = arg.after("POUT="); ctl.parseable_opt.pout = val.to_string(); diff --git a/enclone_args/src/process_special_arg2.rs b/enclone_args/src/process_special_arg2.rs index 6ebf1c9f6..1ba72480d 100644 --- a/enclone_args/src/process_special_arg2.rs +++ b/enclone_args/src/process_special_arg2.rs @@ -5,7 +5,7 @@ use crate::proc_args2::{is_f64_arg, is_usize_arg}; use enclone_core::defs::{EncloneControl, GeneScanOpts}; use enclone_core::linear_condition::LinearCondition; -use enclone_core::{require_readable_file, tilde_expand_me}; +use enclone_core::require_readable_file; use evalexpr::build_operator_tree; use io_utils::open_for_read; use regex::Regex; @@ -16,7 +16,6 @@ use vector_utils::unique_sort; pub fn process_special_arg2( arg: &str, ctl: &mut EncloneControl, - metas: &mut Vec, metaxs: &mut Vec, xcrs: &mut Vec, using_plot: &mut bool, @@ -389,25 +388,6 @@ pub fn process_special_arg2( .origin_color_map .insert(xj.before("->").to_string(), xj.after("->").to_string()); } - } else if arg.starts_with("PLOT2=") { - *using_plot = true; - let x = arg.after("PLOT2=").split(',').collect::>(); - if x.is_empty() { - return Err("\nArgument to PLOT is invalid.\n".to_string()); - } - if x.len() % 2 != 1 { - return Err("\nArgument to PLOT is invalid.\n".to_string()); - } - ctl.plot_opt.plot_file = x[0].to_string(); - for j in (1..x.len()).step_by(2) { - let condition = x[j].to_string(); - let color = x[j + 1].to_string(); - if !condition.contains('=') { - return Err("\nArgument to PLOT is invalid.\n".to_string()); - } - ctl.plot_opt.plot_conditions.push(condition); - ctl.plot_opt.plot_colors.push(color); - } } else if arg.starts_with("PLOT_BY_ISOTYPE=") { ctl.plot_opt.plot_by_isotype = true; ctl.plot_opt.plot_file = arg.after("PLOT_BY_ISOTYPE=").to_string(); @@ -665,13 +645,6 @@ pub fn process_special_arg2( } else if is_usize_arg(arg, "CELLS")? { ctl.clono_filt_opt.ncells_low = arg.after("CELLS=").force_usize(); ctl.clono_filt_opt.ncells_high = ctl.clono_filt_opt.ncells_low; - } else if arg.starts_with("META=") { - let v = arg.after("META=").split(','); - for f in v { - let mut f = f.to_string(); - tilde_expand_me(&mut f); - metas.push(f); - } } else if arg.starts_with("METAX=") { let f = arg.after("METAX="); let f = f.chars().filter(|c| !c.is_whitespace()).collect(); diff --git a/enclone_args/src/read_json.rs b/enclone_args/src/read_json.rs index 3fb875359..be58717a3 100644 --- a/enclone_args/src/read_json.rs +++ b/enclone_args/src/read_json.rs @@ -201,7 +201,7 @@ fn process_json_annotation( chain_type = refdata.name[t][0..3].to_string(); if chain_type == *"IGH" || chain_type == *"TRB" - || (chain_type == *"TRD" && ctl.gen_opt.gamma_delta) + || (chain_type == *"TRD" && ctl.cr_opt.gamma_delta) { left = true; } @@ -291,7 +291,7 @@ fn process_json_annotation( cdr3_start -= tig_start as usize; if chain == VdjChain::IGH || chain == VdjChain::TRB - || (chain == VdjChain::TRD && ctl.gen_opt.gamma_delta) + || (chain == VdjChain::TRD && ctl.cr_opt.gamma_delta) { left = true; } @@ -697,7 +697,7 @@ pub fn parse_json_annotations_files( refdata: &RefData, ) -> Result { // Note: only tracking truncated seq and quals initially - let ann = if !ctl.gen_opt.cellranger { + let ann = if !ctl.cr_opt.cellranger { "all_contig_annotations.json" } else { "contig_annotations.json" diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index f3833704c..2ccc9d813 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -2,16 +2,19 @@ use crate::cell_color::CellColor; use crate::linear_condition::LinearCondition; +use crate::{require_readable_file, test_writeable, tilde_expand_me}; +use anyhow::{anyhow, ensure, Result}; use debruijn::dna_string::DnaString; use evalexpr::Node; use hdf5::Dataset; +use itertools::Itertools; use regex::Regex; use std::cmp::max; use std::collections::HashMap; use std::sync::atomic::AtomicBool; -use std::time::{Instant, SystemTime}; +use std::time::Instant; use vdj_ann::annotate::Annotation; use vector_utils::unique_sort; @@ -91,20 +94,175 @@ impl OriginInfo { } } +/// The subset of configuration options used by Cellranger. +#[derive(PartialEq)] +pub struct CellrangerOpt { + /// True if enclone is being called from Cellranger. + // FIXME: always true when called from CR... we should figure out how to + // eliminate this. + pub cellranger: bool, + pub pre: Vec, + /// Path to reference. + pub refname: String, + /// Paths to optional metadata files. + pub metas: Vec, + + /// Path to donor reference output file. + pub dref_file: String, + /// Path to protobuf output file. + pub proto: String, + /// Optional path to a json file containing metadata. + pub proto_metadata: String, + /// Optional path to write out barcode fate. + pub fate_file: String, + + /// True if running in gamma-delta mode. + pub gamma_delta: bool, + + pub ngraph_filter: bool, + + // Clonotype filtering options. + // TOOD: split these back out into a separate struct? + /// umi count filter + pub umi_filt: bool, + /// umi ratio filter + pub umi_ratio_filt: bool, + /// filter weak chains from clonotypes + pub weak_chains: bool, + /// filter weak foursies + pub weak_foursies: bool, + /// filter putative doublets + pub doublet: bool, + /// signature filtering + pub signature: bool, +} + +impl Default for CellrangerOpt { + fn default() -> Self { + Self { + cellranger: Default::default(), + pre: Default::default(), + refname: Default::default(), + metas: Default::default(), + dref_file: Default::default(), + proto: Default::default(), + proto_metadata: Default::default(), + fate_file: Default::default(), + gamma_delta: Default::default(), + ngraph_filter: Default::default(), + umi_filt: true, + umi_ratio_filt: true, + weak_chains: true, + weak_foursies: true, + doublet: true, + signature: true, + } + } +} + +impl CellrangerOpt { + /// Process command line arguments relevant to cellranger. + /// Any unused arguments are returned for further processing. + pub fn from_args(args: Vec) -> Result<(Self, Vec)> { + let mut cr_opts = Self::default(); + let mut unused_args = Vec::new(); + for arg in args { + let mut pieces = arg.split('='); + let arg_name = pieces.next().unwrap(); + let mut get_rest = || { + let result = pieces.join("="); + ensure!(!result.is_empty(), "no value provided for {arg_name}"); + Ok(result) + }; + match arg_name { + "CELLRANGER" => { + cr_opts.cellranger = true; + } + "PRE" => { + cr_opts.pre = get_rest()?.split(',').map(str::to_string).collect(); + } + "REF" => { + cr_opts.refname = ensure_readable_file(get_rest()?, arg_name)?; + } + "META" => { + for meta in get_rest()?.split(',') { + let mut f = meta.to_string(); + tilde_expand_me(&mut f); + cr_opts.metas.push(f); + } + } + "DONOR_REF_FILE" => { + cr_opts.dref_file = ensure_writable_file(get_rest()?)?; + } + "PROTO" => { + cr_opts.proto = ensure_writable_file(get_rest()?)?; + } + "PROTO_METADATA" => { + cr_opts.proto_metadata = ensure_readable_file(get_rest()?, arg_name)?; + } + "FATE_FILE" => { + cr_opts.fate_file = ensure_writable_file(get_rest()?)?; + } + "GAMMA_DELTA" => { + cr_opts.gamma_delta = true; + } + "NGRAPH_FILTER" => { + cr_opts.ngraph_filter = true; + } + "NUMI" => { + cr_opts.umi_filt = false; + } + "NUMI_RATIO" => { + cr_opts.umi_ratio_filt = false; + } + "NWEAK_CHAINS" => { + cr_opts.weak_chains = false; + } + "NFOURSIE_KILL" => { + cr_opts.weak_foursies = false; + } + "NDOUBLET" => { + cr_opts.doublet = false; + } + "NSIG" => { + cr_opts.signature = false; + } + _ => { + unused_args.push(arg.clone()); + } + } + } + Ok((cr_opts, unused_args)) + } +} + +/// Ensure that a path points to a readable file. +/// Expand ~ into home directories. +fn ensure_readable_file(mut path: String, arg: &str) -> Result { + tilde_expand_me(&mut path); + require_readable_file(&path, arg).map_err(|e| anyhow!(e))?; + Ok(path) +} + +/// Ensure that we can write to a path by writing an empty file there. +/// Expand ~ into home directories. +fn ensure_writable_file(mut path: String) -> Result { + tilde_expand_me(&mut path); + test_writeable(&path, false).map_err(|e| anyhow!(e))?; + Ok(path) +} + // Miscellaneous general options. #[derive(Default, PartialEq)] pub struct GeneralOpt { - pub pre: Vec, pub indels: bool, pub reannotate: bool, pub heavy_chain_reuse: bool, - pub ngraph_filter: bool, pub graph: bool, pub utr_con: bool, pub con_con: bool, pub nwhitef: bool, - pub weak: bool, pub tcr: bool, pub bcr: bool, pub tcrgd: bool, @@ -122,19 +280,10 @@ pub struct GeneralOpt { pub complete: bool, pub exact: Option, pub binary: String, - pub proto: String, - pub fate_file: String, - // Optional path to a json file containing metadata - pub proto_metadata: Option, pub h5_pre: bool, pub accept_reuse: bool, pub descrip: bool, - pub ext: String, - pub extc: HashMap<(String, String), String>, - pub extn: HashMap, - pub dref_file: String, pub mouse: bool, - pub refname: String, pub noprint: bool, pub noprintx: bool, pub required_fps: Option, @@ -146,11 +295,9 @@ pub struct GeneralOpt { pub required_three_chain_clonotypes: Option, pub required_four_chain_clonotypes: Option, pub required_datasets: Option, - pub cellranger: bool, pub summary: bool, pub summary_clean: bool, pub summary_csv: bool, - pub cr_version: String, pub nwarn: bool, pub gene_scan: Option, pub gene_scan_exact: bool, @@ -194,7 +341,6 @@ pub struct GeneralOpt { pub subset_json: String, pub fold_headers: bool, pub no_uncap_sim: bool, - pub nopager: bool, pub info: Option, pub info_fields: Vec, pub info_data: HashMap>, @@ -220,11 +366,9 @@ pub struct GeneralOpt { pub toy: bool, // toy with phylogeny pub group_post_filter: Option>, pub no_newline: bool, - pub fb_show: String, pub var_def: Vec<(String, String, Node, String)>, // {(variable, value, compiled value, expr)} pub nospaces: bool, pub subsample: f64, - pub gamma_delta: bool, pub pre_eval: bool, pub pre_eval_show: bool, pub external_ref: String, @@ -234,9 +378,6 @@ pub struct GeneralOpt { pub mix_only: bool, pub no_alt_alleles: bool, pub vis_dump: bool, - pub session_name: String, - pub state_narrative: String, - pub session_narrative: String, } impl GeneralOpt { @@ -283,8 +424,6 @@ pub struct PlotOpt { pub plot_xy_x_log10: bool, pub plot_xy_y_log10: bool, pub plot_xy_sym: bool, - pub plot_conditions: Vec, - pub plot_colors: Vec, pub plot_file: String, pub plot_by_isotype: bool, pub plot_by_isotype_nolegend: bool, @@ -323,8 +462,6 @@ pub struct AllelePrintOpt { #[derive(Clone, Default)] pub struct AlleleData { pub alt_refs: Vec<(usize, usize, DnaString, usize, bool)>, - pub var_pos: Vec>, - pub var_bases: Vec>>, } // Join printing options. @@ -358,7 +495,6 @@ pub struct JoinAlgOpt { pub basicx: bool, pub join_full_diff: bool, pub join_cdr3_ident: f64, - pub join_cdr12h_ident: f64, pub fwr1_cdr12_delta: f64, pub cdr3_normal_len: usize, pub auto_share: usize, @@ -377,21 +513,15 @@ pub struct JoinAlgOpt { pub struct ClonoFiltOptDefault { pub marked_b: bool, // only print clonotypes having a mark and which are typed as B cells pub donor: bool, // allow cells from different donors to be placed in the same clonotype - pub weak_foursies: bool, // filter weak foursies pub ngex: bool, // turn off gex filtering, pub non_cell_mark: bool, pub weak_onesies: bool, // filter weak onesies - pub doublet: bool, // filter putative doublets pub fcell: Vec, // constraints from FCELL - pub umi_filt: bool, // umi count filter pub umi_filt_mark: bool, // umi count filter (but only mark) - pub umi_ratio_filt: bool, // umi ratio filter pub umi_ratio_filt_mark: bool, // umi ratio filter (but only mark) - pub weak_chains: bool, // filter weak chains from clonotypes pub whitef: bool, // only show clonotypes exhibiting whitelist contamination pub ncross: bool, // turn off cross filtering, pub bc_dup: bool, // filter duplicated barcodes within an exact subclonotype - pub signature: bool, // signature filtering pub nmax: bool, // turn off max contigs filter } @@ -410,7 +540,6 @@ pub struct ClonoFiltOpt { pub cdr3: Option, // only show clonotypes whose CDR3_AA matches regular expression pub cdr3_lev: String, // only show clonotypes whose CDR3_AA matches Levenshtein dist pattern pub protect_bads: bool, // protect bads from deletion - pub fail_only: bool, // only print fails pub seg: Vec>, // only show clonotypes using one of these VDJ segment names pub segn: Vec>, // only show clonotypes using one of these VDJ segment numbers pub nseg: Vec>, // do not show clonotypes using one of these VDJ segment names @@ -450,11 +579,14 @@ pub struct ClonoPrintOpt { pub cvars: Vec, // per-chain per-exact-clonotype columns pub lvars: Vec, // per-exact-clonotype ('lead') columns pub regex_match: Vec>>, // matching features for _g etc. - pub chain_brief: bool, // show abbreviated chain headers pub sum: bool, // print sum row pub mean: bool, // print mean row pub conx: bool, pub conp: bool, + /// don't gray in per cell lines + pub nogray: bool, + /// turn on debugging for table printing + pub debug_table_printing: bool, } // Clonotype grouping options. @@ -493,10 +625,7 @@ pub struct ClonoGroupOpt { // ASYMMETRIC GROUPING CONTROLS pub asymmetric_center: String, // definition of center for asymmetric grouping - pub asymmetric_dist_formula: String, // definition of distance formula for asymmetric grouping pub asymmetric_dist_bound: String, // definition of distance bound for asymmetric grouping - // DEPRECATED - pub vj_refname_strong: bool, // group by having the same VJ reference names, but stronger } // Parseable output options. @@ -519,28 +648,44 @@ pub struct ParseableOpt { #[derive(Default)] pub struct EncloneControl { - pub start_time: Option, // enclone start time - pub gen_opt: GeneralOpt, // miscellaneous general options - pub plot_opt: PlotOpt, // plot options - pub pretty: bool, // use escape characters to enhance view - pub nogray: bool, // don't gray in per cell lines - pub silent: bool, // turn off extra logging - pub force: bool, // make joins even if redundant - pub debug_table_printing: bool, // turn on debugging for table printing - pub merge_all_impropers: bool, // merge all improper exact subclonotypes - pub heur: ClonotypeHeuristics, // algorithmic heuristics - pub origin_info: OriginInfo, // origin (sample) info - pub allele_alg_opt: AlleleAlgOpt, // algorithmic options for allele finding - pub allele_print_opt: AllelePrintOpt, // print options for allele finding - pub join_alg_opt: JoinAlgOpt, // algorithmic options for join - pub join_print_opt: JoinPrintOpt, // printing options for join operations - pub clono_filt_opt_def: ClonoFiltOptDefault, // default filtering options for clonotypes - pub clono_filt_opt: ClonoFiltOpt, // filtering options for clonotypes - pub clono_print_opt: ClonoPrintOpt, // printing options for clonotypes - pub clono_group_opt: ClonoGroupOpt, // grouping options for clonotypes - pub parseable_opt: ParseableOpt, // parseable output options - pub pathlist: Vec, // list of input files - pub last_modified: Vec, // last modified for pathlist + /// enclone start time + pub start_time: Option, + /// miscellaneous general options + pub gen_opt: GeneralOpt, + /// Config options used by cellranger. + pub cr_opt: CellrangerOpt, + /// plot options + pub plot_opt: PlotOpt, + /// use escape characters to enhance view + pub pretty: bool, + /// turn off extra logging + pub silent: bool, + /// make joins even if redundant + pub force: bool, + /// merge all improper exact subclonotypes + pub merge_all_impropers: bool, + /// algorithmic heuristics + pub heur: ClonotypeHeuristics, + /// origin (sample) info + pub origin_info: OriginInfo, + /// algorithmic options for allele finding + pub allele_alg_opt: AlleleAlgOpt, + /// print options for allele finding + pub allele_print_opt: AllelePrintOpt, + /// algorithmic options for join + pub join_alg_opt: JoinAlgOpt, + /// printing options for join operations + pub join_print_opt: JoinPrintOpt, + /// default filtering options for clonotypes + pub clono_filt_opt_def: ClonoFiltOptDefault, + /// filtering options for clonotypes + pub clono_filt_opt: ClonoFiltOpt, + /// printing options for clonotypes + pub clono_print_opt: ClonoPrintOpt, + /// grouping options for clonotypes + pub clono_group_opt: ClonoGroupOpt, + /// parseable output options + pub parseable_opt: ParseableOpt, } // Set up data structure to track clonotype data. A TigData is for one contig; @@ -763,7 +908,6 @@ pub struct CloneInfo { pub struct GexInfo { pub gex_features: Vec>, pub gex_barcodes: Vec>, - pub feature_refs: Vec, pub gex_cell_barcodes: Vec>, pub cluster: Vec>, pub cell_type: Vec>, @@ -780,7 +924,6 @@ pub struct GexInfo { pub have_fb: bool, pub feature_metrics: Vec>, pub json_metrics: Vec>, - pub metrics: Vec, } // Every entry in a ColInfo is a vector whose number of entries is the number of chains diff --git a/enclone_core/src/lib.rs b/enclone_core/src/lib.rs index 04ed6399b..48e519c2d 100644 --- a/enclone_core/src/lib.rs +++ b/enclone_core/src/lib.rs @@ -20,9 +20,12 @@ pub mod stringulate; pub mod test_def; pub mod var_reg; +use io_utils::path_exists; use std::cmp::max; use std::fmt::Write; +use std::fs::{remove_file, File}; use std::io::BufRead; +use string_utils::TextUtils; #[cfg(not(target_os = "windows"))] use string_utils::stringme; @@ -133,10 +136,9 @@ pub fn parse_bsv(x: &str) -> Vec<&str> { args } -// Test to see if a line can be read from the given file f. If not, return an error message -// the references arg, which is supposed to be the name of a command line argument from which -// f originated. - +/// Test to see if a line can be read from the given file f. If not, return an error message +/// the references arg, which is supposed to be the name of a command line argument from which +/// f originated. pub fn require_readable_file(f: &str, arg: &str) -> Result<(), String> { let x = std::fs::File::open(f); if x.is_err() { @@ -163,3 +165,33 @@ pub fn require_readable_file(f: &str, arg: &str) -> Result<(), String> { } Ok(()) } + +/// Test a file for writeability by writing and then deleting it. +pub fn test_writeable(val: &str, evil_eye: bool) -> Result<(), String> { + if evil_eye { + println!("creating file {val} to test writability"); + } + let f = File::create(val); + if f.is_err() { + let mut msgx = + format!("\nYou've specified an output file\n{val}\nthat cannot be written.\n"); + if val.contains('/') { + let dir = val.rev_before("/"); + let msg = if path_exists(dir) { + "exists" + } else { + "does not exist" + }; + writeln!(msgx, "Note that the path {dir} {msg}.").unwrap(); + } + return Err(msgx); + } + if evil_eye { + println!("removing file {val}"); + } + remove_file(val).unwrap_or_else(|_| panic!("could not remove file {val}")); + if evil_eye { + println!("removal of file {val} complete"); + } + Ok(()) +} diff --git a/enclone_process/src/filter.rs b/enclone_process/src/filter.rs index 72a2b4bce..155a0b3de 100644 --- a/enclone_process/src/filter.rs +++ b/enclone_process/src/filter.rs @@ -511,9 +511,6 @@ pub fn survives_filter( }) .collect::>(); unique_sort(&mut donors); - if ctl.clono_filt_opt.fail_only && donors.len() <= 1 { - return false; - } // Inconsistent D genes. diff --git a/enclone_process/src/loupe.rs b/enclone_process/src/loupe.rs index 896e3dc7a..fdbe1efef 100644 --- a/enclone_process/src/loupe.rs +++ b/enclone_process/src/loupe.rs @@ -430,7 +430,7 @@ pub fn loupe_out( refdata: &RefData, dref: &[DonorReferenceItem], ) { - if !ctl.gen_opt.binary.is_empty() || !ctl.gen_opt.proto.is_empty() { + if !ctl.gen_opt.binary.is_empty() || !ctl.cr_opt.proto.is_empty() { let mut uref = Vec::new(); for i in 0..refdata.refs.len() { uref.push(UniversalReferenceItem { @@ -447,13 +447,15 @@ pub fn loupe_out( nt_sequence: refdata.refs[i].to_ascii_vec(), }); } - let metadata = match &ctl.gen_opt.proto_metadata { - Some(fname) => serde_json::from_reader( + let metadata = if !ctl.cr_opt.proto_metadata.is_empty() { + let fname = &ctl.cr_opt.proto_metadata; + serde_json::from_reader( std::fs::File::open(fname) .unwrap_or_else(|_| panic!("Error while reading {fname}")), ) - .unwrap_or_else(|_| panic!("Unable to deserialize Metadata from {fname}")), - None => Metadata::default(), + .unwrap_or_else(|_| panic!("Unable to deserialize Metadata from {fname}")) + } else { + Metadata::default() }; let enclone_outputs = EncloneOutputs { version: PROTO_VERSION.into(), @@ -468,8 +470,8 @@ pub fn loupe_out( if !ctl.gen_opt.binary.is_empty() { write_obj(&enclone_outputs, &ctl.gen_opt.binary); } - if !ctl.gen_opt.proto.is_empty() { - write_proto(enclone_outputs, &ctl.gen_opt.proto).unwrap(); + if !ctl.cr_opt.proto.is_empty() { + write_proto(enclone_outputs, &ctl.cr_opt.proto).unwrap(); } } } diff --git a/enclone_process/src/process_clonotypes.rs b/enclone_process/src/process_clonotypes.rs index c1f6badb2..5f3427ce9 100644 --- a/enclone_process/src/process_clonotypes.rs +++ b/enclone_process/src/process_clonotypes.rs @@ -185,7 +185,7 @@ pub fn process_clonotypes( // Generate Loupe data. - let loupe_clonotype = (!ctl.gen_opt.binary.is_empty() || !ctl.gen_opt.proto.is_empty()) + let loupe_clonotype = (!ctl.gen_opt.binary.is_empty() || !ctl.cr_opt.proto.is_empty()) .then(|| make_loupe_clonotype(exact_clonotypes, &exacts, &rsi, refdata, dref, ctl)); // Let n be the total number of cells in this pass. @@ -227,9 +227,9 @@ pub fn process_clonotypes( results.sort_by_key(|(num_cells, _, _)| Reverse(*num_cells)); // Write out the fate of each filtered barcode. - if !ctl.gen_opt.fate_file.is_empty() { + if !ctl.cr_opt.fate_file.is_empty() { let mut wtr = BufWriter::new( - File::create(&ctl.gen_opt.fate_file).expect("Unable to open FATE_FILE for writing"), + File::create(&ctl.cr_opt.fate_file).expect("Unable to open FATE_FILE for writing"), ); serde_json::to_writer_pretty(&mut wtr, &fate).map_err(|e| e.to_string())?; } diff --git a/enclone_ranger/Cargo.toml b/enclone_ranger/Cargo.toml index 569060864..c3be101bf 100644 --- a/enclone_ranger/Cargo.toml +++ b/enclone_ranger/Cargo.toml @@ -20,6 +20,7 @@ publish = false # in the root of the enclone repo. [dependencies] +anyhow.workspace = true enclone_core = { path = "../enclone_core" } enclone_process = { path = "../enclone_process" } enclone_stuff = { path = "../enclone_stuff" } diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 0a708905f..a562add03 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -5,10 +5,11 @@ use self::refx::{make_vdj_ref_data_core, RefData}; use crate::USING_PAGER; +use anyhow::anyhow; use enclone::innate::species; use enclone_args::load_gex::get_gex_info; use enclone_args::proc_args::proc_args; -use enclone_core::defs::EncloneControl; +use enclone_core::defs::{CellrangerOpt, EncloneControl}; use enclone_core::enclone_structs::EncloneSetup; use enclone_process::process_clonotypes::{process_clonotypes, OrbitProcessor}; use enclone_stuff::start::main_enclone_start; @@ -21,33 +22,31 @@ use std::{ use string_utils::TextUtils; use vdj_ann::refx; -pub fn main_enclone_ranger(args: &[String]) -> Result<(), String> { - const REQUIRED_ARGS: [&str; 8] = [ - "CELLRANGER", - "DONOR_REF_FILE", - "MAX_CORES", - "NOPAGER", - "NOPRINT", - "PRE", - "PROTO", - "REF", +pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { + const REQUIRED_ARGS: [&str; 7] = [ + "CELLRANGER", // done + "DONOR_REF_FILE", // done + "MAX_CORES", // FIXME: move this behavior into enclone and set thread count in CR when calling + "NOPRINT", // now unused in enclone_ranger + "PRE", // done + "PROTO", // done + "REF", // done ]; - const ALLOWED_ARGS: [&str; 17] = [ + const ALLOWED_ARGS: [&str; 16] = [ "BCR", - "META", - "NOPRETTY", - "PROTO_METADATA", + "META", // done + "PROTO_METADATA", // done "TCR", "TCRGD", - "GAMMA_DELTA", - "FATE_FILE", - "NUMI", - "NUMI_RATIO", - "NGRAPH_FILTER", - "NWEAK_CHAINS", - "NFOURSIE_KILL", - "NDOUBLET", - "NSIG", + "GAMMA_DELTA", // done + "FATE_FILE", // done + "NUMI", // done + "NUMI_RATIO", // done + "NGRAPH_FILTER", // done + "NWEAK_CHAINS", // done + "NFOURSIE_KILL", // done + "NDOUBLET", // done + "NSIG", // done "SPLIT_MAX_CHAINS", "NCROSS", ]; @@ -75,43 +74,39 @@ pub fn main_enclone_ranger(args: &[String]) -> Result<(), String> { } } let setup = main_enclone_setup_ranger(args)?; - let (exacts, fate) = main_enclone_start(&setup)?; + let (exacts, fate) = main_enclone_start(&setup).map_err(|e| anyhow!(e))?; let gex_readers = setup.create_gex_readers(); process_clonotypes::<(), ()>(&setup, &exacts, &gex_readers, &fate, NoOpProc) + .map_err(|e| anyhow!(e))?; + Ok(()) } -pub fn main_enclone_setup_ranger(args: &[String]) -> Result { +pub fn main_enclone_setup_ranger(args: Vec) -> anyhow::Result { let tall = Instant::now(); - // Set up stuff, read args, etc. + let (cr_opt, args) = CellrangerOpt::from_args(args)?; + + let mut ctl = EncloneControl { + cr_opt, + ..Default::default() + }; - let mut ctl = EncloneControl::default(); - ctl.gen_opt.cellranger = true; - for arg in args.iter().skip(1) { - if arg.starts_with("PRE=") { - ctl.gen_opt.pre.clear(); - ctl.gen_opt - .pre - .extend(arg.after("PRE=").split(',').map(str::to_string)); - } - } ctl.start_time = Some(tall); ctl.gen_opt.cpu_all_start = 0; ctl.gen_opt.cpu_this_start = 0; - ctl.gen_opt.nopager = true; ctl.pretty = true; USING_PAGER.store(false, SeqCst); - proc_args(&mut ctl, args)?; + proc_args(&mut ctl, &args).map_err(|e| anyhow!(e))?; // Get gene expression and feature barcode counts. - let gex_info = get_gex_info(&mut ctl)?; + let gex_info = get_gex_info(&mut ctl).map_err(|e| anyhow!(e))?; // Determine the reference sequence that is to be used. let mut refx = String::new(); let ann = "contig_annotations.json"; - let fx = File::open(&ctl.gen_opt.refname); + let fx = File::open(&ctl.cr_opt.refname); let f = BufReader::new(fx.unwrap()); for line in f.lines() { let s = line.unwrap(); diff --git a/enclone_stuff/src/doublets.rs b/enclone_stuff/src/doublets.rs index f11c4c10d..3e3e4b098 100644 --- a/enclone_stuff/src/doublets.rs +++ b/enclone_stuff/src/doublets.rs @@ -31,7 +31,7 @@ pub fn delete_doublets( dref: &[DonorReferenceItem], fate: &mut [BarcodeFates], ) { - if ctl.clono_filt_opt_def.doublet { + if ctl.cr_opt.doublet { // Define pure subclonotypes. To do this we break each clonotype up by chain signature. // Note duplication of code with print_clonotypes.rs. And this is doing some // superfluous compute. diff --git a/enclone_stuff/src/filter_umi.rs b/enclone_stuff/src/filter_umi.rs index ab2e5ac0e..1c4ac43d6 100644 --- a/enclone_stuff/src/filter_umi.rs +++ b/enclone_stuff/src/filter_umi.rs @@ -192,7 +192,7 @@ pub fn filter_umi( .insert(ex.clones[i][0].barcode.clone(), BarcodeFate::Umi); } } - if ctl.clono_filt_opt_def.umi_filt { + if ctl.cr_opt.umi_filt { erase_if(&mut ex.clones, &to_delete); } } @@ -299,7 +299,7 @@ pub fn filter_umi( .insert(ex.clones[i][0].barcode.clone(), BarcodeFate::UmiRatio); } } - if ctl.clono_filt_opt_def.umi_ratio_filt { + if ctl.cr_opt.umi_ratio_filt { erase_if(&mut ex.clones, &to_delete[j]); if ex.ncells() == 0 { to_deletex[j] = true; @@ -308,7 +308,7 @@ pub fn filter_umi( } } if pass == 2 { - if ctl.clono_filt_opt_def.umi_ratio_filt { + if ctl.cr_opt.umi_ratio_filt { erase_if(&mut o, &to_deletex); } if !o.is_empty() { diff --git a/enclone_stuff/src/populate_features.rs b/enclone_stuff/src/populate_features.rs index 4a2176fda..a3efeb95d 100644 --- a/enclone_stuff/src/populate_features.rs +++ b/enclone_stuff/src/populate_features.rs @@ -139,7 +139,7 @@ pub fn populate_features( // the user supplied the wrong reference, so there is no value in criticizing the reference // in that case. - if !log.is_empty() && !ctl.gen_opt.cellranger && !ctl.gen_opt.accept_broken { + if !log.is_empty() && !ctl.cr_opt.cellranger && !ctl.gen_opt.accept_broken { let mut log = Vec::::new(); fwriteln!( log, diff --git a/enclone_stuff/src/some_filters.rs b/enclone_stuff/src/some_filters.rs index bcd0d48d7..b16752e09 100644 --- a/enclone_stuff/src/some_filters.rs +++ b/enclone_stuff/src/some_filters.rs @@ -147,7 +147,7 @@ pub fn some_filters( t.push(col); } } - if dels.contains(&t) && ctl.clono_filt_opt_def.signature { + if dels.contains(&t) && ctl.cr_opt.signature { res.2.push(exacts[u]); let ex = &exact_clonotypes[exacts[u]]; for i in 0..ex.ncells() { diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs index ee715a803..1823d42f3 100644 --- a/enclone_stuff/src/start.rs +++ b/enclone_stuff/src/start.rs @@ -249,7 +249,7 @@ pub fn main_enclone_start( } } } - if ctl.clono_filt_opt_def.weak_foursies { + if ctl.cr_opt.weak_foursies { erase_if(&mut exact_clonotypes, &to_delete); } @@ -286,12 +286,12 @@ pub fn main_enclone_start( alt_refs = find_alleles(refdata, ctl, &exact_clonotypes); } - if !ctl.gen_opt.dref_file.is_empty() { - let f = File::create(&ctl.gen_opt.dref_file); + if !ctl.cr_opt.dref_file.is_empty() { + let f = File::create(&ctl.cr_opt.dref_file); if f.is_err() { eprintln!( - "\nError trying to write ctl.gen_opt.dref_file = {}.", - ctl.gen_opt.dref_file + "\nError trying to write ctl.cr_opt.dref_file = {}.", + ctl.cr_opt.dref_file ); } let mut f = BufWriter::new(f.unwrap()); @@ -475,7 +475,7 @@ pub fn main_enclone_start( for (clone, d) in ex.clones.iter().take(ex.ncells()).zip(to_delete.iter_mut()) { let li = clone[0].dataset_index; let bc = &clone[0].barcode; - if ctl.gen_opt.cellranger { + if ctl.cr_opt.cellranger { if gex_cells_specified[li] && !bin_member(&gex_cells[li], bc) { *d = true; fate[li].insert(bc.clone(), BarcodeFate::NotGexCell); @@ -880,11 +880,7 @@ pub fn main_enclone_start( join_info, drefs, sr, - allele_data: AlleleData { - alt_refs, - var_pos: Vec::new(), - var_bases: Vec::new(), - }, + allele_data: AlleleData { alt_refs }, }, fate, )) diff --git a/enclone_stuff/src/weak_chains.rs b/enclone_stuff/src/weak_chains.rs index 98ca876f6..5cee4cf90 100644 --- a/enclone_stuff/src/weak_chains.rs +++ b/enclone_stuff/src/weak_chains.rs @@ -75,7 +75,7 @@ pub fn weak_chains( for j in 0..cols { if ncells[j] <= 20 && 8 * ncells[j] < total_cells { for d in &col_entries[j] { - if ctl.clono_filt_opt_def.weak_chains { + if ctl.cr_opt.weak_chains { res.2.push(exacts[*d]); } let ex = &exact_clonotypes[exacts[*d]];