From 3a2f1e6c30f8a0a161258e73583cb505c4af30c2 Mon Sep 17 00:00:00 2001
From: Erik Borgstrom <erik.borgstrom@10xgenomics.com>
Date: Fri, 15 Mar 2024 10:33:13 +0100
Subject: [PATCH] refactor: clarify CDR3 annotation algorithms (#399)

---
 amino/src/lib.rs                        |  10 +-
 enclone/src/info.rs                     |   8 +-
 enclone/src/misc2.rs                    |   4 +-
 enclone_args/src/read_json.rs           |  20 +--
 enclone_core/src/mammalian_fixed_len.rs |   4 +-
 enclone_print/src/print_utils2.rs       |  16 +-
 enclone_print/src/proc_cvar_auto.rs     |  28 +--
 enclone_print/src/proc_lvar_auto.rs     |  62 +++----
 enclone_stuff/src/flag_defective.rs     |  14 +-
 enclone_stuff/src/populate_features.rs  |   4 +-
 vdj_ann/src/annotate.rs                 | 228 ++++++++++++++----------
 vdj_ann/src/transcript.rs               |   9 +-
 12 files changed, 231 insertions(+), 176 deletions(-)
diff --git a/amino/src/lib.rs b/amino/src/lib.rs
index d9e456659..7e029eae4 100644
--- a/amino/src/lib.rs
+++ b/amino/src/lib.rs
@@ -132,14 +132,14 @@ pub fn codon_to_aa(codon: &[u8]) -> u8 {
 
 // Convert a given DNA sequence to amino acids, starting at a given position.
 
-pub fn aa_seq(x: &[u8], start: usize) -> Vec<u8> {
+pub fn nucleotide_to_aminoacid_sequence(dna_seq: &[u8], start: usize) -> Vec<u8> {
     let mut a = Vec::<u8>::new();
-    if x.len() >= 3 {
-        for j in (start..x.len() - 3 + 1).step_by(3) {
-            if x[j] == b'-' && x[j + 1] == b'-' && x[j + 2] == b'-' {
+    if dna_seq.len() >= 3 {
+        for pos in (start..dna_seq.len() - 3 + 1).step_by(3) {
+            if dna_seq[pos] == b'-' && dna_seq[pos + 1] == b'-' && dna_seq[pos + 2] == b'-' {
                 a.push(b'-');
             } else {
-                a.push(codon_to_aa(&x[j..j + 3]));
+                a.push(codon_to_aa(&dna_seq[pos..pos + 3]));
             }
         }
     }
diff --git a/enclone/src/info.rs b/enclone/src/info.rs
index b52c881ce..e08f34c41 100644
--- a/enclone/src/info.rs
+++ b/enclone/src/info.rs
@@ -6,7 +6,7 @@ use enclone_core::barcode_fate::BarcodeFate;
 use vdj_ann::refx;
 
 use self::refx::RefData;
-use amino::{aa_seq, codon_to_aa};
+use amino::{codon_to_aa, nucleotide_to_aminoacid_sequence};
 use ansi_escape::emit_end_escape;
 use debruijn::{dna_string::DnaString, Mer};
 use enclone_core::defs::{CloneInfo, EncloneControl, ExactClonotype};
@@ -130,8 +130,8 @@ pub fn build_info(
 
                 // Optimize to compute entry in aa_mod_indel and the inserted aa sequence.
 
-                let aa_full = aa_seq(&x.seq, 0);
-                let ref_aa = aa_seq(&refdata.refs[vid].to_ascii_vec(), 0);
+                let aa_full = nucleotide_to_aminoacid_sequence(&x.seq, 0);
+                let ref_aa = nucleotide_to_aminoacid_sequence(&refdata.refs[vid].to_ascii_vec(), 0);
                 let ins_len_aa = ins_len / 3;
                 const EXT: usize = 10;
                 let ins_pos_low = if ins_pos / 3 < EXT {
@@ -180,7 +180,7 @@ pub fn build_info(
                 lens.push(x.seq.len());
                 tigs.push(x.seq.clone());
                 tigs_amino.push(x.seq.clone());
-                aa_mod_indel.push(aa_seq(&x.seq, 0));
+                aa_mod_indel.push(nucleotide_to_aminoacid_sequence(&x.seq, 0));
                 tigs_ins.push(Vec::new());
             }
 
diff --git a/enclone/src/misc2.rs b/enclone/src/misc2.rs
index 7d6d3d010..911ace84e 100644
--- a/enclone/src/misc2.rs
+++ b/enclone/src/misc2.rs
@@ -4,7 +4,7 @@
 
 use crate::innate::mark_innate;
 use crate::misc3::study_consensus;
-use amino::aa_seq;
+use amino::nucleotide_to_aminoacid_sequence;
 use debruijn::dna_string::DnaString;
 use enclone_core::barcode_fate::BarcodeFate;
 use enclone_core::defs::{EncloneControl, ExactClonotype, Junction, TigData, TigData0, TigData1};
@@ -180,7 +180,7 @@ pub fn create_exact_subclonotype_core(
         // are the same, which in principle they should be, but this is not actually always true.
         // However this is hard to fix.
 
-        let aa = aa_seq(tig_bc[r][m].seq(), 0);
+        let aa = nucleotide_to_aminoacid_sequence(tig_bc[r][m].seq(), 0);
         let mut d_start = None;
         if tig_bc[r][m].d_start.is_some() {
             d_start = Some(tig_bc[r][m].d_start.unwrap() + utr.len() - tig_bc[r][m].v_start);
diff --git a/enclone_args/src/read_json.rs b/enclone_args/src/read_json.rs
index a2e9880a3..d66fceb31 100644
--- a/enclone_args/src/read_json.rs
+++ b/enclone_args/src/read_json.rs
@@ -191,10 +191,10 @@ fn process_json_annotation(
         } else if !is_productive_contig(&x, refdata, &ann1).0 {
             return Ok(res);
         }
-        let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new();
-        get_cdr3_using_ann(&x, refdata, &ann1, &mut cdr3);
-        cdr3_aa = stringme(&cdr3[0].1);
-        cdr3_start = cdr3[0].0;
+        let found_cdr3s = get_cdr3_using_ann(&x, refdata, &ann1);
+        let cdr3 = found_cdr3s.first().unwrap();
+        cdr3_aa = stringme(&cdr3.aa_seq);
+        cdr3_start = cdr3.start_position_on_contig;
         cdr3_dna = x
             .slice(cdr3_start, cdr3_start + 3 * cdr3_aa.len())
             .to_string();
@@ -403,23 +403,23 @@ fn process_json_annotation(
         // than is used in the current version of enclone.  This could result in internal
         // inconsistencies, leading to an assert somewhere downstream.
 
-        let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new();
         let x = DnaString::from_dna_string(&ann.sequence);
-        get_cdr3_using_ann(&x, refdata, &annv, &mut cdr3);
-        if cdr3.is_empty() {
+        let found_cdr3s = get_cdr3_using_ann(&x, refdata, &annv);
+        if found_cdr3s.is_empty() {
             return Ok(res);
         }
-        let cdr3_aa_alt = stringme(&cdr3[0].1);
+        let cdr3 = found_cdr3s.first().unwrap();
+        let cdr3_aa_alt = stringme(&cdr3.aa_seq);
         if cdr3_aa != cdr3_aa_alt {
             // This is particularly pathological and rare:
 
-            if tig_start as usize > cdr3[0].0 {
+            if tig_start as usize > cdr3.start_position_on_contig {
                 return Ok(res);
             }
 
             // Define start.
 
-            cdr3_start = cdr3[0].0 - tig_start as usize;
+            cdr3_start = cdr3.start_position_on_contig - tig_start as usize;
 
             // Define cdr3.
 
diff --git a/enclone_core/src/mammalian_fixed_len.rs b/enclone_core/src/mammalian_fixed_len.rs
index 15ba7037d..6ab76175c 100644
--- a/enclone_core/src/mammalian_fixed_len.rs
+++ b/enclone_core/src/mammalian_fixed_len.rs
@@ -1,6 +1,6 @@
 // Copyright (c) 2021 10X Genomics, Inc. All rights reserved.
 
-use amino::aa_seq;
+use amino::nucleotide_to_aminoacid_sequence;
 use std::collections::HashMap;
 use string_utils::TextUtils;
 use superslice::Ext;
@@ -54,7 +54,7 @@ pub fn mammalian_fixed_len_peer_groups(refdata: &RefData) -> Vec<Vec<(usize, u8,
     let mut pg = vec![Vec::<(usize, u8, u32)>::new(); refdata.refs.len()];
     for (i, pgi) in pg.iter_mut().enumerate() {
         if refdata.is_v(i) {
-            let aa = aa_seq(&refdata.refs[i].to_ascii_vec(), 0);
+            let aa = nucleotide_to_aminoacid_sequence(&refdata.refs[i].to_ascii_vec(), 0);
             let rtype = refdata.rtype[i];
             let chain_type = if rtype == 0 {
                 "IGH"
diff --git a/enclone_print/src/print_utils2.rs b/enclone_print/src/print_utils2.rs
index 1541c3fdc..ca03c1152 100644
--- a/enclone_print/src/print_utils2.rs
+++ b/enclone_print/src/print_utils2.rs
@@ -7,7 +7,7 @@ use crate::print_utils1::color_codon;
 use crate::proc_cvar_auto::proc_cvar_auto;
 use crate::proc_lvar2::proc_lvar2;
 use crate::proc_lvar_auto::proc_lvar_auto;
-use amino::{aa_seq, codon_to_aa};
+use amino::{codon_to_aa, nucleotide_to_aminoacid_sequence};
 use enclone_core::allowed_vars::LVARS_ALLOWED;
 use enclone_core::barcode_fate::BarcodeFate;
 use enclone_core::defs::{AlleleData, ColInfo, EncloneControl, ExactClonotype, GexInfo, POUT_SEP};
@@ -564,7 +564,12 @@ pub fn row_fill(
         // Speak some other column entries.
 
         let xm = &ex.share[mid];
-        speakc!(u, col, "vj_aa".to_string(), stringme(&aa_seq(&xm.seq, 0)));
+        speakc!(
+            u,
+            col,
+            "vj_aa".to_string(),
+            stringme(&nucleotide_to_aminoacid_sequence(&xm.seq, 0))
+        );
         speakc!(u, col, "vj_seq".to_string(), stringme(&xm.seq));
         let mut dna = Vec::<u8>::new();
         for p in xm.fr1_start..xm.seq_del_amino.len() {
@@ -578,7 +583,12 @@ pub fn row_fill(
                 dna.push(xm.seq_del_amino[p]);
             }
         }
-        speakc!(u, col, "vj_aa_nl".to_string(), stringme(&aa_seq(&dna, 0)));
+        speakc!(
+            u,
+            col,
+            "vj_aa_nl".to_string(),
+            stringme(&nucleotide_to_aminoacid_sequence(&dna, 0))
+        );
         speakc!(u, col, "vj_seq_nl".to_string(), stringme(&dna));
         speakc!(u, col, "seq".to_string(), stringme(&xm.full_seq));
         let mut vv = Vec::<usize>::new();
diff --git a/enclone_print/src/proc_cvar_auto.rs b/enclone_print/src/proc_cvar_auto.rs
index 6cabbb453..d526b1a2f 100644
--- a/enclone_print/src/proc_cvar_auto.rs
+++ b/enclone_print/src/proc_cvar_auto.rs
@@ -6,7 +6,7 @@ use crate::print_utils1::{
     test_internal_error_seq,
 };
 use crate::print_utils3::comp_edit;
-use amino::{aa_seq, codon_to_aa};
+use amino::{codon_to_aa, nucleotide_to_aminoacid_sequence};
 use enclone_core::align_to_vdj_ref::{align_to_vdj_ref, cigar};
 use enclone_core::defs::{AlleleData, ColInfo, EncloneControl, ExactClonotype, POUT_SEP};
 use enclone_core::median::rounded_median;
@@ -212,7 +212,7 @@ pub fn proc_cvar_auto(
                 let dna = refdata.refs[x.v_ref_id].to_ascii_vec()
                     [x.cdr1_start.unwrap()..x.fr2_start.unwrap()]
                     .to_vec();
-                y = stringme(&aa_seq(&dna, 0));
+                y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
             }
         } else if x.cdr2_start.is_some()
             && x.fr3_start.is_some()
@@ -221,7 +221,7 @@ pub fn proc_cvar_auto(
             let dna = refdata.refs[x.v_ref_id].to_ascii_vec()
                 [x.cdr2_start.unwrap()..x.fr3_start.unwrap()]
                 .to_vec();
-            y = stringme(&aa_seq(&dna, 0));
+            y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
         }
 
         (y, Vec::new(), "clono".to_string())
@@ -264,9 +264,9 @@ pub fn proc_cvar_auto(
         let arg1 = vname.between2("cdr", "_aa").force_i64();
         let x = &ex.share[mid];
         let y = if arg1 == 1 {
-            get_cdr1(x, 0, 0).map(|c| stringme(&aa_seq(c.as_bytes(), 0)))
+            get_cdr1(x, 0, 0).map(|c| stringme(&nucleotide_to_aminoacid_sequence(c.as_bytes(), 0)))
         } else if arg1 == 2 {
-            get_cdr2(x, 0, 0).map(|c| stringme(&aa_seq(c.as_bytes(), 0)))
+            get_cdr2(x, 0, 0).map(|c| stringme(&nucleotide_to_aminoacid_sequence(c.as_bytes(), 0)))
         } else {
             Some(x.cdr3_aa.clone())
         }
@@ -291,7 +291,7 @@ pub fn proc_cvar_auto(
             get_cdr3(x, -1, -1)
         };
         let y = if let Some(c) = c {
-            stringme(&aa_seq(c.as_bytes(), 0))
+            stringme(&nucleotide_to_aminoacid_sequence(c.as_bytes(), 0))
         } else {
             "unknown".to_string()
         };
@@ -348,7 +348,7 @@ pub fn proc_cvar_auto(
                     }
                 }
                 test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?;
-                y = stringme(&aa_seq(&dna, 0));
+                y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
             }
         } else if arg1 == 2 {
             if x.cdr2_start.is_some()
@@ -372,7 +372,7 @@ pub fn proc_cvar_auto(
                     }
                 }
                 test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?;
-                y = stringme(&aa_seq(&dna, 0));
+                y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
             }
         } else if x.cdr3_start as i64 - left >= 0
             && x.cdr3_start as i64 - left < x.seq_del_amino.len() as i64
@@ -395,7 +395,7 @@ pub fn proc_cvar_auto(
                 }
             }
             test_internal_error_seq(&x.seq, &dna, &x.cdr3_aa)?;
-            y = stringme(&aa_seq(&dna, 0));
+            y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
         }
 
         (y, Vec::new(), "exact".to_string())
@@ -848,7 +848,7 @@ pub fn proc_cvar_auto(
             Some(stringme(dna))
         };
         let y = if let Some(c) = c {
-            stringme(&aa_seq(c.as_bytes(), 0))
+            stringme(&nucleotide_to_aminoacid_sequence(c.as_bytes(), 0))
         } else {
             "unknown".to_string()
         };
@@ -868,21 +868,21 @@ pub fn proc_cvar_auto(
                 let dna = refdata.refs[x.v_ref_id].to_ascii_vec()
                     [x.fr1_start..x.cdr1_start.unwrap()]
                     .to_vec();
-                y = stringme(&aa_seq(&dna, 0));
+                y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
             }
         } else if arg1 == 2 {
             if x.fr2_start.unwrap() <= x.cdr2_start.unwrap() {
                 let dna = refdata.refs[x.v_ref_id].to_ascii_vec()
                     [x.fr2_start.unwrap()..x.cdr2_start.unwrap()]
                     .to_vec();
-                y = stringme(&aa_seq(&dna, 0));
+                y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
             }
         } else if arg1 == 3 {
             if x.fr3_start.is_some() && x.fr3_start.unwrap() <= x.cdr3_start - x.ins_len() {
                 let dna = refdata.refs[x.v_ref_id].to_ascii_vec();
                 if x.cdr3_start <= dna.len() {
                     let dna = dna[x.fr3_start.unwrap()..x.cdr3_start - x.ins_len()].to_vec();
-                    y = stringme(&aa_seq(&dna, 0));
+                    y = stringme(&nucleotide_to_aminoacid_sequence(&dna, 0));
                 }
             }
         } else {
@@ -890,7 +890,7 @@ pub fn proc_cvar_auto(
             let aa_len = if heavy { 10 } else { 9 };
             let dna = refdata.refs[x.j_ref_id].to_ascii_vec();
             let dna = &dna[dna.len() - 1 - 3 * aa_len..dna.len() - 1];
-            y = stringme(&aa_seq(dna, 0));
+            y = stringme(&nucleotide_to_aminoacid_sequence(dna, 0));
         }
 
         (y, Vec::new(), "clono".to_string())
diff --git a/enclone_print/src/proc_lvar_auto.rs b/enclone_print/src/proc_lvar_auto.rs
index f4edd177c..8cce1db10 100644
--- a/enclone_print/src/proc_lvar_auto.rs
+++ b/enclone_print/src/proc_lvar_auto.rs
@@ -1,7 +1,7 @@
 // Copyright (c) 2021 10x Genomics, Inc. All rights reserved.
 // This file is auto-generated by the crate enclone_vars, please do not edit.
 
-use amino::{aa_seq, codon_to_aa};
+use amino::{codon_to_aa, nucleotide_to_aminoacid_sequence};
 use enclone_core::barcode_fate::BarcodeFate;
 use enclone_core::defs::{ColInfo, EncloneControl, ExactClonotype, GexInfo, POUT_SEP};
 use enclone_core::median::{median_f64, rounded_median};
@@ -151,7 +151,7 @@ pub fn proc_lvar_auto(
         let reg = Regex::new(vname.between2("count_", "")).unwrap();
         let mut n = 0;
         for j in 0..ex.share.len() {
-            let aa = aa_seq(&ex.share[j].seq, 0); // seems inefficient
+            let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq, 0); // seems inefficient
             n += reg.find_iter(strme(&aa)).count();
         }
 
@@ -168,7 +168,7 @@ pub fn proc_lvar_auto(
         let reg = Regex::new(vname.between2("count_", "_cell")).unwrap();
         let mut n = 0;
         for j in 0..ex.share.len() {
-            let aa = aa_seq(&ex.share[j].seq, 0); // seems inefficient
+            let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq, 0); // seems inefficient
             n += reg.find_iter(strme(&aa)).count();
         }
 
@@ -190,7 +190,7 @@ pub fn proc_lvar_auto(
                 let cdr1 = ex.share[j].cdr1_start.unwrap();
                 let fwr2 = ex.share[j].fr2_start.unwrap();
                 if cdr1 < fwr2 {
-                    let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr1..fwr2], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
@@ -198,13 +198,13 @@ pub fn proc_lvar_auto(
                 let cdr2 = ex.share[j].cdr2_start.unwrap();
                 let fwr3 = ex.share[j].fr3_start.unwrap();
                 if cdr2 < fwr3 {
-                    let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr2..fwr3], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
             let cdr3 = ex.share[j].cdr3_start;
             let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len();
-            let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0);
+            let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr3..fwr4], 0);
             n += reg.find_iter(strme(&aa)).count();
         }
 
@@ -225,7 +225,7 @@ pub fn proc_lvar_auto(
                 let cdr1 = ex.share[j].cdr1_start.unwrap();
                 let fwr2 = ex.share[j].fr2_start.unwrap();
                 if cdr1 < fwr2 {
-                    let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr1..fwr2], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
@@ -233,13 +233,13 @@ pub fn proc_lvar_auto(
                 let cdr2 = ex.share[j].cdr2_start.unwrap();
                 let fwr3 = ex.share[j].fr3_start.unwrap();
                 if cdr2 < fwr3 {
-                    let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr2..fwr3], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
             let cdr3 = ex.share[j].cdr3_start;
             let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len();
-            let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0);
+            let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr3..fwr4], 0);
             n += reg.find_iter(strme(&aa)).count();
         }
 
@@ -267,7 +267,7 @@ pub fn proc_lvar_auto(
                     let cdr1 = ex.share[j].cdr1_start.unwrap();
                     let fwr2 = ex.share[j].fr2_start.unwrap();
                     if cdr1 < fwr2 {
-                        let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr1..fwr2], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -278,7 +278,7 @@ pub fn proc_lvar_auto(
                     let cdr2 = ex.share[j].cdr2_start.unwrap();
                     let fwr3 = ex.share[j].fr3_start.unwrap();
                     if cdr2 < fwr3 {
-                        let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr2..fwr3], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -287,7 +287,7 @@ pub fn proc_lvar_auto(
             for j in 0..ex.share.len() {
                 let cdr3 = ex.share[j].cdr3_start;
                 let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len();
-                let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0);
+                let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr3..fwr4], 0);
                 n += reg.find_iter(strme(&aa)).count();
             }
         }
@@ -318,7 +318,7 @@ pub fn proc_lvar_auto(
                     let cdr1 = ex.share[j].cdr1_start.unwrap();
                     let fwr2 = ex.share[j].fr2_start.unwrap();
                     if cdr1 < fwr2 {
-                        let aa = aa_seq(&ex.share[j].seq[cdr1..fwr2], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr1..fwr2], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -329,7 +329,7 @@ pub fn proc_lvar_auto(
                     let cdr2 = ex.share[j].cdr2_start.unwrap();
                     let fwr3 = ex.share[j].fr3_start.unwrap();
                     if cdr2 < fwr3 {
-                        let aa = aa_seq(&ex.share[j].seq[cdr2..fwr3], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr2..fwr3], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -338,7 +338,7 @@ pub fn proc_lvar_auto(
             for j in 0..ex.share.len() {
                 let cdr3 = ex.share[j].cdr3_start;
                 let fwr4 = cdr3 + 3 * ex.share[j].cdr3_aa.len();
-                let aa = aa_seq(&ex.share[j].seq[cdr3..fwr4], 0);
+                let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[cdr3..fwr4], 0);
                 n += reg.find_iter(strme(&aa)).count();
             }
         }
@@ -361,7 +361,7 @@ pub fn proc_lvar_auto(
                 let fwr1 = ex.share[j].fr1_start;
                 let cdr1 = ex.share[j].cdr1_start.unwrap();
                 if fwr1 < cdr1 {
-                    let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr1..cdr1], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
@@ -369,7 +369,7 @@ pub fn proc_lvar_auto(
                 let fwr2 = ex.share[j].fr2_start.unwrap();
                 let cdr2 = ex.share[j].cdr2_start.unwrap();
                 if fwr2 < cdr2 {
-                    let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr2..cdr2], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
@@ -377,12 +377,12 @@ pub fn proc_lvar_auto(
                 let fwr3 = ex.share[j].fr3_start.unwrap();
                 let cdr3 = ex.share[j].cdr3_start;
                 if fwr3 < cdr3 {
-                    let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr3..cdr3], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
             let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len();
-            let aa = aa_seq(&ex.share[j].seq[fwr4..], 0);
+            let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr4..], 0);
             n += reg.find_iter(strme(&aa)).count();
         }
 
@@ -403,7 +403,7 @@ pub fn proc_lvar_auto(
                 let fwr1 = ex.share[j].fr1_start;
                 let cdr1 = ex.share[j].cdr1_start.unwrap();
                 if fwr1 < cdr1 {
-                    let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr1..cdr1], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
@@ -411,7 +411,7 @@ pub fn proc_lvar_auto(
                 let fwr2 = ex.share[j].fr2_start.unwrap();
                 let cdr2 = ex.share[j].cdr2_start.unwrap();
                 if fwr2 < cdr2 {
-                    let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr2..cdr2], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
@@ -419,12 +419,12 @@ pub fn proc_lvar_auto(
                 let fwr3 = ex.share[j].fr3_start.unwrap();
                 let cdr3 = ex.share[j].cdr3_start;
                 if fwr3 < cdr3 {
-                    let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0);
+                    let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr3..cdr3], 0);
                     n += reg.find_iter(strme(&aa)).count();
                 }
             }
             let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len();
-            let aa = aa_seq(&ex.share[j].seq[fwr4..], 0);
+            let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr4..], 0);
             n += reg.find_iter(strme(&aa)).count();
         }
 
@@ -452,7 +452,7 @@ pub fn proc_lvar_auto(
                     let fwr1 = ex.share[j].fr1_start;
                     let cdr1 = ex.share[j].cdr1_start.unwrap();
                     if fwr1 < cdr1 {
-                        let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr1..cdr1], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -463,7 +463,7 @@ pub fn proc_lvar_auto(
                     let fwr2 = ex.share[j].fr2_start.unwrap();
                     let cdr2 = ex.share[j].cdr2_start.unwrap();
                     if fwr2 < cdr2 {
-                        let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr2..cdr2], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -474,7 +474,7 @@ pub fn proc_lvar_auto(
                     let fwr3 = ex.share[j].fr3_start.unwrap();
                     let cdr3 = ex.share[j].cdr3_start;
                     if fwr3 < cdr3 {
-                        let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr3..cdr3], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -482,7 +482,7 @@ pub fn proc_lvar_auto(
         } else {
             for j in 0..ex.share.len() {
                 let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len();
-                let aa = aa_seq(&ex.share[j].seq[fwr4..], 0);
+                let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr4..], 0);
                 n += reg.find_iter(strme(&aa)).count();
             }
         }
@@ -513,7 +513,7 @@ pub fn proc_lvar_auto(
                     let fwr1 = ex.share[j].fr1_start;
                     let cdr1 = ex.share[j].cdr1_start.unwrap();
                     if fwr1 < cdr1 {
-                        let aa = aa_seq(&ex.share[j].seq[fwr1..cdr1], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr1..cdr1], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -524,7 +524,7 @@ pub fn proc_lvar_auto(
                     let fwr2 = ex.share[j].fr2_start.unwrap();
                     let cdr2 = ex.share[j].cdr2_start.unwrap();
                     if fwr2 < cdr2 {
-                        let aa = aa_seq(&ex.share[j].seq[fwr2..cdr2], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr2..cdr2], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -535,7 +535,7 @@ pub fn proc_lvar_auto(
                     let fwr3 = ex.share[j].fr3_start.unwrap();
                     let cdr3 = ex.share[j].cdr3_start;
                     if fwr3 < cdr3 {
-                        let aa = aa_seq(&ex.share[j].seq[fwr3..cdr3], 0);
+                        let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr3..cdr3], 0);
                         n += reg.find_iter(strme(&aa)).count();
                     }
                 }
@@ -543,7 +543,7 @@ pub fn proc_lvar_auto(
         } else {
             for j in 0..ex.share.len() {
                 let fwr4 = ex.share[j].cdr3_start + 3 * ex.share[j].cdr3_aa.len();
-                let aa = aa_seq(&ex.share[j].seq[fwr4..], 0);
+                let aa = nucleotide_to_aminoacid_sequence(&ex.share[j].seq[fwr4..], 0);
                 n += reg.find_iter(strme(&aa)).count();
             }
         }
diff --git a/enclone_stuff/src/flag_defective.rs b/enclone_stuff/src/flag_defective.rs
index 78b5ff066..657b6b2f5 100644
--- a/enclone_stuff/src/flag_defective.rs
+++ b/enclone_stuff/src/flag_defective.rs
@@ -2,7 +2,7 @@
 
 // Flag defective reference sequences.
 
-use amino::aa_seq;
+use amino::nucleotide_to_aminoacid_sequence;
 use enclone_core::defs::EncloneControl;
 use io_utils::fwriteln;
 use itertools::Itertools;
@@ -93,8 +93,8 @@ pub fn flag_defective(
             // Continue.
 
             let seq = refs.to_ascii_vec();
-            let aa0 = aa_seq(&seq, 0);
-            let aa2 = aa_seq(&seq, 2);
+            let aa0 = nucleotide_to_aminoacid_sequence(&seq, 0);
+            let aa2 = nucleotide_to_aminoacid_sequence(&seq, 2);
             if aa2.contains(&b'*') && !aa0.contains(&b'*') {
                 count += 1;
                 *broken = true;
@@ -142,7 +142,7 @@ pub fn flag_defective(
             // Test for broken.
 
             let seq = refs.to_ascii_vec();
-            let aa = aa_seq(&seq, 0);
+            let aa = nucleotide_to_aminoacid_sequence(&seq, 0);
             let mut reasons = Vec::<&'static str>::new();
             if !aa.starts_with(b"M") {
                 reasons.push("does not begin with a start codon");
@@ -171,7 +171,7 @@ pub fn flag_defective(
                         let mut seqx = seq.clone();
                         for _ in 1..=2 {
                             let _ = seqx.remove(3 * j);
-                            let aax = aa_seq(&seqx, 0);
+                            let aax = nucleotide_to_aminoacid_sequence(&seqx, 0);
                             if !aax.contains(&b'*') {
                                 fixable = true;
                             }
@@ -184,7 +184,7 @@ pub fn flag_defective(
             }
             if aa.len() >= 31 {
                 for del in 1..=2 {
-                    let aad = aa_seq(&seq, del);
+                    let aad = nucleotide_to_aminoacid_sequence(&seq, del);
                     if cdr3_score(&aad, chain_type, false) > 4 + cdr3_score(&aa, chain_type, false)
                     {
                         reasons.push("appears to be frameshifted");
@@ -208,7 +208,7 @@ pub fn flag_defective(
                 let score = cdr3_score(&aa, chain_type, false);
                 let mut frameshift = false;
                 for del in 1..=2 {
-                    let aad = aa_seq(&seq, del);
+                    let aad = nucleotide_to_aminoacid_sequence(&seq, del);
                     if score <= 6 && cdr3_score(&aad, chain_type, false) >= 3 + score {
                         frameshift = true;
                     }
diff --git a/enclone_stuff/src/populate_features.rs b/enclone_stuff/src/populate_features.rs
index 755c06bf2..4a2176fda 100644
--- a/enclone_stuff/src/populate_features.rs
+++ b/enclone_stuff/src/populate_features.rs
@@ -2,7 +2,7 @@
 
 // Populate features.
 
-use amino::aa_seq;
+use amino::nucleotide_to_aminoacid_sequence;
 use enclone_core::defs::EncloneControl;
 use io_utils::fwriteln;
 use std::fmt::Write as _;
@@ -34,7 +34,7 @@ pub fn populate_features(
             if broken[i] && ctl.gen_opt.require_unbroken_ok {
                 continue;
             }
-            let aa = aa_seq(&refdata.refs[i].to_ascii_vec(), 0);
+            let aa = nucleotide_to_aminoacid_sequence(&refdata.refs[i].to_ascii_vec(), 0);
             let rtype = refdata.rtype[i];
             let chain_type = if rtype == 0 {
                 "IGH"
diff --git a/vdj_ann/src/annotate.rs b/vdj_ann/src/annotate.rs
index bccc4de59..ed0587a47 100644
--- a/vdj_ann/src/annotate.rs
+++ b/vdj_ann/src/annotate.rs
@@ -6,7 +6,7 @@
 use crate::transcript::is_productive_contig;
 use crate::{refx::RefData, transcript::ContigStatus};
 use align_tools::affine_align;
-use amino::{aa_seq, have_start};
+use amino::{have_start, nucleotide_to_aminoacid_sequence};
 use bio_edit::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
 use debruijn::{
     dna_string::{DnaString, DnaStringSlice},
@@ -2654,72 +2654,115 @@ pub fn cdr3_motif_right() -> Vec<Vec<u8>> {
     vec![b"LTFG.GTRVTV".to_vec(), b"LIWG.GSKLSI".to_vec()]
 }
 
-pub fn cdr3_min_len() -> usize {
-    5
+const CDR3_MIN_LEN: usize = 5;
+const LEFT_FLANK_MIN_SCORE: usize = 3;
+const RIGHT_FLANK_MIN_SCORE: usize = 4;
+
+pub struct CDR3Annotation {
+    pub start_position_on_contig: usize,
+    pub aa_seq: Vec<u8>,
+    left_flank_score: usize,
+    right_flank_score: usize,
 }
 
-pub fn get_cdr3(tig: &DnaStringSlice, cdr3: &mut Vec<(usize, Vec<u8>, usize, usize)>) {
+pub fn get_cdr3(contig: &DnaStringSlice) -> Vec<CDR3Annotation> {
     const MIN_TOTAL_CDR3_SCORE: usize = 10; // about as high as one can go
-    let (left, right) = (cdr3_motif_left(), cdr3_motif_right());
-    cdr3.clear();
-    let x = tig.to_owned().to_ascii_vec();
-    for i in 0..3 {
-        // go through three frames
-        let a = aa_seq(&x, i);
-        if a.len() < 4 {
-            return;
-        }
-        for j in 0..a.len() - min(a.len(), (cdr3_min_len() + 3) + 1) {
-            if a[j] == b'C' {
-                // CDR3 starts at position j on a
-                let first_f = j + (cdr3_min_len() - 3);
-                let last_f = a.len() - 4;
-                for k in first_f..last_f {
-                    if k + right[0].len() > a.len() {
+
+    let left_motifs = cdr3_motif_left();
+    let right_motifs = cdr3_motif_right();
+
+    let contig_seq = contig.to_owned().to_ascii_vec();
+
+    let mut found_cdr3s: Vec<CDR3Annotation> = Vec::new();
+    for frame_idx in 0..3 {
+        // Go through three frames.
+
+        // Convert the DNA sequence + frame to an amino acid sequence.
+        let amino_acid_seq = nucleotide_to_aminoacid_sequence(&contig_seq, frame_idx);
+        if amino_acid_seq.len() < 4 {
+            continue;
+        }
+
+        // Check each position in the AA seq to see if we find a CDR3 there.
+        for cdr3_start_pos in
+            0..amino_acid_seq.len() - min(amino_acid_seq.len(), (CDR3_MIN_LEN + 3) + 1)
+        {
+            // The CDR3 has to start with a Cysteine.
+            if amino_acid_seq[cdr3_start_pos] == b'C' {
+                // Search for the right flank set up start and end positions for the search.
+                let first_f = cdr3_start_pos + (CDR3_MIN_LEN - 3);
+                let last_f = amino_acid_seq.len() - RIGHT_FLANK_MIN_SCORE;
+                for right_motif_start_pos in first_f..last_f {
+                    // Don't look further if the remaining part of the amino_acid_seq is to short to find the full right flank motif.
+                    if right_motif_start_pos + right_motifs[0].len() > amino_acid_seq.len() {
                         break;
                     }
-                    let mut rscore = 0;
-                    for m in 0..right[0].len() {
+
+                    // Match the right flank and calculate the score.
+                    let mut right_flank_score = 0;
+                    for right_motif_col in 0..right_motifs[0].len() {
                         let mut hit = false;
-                        for r in 0..right.len() {
-                            if a[k + m] == right[r][m] {
+                        for right_motif_row in &right_motifs {
+                            if amino_acid_seq[right_motif_start_pos + right_motif_col]
+                                == right_motif_row[right_motif_col]
+                            {
                                 hit = true;
                             }
                         }
                         if hit {
-                            rscore += 1;
+                            right_flank_score += 1;
                         }
                     }
-                    if rscore >= 4 {
-                        let mut st = false;
-                        for l in j + 1..k + 2 {
-                            if a[l] == b'*' {
-                                st = true;
+
+                    // If right flank score is larger than RIGHT_FLANK_MIN_SCORE, continue and attempt to match left flank,
+                    // otherwise continue with next possible CDR3 start position.
+                    if right_flank_score >= RIGHT_FLANK_MIN_SCORE {
+                        // Check if there is a stop codon in the CDR3.
+                        let mut stop_codon = false;
+                        for aa in amino_acid_seq
+                            .iter()
+                            .take(right_motif_start_pos + 2)
+                            .skip(cdr3_start_pos + 1)
+                        {
+                            if aa == &b'*' {
+                                stop_codon = true;
                             }
                         }
-                        let ll = left[0].len();
-                        if !st && j >= ll {
-                            let mut lscore = 0;
-                            for m in 0..ll {
+
+                        // If there is no stop codon and there is room for the full left motif in the AA seq, match left flank.
+                        let ll = left_motifs[0].len();
+                        if !stop_codon && cdr3_start_pos >= ll {
+                            let mut left_flank_score = 0;
+                            for left_motif_col in 0..ll {
                                 let mut hit = false;
-                                for r in 0..left.len() {
-                                    if a[j - ll + m] == left[r][m] {
+                                for left_motif_row in &left_motifs {
+                                    if amino_acid_seq[cdr3_start_pos - ll + left_motif_col]
+                                        == left_motif_row[left_motif_col]
+                                    {
                                         hit = true;
                                     }
                                 }
                                 if hit {
-                                    lscore += 1;
+                                    left_flank_score += 1;
                                 }
                             }
-                            // ◼ It's possible that the lscore + rscore
+
+                            // If the left flank score and total score is above cutoff push the CDR3 to the vec.
+                            // ◼ It's possible that the left_flank_score + right_flank_score
                             // ◼ bound should be increased.
-                            if lscore >= 3 && lscore + rscore >= MIN_TOTAL_CDR3_SCORE {
-                                cdr3.push((
-                                    tig.start + i + 3 * j,
-                                    a[j..k + 2 + 1].to_vec(),
-                                    lscore,
-                                    rscore,
-                                ));
+                            if left_flank_score >= LEFT_FLANK_MIN_SCORE
+                                && left_flank_score + right_flank_score >= MIN_TOTAL_CDR3_SCORE
+                            {
+                                found_cdr3s.push(CDR3Annotation {
+                                    start_position_on_contig: contig.start
+                                        + frame_idx
+                                        + 3 * cdr3_start_pos,
+                                    aa_seq: amino_acid_seq
+                                        [cdr3_start_pos..right_motif_start_pos + 2 + 1]
+                                        .to_vec(),
+                                    left_flank_score: left_flank_score,
+                                    right_flank_score: right_flank_score,
+                                });
                             }
                         }
                     }
@@ -2729,33 +2772,39 @@ pub fn get_cdr3(tig: &DnaStringSlice, cdr3: &mut Vec<(usize, Vec<u8>, usize, usi
     }
 
     // Only return cdr3s having the maximum score.
-
-    let m = cdr3.iter().map(|cdi| cdi.2 + cdi.3).max().unwrap_or(0);
-    let to_delete = cdr3.iter().map(|cdi| cdi.2 + cdi.3 < m).collect::<Vec<_>>();
-    erase_if(cdr3, &to_delete);
-    cdr3.sort();
+    let max_score = found_cdr3s
+        .iter()
+        .map(|cdr3| cdr3.left_flank_score + cdr3.right_flank_score)
+        .max()
+        .unwrap_or(0);
+    let to_delete = found_cdr3s
+        .iter()
+        .map(|cdr3| cdr3.left_flank_score + cdr3.right_flank_score < max_score)
+        .collect::<Vec<_>>();
+    erase_if(&mut found_cdr3s, &to_delete);
+    found_cdr3s.sort_by_key(|cdr3| cdr3.start_position_on_contig);
 
     // Prefer later start and prefer longer CDR3.
-
-    if cdr3.len() > 1 {
+    if found_cdr3s.len() > 1 {
         // ◼ This is awkward.
-        let n = cdr3.len();
-        cdr3.swap(0, n - 1);
-        cdr3.truncate(1);
-    }
+        let n = found_cdr3s.len();
+        found_cdr3s.swap(0, n - 1);
+        found_cdr3s.truncate(1);
+    };
+
+    found_cdr3s
 }
 
 pub fn print_cdr3(tig: &DnaStringSlice, log: &mut Vec<u8>) {
-    let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new();
-    get_cdr3(tig, &mut cdr3);
-    for i in 0..cdr3.len() {
+    let cdr3_anns = get_cdr3(tig);
+    for cdr3 in cdr3_anns {
         fwriteln!(
             log,
             "cdr3 = {} at {}, score = {} + {}",
-            strme(&cdr3[i].1),
-            cdr3[i].0,
-            cdr3[i].2,
-            cdr3[i].3
+            strme(&cdr3.aa_seq),
+            cdr3.start_position_on_contig,
+            cdr3.left_flank_score,
+            cdr3.right_flank_score
         );
     }
 }
@@ -2769,7 +2818,7 @@ pub fn print_cdr3(tig: &DnaStringSlice, log: &mut Vec<u8>) {
 // ◼ of the V segment is not right.
 
 pub fn cdr3_loc<'a>(
-    tig: &'a DnaString,
+    contig: &'a DnaString,
     refdata: &RefData,
     ann: &[Annotation],
 ) -> DnaStringSlice<'a> {
@@ -2777,25 +2826,25 @@ pub fn cdr3_loc<'a>(
     // except possibly for changes less than ten.
     const LOW_RELV_CDR3: isize = -40;
     if ann.is_empty() {
-        return tig.slice(0, 0);
+        return contig.slice(0, 0);
     }
     let mut i = ann.len() - 1;
     loop {
         let t = ann[i].ref_id as usize;
         if !refdata.rheaders[t].contains("segment") && refdata.is_v(t) {
             let (l, p) = (ann[i].tig_start as isize, ann[i].ref_start as isize);
-            let vstop_on_tig = l + refdata.refs[t].len() as isize - p;
-            let mut start = vstop_on_tig + LOW_RELV_CDR3;
+            let vstop_on_contig = l + refdata.refs[t].len() as isize - p;
+            let mut start = vstop_on_contig + LOW_RELV_CDR3;
             if start < 0 {
                 start = 0;
             }
-            if start > tig.len() as isize {
-                start = tig.len() as isize;
+            if start > contig.len() as isize {
+                start = contig.len() as isize;
             }
-            return tig.slice(start as usize, tig.len());
+            return contig.slice(start as usize, contig.len());
         }
         if i == 0 {
-            return tig.slice(0, 0);
+            return contig.slice(0, 0);
         }
         i -= 1;
     }
@@ -2809,8 +2858,7 @@ pub fn get_cdr3_using_ann(
     tig: &DnaString,
     refdata: &RefData,
     ann: &[Annotation],
-    cdr3: &mut Vec<(usize, Vec<u8>, usize, usize)>,
-) {
+) -> Vec<CDR3Annotation> {
     let window = cdr3_loc(tig, refdata, ann);
 
     // Enlarge the window because get_cdr3 looks for motifs to the left and right
@@ -2828,8 +2876,7 @@ pub fn get_cdr3_using_ann(
     let window2 = tig.slice(start as usize, stop as usize);
 
     // Now find the CDR3.
-
-    get_cdr3(&window2, cdr3)
+    get_cdr3(&window2)
 }
 
 pub fn print_cdr3_using_ann(
@@ -2838,16 +2885,15 @@ pub fn print_cdr3_using_ann(
     ann: &[Annotation],
     log: &mut Vec<u8>,
 ) {
-    let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new();
-    get_cdr3_using_ann(tig, refdata, ann, &mut cdr3);
-    for i in 0..cdr3.len() {
+    let found_cdr3s = get_cdr3_using_ann(tig, refdata, ann);
+    for cdr3 in found_cdr3s {
         fwriteln!(
             log,
             "cdr3 = {} at {}, score = {} + {}",
-            strme(&cdr3[i].1),
-            cdr3[i].0,
-            cdr3[i].2,
-            cdr3[i].3
+            strme(&cdr3.aa_seq),
+            cdr3.start_position_on_contig,
+            cdr3.left_flank_score,
+            cdr3.right_flank_score
         );
     }
 }
@@ -3150,7 +3196,7 @@ impl ContigAnnotation {
         let mut stop = -1_i32;
         let x = b.to_owned().to_ascii_vec();
         if vstart >= 0 {
-            let y = aa_seq(&x, vstart as usize);
+            let y = nucleotide_to_aminoacid_sequence(&x, vstart as usize);
             aa = stringme(&y);
             for i in 0..y.len() {
                 if y[i] == b'*' {
@@ -3161,15 +3207,15 @@ impl ContigAnnotation {
         }
         let (mut cdr3x, mut cdr3x_dna) = (String::new(), String::new());
         let (mut cdr3x_start, mut cdr3x_stop) = (-1_i32, -1_i32);
-        let mut cdr3y = Vec::<(usize, Vec<u8>, usize, usize)>::new();
-        if !refdata.refs.is_empty() {
-            get_cdr3_using_ann(b, refdata, ann, &mut cdr3y);
+        let found_cdr3s = if !refdata.refs.is_empty() {
+            get_cdr3_using_ann(b, refdata, ann)
         } else {
-            get_cdr3(&b.slice(0, b.len()), &mut cdr3y);
-        }
-        if !cdr3y.is_empty() {
-            cdr3x = stringme(&cdr3y[0].1);
-            let start = cdr3y[0].0;
+            get_cdr3(&b.slice(0, b.len()))
+        };
+        if !found_cdr3s.is_empty() {
+            let cdr3 = found_cdr3s.first().unwrap();
+            cdr3x = stringme(&cdr3.aa_seq);
+            let start = cdr3.start_position_on_contig;
             for i in start..start + 3 * cdr3x.len() {
                 cdr3x_dna.push(x[i] as char);
             }
diff --git a/vdj_ann/src/transcript.rs b/vdj_ann/src/transcript.rs
index 2d29d3f57..36e73c231 100644
--- a/vdj_ann/src/transcript.rs
+++ b/vdj_ann/src/transcript.rs
@@ -170,13 +170,12 @@ fn evaluate_contig_status(
         )
     };
 
-    let mut cdr3 = Vec::<(usize, Vec<u8>, usize, usize)>::new();
-    get_cdr3_using_ann(contig, reference, ann, &mut cdr3);
-    contig_status.has_cdr3 = Some(!cdr3.is_empty());
+    let found_cdr3s = get_cdr3_using_ann(contig, reference, ann);
+    contig_status.has_cdr3 = Some(!found_cdr3s.is_empty());
 
-    if let (Some((vstart, jstop)), Some(cdr3)) = (inframe_pair, cdr3.first()) {
+    if let (Some((vstart, jstop)), Some(cdr3)) = (inframe_pair, found_cdr3s.first()) {
         let expected_len = (refs[vstart.ref_id].len() + refs[jstop.ref_id].len()) as i32
-            + (3 * cdr3.1.len() as i32)
+            + (3 * cdr3.aa_seq.len() as i32)
             - 20;
         let observed_len = jstop.tig_stop as i32 - vstart.tig_start as i32;
         let delta = expected_len - observed_len;