From 7e88f7d9e010513efd09e3f78795b0f703d1ae1c Mon Sep 17 00:00:00 2001 From: Rutger Vos Date: Mon, 25 Nov 2024 22:35:21 +0100 Subject: [PATCH] the MGE FASTA files seem to have developed underscores as some magic sequence character :-/ --- barcode_validator/alignment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/barcode_validator/alignment.py b/barcode_validator/alignment.py index 7cec526..361f15b 100644 --- a/barcode_validator/alignment.py +++ b/barcode_validator/alignment.py @@ -120,15 +120,15 @@ def unalign_sequence(self, sequence): self.logger.info("Removing gaps from aligned sequence") if isinstance(sequence, SeqRecord): # Convert Seq to string, remove gaps, then convert back to Seq - unaligned_sequence = str(sequence.seq).replace('-', '').replace('~', '') + unaligned_sequence = str(sequence.seq).replace('-', '').replace('~', '').replace('_', '') sequence.seq = Seq(unaligned_sequence) return sequence elif isinstance(sequence, Seq): # If it's just a Seq object, convert to string, remove gaps, then back to Seq - return Seq(str(sequence).replace('-', '').replace('~', '')) + return Seq(str(sequence).replace('-', '').replace('~', '').replace('_', '')) elif isinstance(sequence, str): # If it's a string, just remove the gaps - return sequence.replace('-', '').replace('~', '') + return sequence.replace('-', '').replace('~', '').replace('_', '') else: raise TypeError(f"Unexpected type for sequence: {type(sequence)}")