diff --git a/htsinfer/get_library_type.py b/htsinfer/get_library_type.py index 3781179..3dd562c 100644 --- a/htsinfer/get_library_type.py +++ b/htsinfer/get_library_type.py @@ -158,59 +158,66 @@ def _align_mates(self): previous_seq_id2 = None reads1 = [] # List to store alignments for one read from file1 - mate1 = [] # List to store alignments for each read + mate1 = [] # List to store alignments for each read from file1 reads2 = [] # List to store alignments for one read from file2 + mate2 = [] # List to store alignments for each read from file2 concordant = 0 for read1 in samfile1: - # ensure that "unmapped" flag is not set and query name is set - if not read1.flag & (1 << 2) and isinstance(read1.query_name, str): - seq_id1 = read1.query_name - if ( - seq_id1 != previous_seq_id1 and - previous_seq_id1 is not None - ): - mate1.append(reads1.copy()) - reads1.clear() - if read1.reference_end: - reads1.append(read1) + seq_id1 = read1.query_name + if ( + seq_id1 != previous_seq_id1 and + previous_seq_id1 is not None + ): + mate1.append(reads1.copy()) + reads1.clear() + if read1.reference_end: + reads1.append(read1) previous_seq_id1 = seq_id1 mate1.append(reads1.copy()) read_counter = 0 for read2 in samfile2: - # ensure that "unmapped" flag is not set and query name is set - if not read2.flag & (1 << 2) and isinstance(read2.query_name, str): - seq_id2 = read2.query_name - if seq_id2 != previous_seq_id2 \ - and previous_seq_id2 is not None: - if read_counter < len(mate1) and self._compare_alignments( - mate1[read_counter], reads2 - ): - concordant += 1 - reads2.clear() - read_counter += 1 - if read2.reference_end: - reads2.append(read2) + seq_id2 = read2.query_name + if ( + seq_id2 != previous_seq_id2 and + previous_seq_id2 is not None + ): + mate2.append(reads2.copy()) + if self._compare_alignments( + mate1[read_counter], reads2 + ): + concordant += 1 + reads2.clear() + read_counter += 1 + if read2.reference_end: + reads2.append(read2) previous_seq_id2 = seq_id2 - - if read_counter < len(mate1) and self._compare_alignments( + mate2.append(reads2.copy()) + if self._compare_alignments( mate1[read_counter], reads2 ): concordant += 1 - LOGGER.debug(f"Number of aligned reads file 1: {len(mate1)}") - LOGGER.debug(f"Number of aligned reads file 2: {read_counter}") + + aligned_mate1 = len(list(filter(None, mate1))) + aligned_mate2 = len(list(filter(None, mate2))) + + LOGGER.debug(f"Number of aligned reads file 1: {aligned_mate1}") + LOGGER.debug(f"Number of aligned reads file 2: {aligned_mate2}") LOGGER.debug(f"Number of concordant reads: {concordant}") - self._update_relationship(concordant, read_counter) + + self._update_relationship( + concordant, min(aligned_mate1, aligned_mate2) + ) samfile1.close() samfile2.close() - def _update_relationship(self, concordant, read_counter): + def _update_relationship(self, concordant, aligned_reads): """Helper function to update relationship based on alignment.""" try: - ratio = concordant / read_counter + ratio = concordant / aligned_reads except ZeroDivisionError: self.results.relationship = ( StatesTypeRelationship.not_available diff --git a/htsinfer/mapping.py b/htsinfer/mapping.py index cb53f7a..e1cef46 100644 --- a/htsinfer/mapping.py +++ b/htsinfer/mapping.py @@ -299,6 +299,7 @@ def build_star_command( "--runThreadN", f"{str(self.threads_star)}", "--genomeDir", f"{str(index_dir)}", "--outFilterMultimapNmax", "50", + "--outSAMorder", "PairedKeepInputOrder", "--outSAMunmapped", "Within", "KeepPairs", ] cmd: List[str] = cmd_base[:] diff --git a/tests/test_mapping.py b/tests/test_mapping.py index 33798be..1bdecbb 100644 --- a/tests/test_mapping.py +++ b/tests/test_mapping.py @@ -187,7 +187,8 @@ def test_prepare_star_alignment_commands(self, tmpdir): file1_alignment_path = tmpdir / 'alignments/file_1' cmd = "STAR --alignIntronMax 1 --alignEndsType Local --runThreadN 1" \ + " --genomeDir " + str(index_dir) + " --outFilterMultimapNmax " \ - + "50 --outSAMunmapped Within KeepPairs --readFilesIn " \ + + "50 --outSAMorder PairedKeepInputOrder " \ + + "--outSAMunmapped Within KeepPairs --readFilesIn " \ + str(FILE_2000_RECORDS) + " --outFileNamePrefix " \ + str(file1_alignment_path) + "/" results = test_instance.prepare_star_alignment_commands(