Skip to content

Commit

Permalink
refactor: update concordant read counting, mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
balajtimate committed Jan 17, 2024
1 parent b5b80ca commit 31af8ec
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 33 deletions.
71 changes: 39 additions & 32 deletions htsinfer/get_library_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,59 +158,66 @@ def _align_mates(self):
previous_seq_id2 = None

reads1 = [] # List to store alignments for one read from file1
mate1 = [] # List to store alignments for each read
mate1 = [] # List to store alignments for each read from file1
reads2 = [] # List to store alignments for one read from file2
mate2 = [] # List to store alignments for each read from file2

concordant = 0

for read1 in samfile1:
# ensure that "unmapped" flag is not set and query name is set
if not read1.flag & (1 << 2) and isinstance(read1.query_name, str):
seq_id1 = read1.query_name
if (
seq_id1 != previous_seq_id1 and
previous_seq_id1 is not None
):
mate1.append(reads1.copy())
reads1.clear()
if read1.reference_end:
reads1.append(read1)
seq_id1 = read1.query_name
if (
seq_id1 != previous_seq_id1 and
previous_seq_id1 is not None
):
mate1.append(reads1.copy())
reads1.clear()
if read1.reference_end:
reads1.append(read1)
previous_seq_id1 = seq_id1
mate1.append(reads1.copy())

read_counter = 0
for read2 in samfile2:
# ensure that "unmapped" flag is not set and query name is set
if not read2.flag & (1 << 2) and isinstance(read2.query_name, str):
seq_id2 = read2.query_name
if seq_id2 != previous_seq_id2 \
and previous_seq_id2 is not None:
if read_counter < len(mate1) and self._compare_alignments(
mate1[read_counter], reads2
):
concordant += 1
reads2.clear()
read_counter += 1
if read2.reference_end:
reads2.append(read2)
seq_id2 = read2.query_name
if (
seq_id2 != previous_seq_id2 and
previous_seq_id2 is not None
):
mate2.append(reads2.copy())
if self._compare_alignments(
mate1[read_counter], reads2
):
concordant += 1
reads2.clear()
read_counter += 1
if read2.reference_end:
reads2.append(read2)
previous_seq_id2 = seq_id2

if read_counter < len(mate1) and self._compare_alignments(
mate2.append(reads2.copy())
if self._compare_alignments(
mate1[read_counter], reads2
):
concordant += 1
LOGGER.debug(f"Number of aligned reads file 1: {len(mate1)}")
LOGGER.debug(f"Number of aligned reads file 2: {read_counter}")

aligned_mate1 = len(list(filter(None, mate1)))
aligned_mate2 = len(list(filter(None, mate2)))

LOGGER.debug(f"Number of aligned reads file 1: {aligned_mate1}")
LOGGER.debug(f"Number of aligned reads file 2: {aligned_mate2}")
LOGGER.debug(f"Number of concordant reads: {concordant}")
self._update_relationship(concordant, read_counter)

self._update_relationship(
concordant, min(aligned_mate1, aligned_mate2)
)

samfile1.close()
samfile2.close()

def _update_relationship(self, concordant, read_counter):
def _update_relationship(self, concordant, aligned_reads):
"""Helper function to update relationship based on alignment."""
try:
ratio = concordant / read_counter
ratio = concordant / aligned_reads
except ZeroDivisionError:
self.results.relationship = (
StatesTypeRelationship.not_available
Expand Down
1 change: 1 addition & 0 deletions htsinfer/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ def build_star_command(
"--runThreadN", f"{str(self.threads_star)}",
"--genomeDir", f"{str(index_dir)}",
"--outFilterMultimapNmax", "50",
"--outSAMorder", "PairedKeepInputOrder",
"--outSAMunmapped", "Within", "KeepPairs",
]
cmd: List[str] = cmd_base[:]
Expand Down
3 changes: 2 additions & 1 deletion tests/test_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ def test_prepare_star_alignment_commands(self, tmpdir):
file1_alignment_path = tmpdir / 'alignments/file_1'
cmd = "STAR --alignIntronMax 1 --alignEndsType Local --runThreadN 1" \
+ " --genomeDir " + str(index_dir) + " --outFilterMultimapNmax " \
+ "50 --outSAMunmapped Within KeepPairs --readFilesIn " \
+ "50 --outSAMorder PairedKeepInputOrder " \
+ "--outSAMunmapped Within KeepPairs --readFilesIn " \
+ str(FILE_2000_RECORDS) + " --outFileNamePrefix " \
+ str(file1_alignment_path) + "/"
results = test_instance.prepare_star_alignment_commands(
Expand Down

0 comments on commit 31af8ec

Please sign in to comment.