Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: output read orientation fractions to json #169

Merged
merged 8 commits into from
Oct 9, 2024
133 changes: 123 additions & 10 deletions htsinfer/get_read_orientation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import (Any, DefaultDict, Dict, List)

import pysam # type: ignore
import pandas as pd # type: ignore

from htsinfer.exceptions import (
FileProblem,
Expand Down Expand Up @@ -58,6 +59,7 @@ def __init__(
self.library_source = config.results.library_source
self.transcripts_file = config.args.t_file_processed
self.tmp_dir = config.args.tmp_dir
self.out_dir = config.args.out_dir
self.min_mapped_reads = config.args.read_orientation_min_mapped_reads
self.min_fraction = config.args.read_orientation_min_fraction
self.mapping = mapping
Expand Down Expand Up @@ -176,13 +178,31 @@ def process_single(
else:
orientation = StatesOrientation.unstranded

# write log messages and return result
orient_df = pd.DataFrame([{
'Number of mapped reads': reads,
'Fraction SF': fractions[0].get(
StatesOrientation.stranded_forward
),
'Fraction SR': fractions[0].get(
StatesOrientation.stranded_reverse
),
'Orientation': orientation.value
}])

orient_df = self.create_orient_df(
reads, fractions_all_states, orientation, paired=False
)
balajtimate marked this conversation as resolved.
Show resolved Hide resolved

LOGGER.debug(
f"Required number of mapped reads pairs: {self.min_mapped_reads}"
f"Required number of mapped reads: {self.min_mapped_reads}"
)
LOGGER.debug(f"Number of reads mapped: {reads}")
LOGGER.debug(f"Fraction of states: {fractions_all_states}")
LOGGER.debug(f"Orientation: {orientation}")
LOGGER.debug(f"Number of mapped reads: {orient_df.iloc[0, 0]}")
LOGGER.debug(f"Fraction of SF: {orient_df.iloc[0, 1]}")
LOGGER.debug(f"Fraction of SR: {orient_df.iloc[0, 2]}")
LOGGER.debug(f"Orientation: {orient_df.iloc[0, 3]}")
balajtimate marked this conversation as resolved.
Show resolved Hide resolved

self.write_orientation_to_json(orient_df, self.paths[0].name)

return orientation

def process_paired( # pylint: disable=R0912,R0915
Expand Down Expand Up @@ -293,13 +313,32 @@ def process_paired( # pylint: disable=R0912,R0915
orientation.file_1 = StatesOrientation.unstranded
orientation.file_2 = StatesOrientation.unstranded

# write log messages and return result
orient_df_1 = self.create_orient_df(
reads, fractions_all_states, orientation, paired=True, file_index=1
)
orient_df_2 = self.create_orient_df(
reads, fractions_all_states, orientation, paired=True, file_index=2
)

LOGGER.debug(
f"Required number of mapped read pairs: {self.min_mapped_reads}"
f"Required number of mapped reads: {self.min_mapped_reads}"
)
LOGGER.debug(f"Number of reads mapped: {reads}")
LOGGER.debug(f"Fraction of states: {fractions_all_states}")
LOGGER.debug(f"Orientation: {orientation}")
LOGGER.debug(f"Number of mapped reads: {orient_df_1.iloc[0, 0]}")
LOGGER.debug(f"Fraction of ISF: {orient_df_1.iloc[0, 1]}")
LOGGER.debug(f"Fraction of ISR: {orient_df_1.iloc[0, 2]}")
LOGGER.debug(f"Orientation file 1: {orient_df_1.iloc[0, 3]}")
LOGGER.debug(f"Orientation file 2: {orient_df_2.iloc[0, 3]}")
LOGGER.debug(
f"Orientation relationship: {orient_df_1.iloc[0, 4]}"
)

self.write_orientation_to_json(
orient_df_1, getattr(self.paths[0], 'name')
)
self.write_orientation_to_json(
orient_df_2, getattr(self.paths[1], 'name')
)

return orientation

@staticmethod
Expand Down Expand Up @@ -338,3 +377,77 @@ def sum_dicts(*dicts: Dict[Any, float]) -> Dict[Any, float]:
for key, num in dct.items():
result[key] += num
return dict(result)

def create_orient_df( # pylint: disable=R0917
self,
balajtimate marked this conversation as resolved.
Show resolved Hide resolved
reads,
fractions_all_states,
orientation,
paired: bool,
file_index=None
):
"""Prepare DataFrame for orientation details.

Constructs a DataFrame with information about read orientation for
single or paired-end sequencing data.

Args:
reads: Number of mapped reads.
fractions_all_states: Dictionary containing the fraction
of each orientation state.
orientation: Orientation states.
paired: Indicates if the sequencing data is paired-end.
file_index: Specifies the index of the file for paired-end data
(1 or 2). Ignored for single-end data.

Returns:
pd.DataFrame: A DataFrame containing orientation details.
"""
if paired:
data = {
'Number of mapped reads': reads,
'Fraction ISF': fractions_all_states.get(
StatesOrientationRelationship.inward_stranded_forward
),
'Fraction ISR': fractions_all_states.get(
StatesOrientationRelationship.inward_stranded_reverse
),
'Orientation': getattr(
orientation.file_1
if file_index == 1 else orientation.file_2,
'value',
None
),
'Relationship': getattr(
orientation.relationship, 'value', None
)
}
else:
data = {
'Number of mapped reads': reads,
'Fraction SF': fractions_all_states.get(
StatesOrientation.stranded_forward
),
'Fraction SR': fractions_all_states.get(
StatesOrientation.stranded_reverse
),
'Orientation': orientation.value
}
return pd.DataFrame([data])

def write_orientation_to_json(self, orient_df, filename):
"""Write orientation dataframe to a JSON file.

Serializes the provided orientation dataframe to a JSON file
with indentation.

Args:
orient_df: The dataframe containing orientation details.
filename: Name of the file to save the JSON data.

Returns:
None
"""
file_path = Path(self.out_dir) / f"read_orientation_{filename}.json"
LOGGER.debug(f"Writing results to file: {file_path}")
orient_df.to_json(file_path, orient='split', index=False, indent=True)
Loading