Skip to content

Commit

Permalink
Fix case where the cram file was exchanged
Browse files Browse the repository at this point in the history
  • Loading branch information
aghozlane committed Apr 15, 2024
1 parent 419fd74 commit 5453950
Showing 1 changed file with 23 additions and 5 deletions.
28 changes: 23 additions & 5 deletions meteor/strain.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Strain(Session):
MIN_MIN_SNP_DEPTH: ClassVar[int] = 1
MAX_MIN_SNP_DEPTH: ClassVar[int] = 10000
DEFAULT_MIN_SNP_DEPTH: ClassVar[int] = 3
DEFAULT_MIN_FREQUENCY_NON_REFERENCE : ClassVar[float] = 0.8
DEFAULT_MIN_FREQUENCY_NON_REFERENCE: ClassVar[float] = 0.8
MIN_MIN_MSP_COVERAGE: ClassVar[int] = 1
MAX_MIN_MSP_COVERAGE: ClassVar[int] = 100
DEFAULT_MIN_MSP_COVERAGE: ClassVar[int] = 50
Expand All @@ -57,7 +57,9 @@ def __post_init__(self) -> None:
self.meteor.tmp_dir = Path(mkdtemp(dir=self.meteor.tmp_path))
self.meteor.strain_dir.mkdir(exist_ok=True, parents=True)

def filter_coverage(self, cram_file: Path, bed_file: Path) -> pd.DataFrame:
def filter_coverage(
self, cram_file: Path, bed_file: Path, reference_file: Path
) -> pd.DataFrame:
"""Filter gene coverage
:param cram_file: (Path) Path to the cram file
:param bed_file: (Path) Path to the bed file
Expand All @@ -76,7 +78,13 @@ def filter_coverage(self, cram_file: Path, bed_file: Path) -> pd.DataFrame:
)
cov_df = pd.read_csv(
StringIO(
pysam.coverage("-d", str(self.max_depth), str(cram_file.resolve())) # type: ignore[attr-defined]
pysam.coverage(
"--reference",
str(reference_file.resolve()),
"-d",
str(self.max_depth),
str(cram_file.resolve()),
) # type: ignore[attr-defined]
),
sep="\t",
header=1,
Expand Down Expand Up @@ -109,6 +117,7 @@ def get_msp_variant(
msp_file: Path,
cram_file: Path,
bed_file: Path,
reference_file: Path,
) -> None:
"""Produce meaning full msp variants
:param consensus_file: (Path) A path to consensus file
Expand All @@ -134,7 +143,9 @@ def get_msp_variant(
# filtered_count = count[count["value"] >= self.min_gene_count]
# Filter for gene with a minimum count
if self.min_gene_coverage:
filtered_coverage = self.filter_coverage(cram_file, bed_file)
filtered_coverage = self.filter_coverage(
cram_file, bed_file, reference_file
)
joined_df = msp_content.merge(filtered_coverage, on="gene_id")
# filtered_coverage = filtered_count.groupby("gene_id")
# Join the two DataFrames based on gene_id
Expand Down Expand Up @@ -273,14 +284,21 @@ def execute(self) -> None:
self.json_data["mapped_sample_dir"]
/ f"{sample_info['sample_name']}.cram"
)
reference_file = (
self.meteor.ref_dir
/ self.json_data["reference"]["reference_file"]["fasta_dir"]
/ self.json_data["reference"]["reference_file"]["fasta_filename"]
)
bed_file = (
self.meteor.ref_dir
/ self.json_data["reference"]["reference_file"]["database_dir"]
/ self.json_data["reference"]["annotation"]["bed"]["filename"]
)
start = perf_counter()
# count_file,
self.get_msp_variant(consensus_file, msp_file, cram_file, bed_file)
self.get_msp_variant(
consensus_file, msp_file, cram_file, bed_file, reference_file
)
logging.info(
"Completed strain analysis in %f seconds", perf_counter() - start
)
Expand Down

0 comments on commit 5453950

Please sign in to comment.