Skip to content

Commit

Permalink
extra validity tests + debug
Browse files Browse the repository at this point in the history
  • Loading branch information
semenko committed Jan 19, 2024
1 parent 7cd4f04 commit d856cb6
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
8 changes: 3 additions & 5 deletions src/bam2tensor/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,11 @@ def main(
"""Bam2Tensor."""
time_start = time.time()
# Print run information
print(f"Genome name: {genome_name}")
print(f"Reference fasta: {reference_fasta}")
print(f"Expected chromosomes: {expected_chromosomes}")
print(f"Input path: {input_path}")

print(f"\nLoading (or generating) methylation embedding named: {reference_fasta}")

# Convert expected_chromosomes to a list
print(f"\tExpected chromosomes: {expected_chromosomes}")
print(f"\nLoading (or generating) methylation embedding for: {genome_name}")

# Create (or load) a GenomeMethylationEmbedding object
genome_methylation_embedding = GenomeMethylationEmbedding(
Expand Down
22 changes: 15 additions & 7 deletions src/bam2tensor/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ def __init__(
if not skip_cache:
try:
cache_available = self.load_embedding_cache()
assert (
expected_chromosomes == self.expected_chromosomes
), "Expected chromosomes do not match cached chromosomes!"
assert (
window_size == self.window_size
), "Window size does not match cached window size!"
except FileNotFoundError as e:
if self.verbose:
print("Could not load methylation embedding from cache: " + str(e))
Expand All @@ -108,7 +114,10 @@ def __init__(
# How many CpG sites are there?
self.total_cpg_sites = sum([len(v) for v in self.cpg_sites_dict.values()])
if self.verbose:
print(f"\t\tTotal CpG sites: {self.total_cpg_sites:,}")
print(f"\tTotal CpG sites: {self.total_cpg_sites:,}")
print(
f"\tTotal number of windows (at window_size = {self.window_size}): {len(self.windowed_cpg_sites_dict):,}"
)

# Create a dictionary of chromosome -> CpG site -> index (embedding) for efficient lookup
self.chr_to_cpg_to_embedding_dict = {
Expand All @@ -127,7 +136,7 @@ def __init__(
)

if verbose:
print(f"Loaded methylation embedding for: {self.genome_name}")
print("Loaded methylation embedding.")

def save_embedding_cache(self):
"""Save a cache of expensive objects as our methylation embedding."""
Expand Down Expand Up @@ -163,12 +172,9 @@ def load_embedding_cache(self) -> bool:
If the cached CpG site file cannot be found.
"""

if self.verbose:
print(f"\tLoading embedding data for: {self.genome_name}")

if os.path.exists(self.cache_file):
if self.verbose:
print(f"\t\tReading embedding from cache: {self.cache_file}")
print(f"\tReading embedding from cache: {self.cache_file}")

# TODO: Add type hinting via TypedDicts?
# e.g. https://stackoverflow.com/questions/51291722/define-a-jsonable-type-using-mypy-pep-526
Expand All @@ -195,8 +201,10 @@ def load_embedding_cache(self) -> bool:
].items()
}

if self.verbose:
print(f"\tCached genome fasta source: {self.fasta_source}")
else:
raise FileNotFoundError("\tNo cache of embedding found.")
raise FileNotFoundError("No cache of embedding found.")

return True

Expand Down

0 comments on commit d856cb6

Please sign in to comment.