Skip to content

Commit

Permalink
Rename functions
Browse files Browse the repository at this point in the history
  • Loading branch information
fplazaonate committed May 2, 2024
1 parent 5f7d0c9 commit a2c0b04
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 51 deletions.
3 changes: 3 additions & 0 deletions crocodeel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import os
# Disable implicit parallelism in numpy
os.environ["OMP_NUM_THREADS"] = "1"
78 changes: 38 additions & 40 deletions crocodeel/ab_table_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,48 +4,46 @@
import sys
from typing import TextIO

class SpeciesAbTableUtils:
@staticmethod
def load(fh: TextIO) -> pd.DataFrame:
# Read table
species_ab_table = pd.read_csv(fh, sep="\t", header=0, index_col=0)
logging.info(
"Abundance table quantifies %d species in %d samples", species_ab_table.shape[0], species_ab_table.shape[1]

def load(fh: TextIO) -> pd.DataFrame:
# Read table
species_ab_table = pd.read_csv(fh, sep="\t", header=0, index_col=0)
logging.info(
"Abundance table quantifies %d species in %d samples", species_ab_table.shape[0], species_ab_table.shape[1]
)

# Check species names
species_names_type = species_ab_table.index.inferred_type
if species_names_type not in ("integer", "string"):
logging.error(
"Species names in first column are of the '%s' type but should be 'string' or 'integer'",
species_names_type,
)
sys.exit(1)
# Convert species names to strings when they are integers
species_ab_table.index = species_ab_table.index.astype(str)

# Check species abundance type
bad_format_samples = [
sample
for sample in species_ab_table.columns
if not pd.api.types.is_numeric_dtype(species_ab_table[sample].dtype)
]
if bad_format_samples:
logging.error("Species abundance in the following samples is not numeric: %s", " ".join(bad_format_samples))
sys.exit(1)



# Check species names
species_names_type = species_ab_table.index.inferred_type
if species_names_type not in ("integer", "string"):
logging.error(
"Species names in first column are of the '%s' type but should be 'string' or 'integer'",
species_names_type,
)
sys.exit(1)
# Convert species names to strings when they are integers
species_ab_table.index = species_ab_table.index.astype(str)

# Check species abundance type
bad_format_samples = [
sample
for sample in species_ab_table.columns
if not pd.api.types.is_numeric_dtype(species_ab_table[sample].dtype)
]
if bad_format_samples:
logging.error("Species abundance in the following samples is not numeric: %s", " ".join(bad_format_samples))
sys.exit(1)



return species_ab_table
return species_ab_table

@staticmethod
def normalize(species_ab_table: pd.DataFrame) -> pd.DataFrame :
# Normalize to relative abundance
species_ab_table = species_ab_table.div(species_ab_table.sum(axis=0), axis=1)
def normalize(species_ab_table: pd.DataFrame) -> pd.DataFrame :
# Normalize to relative abundance
species_ab_table = species_ab_table.div(species_ab_table.sum(axis=0), axis=1)

# Perform log10 transformation
with np.errstate(divide="ignore"):
species_ab_table = species_ab_table.apply(np.log10)
# Perform log10 transformation
with np.errstate(divide="ignore"):
species_ab_table = species_ab_table.apply(np.log10)

logging.info("Species abundance table normalized and log-transformed")
return species_ab_table
logging.info("Species abundance table normalized and log-transformed")
return species_ab_table
6 changes: 3 additions & 3 deletions crocodeel/easy_wf.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Any
from crocodeel.species_ab_table import SpeciesAbTableUtils
import crocodeel.ab_table_utils as ab_table_utils
from crocodeel.search_conta import run_search_conta
from crocodeel.plot_conta import run_plot_conta, Defaults as plot_conta_defaults

def run_easy_wf(args: dict[str,Any]):
species_ab_table = SpeciesAbTableUtils.load(args["species_ab_table_fh"])
species_ab_table = ab_table_utils.load(args["species_ab_table_fh"])
args["species_ab_table_fh"].close()
args["species_ab_table"] = SpeciesAbTableUtils.normalize(species_ab_table)
args["species_ab_table"] = ab_table_utils.normalize(species_ab_table)

run_search_conta(args)

Expand Down
6 changes: 3 additions & 3 deletions crocodeel/plot_conta.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
import logging
from time import perf_counter
from crocodeel.conta_event import ContaminationEvent, ContaminationEventIO
from crocodeel.species_ab_table import SpeciesAbTableUtils
import crocodeel.ab_table_utils as ab_table_utils


def run_plot_conta(args: dict[str, Any]):
if "species_ab_table" in args:
species_ab_table = args["species_ab_table"]
else:
species_ab_table = SpeciesAbTableUtils.load(args["species_ab_table_fh"])
species_ab_table = ab_table_utils.load(args["species_ab_table_fh"])
args["species_ab_table_fh"].close()
species_ab_table = SpeciesAbTableUtils.normalize(species_ab_table)
species_ab_table = ab_table_utils.normalize(species_ab_table)

conta_events = list(ContaminationEventIO.read_tsv(args["conta_events_fh"]))
if args["conta_events_fh"].mode == 'r':
Expand Down
8 changes: 3 additions & 5 deletions crocodeel/search_conta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os
os.environ["OMP_NUM_THREADS"] = "1"
from multiprocessing import Pool
from functools import partial
from itertools import product
Expand All @@ -13,7 +11,7 @@
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import NearestNeighbors
from scipy.stats import spearmanr
from crocodeel.species_ab_table import SpeciesAbTableUtils
import crocodeel.ab_table_utils as ab_table_utils
from crocodeel.conta_event import ContaminationEvent, ContaminationEventIO
from crocodeel.rf_model import RandomForestModel

Expand All @@ -23,9 +21,9 @@ def run_search_conta(args: dict[str,Any]):
if "species_ab_table" in args:
species_ab_table = args["species_ab_table"]
else:
species_ab_table = SpeciesAbTableUtils.load(args["species_ab_table_fh"])
species_ab_table = ab_table_utils.load(args["species_ab_table_fh"])
args["species_ab_table_fh"].close()
species_ab_table = SpeciesAbTableUtils.normalize(species_ab_table)
species_ab_table = ab_table_utils.normalize(species_ab_table)

start = perf_counter()
logging.info("Search for contaminations started")
Expand Down

0 comments on commit a2c0b04

Please sign in to comment.