Skip to content

Commit

Permalink
fixed issue with incorrect parsing of reaction times from dir names
Browse files Browse the repository at this point in the history
  • Loading branch information
haeussma committed Oct 9, 2024
1 parent 533ad42 commit 3f0c653
Show file tree
Hide file tree
Showing 12 changed files with 246 additions and 277 deletions.
95 changes: 44 additions & 51 deletions chromatopy/readers/abstractreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def _parse_time_and_unit(

try:
filenames = [Path(f) for f in data["file_paths"]]
if len(filenames) == 0:
raise KeyError
except KeyError:
path = Path(data["dirpath"])
if not path.exists():
Expand All @@ -101,71 +103,62 @@ def _parse_time_and_unit(

# get all filnames of normal files in the directory. exclude hidden files

filenames = [
f for f in path.iterdir() if f.is_file() and not f.name.startswith(".")
]
filenames = [f for f in path.iterdir() if not f.name.startswith(".")]

pattern = r"(\d+(\.\d+)?)\s*[_-]?\s*(min|minutes?|sec|seconds?|hours?)"
pattern = r".*?(\d+(\.\d+)?)\s*[_-]?\s*(min|minutes?|sec|seconds?|hours?)"

# check if all filenames contain a reaction time and unit
if all(re.search(pattern, f.name) for f in filenames):
# extract all reaction times and units from the filenames
rctn_time_path_dict: dict[float, str] = {}
units = []
# extract all reaction times and units from the filenames or parent directories
rctn_time_path_dict: dict[float, str] = {}
units = []

for file in filenames:
match = re.search(pattern, file.name)
assert (
match is not None
), f"Could not parse reaction time from '{file.name}'."
for file in filenames:
match = re.search(pattern, file.name)
if not match:
match = re.search(pattern, file.parent.name)
if match:
reaction_time = float(match.group(1))
units.append(match.group(3))
unit = match.group(3)
if reaction_time in rctn_time_path_dict:
logger.debug(
f"Duplicate reaction time '{reaction_time}' found in directory '{data['dirpath']}'."
)
raise MetadataExtractionError(
f"Reaction times in directory '{data['dirpath']}' are not unique."
)
rctn_time_path_dict[reaction_time] = str(file.absolute())

if not len(list(rctn_time_path_dict.keys())) == len(filenames):
logger.debug(
f"Reaction times in directory '{data['dirpath']}' are not unique."
)
units.append(unit)
else:
logger.debug(f"Could not parse reaction time from '{file}'.")
raise MetadataExtractionError(
f"Reaction times in directory '{data['dirpath']}' are not unique."
f"Could not parse reaction time from '{file}'."
)

# check if all units are the same
if not all(unit == units[0] for unit in units):
logger.debug(
f"Units of reaction times in directory '{data['dirpath']}' are not consistent: {units}"
)
raise UnitConsistencyError(
f"Units of reaction times in directory '{data['dirpath']}' are not consistent: {units}"
)

try:
unit_definition = AbstractReader._map_unit_str_to_UnitDefinition(
units[0]
)
except ValueError:
logger.debug(
f"Unit {units[0]} from directory '{data['dirpath']}' not recognized."
)
raise MetadataExtractionError(
f"Unit {units[0]} from directory '{data['dirpath']}' not recognized."
)

data["file_paths"] = []
data["reaction_times"] = []
for time, full_path in sorted(rctn_time_path_dict.items()):
data["reaction_times"].append(time)
data["file_paths"].append(full_path)
data["time_unit"] = unit_definition
# check if all units are the same
if not all(unit == units[0] for unit in units):
logger.debug(
f"Units of reaction times in directory '{data['dirpath']}' are not consistent: {units}"
)
raise UnitConsistencyError(
f"Units of reaction times in directory '{data['dirpath']}' are not consistent: {units}"
)

else:
try:
unit_definition = AbstractReader._map_unit_str_to_UnitDefinition(units[0])
except ValueError:
logger.debug(
f"Reaction times and units could not be parsed from filenames in directory '{data['dirpath']}'."
f"Unit {units[0]} from directory '{data['dirpath']}' not recognized."
)
raise MetadataExtractionError(
f"Reaction times and units could not be parsed from filenames in directory '{data['dirpath']}'."
f"Unit {units[0]} from directory '{data['dirpath']}' not recognized."
)

data["file_paths"] = []
data["reaction_times"] = []
for time, full_path in sorted(rctn_time_path_dict.items()):
data["reaction_times"].append(time)
data["file_paths"].append(full_path)
data["time_unit"] = unit_definition

@staticmethod
def _map_unit_str_to_UnitDefinition(
unit_str: str,
Expand Down
47 changes: 3 additions & 44 deletions chromatopy/readers/agilent_csv.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,25 @@
from pathlib import Path
from typing import Any

import pandas as pd
from loguru import logger

from chromatopy.model import Chromatogram, Measurement, Peak
from chromatopy.readers.abstractreader import AbstractReader


class AgilentCSVReader(AbstractReader):
def model_post_init(self, __context: Any) -> None:
if not self.reaction_times or not self.time_unit or not self.file_paths:
logger.debug(
"Collecting file paths without reaction time and unit parsing."
)
self._get_file_paths()

def read(self) -> list[Measurement]:
"""Reads chromatographic data from the specified Agilent CSV files.
Returns:
list[Measurement]: A list of Measurement objects representing the chromatographic data.
"""
csv_paths = self._get_file_paths()

assert len(self.reaction_times) == len(csv_paths), f"""
assert len(self.reaction_times) == len(self.file_paths), f"""
The number of reaction times {len(self.reaction_times)} does not match the number of
'RESULTS.CSV' files {len(csv_paths)}.
'RESULTS.CSV' files {len(self.file_paths)}.
"""

measurements = []
for path_idx, (csv_path, reaction_time) in enumerate(
zip(csv_paths, self.reaction_times)
zip(self.file_paths, self.reaction_times)
):
peaks = self._read_peaks_from_csv(csv_path)
chromatogram = Chromatogram(peaks=peaks)
Expand All @@ -53,35 +41,6 @@ def read(self) -> list[Measurement]:

return measurements

def _get_file_paths(self) -> list[str]:
"""Collects the file paths of the Agilent CSV files."""
directory = Path(self.dirpath)
target_paths = []

if directory.is_dir():
dirs = sorted(directory.iterdir())
found_count = 0

for folder in dirs:
if (
folder.is_dir()
and folder.name.endswith(".D")
and not folder.name.startswith(".")
):
for file in folder.iterdir():
if file.name == "RESULTS.CSV" and not file.name.startswith("."):
found_count += 1
target_paths.append(str(file.absolute()))

if found_count == 0:
raise FileNotFoundError(
f"No 'RESULTS.CSV' file found in '{self.dirpath}'."
)
else:
target_paths = [self.dirpath]

return sorted(target_paths)

def _read_peaks_from_csv(self, path: str, skiprows: int = 6) -> list[Peak]:
"""Reads peaks from an Agilent CSV file."""
peaks = []
Expand Down
19 changes: 15 additions & 4 deletions chromatopy/tools/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,14 +465,15 @@ def read_agilent(
from chromatopy.readers.agilent_txt import AgilentTXTReader

directory = Path(path)

txt_paths = [
str(f.absolute())
for f in directory.rglob("*.TXT")
for f in directory.rglob("Report.TXT")
if f.parent.parent == directory
]
csv_paths = [
str(f.absolute())
for f in directory.rglob("*.csv")
for f in directory.rglob("RESULTS.CSV")
if f.parent.parent == directory
]

Expand All @@ -491,12 +492,14 @@ def read_agilent(
if data["reaction_times"] is None:
data.pop("reaction_times")

try:
if not csv_paths and txt_paths:
data["file_paths"] = txt_paths # type: ignore
measurements = AgilentTXTReader(**data).read() # type: ignore
except FileNotFoundError:
elif csv_paths and not txt_paths:
data["file_paths"] = csv_paths # type: ignore
measurements = AgilentCSVReader(**data).read() # type: ignore
else:
raise IOError(f"No 'REPORT.TXT' or 'RESULTS.CSV' files found in '{path}'.")

if id is None:
id = path
Expand Down Expand Up @@ -1153,3 +1156,11 @@ def visualize_spectra(self, dark_mode: bool = False) -> go.Figure:
)

return fig


if __name__ == "__main__":
path = "/Users/max/Documents/GitHub/eyring-kinetics/data/hetero/RAU-R503"

ana = ChromAnalyzer.read_agilent(path, ph=7.4, temperature=37)
for meas in ana.measurements:
print(meas.reaction_time)
Binary file removed docs/examples/data/asm.zip
Binary file not shown.
Loading

0 comments on commit 3f0c653

Please sign in to comment.