Skip to content

Commit

Permalink
Add agilent rdl (#20)
Browse files Browse the repository at this point in the history
* added agilent rdl

* updated docs

* Bump version to 0.9.6 in pyproject.toml

* fixed unicode decode error

* added debug print

* Fix debug print statement and update file path string formatting

* Update pytest command to include verbose output

* Update pytest command to include verbose output

* Fix assertion error in test_windows

* Refactor debug print statements and flush output in analyzer.py

* Refactor debug print statements and flush output in analyzer.py

* Refactor debug print statements and improve error handling in analyzer.py

* Refactor debug print statements and add error handling in analyzer.py

* Refactor file reading in AgilentRDLReader to specify UTF-8 encoding

* Refactor debug print statements and remove unnecessary variables in analyzer.py

* Refactor debug print statements and update error handling in analyzer.py

* Refactor debug print statements and update error handling in analyzer.py

* Refactor error handling in analyzer.py
  • Loading branch information
haeussma authored Oct 25, 2024
1 parent b790f6e commit d21576d
Show file tree
Hide file tree
Showing 16 changed files with 295,815 additions and 295,476 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ jobs:
- name: Run tests with pytest
run: |
poetry run pytest --nbval-lax
poetry run pytest --nbval-lax -s
2 changes: 1 addition & 1 deletion chromatopy/readers/abstractreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def read(self) -> list[Measurement]:

def print_success(self, n_measurement_objects: int) -> None:
"""Prints a success message."""
print(f" Loaded {n_measurement_objects} chromatograms.")
print(f" Loaded {n_measurement_objects} chromatograms.")


if __name__ == "__main__":
Expand Down
2 changes: 0 additions & 2 deletions chromatopy/readers/agilent_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ def read(self) -> list[Measurement]:
'RESULTS.CSV' files {len(self.file_paths)}.
"""

print(self.file_paths)

measurements = []
for path_idx, csv_path in enumerate(self.file_paths):
peaks = self._read_peaks_from_csv(csv_path)
Expand Down
115 changes: 115 additions & 0 deletions chromatopy/readers/agilent_rdl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import re
from typing import Tuple

from chromatopy.model import Chromatogram, Data, Measurement, Peak
from chromatopy.readers.abstractreader import AbstractReader


class AgilentRDLReader(AbstractReader):
def read(self):
measurements = []
for path_id, path in enumerate(self.file_paths):
lines = self.read_file(path)

peak_data, sample_name, signal = self.extract_information(lines)

peak_data = [
self.align_and_concatenate_columns(*pair) for pair in peak_data
]
peaks = [self.map_peak(peak) for peak in peak_data]

sample_name = self.align_and_concatenate_columns(*sample_name)[1]

wavelength = self.extract_wavelength(signal)

data = Data(
value=self.values[path_id],
unit=self.unit,
data_type=self.mode,
)

chromatogram = Chromatogram(peaks=peaks, wavelength=wavelength)

measurements.append(
Measurement(
id=f"m{path_id}",
chromatograms=[chromatogram],
temperature=self.temperature,
temperature_unit=self.temperature_unit,
ph=self.ph,
data=data,
)
)

if not self.silent:
self.print_success(len(measurements))

return measurements

@staticmethod
def read_file(file_path: str) -> list[str]:
with open(file_path, "r", encoding="utf-8") as file:
lines = file.readlines()

return lines

@staticmethod
def extract_information(lines: list[str]) -> Tuple[list[list[str]], list[str], str]:
data = []
for i, line in enumerate(lines):
# Check for first line with float values after space and pipe characters
if re.search(r"^\s{2}│\s+\d+\.\d+\s+│", line):
line_pair = [line]

if lines[i + 1].startswith(" │"):
line_pair.append(lines[i + 1])
else:
line_pair.append("")

data.append(line_pair)

if "│Sample Name │" in line:
sample_name = [line]
if lines[i + 1].startswith(" │"):
sample_name.append(lines[i + 1])
else:
sample_name.append("")

if "│Signal:│" in line:
signal = line

return data, sample_name, signal

@staticmethod
def extract_wavelength(line: str) -> int | None:
pattern = r"Sig=(\d+)"

match = re.search(pattern, line)
if match:
return int(match.group(1))
else:
return None

@staticmethod
def align_and_concatenate_columns(row1, row2):
# Split each string by the vertical bar '│' and strip whitespace from each column
row1_columns = [col.strip() for col in re.split(r"│", row1) if col]
row2_columns = [col.strip() for col in re.split(r"│", row2) if col]

# Concatenate aligned columns
aligned_columns = [
f"{col1}{col2}".strip() for col1, col2 in zip(row1_columns, row2_columns)
]

return aligned_columns[1:-1]

@staticmethod
def map_peak(peak_list: list[str]) -> Peak:
return Peak(
retention_time=float(peak_list[0]),
type=peak_list[1],
width=float(peak_list[2]),
area=float(peak_list[3]),
amplitude=float(peak_list[4]),
percent_area=float(peak_list[5]),
)
31 changes: 29 additions & 2 deletions chromatopy/tools/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,10 +486,15 @@ def read_agilent(
ChromAnalyzer: ChromAnalyzer object containing the measurements.
"""
from chromatopy.readers.agilent_csv import AgilentCSVReader
from chromatopy.readers.agilent_rdl import AgilentRDLReader
from chromatopy.readers.agilent_txt import AgilentTXTReader

directory = Path(path)

txt_paths = []
csv_paths = []
rdl_paths = []

txt_paths = [
str(f.absolute())
for f in directory.rglob("Report.TXT")
Expand All @@ -500,6 +505,24 @@ def read_agilent(
for f in directory.rglob("RESULTS.CSV")
if f.parent.parent == directory
]
rdl_paths = []

try:
txt_path = next(directory.rglob("*.txt"))
print("sole path: ", txt_path, flush=True)
print(f"all txt paths: {list(directory.rglob('*.txt'))}", flush=True)
print(f"everything: {directory.rglob('*')}", flush=True)
try:
lines = AgilentRDLReader.read_file(str(txt_path))
if lines[0].startswith("┌─────"):
rdl_paths = [str(f.absolute()) for f in directory.rglob("*.txt")]
else:
txt_paths = txt_paths
except UnicodeDecodeError:
txt_paths = txt_paths

except StopIteration:
txt_paths = txt_paths

data = {
"dirpath": path,
Expand All @@ -512,9 +535,13 @@ def read_agilent(
"mode": mode,
}

if not csv_paths and txt_paths:
if rdl_paths:
data["file_paths"] = rdl_paths # type: ignore
reader = AgilentRDLReader(**data)
measurements = reader.read() # type: ignore
elif not csv_paths and txt_paths:
data["file_paths"] = txt_paths # type: ignore
reader = AgilentTXTReader(**data)
reader = AgilentTXTReader(**data) # type: ignore
measurements = reader.read() # type: ignore
elif csv_paths and not txt_paths:
data["file_paths"] = csv_paths # type: ignore
Expand Down
40 changes: 40 additions & 0 deletions docs/examples/data/agilent_rdl/M2_MJ_100_min.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
┌──────────────────────────────────────────────────────────────────────────────┐
│Cross Sequence Summary Report │
└──────────────────────────────────────────────────────────────────────────────┘
┌────────────────┬──────────────────────────────────────────────────────────┐
│Sample Name │M2_MJ_ │
│ │100_min │
└────────────────┴──────────────────────────────────────────────────────────┘
┌───────┬────────────────────────────────────────────────────────────────────┐
│Signal:│DAD1A,Sig=254,4 Ref=360,100 │
└───────┴────────────────────────────────────────────────────────────────────┘
┌───────┬───┬────────┬────────┬───────┬────────┬─────────────────────────────┐
│ RT │Typ│ Width │ Area │ Height│ Area% │Name │
│ [min] │e │ [min] │ │ │ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 0.698 │BV │ 0.4062 │ 53. │ 7.2642│ 0.3671 │ │
│ │ │ │ 0992 │ │ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 1.169 │VV │ 0.8468 │ 6094. │ 783.│ 42. │AMP@1,169min │
│ │ │ │ 3336 │ 0775│ 1289 │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 2.756 │VB │ 0.4315 │ 14. │ 1.9341│ 0.0982 │ │
│ │ │ │ 2114 │ │ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 3.331 │BV │ 0.7823 │ 7620. │ 925.│ 52. │ADP@3,327min │
│ │ │ │ 7030 │ 5433│ 6804 │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 3.974 │VB │ 1.5648 │ 381. │ 55.│ 2.6397 │ │
│ │ │ │ 8593 │ 0016│ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 5.770 │BB │ 1.0452 │ 301. │ 22.│ 2.0856 │ATP@5,720min │
│ │ │ │ 7014 │ 1066│ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ │ │ Sum │ 14465. │ │ │ │
│ │ │ │ 9079 │ │ │ │
└───────┴───┴────────┴────────┴───────┴────────┴─────────────────────────────┘
┌───────────────────────────┬────────────────────────────────┬─────────────────┐
│D:\CDSProjects\JNS\Report │ Printed: 2024-10-20 │ Page 1 of 1 │
│Templates\Export_for_ │ 16:25:44+02:00 │ │
│python.rdl [Rev. 2.0] │ │ │
════════════════════════════════════════════════════════════════════════════════
40 changes: 40 additions & 0 deletions docs/examples/data/agilent_rdl/M3_102_min.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
┌──────────────────────────────────────────────────────────────────────────────┐
│Cross Sequence Summary Report │
└──────────────────────────────────────────────────────────────────────────────┘
┌────────────────┬──────────────────────────────────────────────────────────┐
│Sample Name │M2_MJ_ │
│ │100_min │
└────────────────┴──────────────────────────────────────────────────────────┘
┌───────┬────────────────────────────────────────────────────────────────────┐
│Signal:│DAD1A,Sig=254,4 Ref=360,100 │
└───────┴────────────────────────────────────────────────────────────────────┘
┌───────┬───┬────────┬────────┬───────┬────────┬─────────────────────────────┐
│ RT │Typ│ Width │ Area │ Height│ Area% │Name │
│ [min] │e │ [min] │ │ │ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 0.698 │BV │ 0.4062 │ 53. │ 7.2642│ 0.3671 │ │
│ │ │ │ 0992 │ │ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 1.169 │VV │ 0.8468 │ 6094. │ 783.│ 42. │AMP@1,169min │
│ │ │ │ 3336 │ 0775│ 1289 │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 2.756 │VB │ 0.4315 │ 14. │ 1.9341│ 0.0982 │ │
│ │ │ │ 2114 │ │ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 3.331 │BV │ 0.7823 │ 7620. │ 925.│ 52. │ADP@3,327min │
│ │ │ │ 7030 │ 5433│ 6804 │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 3.974 │VB │ 1.5648 │ 381. │ 55.│ 2.6397 │ │
│ │ │ │ 8593 │ 0016│ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ 5.770 │BB │ 1.0452 │ 301. │ 22.│ 2.0856 │ATP@5,720min │
│ │ │ │ 7014 │ 1066│ │ │
├───────┼───┼────────┼────────┼───────┼────────┼─────────────────────────────┤
│ │ │ Sum │ 14465. │ │ │ │
│ │ │ │ 9079 │ │ │ │
└───────┴───┴────────┴────────┴───────┴────────┴─────────────────────────────┘
┌───────────────────────────┬────────────────────────────────┬─────────────────┐
│D:\CDSProjects\JNS\Report │ Printed: 2024-10-20 │ Page 1 of 1 │
│Templates\Export_for_ │ 16:25:44+02:00 │ │
│python.rdl [Rev. 2.0] │ │ │
════════════════════════════════════════════════════════════════════════════════
Loading

0 comments on commit d21576d

Please sign in to comment.