added "biomarker" module

mad-lab-fau · Nov 24, 2023 · bb26446 · bb26446
1 parent 6001357
commit bb26446
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 22 deletions.
diff --git a/src/biopsykit/io/biomarker.py b/src/biopsykit/io/biomarker.py
@@ -4,20 +4,19 @@
 
 import numpy as np
 import pandas as pd
-
 from biopsykit.io.io import _apply_index_cols
 from biopsykit.utils._datatype_validation_helper import _assert_file_extension, _assert_has_columns
 from biopsykit.utils._types import path_t
 from biopsykit.utils.datatype_helper import (
+    BiomarkerRawDataFrame,
     SalivaRawDataFrame,
     SubjectConditionDataFrame,
+    _BiomarkerRawDataFrame,
     _SalivaRawDataFrame,
     _SubjectConditionDataFrame,
+    is_biomarker_raw_dataframe,
     is_saliva_raw_dataframe,
     is_subject_condition_dataframe,
-    BiomarkerRawDataFrame,
-    _BiomarkerRawDataFrame,
-    is_biomarker_raw_dataframe,
 )
 
 __all__ = ["load_saliva_plate", "save_saliva", "load_saliva_wide_format", "load_biomarker_results"]
@@ -105,8 +104,7 @@ def load_saliva_plate(
         if imported data can not be parsed to a SalivaRawDataFrame
 
     """
-
-    if regex_str == None:
+    if regex_str is None:
         regex_str = r"(Vp\d+) (S\d)"
 
     return _SalivaRawDataFrame(
@@ -272,10 +270,10 @@ def load_biomarker_results(
     condition_list: Optional[Union[Sequence, Dict[str, Sequence], pd.Index]] = None,
     **kwargs,
 ) -> BiomarkerRawDataFrame:
-    """Load biomarker results from Excel file.
+    r"""Load biomarker results from Excel file.
 
-    Parameters:
-    -----------
+    Parameters
+    ----------
     file_path: :class:`~pathlib.Path` or str
         path to file
     dbs_type: str, optional
@@ -288,7 +286,7 @@ def load_biomarker_results(
         names of the extracted ID column names. ``None`` to use the default column names (['subject', 'day', 'sample'])
     regex_str: str, optional
         regular expression to extract subject, day, and sample ID from sample ID column.
-        ``None`` to use the default regular expression ``r"(VP\d+)-(T\w)-(B\w)"``.
+        ``None`` to use the default regular expression ``r"(VP\\d+)-(T\\w)-(B\\w)"``.
     sample_times: list of int, optional
         times at which samples were collected or ``None`` if no sample times should be specified.
         Default: ``None``
@@ -309,7 +307,6 @@ def load_biomarker_results(
     :exc:`~biopsykit.utils.exceptions.FileExtensionError`
         if file is no Excel file
     """
-
     # ensure pathlib
     file_path = Path(file_path)
     _assert_file_extension(file_path, (".xls", ".xlsx"))
@@ -363,7 +360,7 @@ def load_biomarker_results(
 
 def _get_index_cols(condition_col: str, index_cols: Sequence[str], additional_index_cols: Sequence[str]):
     if condition_col is not None:
-        index_cols = [condition_col] + index_cols
+        index_cols = [condition_col, *index_cols]
 
     if additional_index_cols is None:
         additional_index_cols = []
@@ -470,13 +467,12 @@ def _get_id_columns(id_col_names: Sequence[str], extracted_cols: pd.DataFrame):
         id_col_names = ["subject", "sample"]
         if len(extracted_cols.columns) == 3:
             id_col_names = ["subject", "day", "sample"]
-    else:
-        if len(id_col_names) != len(extracted_cols.columns):
-            raise ValueError(
-                "Number of 'id_col_names' must match length of extracted index columns! Expected {}, got {}.".format(
-                    len(extracted_cols), len(id_col_names)
-                )
+    elif len(id_col_names) != len(extracted_cols.columns):
+        raise ValueError(
+            "Number of 'id_col_names' must match length of extracted index columns! Expected {}, got {}.".format(
+                len(extracted_cols), len(id_col_names)
             )
+        )
 
     return id_col_names
 

diff --git a/src/biopsykit/io/saliva.py b/src/biopsykit/io/saliva.py
@@ -1,14 +1,13 @@
 """Module wrapping biopsykit.io.biomarker including only I/O functions for saliva data."""
 from pathlib import Path
-from typing import Optional, Union, Sequence, Dict, Tuple
+from typing import Dict, Optional, Sequence, Tuple, Union
 
 import pandas as pd
 from biopsykit.io import biomarker
 
 __all__ = ["load_saliva_plate", "save_saliva", "load_saliva_wide_format"]
 
 from biopsykit.utils._types import path_t
-
 from biopsykit.utils.datatype_helper import SalivaRawDataFrame, SubjectConditionDataFrame
 
 
@@ -186,7 +185,7 @@ def _get_index_cols(condition_col: str, index_cols: Sequence[str], additional_in
 
 
 def _read_dataframe(file_path: Path, **kwargs):
-    return biomarker._read_dataframe(file_path)
+    return biomarker._read_dataframe(file_path, **kwargs)
 
 
 def _check_num_samples(num_samples: int, num_subjects: int):

diff --git a/src/biopsykit/utils/datatype_helper.py b/src/biopsykit/utils/datatype_helper.py
@@ -1017,7 +1017,7 @@ def is_biomarker_raw_dataframe(
         _assert_is_dtype(data, pd.DataFrame)
         _assert_has_multiindex(data, nlevels=2, nlevels_atleast=True)
         _assert_has_index_levels(data, index_levels=["subject", "sample"], match_atleast=True, match_order=False)
-        _assert_has_columns(data, [biomarker_type, biomarker_type + ["time"]])
+        _assert_has_columns(data, [biomarker_type, [*biomarker_type, "time"]])
     except ValidationError as e:
         if raise_exception is True:
             raise ValidationError(