Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rename metadata_path -> metadata_path_or_df #336

Merged
merged 3 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ class DataSet:
def __init__(
self,
loader: BaseLoader,
metadata_path: Optional[Union[str, pd.DataFrame]] = None,
metadata_path_or_df: Optional[Union[str, pd.DataFrame]] = None,
sample_column: Optional[str] = None,
):
"""Create DataSet

Args:
loader (_type_): loader of class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader, SpectronautLoader
metadata_path (str or pd.DataFrame, optional): path to metadata file or an actual df. Defaults to None.
metadata_path_or_df (str or pd.DataFrame, optional): path to metadata file or an actual df. Defaults to None.
sample_column (str, optional): column in metadata file indicating the sample IDs. Defaults to None.

Attributes of a DataSet instance:
Expand Down Expand Up @@ -86,7 +86,7 @@ def __init__(
rawinput=self.rawinput,
index_column=self.index_column,
intensity_column=self._intensity_column,
metadata_path=metadata_path,
metadata_path_or_df=metadata_path_or_df,
sample_column=sample_column,
)

Expand Down
8 changes: 4 additions & 4 deletions alphastats/dataset_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ def __init__(
rawinput: pd.DataFrame,
index_column: str,
intensity_column: Union[List[str], str],
metadata_path: Union[str, pd.DataFrame],
metadata_path_or_df: Union[str, pd.DataFrame],
sample_column: str,
):
self.rawinput: pd.DataFrame = rawinput
self.sample_column: str = sample_column
self.index_column: str = index_column
self.intensity_column: Union[List[str], str] = intensity_column
self.metadata_path: Union[str, pd.DataFrame] = metadata_path
self.metadata_path_or_df: Union[str, pd.DataFrame] = metadata_path_or_df

def create_matrix_from_rawinput(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Creates a matrix: features (Proteins) as columns, samples as rows."""
Expand Down Expand Up @@ -61,9 +61,9 @@ def _check_matrix_values(mat: pd.DataFrame) -> None:
def create_metadata(self, mat: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
"""Create metadata DataFrame from metadata file or DataFrame."""

if self.metadata_path is not None:
if self.metadata_path_or_df is not None:
sample = self.sample_column
metadata = self._load_metadata(file_path=self.metadata_path)
metadata = self._load_metadata(file_path=self.metadata_path_or_df)
metadata = self._remove_missing_samples_from_metadata(mat, metadata, sample)
else:
sample = "sample"
Expand Down
2 changes: 1 addition & 1 deletion alphastats/gui/pages/02_Import Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def _finalize_data_loading(

dataset = DataSet(
loader=loader,
metadata_path=metadatafile_df,
metadata_path_or_df=metadatafile_df,
sample_column=sample_column,
)
metadata_columns = metadatafile_df.columns.to_list()
Expand Down
8 changes: 6 additions & 2 deletions alphastats/gui/utils/import_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,19 @@ def load_example_data():

loader = MaxQuantLoader(file=filepath)
# TODO why is this done twice?
dataset = DataSet(loader=loader, metadata_path=metadatapath, sample_column="sample")
dataset = DataSet(
loader=loader, metadata_path_or_df=metadatapath, sample_column="sample"
)
metadatapath = (
os.path.join(_parent_directory, "sample_data", "metadata.xlsx")
.replace("pages/", "")
.replace("pages\\", "")
)

loader = MaxQuantLoader(file=filepath)
dataset = DataSet(loader=loader, metadata_path=metadatapath, sample_column="sample")
dataset = DataSet(
loader=loader, metadata_path_or_df=metadatapath, sample_column="sample"
)

dataset.metadata = dataset.metadata[
[
Expand Down
4 changes: 2 additions & 2 deletions docs/import_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ maxquant_data = alphastats.MaxQuantLoader(

dataset = alphastats.DataSet(
loader = maxquant_data,
metadata_path="../testfiles/maxquant/metadata.xlsx",
metadata_path_or_df="../testfiles/maxquant/metadata.xlsx",
sample_column="sample"
)
```
Expand Down Expand Up @@ -126,7 +126,7 @@ maxquant_data = alphastats.MaxQuantLoader(

dataset = alphastats.DataSet(
loader = maxquant_data,
metadata_path="../testfiles/maxquant/metadata.xlsx",
metadata_path_or_df="../testfiles/maxquant/metadata.xlsx",
sample_column="sample"
)
```
2 changes: 1 addition & 1 deletion docs/workflow_mq.html
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ <h2>2. Create a DataSet<a class="headerlink" href="#2.-Create-a-DataSet" title="
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">ds</span> <span class="o">=</span> <span class="n">alphastats</span><span class="o">.</span><span class="n">DataSet</span><span class="p">(</span>
<span class="n">loader</span> <span class="o">=</span> <span class="n">maxquant_data</span><span class="p">,</span>
<span class="n">metadata_path</span> <span class="o">=</span> <span class="s2">&quot;../testfiles/maxquant/metadata.xlsx&quot;</span><span class="p">,</span>
<span class="n">metadata_path_or_df</span> <span class="o">=</span> <span class="s2">&quot;../testfiles/maxquant/metadata.xlsx&quot;</span><span class="p">,</span>
<span class="n">sample_column</span> <span class="o">=</span> <span class="s2">&quot;sample&quot;</span> <span class="c1"># specify the column that corresponds to the sample names in proteinGroups</span>
<span class="p">)</span>
</pre></div>
Expand Down
2 changes: 1 addition & 1 deletion nbs/getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@
"source": [
"ds = DataSet(\n",
" loader=maxquant_data,\n",
" metadata_path=\"../testfiles/maxquant/metadata.xlsx\",\n",
" metadata_path_or_df=\"../testfiles/maxquant/metadata.xlsx\",\n",
" sample_column=\"sample\", # specify the column that corresponds to the sample names in proteinGroups\n",
")"
]
Expand Down
2 changes: 1 addition & 1 deletion nbs/liu_2019.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
")\n",
"dataset = DataSet(\n",
" loader=loader,\n",
" metadata_path=\"../testfiles/maxquant/metadata.xlsx\",\n",
" metadata_path_or_df=\"../testfiles/maxquant/metadata.xlsx\",\n",
" sample_column=\"sample\",\n",
")"
]
Expand Down
18 changes: 9 additions & 9 deletions nbs/ramus_2016.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/gui/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def create_dataset_alphapept():
metadata_path = TEST_INPUT_FILES_PATH / "alphapept/metadata.csv"
return DataSet(
loader=loader,
metadata_path=str(metadata_path),
metadata_path_or_df=str(metadata_path),
sample_column="sample",
)

Expand Down
22 changes: 11 additions & 11 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def setUp(self):
self.metadata_path = "testfiles/alphapept/metadata.csv"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)
# expected dimensions of matrix
Expand Down Expand Up @@ -239,7 +239,7 @@ def test_remove_misc_samples_in_metadata(self, mock):
)
obj = DataSet(
loader=self.loader,
metadata_path=df,
metadata_path_or_df=df,
sample_column="sample",
)
#  is sample C removed
Expand All @@ -253,7 +253,7 @@ def test_load_metadata_df(self):
df = pd.read_excel(self.metadata_path)
obj = DataSet(
loader=self.loader,
metadata_path=df,
metadata_path_or_df=df,
sample_column="sample",
)
self.assertIsInstance(obj.metadata, pd.DataFrame)
Expand Down Expand Up @@ -403,7 +403,7 @@ def setUp(self):
self.metadata_path = "testfiles/maxquant/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)
# expected dimensions of matrix
Expand All @@ -419,7 +419,7 @@ def test_load_evidence_wrong_sample_names(self):
)
DataSet(
loader=loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)

Expand Down Expand Up @@ -468,7 +468,7 @@ def test_plot_volcano_with_grouplist_wrong_names(self):
def test_plot_volcano_compare_preprocessing_modes_no_randomforest(self):
obj_ut = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)

Expand All @@ -487,7 +487,7 @@ def test_plot_volcano_compare_preprocessing_modes_no_randomforest(self):
def test_plot_volcano_compare_preprocessing_modes_randomforest(self):
obj_ut = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)

Expand Down Expand Up @@ -801,7 +801,7 @@ def setUp(self):
self.metadata_path = "testfiles/diann/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="analytical_sample external_id",
)
# expected dimensions of matrix
Expand Down Expand Up @@ -916,7 +916,7 @@ def setUp(self):
self.metadata_path = "testfiles/fragpipe/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="analytical_sample external_id",
)
# expected dimensions of matrix
Expand All @@ -937,7 +937,7 @@ def setUpClass(cls):
cls.cls_metadata_path = "testfiles/spectronaut/metadata.xlsx"
cls.cls_obj = DataSet(
loader=cls.cls_loader,
metadata_path=cls.cls_metadata_path,
metadata_path_or_df=cls.cls_metadata_path,
sample_column="sample",
)

Expand Down Expand Up @@ -983,7 +983,7 @@ def setUpClass(cls):
cls.cls_metadata_path = "testfiles/fragpipe/metadata2.xlsx"
cls.cls_obj = DataSet(
loader=cls.cls_loader,
metadata_path=cls.cls_metadata_path,
metadata_path_or_df=cls.cls_metadata_path,
sample_column="analytical_sample external_id",
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_DataSet_Pathway.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def setUp(self):

self.obj = DataSet(
loader=self.loader,
metadata_path=metadata,
metadata_path_or_df=metadata,
sample_column="sample",
)
self.fg_sample = "AC399"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setUp(self):
self.metadata_path = "testfiles/maxquant/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)
# expected dimensions of matrix
Expand Down
Loading