From aba502ef4dd0c908fcd36a201b0ae3e295697177 Mon Sep 17 00:00:00 2001 From: Colin Wood Date: Fri, 13 Sep 2024 15:06:24 -0700 Subject: [PATCH] subset and filter method --- q2_qsip2/plugin_setup.py | 58 +++++++++++++++++++++++++++++++++++++--- q2_qsip2/workflow.py | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/q2_qsip2/plugin_setup.py b/q2_qsip2/plugin_setup.py index 627d045..050a19e 100644 --- a/q2_qsip2/plugin_setup.py +++ b/q2_qsip2/plugin_setup.py @@ -8,12 +8,14 @@ import importlib -from qiime2.plugin import Citations, List, Metadata, Plugin, Str +from qiime2.plugin import Citations, Int, List, Metadata, Plugin, Str from q2_types.feature_table import FeatureTable, Frequency from q2_qsip2 import __version__ -from q2_qsip2.workflow import standard_workflow, create_qsip_data from q2_qsip2.types import QSIP2Data, Unfiltered, Filtered, EAF +from q2_qsip2.workflow import ( + standard_workflow, create_qsip_data, subset_and_filter +) from q2_qsip2.visualizers._visualizers import ( plot_weighted_average_densities, plot_sample_curves, plot_density_outliers, show_comparison_groups @@ -78,7 +80,9 @@ outputs=[ ('qsip_data', QSIP2Data[Unfiltered]) ], - input_descriptions={}, + input_descriptions={ + 'table': 'The qSIP feature table.' + }, parameter_descriptions={ 'sample_metadata': 'The sample-level metadata.', 'source_metadata': 'The source-level metadata.', @@ -98,6 +102,54 @@ ) ) +plugin.methods.register_function( + function=subset_and_filter, + inputs={ + 'qsip_data': QSIP2Data[Unfiltered] + }, + parameters={ + 'unlabeled_sources': List[Str], + 'labeled_sources': List[Str], + 'min_unlabeled_sources': Int, + 'min_labeled_sources': Int, + 'min_unlabeled_fractions': Int, + 'min_labeled_fractions': Int + }, + outputs=[ + ('filtered_qsip_data', QSIP2Data[Filtered]) + ], + input_descriptions={ + 'qsip_data': 'Your unfiltered qSIP2 data.' + }, + parameter_descriptions={ + 'unlabeled_sources': 'The IDs of the unlabeled sources to retain.', + 'labeled_sources': 'The IDs of the labeled sources to retain.', + 'min_unlabeled_sources': ( + 'The minimum number of unlabeled sources a feature must be ' + 'present in to be retained.' + ), + 'min_labeled_sources': ( + 'The minimum number of labeled sources a feature must be present ' + 'in to be retained.' + ), + 'min_unlabeled_fractions': ( + 'The minimum number of fractions a feature must be present in ' + 'to be considered present in an unlabeled source.' + ), + 'min_labeled_fractions': ( + 'The minimum number of fractions a feature must be present in ' + 'to be considered present in a labeled source.' + ) + }, + output_descriptions={ + 'filtered_qsip_data': 'Your subsetted and filtered qSIP2 data.' + }, + name='Subset sources and filter features to prepare for comparison.', + description=( + 'Placeholder.' + ) +) + plugin.visualizers.register_function( function=plot_weighted_average_densities, inputs={ diff --git a/q2_qsip2/workflow.py b/q2_qsip2/workflow.py index b66111f..7388bf9 100644 --- a/q2_qsip2/workflow.py +++ b/q2_qsip2/workflow.py @@ -122,3 +122,50 @@ def create_qsip_data( ) return R_qsip_obj + +def subset_and_filter( + qsip_data: RS4, + unlabeled_sources: list[str], + labeled_sources: list[str], + min_unlabeled_sources: int, + min_labeled_sources: int, + min_unlabeled_fractions: int, + min_labeled_fractions: int +) -> RS4: + ''' + Subsets the qsip data object to include only those sources listed in + `unlabeled_sources` and `labeled_sources`, and to include only those + features that pass the minimum prevalence parameters. + + Parameters + ---------- + qsip_data : RS4 + The "qsip_data" object. + unlabeled_sources : list[str] + The IDs of the unlabeled sources to retain. + labeled_sources : list[str] + The IDs of the labeled sources to retain. + min_unlabeled_sources : int + The minimum number of unlabeled sources a feature must be present in + to be retained. + min_labeled_sources : int + The minimum number of labeled sources a feature must be present in + to be retained. + min_unlabeled_fractions : int + The minimum number of fractions a feature must be present in + to be considered present in an unlabeled source. + min_labeled_fractions : int + The minimum number of fractions a feature must be present in + to be considered present in a labeled source. + ''' + filtered_qsip_data = qsip2.run_feature_filter( + qsip_data, + unlabeled_source_mat_ids=unlabeled_sources, + labeled_source_mat_ids=labeled_sources, + min_unlabeled_sources=min_unlabeled_sources, + min_labeled_sources=min_labeled_sources, + min_unlabeled_fractions=min_unlabeled_fractions, + min_labeled_fractions=min_labeled_fractions + ) + + return filtered_qsip_data