From 49ede057c1677eab2925fb2e5ea39114991d47ae Mon Sep 17 00:00:00 2001 From: Colin Wood Date: Fri, 13 Sep 2024 17:06:10 -0700 Subject: [PATCH] eaf method and visualizer --- q2_qsip2/plugin_setup.py | 100 ++++++++++++------ .../types/_deferred_setup/_transformers.py | 2 +- q2_qsip2/visualizers/_visualizers.py | 31 ++++++ q2_qsip2/workflow.py | 43 ++++++-- 4 files changed, 139 insertions(+), 37 deletions(-) diff --git a/q2_qsip2/plugin_setup.py b/q2_qsip2/plugin_setup.py index 3dc0665..7231a07 100644 --- a/q2_qsip2/plugin_setup.py +++ b/q2_qsip2/plugin_setup.py @@ -8,17 +8,18 @@ import importlib -from qiime2.plugin import Citations, Int, List, Metadata, Plugin, Str +from qiime2.plugin import Citations, Float, Int, List, Metadata, Plugin, Str from q2_types.feature_table import FeatureTable, Frequency from q2_qsip2 import __version__ from q2_qsip2.types import QSIP2Data, Unfiltered, Filtered, EAF from q2_qsip2.workflow import ( - standard_workflow, create_qsip_data, subset_and_filter + standard_workflow, create_qsip_data, subset_and_filter, + resample_and_calculate_EAF ) from q2_qsip2.visualizers._visualizers import ( plot_weighted_average_densities, plot_sample_curves, plot_density_outliers, - show_comparison_groups, plot_filtered_features + show_comparison_groups, plot_filtered_features, plot_excess_atom_fractions ) @@ -34,34 +35,9 @@ "data." ), short_description="Analyze qSIP data.", - # TODO citations=[citations['Caporaso-Bolyen-2024']] ) -plugin.methods.register_function( - function=standard_workflow, - inputs={ - 'table': FeatureTable[Frequency], - 'qsip_metadata': QSIP2Data[Unfiltered], - }, - parameters={}, - outputs=[ - ('output_table', FeatureTable[Frequency]) - ], - input_descriptions={ - 'table': 'The feature table.', - 'qsip_metadata': 'The qSIP metadata.', - }, - parameter_descriptions={}, - output_descriptions={ - 'output_table': 'Placeholder.' - }, - name='Run the standard qSIP2 workflow.', - description=( - 'Placeholder.' - ) -) - plugin.methods.register_function( function=create_qsip_data, inputs={ @@ -99,7 +75,8 @@ name='Bundle your qSIP metadata and feature table.', description=( 'Placeholder.' - ) + ), + citations=[] ) plugin.methods.register_function( @@ -147,7 +124,40 @@ name='Subset sources and filter features to prepare for comparison.', description=( 'Placeholder.' - ) + ), + citations=[] +) + +plugin.methods.register_function( + function=resample_and_calculate_EAF, + inputs={ + 'filtered_qsip_data': QSIP2Data[Filtered] + }, + parameters={ + 'resamples': Int, + 'random_seed': Int, + }, + outputs=[ + ('eaf_qsip_data', QSIP2Data[EAF]) + ], + input_descriptions={ + 'filtered_qsip_data': 'Your filtered qSIP2 data.' + }, + parameter_descriptions={ + 'resamples': 'The number of bootstrap resamplings to perform.', + 'random_seed': 'The random seed to use during resampling.', + }, + output_descriptions={ + 'eaf_qsip_data': ( + 'Your qSIP2 data with excess atom fraction (EAF) values ' + 'calculated on a per-taxon basis.' + ) + }, + name='Calculate excess atom fraction (EAF).', + description=( + 'Placeholder.' + ), + citations=[] ) plugin.visualizers.register_function( @@ -249,4 +259,34 @@ citations=[], ) +plugin.visualizers.register_function( + function=plot_excess_atom_fractions, + inputs={ + 'eaf_qsip_data': QSIP2Data[EAF], + }, + parameters={ + 'num_top': Int, + 'confidence_interval': Float + }, + input_descriptions={ + 'eaf_qsip_data': 'Your EAF-calculated qSIP2 data.', + }, + parameter_descriptions={ + 'num_top': ( + 'The number of taxa displayed, selected in order of decreasing ' + 'excess atom fraction.' + ), + 'confidence_interval': ( + 'The confidence interval to display from the bootstrapped excess ' + 'atom fractions.' + ) + }, + name='Visualize per-taxon excess atom fractions.', + description=( + 'Plots per-taxon excess atom fractions with bootstrapped confidence ' + 'intervals.' + ), + citations=[] +) + importlib.import_module('q2_qsip2.types._deferred_setup') diff --git a/q2_qsip2/types/_deferred_setup/_transformers.py b/q2_qsip2/types/_deferred_setup/_transformers.py index ee22925..8ed5591 100644 --- a/q2_qsip2/types/_deferred_setup/_transformers.py +++ b/q2_qsip2/types/_deferred_setup/_transformers.py @@ -54,7 +54,7 @@ def _4(ff: QSIP2DataFilteredFormat) -> RS4: @plugin.register_transformer def _5(qsip_object: RS4) -> QSIP2DataEAFFormat: - ff = QSIP2DataFilteredFormat() + ff = QSIP2DataEAFFormat() return _qsip_object_to_format(qsip_object, ff) diff --git a/q2_qsip2/visualizers/_visualizers.py b/q2_qsip2/visualizers/_visualizers.py index ab2e761..e981774 100644 --- a/q2_qsip2/visualizers/_visualizers.py +++ b/q2_qsip2/visualizers/_visualizers.py @@ -124,3 +124,34 @@ def plot_filtered_features(output_dir: str, filtered_qsip_data: RS4) -> None: _ggplot2_object_to_visualization( plot, Path(output_dir), width=10, height=10 ) + + +def plot_excess_atom_fractions( + output_dir: str, + eaf_qsip_data: RS4, + num_top: int, + confidence_interval: float = 0.9 +) -> None: + ''' + Plots per-taxon excess atom fraction values. + + Parameters + ---------- + output_dir : str + The root directory of the visualization loaded into the browser. + qsip_data : RS4 + The "qsip_data" object. + num_top : int + The number of taxa displayed taken in order of decreasing excess + atom fraction. + confidence_interval : float + The confidence interval to display from the bootstrapped excess atom + fraction values. + ''' + plot = qsip2.plot_EAF_values( + eaf_qsip_data, top=num_top, confidence=confidence_interval, error='bar' + ) + + _ggplot2_object_to_visualization( + plot, Path(output_dir), width=10, height=10 + ) diff --git a/q2_qsip2/workflow.py b/q2_qsip2/workflow.py index 7388bf9..30bba68 100644 --- a/q2_qsip2/workflow.py +++ b/q2_qsip2/workflow.py @@ -127,10 +127,10 @@ def subset_and_filter( qsip_data: RS4, unlabeled_sources: list[str], labeled_sources: list[str], - min_unlabeled_sources: int, - min_labeled_sources: int, - min_unlabeled_fractions: int, - min_labeled_fractions: int + min_unlabeled_sources: int = 1, + min_labeled_sources: int = 1, + min_unlabeled_fractions: int = 1, + min_labeled_fractions: int = 1 ) -> RS4: ''' Subsets the qsip data object to include only those sources listed in @@ -158,10 +158,13 @@ def subset_and_filter( The minimum number of fractions a feature must be present in to be considered present in a labeled source. ''' + unlabeled_sources_vector = ro.vectors.StrVector(unlabeled_sources) + labeled_sources_vector = ro.vectors.StrVector(labeled_sources) + filtered_qsip_data = qsip2.run_feature_filter( qsip_data, - unlabeled_source_mat_ids=unlabeled_sources, - labeled_source_mat_ids=labeled_sources, + unlabeled_source_mat_ids=unlabeled_sources_vector, + labeled_source_mat_ids=labeled_sources_vector, min_unlabeled_sources=min_unlabeled_sources, min_labeled_sources=min_labeled_sources, min_unlabeled_fractions=min_unlabeled_fractions, @@ -169,3 +172,31 @@ def subset_and_filter( ) return filtered_qsip_data + + +def resample_and_calculate_EAF( + filtered_qsip_data: RS4, + resamples: int = 1000, + random_seed: int = 1, +) -> RS4: + ''' + Reseample and calculate excess atom fraction (EAF) for each feature. + + Parameters + ---------- + filtered_qsip_data : RS4 + The filtered "qsip_data" object. + resamples : int + The number of bootstrap resamplings to perform. + random_seed : int + The random seed to use during resampling. Exposed for reproducibility. + ''' + resampled_qsip_data = qsip2.run_resampling( + filtered_qsip_data, + resamples=resamples, + with_seed=random_seed + ) + + eaf_qsip_data = qsip2.run_EAF_calculations(resampled_qsip_data) + + return eaf_qsip_data