Merge branch 'development'

introducing object-oriented approach from development branch.
opencobra · Jun 21, 2018 · 4f7219e · 4f7219e
2 parents 8fe47f5 + e48060a
commit 4f7219e
Show file tree

Hide file tree

Showing 38 changed files with 665,150 additions and 314 deletions.
diff --git a/README.rst b/README.rst
@@ -1,6 +1,8 @@
 Medusa - Analysis of ensembles of metabolic network reconstructions
 ===================================================================
 
+|Build Status| |PyPI|
+
 Medusa is a tool for constraint-based reconstruction and analysis (COBRA) of ensembles. It builds on the cobrapy package (https://github.com/opencobra/cobrapy) by extending most single-model functionality to efficient ensemble-scale analysis. Additionally, Medusa provides novel functions for the analysis of ensembles.
 
 Medusa is developed openly. We are currently porting code from previous/in-progress projects to generalize beyond the specific use cases that project code was originally developed for. Development plans can be found in the root of the repository in "version_release_plan.txt".
@@ -45,3 +47,7 @@ Feel free to post issues via github or contact us directly via email with any qu
 Authors:
 Greg Medlock (gmedlo [at] gmail [dot] com)
 
+.. |Build Status| image:: https://api.travis-ci.org/gregmedlock/Medusa.svg?branch=master
+   :target: https://travis-ci.org/gregmedlock/Medusa/
+.. |PyPI| image:: https://badge.fury.io/py/medusa-cobra.svg
+   :target: https://pypi.python.org/pypi/medusa-cobra
diff --git a/medusa/core/ensemble.py b/medusa/core/ensemble.py
diff --git a/medusa/core/feature.py b/medusa/core/feature.py
@@ -0,0 +1,48 @@
+
+from __future__ import absolute_import
+
+from cobra.core.object import Object
+
+class Feature(Object):
+    """
+    Feature describing a network component that varies across an ensemble.
+
+    Parameters
+    ----------
+    identifier : string
+        The identifier to associate with the feature. Convention is to append the
+        component_attribute to the base_component's id.
+
+    ensemble : medusa.core.ensemble.Ensemble object
+        The ensemble that the feature is associated with.
+
+    base_component : cobra.core.reaction.Reaction
+        Reference to the Reaction object that the feature describes.
+
+    component_attribute : string
+        string indicating the attribute of base_component that the feature
+        describes the modification of (e.g. "lb", "ub")
+
+    states : dictionary of string:component_attribute value
+        dictionary of model ids mapping to the value of the Feature's
+        component_attribute (value type depends on component_attribute type,
+        e.g. float for "lb", string for "_gene_reaction_rule")
+
+    Attributes
+    ----------
+
+    """
+
+    def __init__(self, identifier=None, name=None,ensemble=None,\
+                base_component=None, component_attribute=None, states=None):
+        Object.__init__(self,identifier,name)
+        self.ensemble = ensemble
+        self.base_component = base_component
+        self.component_attribute = component_attribute
+        self.states = states
+
+
+    def get_model_state(self,member_id):
+        """Get the state of the feature for a particular member
+        """
+        return self.states[member_id]
diff --git a/medusa/core/member.py b/medusa/core/member.py
@@ -0,0 +1,31 @@
+
+from __future__ import absolute_import
+
+from cobra.core.object import Object
+
+class Member(Object):
+    """
+    Object representing an individual member (i.e. model) in an ensemble
+
+    Parameters
+    ----------
+    identifier : string
+        The identifier to associate with the member.
+
+    ensemble : medusa.core.ensemble.Ensemble object
+        The ensemble that the member belongs to.
+
+    states : dictionary of medusa.core.feature.Feature:component_attribute value
+        dictionary of Features mapping to the value of the Feature's
+        component_attribute (value type depends on component_attribute type,
+        e.g. float for "lb", string for "_gene_reaction_rule") for the member.
+
+    Attributes
+    ----------
+
+    """
+
+    def __init__(self,ensemble=None, identifier=None, name=None, states=None):
+        Object.__init__(self,identifier,name)
+        self.ensemble = ensemble
+        self.states = states
diff --git a/medusa/flux_analysis/deletion.py b/medusa/flux_analysis/deletion.py
@@ -0,0 +1,29 @@
+
+
+def ensemble_single_reaction_deletion(ensemble,optimal_only=True,num_models=[]):
+    '''
+    IN PROGRESS, not functional. Need to refactor for features/states.
+
+    Performs single reaction deletions for num_models in the ensemble. Reports
+    absolute objective value, and can be modified to return only optimal solutions.
+
+    Currently does not return the solver status for all members of an ensemble,
+    so infeasible and 0 flux values can't be discriminated unless optimal_only=True
+    is passed (in which case feasible solutions with flux through the objective
+    of 0 are returned, but infeasible solutions are not)
+    '''
+    if not num_models:
+        # if not specified, use all models
+        num_models = len(ensemble.reaction_diffs.keys())
+
+    # for each model, perform all the deletions, then advance to the next model.
+    for model in random.sample(list(ensemble.reaction_diffs.keys()),num_models):
+        # set the correct bounds for the model
+        ensemble._apply_diffs(model)
+        # perform single reaction deletion for all reactions in the model
+        # TODO: make exception for biomass and the demand reaction.
+        print('performing deletions for ' + model)
+        model_deletion_results = cobra.single_reaction_deletion(self.base_model,self.base_model.reactions)
+
+    # if optimal_only, filter the output dataframe for each model to exclude infeasibles,
+    # then append to a master dataframe
diff --git a/medusa/flux_analysis/flux_balance.py b/medusa/flux_analysis/flux_balance.py
@@ -37,7 +37,7 @@ def optimize_ensemble(ensemble,return_flux=None,num_models=None,specific_models=
     '''
 
     if not num_models:
-        num_models = len(ensemble.reaction_diffs.keys())
+        num_models = len(ensemble.members)
 
     if isinstance(return_flux,str):
         return_flux = [return_flux]
@@ -47,14 +47,10 @@ def optimize_ensemble(ensemble,return_flux=None,num_models=None,specific_models=
     if specific_models:
         model_list = specific_models
     else:
-        model_list = sample(list(ensemble.reaction_diffs.keys()),num_models)
+        model_list = sample(ensemble.members,num_models)
     with ensemble.base_model:
         for model in model_list:
-            diffs = ensemble.reaction_diffs[model]
-            for reaction in diffs.keys():
-                rxn = ensemble.base_model.reactions.get_by_id(reaction)
-                rxn.lower_bound = ensemble.reaction_diffs[model][reaction]['lb']
-                rxn.upper_bound = ensemble.reaction_diffs[model][reaction]['ub']
+            ensemble.set_state(model)
             ensemble.base_model.optimize(**kwargs)
             flux_dict[model] = {rxn.id:rxn.flux for rxn in ensemble.base_model.reactions}
     if return_flux:

diff --git a/medusa/flux_analysis/variability.py b/medusa/flux_analysis/variability.py
@@ -53,7 +53,7 @@ def ensemble_fva(ensemble, reaction_list, num_models=[],specific_models=None,
     '''
     if not num_models:
         # if not specified, use all models
-        num_models = len(ensemble.reaction_diffs.keys())
+        num_models = len(ensemble.members)
 
     if isinstance(reaction_list,str):
         reaction_list = [reaction_list]
@@ -65,14 +65,11 @@ def ensemble_fva(ensemble, reaction_list, num_models=[],specific_models=None,
     if specific_models:
         model_list = specific_models
     else:
-        model_list = sample(list(ensemble.reaction_diffs.keys()),num_models)
+        model_list = sample(ensemble.members,num_models)
+        model_list = [model.id for model in model_list]
     with ensemble.base_model:
         for model in model_list:
-            diffs = ensemble.reaction_diffs[model]
-            for reaction in diffs.keys():
-                rxn = ensemble.base_model.reactions.get_by_id(reaction)
-                rxn.lower_bound = ensemble.reaction_diffs[model][reaction]['lb']
-                rxn.upper_bound = ensemble.reaction_diffs[model][reaction]['ub']
+            ensemble.set_state(model)
             fva_result = flux_variability_analysis(
                     ensemble.base_model,reaction_list=reaction_list,
                     fraction_of_optimum=fraction_of_optimum,

diff --git a/medusa/quality/mass_balance.py b/medusa/quality/mass_balance.py
@@ -0,0 +1,50 @@
+
+
+def leak_test(ensemble,metabolites_to_test=[],\
+             exchange_prefix='EX_',verbose=False,num_models=[],**kwargs):
+    '''
+    Checks for leaky metabolites in every member of the ensemble by opening
+    and optimizing a demand reaction while all exchange reactions are closed.
+
+    By default, checks for leaks for every metabolite for all models.
+    '''
+
+    if not num_models:
+        # if the number of models wasn't specified, test all
+        num_models = len(self.reaction_diffs.keys())
+
+    if not metabolites_to_test:
+        metabolites_to_test = [met for met in self.base_model.metabolites]
+
+    old_objective = ensemble.base_model.objective
+    dm_rxns = []
+    for met in metabolites_to_test:
+        rxn = cobra.Reaction(id='leak_DM_' + met.id)
+        rxn.lower_bound = 0.0
+        rxn.upper_bound = 0.0
+        rxn.add_metabolites({met:-1})
+        dm_rxns.append(rxn)
+    ensemble.base_model.add_reactions(dm_rxns)
+    ensemble.base_model.repair()
+
+    leaks = {}
+
+    for rxn in dm_rxns:
+        rxn.upper_bound = 1000.0
+        ensemble.base_model.objective = ensemble.base_model.reactions.get_by_id(rxn.id)
+
+        if verbose:
+            print('checking leak for ' + rxn.id)
+        solutions = ensemble.optimize_ensemble(return_flux=[rxn.id],num_models=num_models,**kwargs)
+        leaks[rxn.id.split('_DM_')[1]] = {}
+        for model in solutions.keys():
+            leaks[rxn.id.split('_DM_')[1]][model] = solutions[model][rxn.id] > 0.0001
+        #rxn.objective_coefficient = 0.0
+        rxn.upper_bound = 0.0
+
+    # remove the demand reactions and restore the original objective
+    ensemble.base_model.remove_reactions(dm_rxns,remove_orphans=True)
+    ensemble.base_model.repair()
+    ensemble.base_model.objective = old_objective
+
+    return leaks
diff --git a/medusa/reconstruct/degrade/degrade.py b/medusa/reconstruct/degrade/degrade.py
@@ -0,0 +1,32 @@
+
+from __future__ import absolute_import
+
+
+
+
+
+
+
+# functions for degrading networks to construct ensembles
+
+def degrade_reactions(base_model,num_reactions,num_models=10):
+    """
+    Removes reactions from an existing COBRA model to generate an ensemble.
+
+    Parameters
+    ----------
+    base_model: cobra.Model
+        Model from which reactions will be removed to create an ensemble.
+    num_reactions: int
+        The number of reactions to remove to generate each ensemble member.
+        Must be smaller than the total number of reactions in the model
+    num_models: int
+        The number of models to generate by randomly removing num_reactions from
+        the base_model. Reactions are removed with replacement.
+
+    Returns
+    -------
+    Medusa.core.ensemble
+        An ensemble
+    """
+    filler=1
diff --git a/medusa/test/__init__.py b/medusa/test/__init__.py
@@ -0,0 +1,37 @@
+from __future__ import absolute_import
+
+import cobra
+import json
+
+from medusa.core.ensemble import Ensemble
+
+
+from os.path import abspath, dirname, join
+
+medusa_directory = abspath(join(dirname(abspath(__file__)), ".."))
+data_dir = join(medusa_directory,"test","data","")
+
+def create_test_model(model_name="textbook"):
+    """Returns an ensemble of models for testing
+    model_name: str
+        One of 'ASF356' or 'ASF519'
+    """
+    if model_name == "ASF356":
+        base_model = cobra.io.load_json_model(join(data_dir, 'ASF356_base_model.json'))
+        with open(join(data_dir, 'ASF356_base_model_reaction_diffs.json'),'r') as infile:
+            reaction_diffs = json.load(infile)
+
+    elif model_name == "ASF519":
+        base_model = cobra.io.load_json_model(join(data_dir, 'ASF519_base_model.json'))
+        with open(join(data_dir, 'ASF519_base_model_reaction_diffs.json'),'r') as infile:
+            reaction_diffs = json.load(infile)
+
+    else:
+        base_model = cobra.create_test_model(model_name)
+
+
+
+    test_model = medusa.Ensemble(base_id=base_model.id)
+    test_model.base_model = base_model
+    test_model.reaction_diffs = reaction_diffs
+    return test_model