feat: separated task preprocessing from simulation

biosimulators · Sep 16, 2021 · 14c2931 · 14c2931
1 parent 0f410f4
commit 14c2931
Show file tree

Hide file tree

Showing 8 changed files with 2,398 additions and 70 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,7 +1,7 @@
 # Base OS
 FROM ghcr.io/biosimulators/biosimulators_boolnet/boolnet_base:latest
 
-ARG VERSION="0.1.15"
+ARG VERSION="0.1.16"
 ARG SIMULATOR_VERSION=2.1.5
 
 # metadata

diff --git a/biosimulators_boolnet/_version.py b/biosimulators_boolnet/_version.py
@@ -1 +1 @@
-__version__ = '0.1.15'
+__version__ = '0.1.16'
diff --git a/biosimulators_boolnet/core.py b/biosimulators_boolnet/core.py
@@ -16,15 +16,21 @@
 from biosimulators_utils.report.data_model import ReportFormat, VariableResults, SedDocumentResults  # noqa: F401
 from biosimulators_utils.sedml import validation
 from biosimulators_utils.sedml.data_model import (Task, ModelLanguage, ModelAttributeChange,  # noqa: F401
+                                                  AddElementModelChange, RemoveElementModelChange, ReplaceElementModelChange,
                                                   UniformTimeCourseSimulation, Variable)
 from biosimulators_utils.sedml.exec import exec_sed_doc as base_exec_sed_doc
+from biosimulators_utils.sedml.utils import apply_changes_to_xml_model
 from biosimulators_utils.simulator.utils import get_algorithm_substitution_policy
 from biosimulators_utils.utils.core import raise_errors_warnings
 from biosimulators_utils.warnings import warn, BioSimulatorsWarning
 from kisao.data_model import AlgorithmSubstitutionPolicy, ALGORITHM_SUBSTITUTION_POLICY_LEVELS
 from kisao.utils import get_preferred_substitute_algorithm_by_ids
 from rpy2.robjects.vectors import StrVector
+import copy
+import lxml.etree
 import numpy
+import os
+import tempfile
 
 __all__ = ['exec_sedml_docs_in_combine_archive', 'exec_sed_doc', 'exec_sed_task', 'preprocess_sed_task']
 
@@ -122,54 +128,129 @@ def exec_sed_task(task, variables, preprocessed_task=None, log=None, config=None
           * Task requires an algorithm that BoolNet doesn't support
     """
     config = config or get_config()
+
     if config.LOG and not log:
         log = TaskLog()
 
     if preprocessed_task is None:
         preprocessed_task = preprocess_sed_task(task, variables, config=config)
 
+    model = task.model
+    sim = task.simulation
+
+    # read model
+    boolnet_model = preprocessed_task['model']['model']
+
+    # modify model
+    if model.changes:
+        raise_errors_warnings(validation.validate_model_change_types(
+            model.changes,
+            (ModelAttributeChange, AddElementModelChange, RemoveElementModelChange, ReplaceElementModelChange)),
+            error_summary='Changes for model `{}` are not supported.'.format(model.id))
+
+        model_etree = preprocessed_task['model']['model_etree']
+
+        model = copy.deepcopy(model)
+        for change in model.changes:
+            if isinstance(change, ModelAttributeChange):
+                change.new_value = str(change.new_value)
+
+        apply_changes_to_xml_model(model, model_etree, sed_doc=None, working_dir=None)
+
+        model_file, model_filename = tempfile.mkstemp(suffix='.xml')
+        os.close(model_file)
+
+        model_etree.write(model_filename,
+                          xml_declaration=True,
+                          encoding="utf-8",
+                          standalone=False,
+                          pretty_print=False)
+
+        boolnet_model = preprocessed_task['boolnet'].loadSBML(StrVector([model_filename]))
+
+        os.remove(model_filename)
+
+    # initialize arguments for BoolNet's time course simulation method
+    simulation_method_args = preprocessed_task['simulation']['method_args']
+    simulation_method_args['numMeasurements'] = int(sim.number_of_points) + 1
+
+    # execute simulation
+    species_results_matrix = preprocessed_task['boolnet'].generateTimeSeries(boolnet_model, **simulation_method_args)[0]
+    species_results_dict = {}
+    for i_species, species_id in enumerate(species_results_matrix.rownames):
+        species_results_dict[species_id] = numpy.array(species_results_matrix.rx(i_species + 1, True))
+
+    # get the results in BioSimulator's format
+    variable_target_sbml_id_map = preprocessed_task['model']['variable_target_sbml_id_map']
+    variable_results = get_variable_results(sim, variables, variable_target_sbml_id_map, species_results_dict)
+    for variable in variables:
+        variable_results[variable.id] = variable_results[variable.id][-(int(sim.number_of_points) + 1):]
+
+    # log action
+    if config.LOG:
+        log.algorithm = preprocessed_task['simulation']['algorithm_kisao_id']
+        log.simulator_details = {
+            'method': 'BoolNet::generateTimeSeries',
+            'arguments': copy.copy(simulation_method_args),
+        }
+        log.simulator_details['arguments']['type'] = preprocessed_task['simulation']['algorithm_type']
+
+    # return the result of each variable and log
+    return variable_results, log
+
+
+def preprocess_sed_task(task, variables, config=None):
+    """ Preprocess a SED task, including its possible model changes and variables. This is useful for avoiding
+    repeatedly initializing tasks on repeated calls of :obj:`exec_sed_task`.
+
+    Args:
+        task (:obj:`Task`): task
+        variables (:obj:`list` of :obj:`Variable`): variables that should be recorded
+        config (:obj:`Config`, optional): BioSimulators common configuration
+
+    Returns:
+        :obj:`object`: preprocessed information about the task
+    """
+    config = config or get_config()
+
     # validate task
     model = task.model
     sim = task.simulation
 
     if config.VALIDATE_SEDML:
         raise_errors_warnings(validation.validate_task(task),
                               error_summary='Task `{}` is invalid.'.format(task.id))
-        raise_errors_warnings(validation.validate_model_language(task.model.language, ModelLanguage.SBML),
+        raise_errors_warnings(validation.validate_model_language(model.language, ModelLanguage.SBML),
                               error_summary='Language for model `{}` is not supported.'.format(model.id))
-        raise_errors_warnings(validation.validate_model_change_types(task.model.changes, ()),
-                              error_summary='Changes for model `{}` are not supported.'.format(model.id))
-        raise_errors_warnings(*validation.validate_model_changes(task.model),
+        raise_errors_warnings(validation.validate_model_change_types(
+            model.changes,
+            (ModelAttributeChange, AddElementModelChange, RemoveElementModelChange, ReplaceElementModelChange)),
+            error_summary='Changes for model `{}` are not supported.'.format(model.id))
+        raise_errors_warnings(*validation.validate_model_changes(model),
                               error_summary='Changes for model `{}` are invalid.'.format(model.id))
-        raise_errors_warnings(validation.validate_simulation_type(task.simulation, (UniformTimeCourseSimulation, )),
+        raise_errors_warnings(validation.validate_simulation_type(sim, (UniformTimeCourseSimulation, )),
                               error_summary='{} `{}` is not supported.'.format(sim.__class__.__name__, sim.id))
-        raise_errors_warnings(*validation.validate_simulation(task.simulation),
+        raise_errors_warnings(*validation.validate_simulation(sim),
                               error_summary='Simulation `{}` is invalid.'.format(sim.id))
-        raise_errors_warnings(validate_time_course(task.simulation),
+        raise_errors_warnings(validate_time_course(sim),
                               error_summary='Simulation `{}` is invalid.'.format(sim.id))
         raise_errors_warnings(*validation.validate_data_generator_variables(variables),
                               error_summary='Data generator variables for task `{}` are invalid.'.format(task.id))
-    target_x_paths_keys = get_variable_target_x_path_keys(variables, task.model.source)
+
+    model_etree = lxml.etree.parse(model.source)
+    variable_target_sbml_id_map = get_variable_target_x_path_keys(variables, model_etree)
 
     # validate model
     if config.VALIDATE_SEDML_MODELS:
-        raise_errors_warnings(*validation.validate_model(task.model, [], working_dir='.'),
+        raise_errors_warnings(*validation.validate_model(model, [], working_dir='.'),
                               error_summary='Model `{}` is invalid.'.format(model.id),
                               warning_summary='Model `{}` may be invalid.'.format(model.id))
 
     # get BoolNet
     boolnet = get_boolnet()
 
     # read model
-    model = boolnet.loadSBML(StrVector([task.model.source]))
-
-    # initialize arguments for BoolNet's time course simulation method
-    sim = task.simulation
-    simulation_method_args = {
-        'numMeasurements': int(sim.number_of_points) + 1,
-        'numSeries': 1,
-        'perturbations': 0,
-    }
+    boolnet_model = boolnet.loadSBML(StrVector([model.source]))
 
     # Load the algorithm specified by :obj:`task.simulation.algorithm.kisao_id`
     alg_kisao_id = sim.algorithm.kisao_id
@@ -178,13 +259,20 @@ def exec_sed_task(task, variables, preprocessed_task=None, log=None, config=None
         alg_kisao_id, KISAO_METHOD_ARGUMENTS_MAP.keys(),
         substitution_policy=algorithm_substitution_policy)
     alg = KISAO_METHOD_ARGUMENTS_MAP[exec_kisao_id]
-    simulation_method_args['type'] = StrVector([alg['type']])
+    alg_type = alg['type']
+
+    simulation_method_args = {
+        'numMeasurements': int(sim.number_of_points) + 1,
+        'numSeries': 1,
+        'perturbations': 0,
+        'type': StrVector([alg_type]),
+    }
 
     # Apply the algorithm parameter changes specified by `simulation.algorithm.parameter_changes`
     if exec_kisao_id == alg_kisao_id:
         for change in sim.algorithm.changes:
             try:
-                set_simulation_method_arg(model, exec_kisao_id, change, simulation_method_args)
+                set_simulation_method_arg(boolnet_model, exec_kisao_id, change, simulation_method_args)
             except NotImplementedError as exception:
                 if (
                     ALGORITHM_SUBSTITUTION_POLICY_LEVELS[algorithm_substitution_policy]
@@ -209,40 +297,17 @@ def exec_sed_task(task, variables, preprocessed_task=None, log=None, config=None
     # validate that BoolNet can produce the desired variables of the desired data generators
     validate_data_generator_variables(variables, exec_kisao_id)
 
-    # execute simulation
-    species_results_matrix = boolnet.generateTimeSeries(model, **simulation_method_args)[0]
-    species_results_dict = {}
-    for i_species, species_id in enumerate(species_results_matrix.rownames):
-        species_results_dict[species_id] = numpy.array(species_results_matrix.rx(i_species + 1, True))
-
-    # get the results in BioSimulator's format
-    variable_results = get_variable_results(sim, variables, target_x_paths_keys, species_results_dict)
-    for variable in variables:
-        variable_results[variable.id] = variable_results[variable.id][-(int(sim.number_of_points) + 1):]
-
-    # log action
-    if config.LOG:
-        log.algorithm = exec_kisao_id
-        log.simulator_details = {
-            'method': 'BoolNet::generateTimeSeries',
-            'arguments': simulation_method_args,
+    # return preprocessed information
+    return {
+        'boolnet': boolnet,
+        'model': {
+            'model': boolnet_model,
+            'model_etree': model_etree,
+            'variable_target_sbml_id_map': variable_target_sbml_id_map,
+        },
+        'simulation': {
+            'method_args': simulation_method_args,
+            'algorithm_kisao_id': exec_kisao_id,
+            'algorithm_type': alg_type,
         }
-        simulation_method_args['type'] = alg['type']
-
-    # return the result of each variable and log
-    return variable_results, log
-
-
-def preprocess_sed_task(task, variables, config=None):
-    """ Preprocess a SED task, including its possible model changes and variables. This is useful for avoiding
-    repeatedly initializing tasks on repeated calls of :obj:`exec_sed_task`.
-
-    Args:
-        task (:obj:`Task`): task
-        variables (:obj:`list` of :obj:`Variable`): variables that should be recorded
-        config (:obj:`Config`, optional): BioSimulators common configuration
-
-    Returns:
-        :obj:`object`: preprocessed information about the task
-    """
-    pass
+    }
diff --git a/biosimulators_boolnet/utils.py b/biosimulators_boolnet/utils.py
@@ -15,7 +15,7 @@
 from rpy2.robjects.vectors import StrVector, ListVector  # noqa: F401
 import biosimulators_utils.sedml.validation
 import biosimulators_utils.xml.utils
-import lxml
+import lxml.etree  # noqa: F401
 import numpy
 import re
 
@@ -69,7 +69,7 @@ def get_boolnet():
     """ Get the BoolNet R package
 
     Returns:
-        :obj:`rpy2.robjects.packages.InstalledSTPackage`: BoolNet R package
+        :obj:`InstalledSTPackage`: BoolNet R package
     """
     return importr('BoolNet')
 
@@ -147,18 +147,17 @@ def validate_data_generator_variables(variables, algorithm_kisao_id):
         raise ValueError(msg)
 
 
-def get_variable_target_x_path_keys(variables, model_source):
+def get_variable_target_x_path_keys(variables, model_etree):
     """ Get the BoolNet key for each XML XPath target of a SED-ML variable
 
     Args:
         variables (:obj:`list` of :obj:`Variable`): variables of data generators
-        model_source (:obj:`str`): path to model
+        model_etree (:obj:`lxml.etree._ElementTree`): element tree for model
 
     Returns:
         :obj:`dict`: dictionary that maps each variable target to the BoolNet key
             of the corresponding qualitative species
     """
-    model_etree = lxml.etree.parse(model_source)
     namespaces = biosimulators_utils.xml.utils.get_namespaces_for_xml_doc(model_etree)
 
     target_x_paths_ids = biosimulators_utils.sedml.validation.validate_target_xpaths(