Merge pull request #319 from MannLabs/refactor_parameter_proposal

Refactor parameter proposal
MannLabs · Aug 23, 2024 · 3214bfa · 3214bfa
2 parents 2098ba4 + f63de4f
commit 3214bfa
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 126 deletions.
diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
@@ -241,27 +241,27 @@ optimization:
   order_of_optimization: null
 
   # Parameters for the update rule for each parameter:
-  #   - update_interval: the percentile interval to use (as a decimal)
+  #   - update_percentile_range: the percentile interval to use (as a decimal)
   #   - update_factor: the factor by which to multiply the result from the percentile interval to get the new parameter value for the next round of search
   ms2_error:
-      targeted_update_interval: 0.95
+      targeted_update_percentile_range: 0.95
       targeted_update_factor: 1.0
-      automatic_update_interval: 0.99
+      automatic_update_percentile_range: 0.99
       automatic_update_factor: 1.1
   ms1_error:
-      targeted_update_interval: 0.95
+      targeted_update_percentile_range: 0.95
       targeted_update_factor: 1.0
-      automatic_update_interval: 0.99
+      automatic_update_percentile_range: 0.99
       automatic_update_factor: 1.1
   mobility_error:
-      targeted_update_interval: 0.95
+      targeted_update_percentile_range: 0.95
       targeted_update_factor: 1.0
-      automatic_update_interval: 0.99
+      automatic_update_percentile_range: 0.99
       automatic_update_factor: 1.1
   rt_error:
-      targeted_update_interval: 0.95
+      targeted_update_percentile_range: 0.95
       targeted_update_factor: 1.0
-      automatic_update_interval: 0.99
+      automatic_update_percentile_range: 0.99
       automatic_update_factor: 1.1
 
 # configuration for the optimization manager

diff --git a/alphadia/workflow/optimization.py b/alphadia/workflow/optimization.py
@@ -91,9 +91,9 @@ def __init__(
         self.update_factor = workflow.config["optimization"][self.parameter_name][
             "automatic_update_factor"
         ]
-        self.update_interval = workflow.config["optimization"][self.parameter_name][
-            "automatic_update_interval"
-        ]
+        self.update_percentile_range = workflow.config["optimization"][
+            self.parameter_name
+        ]["automatic_update_percentile_range"]
 
     def step(
         self,
@@ -211,9 +211,13 @@ def plot(self):
 
         plt.show()
 
-    @abstractmethod
-    def _propose_new_parameter(self, df):
-        """This method specifies the rule according to which the search parameter is updated between rounds of optimization. The rule is specific to the parameter being optimized.
+    def _propose_new_parameter(self, df: pd.DataFrame):
+        """This method specifies the rule according to which the search parameter is updated between rounds of optimization. The update rule is
+            1) calculate the deviation of the predicted mz values from the observed mz values,
+            2) take the mean of the endpoints of the central interval
+                (determined by the self.update_percentile_range attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
+            3) multiply this value by self.update_factor.
+        This is implemented by the ci method for the estimator.
 
         Parameters
         ----------
@@ -226,9 +230,10 @@ def _propose_new_parameter(self, df):
         float
             The proposed new value for the search parameter.
 
-
         """
-        pass
+        return self.update_factor * self.workflow.calibration_manager.get_estimator(
+            self.estimator_group_name, self.estimator_name
+        ).ci(df, self.update_percentile_range)
 
     def _check_convergence(self):
         """Optimization should stop if continued narrowing of the parameter is not improving the feature value.
@@ -303,9 +308,9 @@ def __init__(
         self.update_factor = workflow.config["optimization"][self.parameter_name][
             "targeted_update_factor"
         ]
-        self.update_interval = workflow.config["optimization"][self.parameter_name][
-            "targeted_update_interval"
-        ]
+        self.update_percentile_range = workflow.config["optimization"][
+            self.parameter_name
+        ]["targeted_update_percentile_range"]
         self.has_converged = False
 
     def _check_convergence(self, proposed_parameter: float):
@@ -339,7 +344,7 @@ def _propose_new_parameter(self, df: pd.DataFrame):
         return self.update_factor * max(
             self.workflow.calibration_manager.get_estimator(
                 self.estimator_group_name, self.estimator_name
-            ).ci(df, self.update_interval),
+            ).ci(df, self.update_percentile_range),
             self.target_parameter,
         )
 
@@ -397,24 +402,6 @@ def __init__(
         self.feature_name = "precursor_proportion_detected"
         super().__init__(initial_parameter, workflow, reporter)
 
-    def _propose_new_parameter(self, df: pd.DataFrame):
-        """See base class. The update rule is
-            1) calculate the deviation of the predicted mz values from the observed mz values,
-            2) take the mean of the endpoints of the central interval
-                (determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
-            3) multiply this value by self.update_factor.
-        This is implemented by the ci method for the estimator.
-
-        Returns
-        -------
-        float
-            The proposed new value for the search parameter.
-
-        """
-        return self.update_factor * self.workflow.calibration_manager.get_estimator(
-            self.estimator_group_name, self.estimator_name
-        ).ci(df, self.update_interval)
-
     def _get_feature_value(
         self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
     ):
@@ -435,24 +422,6 @@ def __init__(
         self.feature_name = "precursor_proportion_detected"
         super().__init__(initial_parameter, workflow, reporter)
 
-    def _propose_new_parameter(self, df: pd.DataFrame):
-        """See base class. The update rule is
-            1) calculate the deviation of the predicted mz values from the observed mz values,
-            2) take the mean of the endpoints of the central interval
-                (determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
-            3) multiply this value by self.update_factor.
-        This is implemented by the ci method for the estimator.
-
-        Returns
-        -------
-        float
-            The proposed new value for the search parameter.
-
-        """
-        return self.update_factor * self.workflow.calibration_manager.get_estimator(
-            self.estimator_group_name, self.estimator_name
-        ).ci(df, self.update_interval)
-
     def _get_feature_value(
         self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
     ):
@@ -473,24 +442,6 @@ def __init__(
         self.feature_name = "mean_isotope_intensity_correlation"
         super().__init__(initial_parameter, workflow, reporter)
 
-    def _propose_new_parameter(self, df: pd.DataFrame):
-        """See base class. The update rule is
-            1) calculate the deviation of the predicted mz values from the observed mz values,
-            2) take the mean of the endpoints of the central interval
-                (determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
-            3) multiply this value by self.update_factor.
-        This is implemented by the ci method for the estimator.
-
-        Returns
-        -------
-        float
-            The proposed new value for the search parameter.
-
-        """
-        return self.update_factor * self.workflow.calibration_manager.get_estimator(
-            self.estimator_group_name, self.estimator_name
-        ).ci(df, self.update_interval)
-
     def _get_feature_value(
         self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
     ):
@@ -511,25 +462,6 @@ def __init__(
         self.feature_name = "precursor_proportion_detected"
         super().__init__(initial_parameter, workflow, reporter)
 
-    def _propose_new_parameter(self, df: pd.DataFrame):
-        """See base class. The update rule is
-            1) calculate the deviation of the predicted mz values from the observed mz values,
-            2) take the mean of the endpoints of the central interval
-                (determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
-            3) multiply this value by self.update_factor.
-        This is implemented by the ci method for the estimator.
-
-        Returns
-        -------
-        float
-            The proposed new value for the search parameter.
-
-        """
-
-        return self.update_factor * self.workflow.calibration_manager.get_estimator(
-            self.estimator_group_name, self.estimator_name
-        ).ci(df, self.update_interval)
-
     def _get_feature_value(
         self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
     ):

diff --git a/tests/unit_tests/test_workflow.py b/tests/unit_tests/test_workflow.py
@@ -921,6 +921,32 @@ def test_optlock_batch_idx():
     assert optlock.stop_idx == 2000
 
 
+def test_optlock_reindex():
+    library = create_test_library_for_indexing()
+    optlock = optimization.OptimizationLock(library, TEST_OPTLOCK_CONFIG)
+    optlock.batch_plan = [[0, 100], [100, 200]]
+    optlock.set_batch_dfs(
+        optlock.elution_group_order[optlock.start_idx : optlock.stop_idx]
+    )
+
+    assert (
+        (
+            optlock.batch_library._precursor_df["flat_frag_stop_idx"].iloc[100]
+            - optlock.batch_library._precursor_df["flat_frag_start_idx"].iloc[100]
+        )
+        == (
+            (optlock.batch_library._precursor_df["precursor_idx"].iloc[100] + 1) ** 2
+            - optlock.batch_library._precursor_df["precursor_idx"].iloc[100] ** 2
+        )
+    )  # Since each precursor was set (based on its original ID) to have a number of fragments equal to its original ID squared, the difference between the start and stop index should be equal to the original ID squared (even if the start and stop index have been changed to different values)
+    assert (
+        optlock.batch_library._fragment_df.iloc[
+            optlock.batch_library._precursor_df.iloc[50]["flat_frag_start_idx"]
+        ]["precursor_idx"]
+        == optlock.batch_library._precursor_df.iloc[50]["precursor_idx"] ** 2
+    )  # The original start index of any precursor should be equal to the square of the its original ID
+
+
 def test_configurability():
     workflow = create_workflow_instance()
     workflow.config["optimization"].update(
@@ -931,12 +957,12 @@ def test_configurability():
                 ["mobility_error"],
             ],
             "rt_error": {
-                "automatic_update_interval": 0.99,
+                "automatic_update_percentile_range": 0.99,
                 "automatic_update_factor": 1.3,
             },
             "ms2_error": {
-                "automatic_update_interval": 0.80,
-                "targeted_update_interval": 0.995,
+                "automatic_update_percentile_range": 0.80,
+                "targeted_update_percentile_range": 0.995,
                 "targeted_update_factor": 1.2,
             },
         }
@@ -953,42 +979,16 @@ def test_configurability():
 
     assert ordered_optimizers[0][0].parameter_name == "rt_error"
     assert isinstance(ordered_optimizers[0][0], optimization.AutomaticRTOptimizer)
-    assert ordered_optimizers[0][0].update_interval == 0.99
+    assert ordered_optimizers[0][0].update_percentile_range == 0.99
     assert ordered_optimizers[0][0].update_factor == 1.3
 
     assert ordered_optimizers[1][0].parameter_name == "ms1_error"
-    assert ordered_optimizers[1][0].update_interval == 0.95
+    assert ordered_optimizers[1][0].update_percentile_range == 0.95
     assert isinstance(ordered_optimizers[1][0], optimization.TargetedMS1Optimizer)
 
     assert ordered_optimizers[1][1].parameter_name == "ms2_error"
     assert isinstance(ordered_optimizers[1][1], optimization.TargetedMS2Optimizer)
-    assert ordered_optimizers[1][1].update_interval == 0.995
+    assert ordered_optimizers[1][1].update_percentile_range == 0.995
     assert ordered_optimizers[1][1].update_factor == 1.2
 
     assert ordered_optimizers[2][0].parameter_name == "mobility_error"
-
-
-def test_optlock_reindex():
-    library = create_test_library_for_indexing()
-    optlock = optimization.OptimizationLock(library, TEST_OPTLOCK_CONFIG)
-    optlock.batch_plan = [[0, 100], [100, 200]]
-    optlock.set_batch_dfs(
-        optlock.elution_group_order[optlock.start_idx : optlock.stop_idx]
-    )
-
-    assert (
-        (
-            optlock.batch_library._precursor_df["flat_frag_stop_idx"].iloc[100]
-            - optlock.batch_library._precursor_df["flat_frag_start_idx"].iloc[100]
-        )
-        == (
-            (optlock.batch_library._precursor_df["precursor_idx"].iloc[100] + 1) ** 2
-            - optlock.batch_library._precursor_df["precursor_idx"].iloc[100] ** 2
-        )
-    )  # Since each precursor was set (based on its original ID) to have a number of fragments equal to its original ID squared, the difference between the start and stop index should be equal to the original ID squared (even if the start and stop index have been changed to different values)
-    assert (
-        optlock.batch_library._fragment_df.iloc[
-            optlock.batch_library._precursor_df.iloc[50]["flat_frag_start_idx"]
-        ]["precursor_idx"]
-        == optlock.batch_library._precursor_df.iloc[50]["precursor_idx"] ** 2
-    )  # The original start index of any precursor should be equal to the square of the its original ID