Skip to content

Commit

Permalink
Merge pull request #319 from MannLabs/refactor_parameter_proposal
Browse files Browse the repository at this point in the history
Refactor parameter proposal
  • Loading branch information
odespard authored Aug 23, 2024
2 parents 2098ba4 + f63de4f commit 3214bfa
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 126 deletions.
18 changes: 9 additions & 9 deletions alphadia/constants/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -241,27 +241,27 @@ optimization:
order_of_optimization: null

# Parameters for the update rule for each parameter:
# - update_interval: the percentile interval to use (as a decimal)
# - update_percentile_range: the percentile interval to use (as a decimal)
# - update_factor: the factor by which to multiply the result from the percentile interval to get the new parameter value for the next round of search
ms2_error:
targeted_update_interval: 0.95
targeted_update_percentile_range: 0.95
targeted_update_factor: 1.0
automatic_update_interval: 0.99
automatic_update_percentile_range: 0.99
automatic_update_factor: 1.1
ms1_error:
targeted_update_interval: 0.95
targeted_update_percentile_range: 0.95
targeted_update_factor: 1.0
automatic_update_interval: 0.99
automatic_update_percentile_range: 0.99
automatic_update_factor: 1.1
mobility_error:
targeted_update_interval: 0.95
targeted_update_percentile_range: 0.95
targeted_update_factor: 1.0
automatic_update_interval: 0.99
automatic_update_percentile_range: 0.99
automatic_update_factor: 1.1
rt_error:
targeted_update_interval: 0.95
targeted_update_percentile_range: 0.95
targeted_update_factor: 1.0
automatic_update_interval: 0.99
automatic_update_percentile_range: 0.99
automatic_update_factor: 1.1

# configuration for the optimization manager
Expand Down
102 changes: 17 additions & 85 deletions alphadia/workflow/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ def __init__(
self.update_factor = workflow.config["optimization"][self.parameter_name][
"automatic_update_factor"
]
self.update_interval = workflow.config["optimization"][self.parameter_name][
"automatic_update_interval"
]
self.update_percentile_range = workflow.config["optimization"][
self.parameter_name
]["automatic_update_percentile_range"]

def step(
self,
Expand Down Expand Up @@ -211,9 +211,13 @@ def plot(self):

plt.show()

@abstractmethod
def _propose_new_parameter(self, df):
"""This method specifies the rule according to which the search parameter is updated between rounds of optimization. The rule is specific to the parameter being optimized.
def _propose_new_parameter(self, df: pd.DataFrame):
"""This method specifies the rule according to which the search parameter is updated between rounds of optimization. The update rule is
1) calculate the deviation of the predicted mz values from the observed mz values,
2) take the mean of the endpoints of the central interval
(determined by the self.update_percentile_range attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
3) multiply this value by self.update_factor.
This is implemented by the ci method for the estimator.
Parameters
----------
Expand All @@ -226,9 +230,10 @@ def _propose_new_parameter(self, df):
float
The proposed new value for the search parameter.
"""
pass
return self.update_factor * self.workflow.calibration_manager.get_estimator(
self.estimator_group_name, self.estimator_name
).ci(df, self.update_percentile_range)

def _check_convergence(self):
"""Optimization should stop if continued narrowing of the parameter is not improving the feature value.
Expand Down Expand Up @@ -303,9 +308,9 @@ def __init__(
self.update_factor = workflow.config["optimization"][self.parameter_name][
"targeted_update_factor"
]
self.update_interval = workflow.config["optimization"][self.parameter_name][
"targeted_update_interval"
]
self.update_percentile_range = workflow.config["optimization"][
self.parameter_name
]["targeted_update_percentile_range"]
self.has_converged = False

def _check_convergence(self, proposed_parameter: float):
Expand Down Expand Up @@ -339,7 +344,7 @@ def _propose_new_parameter(self, df: pd.DataFrame):
return self.update_factor * max(
self.workflow.calibration_manager.get_estimator(
self.estimator_group_name, self.estimator_name
).ci(df, self.update_interval),
).ci(df, self.update_percentile_range),
self.target_parameter,
)

Expand Down Expand Up @@ -397,24 +402,6 @@ def __init__(
self.feature_name = "precursor_proportion_detected"
super().__init__(initial_parameter, workflow, reporter)

def _propose_new_parameter(self, df: pd.DataFrame):
"""See base class. The update rule is
1) calculate the deviation of the predicted mz values from the observed mz values,
2) take the mean of the endpoints of the central interval
(determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
3) multiply this value by self.update_factor.
This is implemented by the ci method for the estimator.
Returns
-------
float
The proposed new value for the search parameter.
"""
return self.update_factor * self.workflow.calibration_manager.get_estimator(
self.estimator_group_name, self.estimator_name
).ci(df, self.update_interval)

def _get_feature_value(
self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
):
Expand All @@ -435,24 +422,6 @@ def __init__(
self.feature_name = "precursor_proportion_detected"
super().__init__(initial_parameter, workflow, reporter)

def _propose_new_parameter(self, df: pd.DataFrame):
"""See base class. The update rule is
1) calculate the deviation of the predicted mz values from the observed mz values,
2) take the mean of the endpoints of the central interval
(determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
3) multiply this value by self.update_factor.
This is implemented by the ci method for the estimator.
Returns
-------
float
The proposed new value for the search parameter.
"""
return self.update_factor * self.workflow.calibration_manager.get_estimator(
self.estimator_group_name, self.estimator_name
).ci(df, self.update_interval)

def _get_feature_value(
self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
):
Expand All @@ -473,24 +442,6 @@ def __init__(
self.feature_name = "mean_isotope_intensity_correlation"
super().__init__(initial_parameter, workflow, reporter)

def _propose_new_parameter(self, df: pd.DataFrame):
"""See base class. The update rule is
1) calculate the deviation of the predicted mz values from the observed mz values,
2) take the mean of the endpoints of the central interval
(determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
3) multiply this value by self.update_factor.
This is implemented by the ci method for the estimator.
Returns
-------
float
The proposed new value for the search parameter.
"""
return self.update_factor * self.workflow.calibration_manager.get_estimator(
self.estimator_group_name, self.estimator_name
).ci(df, self.update_interval)

def _get_feature_value(
self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
):
Expand All @@ -511,25 +462,6 @@ def __init__(
self.feature_name = "precursor_proportion_detected"
super().__init__(initial_parameter, workflow, reporter)

def _propose_new_parameter(self, df: pd.DataFrame):
"""See base class. The update rule is
1) calculate the deviation of the predicted mz values from the observed mz values,
2) take the mean of the endpoints of the central interval
(determined by the self.update_interval attribute, which determines the percentile taken expressed as a decimal) of these deviations, and
3) multiply this value by self.update_factor.
This is implemented by the ci method for the estimator.
Returns
-------
float
The proposed new value for the search parameter.
"""

return self.update_factor * self.workflow.calibration_manager.get_estimator(
self.estimator_group_name, self.estimator_name
).ci(df, self.update_interval)

def _get_feature_value(
self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
):
Expand Down
64 changes: 32 additions & 32 deletions tests/unit_tests/test_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,32 @@ def test_optlock_batch_idx():
assert optlock.stop_idx == 2000


def test_optlock_reindex():
library = create_test_library_for_indexing()
optlock = optimization.OptimizationLock(library, TEST_OPTLOCK_CONFIG)
optlock.batch_plan = [[0, 100], [100, 200]]
optlock.set_batch_dfs(
optlock.elution_group_order[optlock.start_idx : optlock.stop_idx]
)

assert (
(
optlock.batch_library._precursor_df["flat_frag_stop_idx"].iloc[100]
- optlock.batch_library._precursor_df["flat_frag_start_idx"].iloc[100]
)
== (
(optlock.batch_library._precursor_df["precursor_idx"].iloc[100] + 1) ** 2
- optlock.batch_library._precursor_df["precursor_idx"].iloc[100] ** 2
)
) # Since each precursor was set (based on its original ID) to have a number of fragments equal to its original ID squared, the difference between the start and stop index should be equal to the original ID squared (even if the start and stop index have been changed to different values)
assert (
optlock.batch_library._fragment_df.iloc[
optlock.batch_library._precursor_df.iloc[50]["flat_frag_start_idx"]
]["precursor_idx"]
== optlock.batch_library._precursor_df.iloc[50]["precursor_idx"] ** 2
) # The original start index of any precursor should be equal to the square of the its original ID


def test_configurability():
workflow = create_workflow_instance()
workflow.config["optimization"].update(
Expand All @@ -931,12 +957,12 @@ def test_configurability():
["mobility_error"],
],
"rt_error": {
"automatic_update_interval": 0.99,
"automatic_update_percentile_range": 0.99,
"automatic_update_factor": 1.3,
},
"ms2_error": {
"automatic_update_interval": 0.80,
"targeted_update_interval": 0.995,
"automatic_update_percentile_range": 0.80,
"targeted_update_percentile_range": 0.995,
"targeted_update_factor": 1.2,
},
}
Expand All @@ -953,42 +979,16 @@ def test_configurability():

assert ordered_optimizers[0][0].parameter_name == "rt_error"
assert isinstance(ordered_optimizers[0][0], optimization.AutomaticRTOptimizer)
assert ordered_optimizers[0][0].update_interval == 0.99
assert ordered_optimizers[0][0].update_percentile_range == 0.99
assert ordered_optimizers[0][0].update_factor == 1.3

assert ordered_optimizers[1][0].parameter_name == "ms1_error"
assert ordered_optimizers[1][0].update_interval == 0.95
assert ordered_optimizers[1][0].update_percentile_range == 0.95
assert isinstance(ordered_optimizers[1][0], optimization.TargetedMS1Optimizer)

assert ordered_optimizers[1][1].parameter_name == "ms2_error"
assert isinstance(ordered_optimizers[1][1], optimization.TargetedMS2Optimizer)
assert ordered_optimizers[1][1].update_interval == 0.995
assert ordered_optimizers[1][1].update_percentile_range == 0.995
assert ordered_optimizers[1][1].update_factor == 1.2

assert ordered_optimizers[2][0].parameter_name == "mobility_error"


def test_optlock_reindex():
library = create_test_library_for_indexing()
optlock = optimization.OptimizationLock(library, TEST_OPTLOCK_CONFIG)
optlock.batch_plan = [[0, 100], [100, 200]]
optlock.set_batch_dfs(
optlock.elution_group_order[optlock.start_idx : optlock.stop_idx]
)

assert (
(
optlock.batch_library._precursor_df["flat_frag_stop_idx"].iloc[100]
- optlock.batch_library._precursor_df["flat_frag_start_idx"].iloc[100]
)
== (
(optlock.batch_library._precursor_df["precursor_idx"].iloc[100] + 1) ** 2
- optlock.batch_library._precursor_df["precursor_idx"].iloc[100] ** 2
)
) # Since each precursor was set (based on its original ID) to have a number of fragments equal to its original ID squared, the difference between the start and stop index should be equal to the original ID squared (even if the start and stop index have been changed to different values)
assert (
optlock.batch_library._fragment_df.iloc[
optlock.batch_library._precursor_df.iloc[50]["flat_frag_start_idx"]
]["precursor_idx"]
== optlock.batch_library._precursor_df.iloc[50]["precursor_idx"] ** 2
) # The original start index of any precursor should be equal to the square of the its original ID

0 comments on commit 3214bfa

Please sign in to comment.