From 3b0cefd0c245f1a4b6a890ce6b0e0bfefdc4b78d Mon Sep 17 00:00:00 2001 From: "Teodor V. Marinov" Date: Mon, 9 Sep 2024 16:55:01 +0000 Subject: [PATCH] Resolved _distribution and common.gin comments. --- compiler_opt/tools/combine_tfa_policies.py | 2 - .../tools/combine_tfa_policies_lib.py | 37 +------------------ 2 files changed, 2 insertions(+), 37 deletions(-) diff --git a/compiler_opt/tools/combine_tfa_policies.py b/compiler_opt/tools/combine_tfa_policies.py index 0e758a8f..0aad21b9 100755 --- a/compiler_opt/tools/combine_tfa_policies.py +++ b/compiler_opt/tools/combine_tfa_policies.py @@ -55,8 +55,6 @@ def main(_): 'Length of policies_names: %d must equal length of policies_paths: %d.', len(_COMBINE_POLICIES_NAMES.value), len(_COMBINE_POLICIES_PATHS.value)) sys.exit(1) - gin.add_config_file_search_path( - 'compiler_opt/rl/inlining/gin_configs/common.gin') gin.parse_config_files_and_bindings( _GIN_FILES.value, bindings=_GIN_BINDINGS.value, skip_unknown=False) diff --git a/compiler_opt/tools/combine_tfa_policies_lib.py b/compiler_opt/tools/combine_tfa_policies_lib.py index c8d09b6a..4a53ba19 100644 --- a/compiler_opt/tools/combine_tfa_policies_lib.py +++ b/compiler_opt/tools/combine_tfa_policies_lib.py @@ -81,17 +81,6 @@ def _process_observation( return observation, high_low_tensor - def _create_distribution(self, inlining_prediction): - """Ensures that even deterministic policies return a distribution. - - This will not change the behavior of the action function which is - what is used at inference time. The change for the distribution - function is so that we can always support sampling even for - deterministic policies.""" - probs = [inlining_prediction, 1.0 - inlining_prediction] - logits = [[0.0, tf.math.log(probs[1] / (1.0 - probs[1]))]] - return tfp.distributions.Categorical(logits=logits) - def _action(self, time_step: ts.TimeStep, policy_state: types.NestedTensorSpec, @@ -122,28 +111,6 @@ def f1(): def _distribution( self, time_step: ts.TimeStep, policy_state: types.NestedTensorSpec) -> policy_step.PolicyStep: - new_observation = time_step.observation - new_observation, switch_tensor = self._process_observation(new_observation) - updated_step = ts.TimeStep( - step_type=time_step.step_type, - reward=time_step.reward, - discount=time_step.discount, - observation=new_observation) - - # TODO(359): We only support combining two policies.Generalize this to - # handle multiple policies. - def f0(): - return tf.cast( - self.tf_policies[0].distribution(updated_step).action.cdf(0)[0], - dtype=tf.float32) - - def f1(): - return tf.cast( - self.tf_policies[1].distribution(updated_step).action.cdf(0)[0], - dtype=tf.float32) - - distribution = tf.cond( - tf.math.reduce_all(tf.equal(switch_tensor, self.high_low_tensor)), f0, - f1) + """Placeholder for distribution as every TFPolicy requires it.""" return policy_step.PolicyStep( - action=self._create_distribution(distribution), state=policy_state) + action=tfp.distributions.Deterministic(2.), state=policy_state)