From 8f59e8ee351597757ef1ae8327a026ca52f5f8a0 Mon Sep 17 00:00:00 2001 From: Antonin Raffin Date: Mon, 25 Nov 2024 18:25:30 +0100 Subject: [PATCH] Fix reset --- sbx/tqc/tqc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sbx/tqc/tqc.py b/sbx/tqc/tqc.py index 49c4475..1dd92a3 100644 --- a/sbx/tqc/tqc.py +++ b/sbx/tqc/tqc.py @@ -199,7 +199,7 @@ def train(self, gradient_steps: int, batch_size: int) -> None: data = self.replay_buffer.sample(batch_size * gradient_steps, env=self._vec_normalize_env) # Maybe reset the parameters - if self.resets and self.reset_idx < len(self.resets) and self.resets[self.reset_idx] >= self.num_timesteps: + if self.resets and self.reset_idx < len(self.resets) and self.num_timesteps >= self.resets[self.reset_idx]: # Note: we are not resetting the entropy coeff assert isinstance(self.qf_learning_rate, float) self.key = self.policy.build(self.key, self.lr_schedule, self.qf_learning_rate)