From a7a96d46208e33ce6094b9c388f1e1ddc3ea254a Mon Sep 17 00:00:00 2001 From: npit Date: Thu, 11 Jan 2024 01:04:17 +0200 Subject: [PATCH 1/7] Fix missing seed / options args in dummyvecenv env resets due to episode termination --- docs/guide/vec_envs.rst | 4 +++- docs/misc/changelog.rst | 2 ++ stable_baselines3/common/vec_env/dummy_vec_env.py | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/guide/vec_envs.rst b/docs/guide/vec_envs.rst index c04001c7c..598402569 100644 --- a/docs/guide/vec_envs.rst +++ b/docs/guide/vec_envs.rst @@ -90,7 +90,9 @@ SB3 VecEnv API is actually close to Gym 0.21 API but differs to Gym 0.26+ API: Note that if ``render_mode != "rgb_array"``, you can only call ``vec_env.render()`` (without argument or with ``mode=env.render_mode``). - the ``reset()`` method doesn't take any parameter. If you want to seed the pseudo-random generator or pass options, - you should call ``vec_env.seed(seed=seed)``/``vec_env.set_options(options)`` and ``obs = vec_env.reset()`` afterward (seed and options are discarded after each call to ``reset()``). + you should call ``vec_env.seed(seed=seed)``/``vec_env.set_options(options)``. + Seed and options parameters will be passed to the next call to ``obs = vec_env.reset()`` and any implicit environment reset invoked by episode termination / truncation. + The provided seed and options will be discarded after each call to ``vec_env.reset()``. - methods and attributes of the underlying Gym envs can be accessed, called and set using ``vec_env.get_attr("attribute_name")``, ``vec_env.env_method("method_name", args1, args2, kwargs1=kwargs1)`` and ``vec_env.set_attr("attribute_name", new_value)``. diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index cbfe41f9d..e482f22ce 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -15,6 +15,7 @@ New Features: Bug Fixes: ^^^^^^^^^^ +- Fixed seed / options argument passing to environment resets in ``vec_env.reset()`` `SB3-Contrib`_ ^^^^^^^^^^^^^^ @@ -35,6 +36,7 @@ Documentation: ^^^^^^^^^^^^^^ - Added a paragraph on modifying vectorized environment parameters via setters (@fracapuano) - Updated callback code example +- Expanded the description for vec_env.reset seed and options passing Release 2.2.1 (2023-11-17) -------------------------- diff --git a/stable_baselines3/common/vec_env/dummy_vec_env.py b/stable_baselines3/common/vec_env/dummy_vec_env.py index 15ecfb681..0b5e19835 100644 --- a/stable_baselines3/common/vec_env/dummy_vec_env.py +++ b/stable_baselines3/common/vec_env/dummy_vec_env.py @@ -67,7 +67,10 @@ def step_wait(self) -> VecEnvStepReturn: if self.buf_dones[env_idx]: # save final observation where user can get it, then reset self.buf_infos[env_idx]["terminal_observation"] = obs - obs, self.reset_infos[env_idx] = self.envs[env_idx].reset() + # reset the environment, supplying seed and options + seed = self._seeds[env_idx] + options = self._options[env_idx] + obs, self.reset_infos[env_idx] = self.envs[env_idx].reset(seed=seed, options=options) self._save_obs(env_idx, obs) return (self._obs_from_buf(), np.copy(self.buf_rews), np.copy(self.buf_dones), deepcopy(self.buf_infos)) From 7996d3dce5e2ca99d1bf5186b10a6fc76392f94a Mon Sep 17 00:00:00 2001 From: npit Date: Thu, 11 Jan 2024 01:05:06 +0200 Subject: [PATCH 2/7] Update TimeDelay and CustomSubClassedSpaceEnv test envs with options argument --- tests/test_logger.py | 2 +- tests/test_predict.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_logger.py b/tests/test_logger.py index 05bf196a3..21c413145 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -353,7 +353,7 @@ def __init__(self, delay: float = 0.01): self.observation_space = spaces.Box(low=-20.0, high=20.0, shape=(4,), dtype=np.float32) self.action_space = spaces.Discrete(2) - def reset(self, seed=None): + def reset(self, seed=None, options=None): return self.observation_space.sample(), {} def step(self, action): diff --git a/tests/test_predict.py b/tests/test_predict.py index 9a845232f..0ccd79a7c 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -30,7 +30,7 @@ def __init__(self): self.observation_space = SubClassedBox(-1, 1, shape=(2,), dtype=np.float32) self.action_space = SubClassedBox(-1, 1, shape=(2,), dtype=np.float32) - def reset(self, seed=None): + def reset(self, seed=None, options=None): return self.observation_space.sample(), {} def step(self, action): From 3e09f4d406fad811201c46bc2bfb0b9ce3eaecc0 Mon Sep 17 00:00:00 2001 From: npit Date: Thu, 11 Jan 2024 19:12:46 +0200 Subject: [PATCH 3/7] Fix missing seed / options args in subprocvecenv env resets due to episode termination --- stable_baselines3/common/vec_env/subproc_vec_env.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/stable_baselines3/common/vec_env/subproc_vec_env.py b/stable_baselines3/common/vec_env/subproc_vec_env.py index c598c735a..2c2b5f886 100644 --- a/stable_baselines3/common/vec_env/subproc_vec_env.py +++ b/stable_baselines3/common/vec_env/subproc_vec_env.py @@ -32,14 +32,15 @@ def _worker( try: cmd, data = remote.recv() if cmd == "step": - observation, reward, terminated, truncated, info = env.step(data) + action, seed, options = data + observation, reward, terminated, truncated, info = env.step(action) # convert to SB3 VecEnv api done = terminated or truncated info["TimeLimit.truncated"] = truncated and not terminated if done: # save final observation where user can get it, then reset info["terminal_observation"] = observation - observation, reset_info = env.reset() + observation, reset_info = env.reset(seed=seed, options=options) remote.send((observation, reward, done, info, reset_info)) elif cmd == "reset": maybe_options = {"options": data[1]} if data[1] else {} @@ -121,8 +122,8 @@ def __init__(self, env_fns: List[Callable[[], gym.Env]], start_method: Optional[ super().__init__(len(env_fns), observation_space, action_space) def step_async(self, actions: np.ndarray) -> None: - for remote, action in zip(self.remotes, actions): - remote.send(("step", action)) + for remote, action, seed, option in zip(self.remotes, actions, self._seeds, self._options): + remote.send(("step", (action, seed, option))) self.waiting = True def step_wait(self) -> VecEnvStepReturn: From 4d0c6a6e7b21b2e7c04ea34c1d525eac0250f115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:49:06 +0200 Subject: [PATCH 4/7] rm blank line --- docs/misc/changelog.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 19637ea52..e331d3ff3 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -83,10 +83,8 @@ New Features: Bug Fixes: ^^^^^^^^^^ - - Fixed ``monitor_wrapper`` argument that was not passed to the parent class, and dones argument that wasn't passed to ``_update_into_buffer`` (@corentinlger) - `SB3-Contrib`_ ^^^^^^^^^^^^^^ - Added ``rollout_buffer_class`` and ``rollout_buffer_kwargs`` arguments to MaskablePPO From 7b99ae013cd93d031c09a24e111d7d7fc54b4f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:49:38 +0200 Subject: [PATCH 5/7] rm blank line --- docs/misc/changelog.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index e331d3ff3..1d743d5dd 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -130,7 +130,6 @@ Documentation: - Fixed docstring signature for sum_independent_dims (@stagoverflow) - Updated docstring description for ``log_interval`` in the base class (@rushitnshah). - Release 2.2.1 (2023-11-17) -------------------------- **Support for options at reset, bug fixes and better error messages** From 2090268d1af7159edc6b00af8192915115d3f65c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:50:14 +0200 Subject: [PATCH 6/7] rm blank line --- docs/misc/changelog.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index 1d743d5dd..8cf4428a3 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -36,9 +36,6 @@ Documentation: ^^^^^^^^^^^^^^ - Expanded the description for vec_env.reset seed and options passing - - - Release 2.3.0 (2024-03-31) -------------------------- From dd7c83b08aa80fbe9002baff36731617c2221072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:58:42 +0200 Subject: [PATCH 7/7] comment step async about seed and option --- stable_baselines3/common/vec_env/subproc_vec_env.py | 1 + 1 file changed, 1 insertion(+) diff --git a/stable_baselines3/common/vec_env/subproc_vec_env.py b/stable_baselines3/common/vec_env/subproc_vec_env.py index 2c2b5f886..b0ef59f09 100644 --- a/stable_baselines3/common/vec_env/subproc_vec_env.py +++ b/stable_baselines3/common/vec_env/subproc_vec_env.py @@ -123,6 +123,7 @@ def __init__(self, env_fns: List[Callable[[], gym.Env]], start_method: Optional[ def step_async(self, actions: np.ndarray) -> None: for remote, action, seed, option in zip(self.remotes, actions, self._seeds, self._options): + # seed and option are used if step triggers a reset remote.send(("step", (action, seed, option))) self.waiting = True