Skip to content

Commit

Permalink
Fixes MARL workflows for recording videos during training/inferencing (
Browse files Browse the repository at this point in the history
…#1596)

# Description

Fixing bug so that using training workflow on MARL workflow populates
videos/train.
See #1595

## Type of change
- Bug fix (non-breaking change which fixes an issue)
## Screenshots

![before_and_after](https://github.com/user-attachments/assets/5b662a88-dedd-4220-a0c4-8e7d09ceb51f)
The first run was without the changes where we see videos/train empty.
The second run is after the changes with videos/train successfully
populated.

## Checklist

- [x] I have run the [`pre-commit` checks](https://pre-commit.com/) with
`./isaaclab.sh --format`
- [N/A] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [Sort of] I have added tests that prove my fix is effective or that my
feature works; I have verified that it works on train.py for skrl and
rl_games. I have not verified rsl_rl or sb3 as well have not verified
play.py on any of the four. However I have implemented the changes on
all of them as they all seem to follow the exact same structure.
- [ ] I have updated the changelog and the corresponding version in the
extension's `config/extension.toml` file
- [ ] I have added my name to the `CONTRIBUTORS.md` or my name already
exists there; Unsure if this fix is worth being labelled as a
contributor, if so would be happy to be added to the contributors.md
(full name is Rishi Veerapaneni).
  • Loading branch information
Rishi-V authored Jan 3, 2025
1 parent e8ea185 commit 7ea72c4
Show file tree
Hide file tree
Showing 12 changed files with 45 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self, env: DirectMARLEnv) -> None:
self.cfg = self.env.cfg
self.sim = self.env.sim
self.scene = self.env.scene
self.render_mode = self.env.render_mode

self.single_observation_space = gym.spaces.Dict()
if self._state_as_observation:
Expand Down Expand Up @@ -126,7 +127,7 @@ def step(self, action: torch.Tensor) -> VecEnvStepReturn:
return obs, rewards, terminated, time_outs, extras

def render(self, recompute: bool = False) -> np.ndarray | None:
self.env.render(recompute)
return self.env.render(recompute)

def close(self) -> None:
self.env.close()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ agent:
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 1600
timesteps: 4800
environment_info: log
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,5 @@ agent:
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 1600
timesteps: 4800
environment_info: log
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ agent:
# https://skrl.readthedocs.io/en/latest/api/trainers/sequential.html
trainer:
class: SequentialTrainer
timesteps: 1600
timesteps: 4800
environment_info: log
9 changes: 5 additions & 4 deletions source/standalone/workflows/rl_games/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ def main():

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap for video recording
if args_cli.video:
video_kwargs = {
Expand All @@ -106,10 +111,6 @@ def main():
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap around environment for rl-games
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions)

Expand Down
9 changes: 5 additions & 4 deletions source/standalone/workflows/rl_games/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap for video recording
if args_cli.video:
video_kwargs = {
Expand All @@ -141,10 +146,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap around environment for rl-games
env = RlGamesVecEnvWrapper(env, rl_device, clip_obs, clip_actions)

Expand Down
9 changes: 5 additions & 4 deletions source/standalone/workflows/rsl_rl/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ def main():

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap for video recording
if args_cli.video:
video_kwargs = {
Expand All @@ -86,10 +91,6 @@ def main():
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env)

Expand Down
8 changes: 4 additions & 4 deletions source/standalone/workflows/rsl_rl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# save resume path before creating a new log_dir
if agent_cfg.resume:
resume_path = get_checkpoint_path(log_root_path, agent_cfg.load_run, agent_cfg.load_checkpoint)
Expand All @@ -116,10 +120,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap around environment for rsl-rl
env = RslRlVecEnvWrapper(env)

Expand Down
6 changes: 6 additions & 0 deletions source/standalone/workflows/sb3/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize

from omni.isaac.lab.envs import DirectMARLEnv, multi_agent_to_single_agent
from omni.isaac.lab.utils.dict import print_dict

import omni.isaac.lab_tasks # noqa: F401
Expand Down Expand Up @@ -82,6 +83,11 @@ def main():

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap for video recording
if args_cli.video:
video_kwargs = {
Expand Down
9 changes: 5 additions & 4 deletions source/standalone/workflows/sb3/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap for video recording
if args_cli.video:
video_kwargs = {
Expand All @@ -116,10 +121,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv):
env = multi_agent_to_single_agent(env)

# wrap around environment for stable baselines
env = Sb3VecEnvWrapper(env)

Expand Down
9 changes: 5 additions & 4 deletions source/standalone/workflows/skrl/play.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def main():

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
env = multi_agent_to_single_agent(env)

# wrap for video recording
if args_cli.video:
video_kwargs = {
Expand All @@ -128,10 +133,6 @@ def main():
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
env = multi_agent_to_single_agent(env)

# wrap around environment for skrl
env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")`

Expand Down
9 changes: 5 additions & 4 deletions source/standalone/workflows/skrl/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen

# create isaac environment
env = gym.make(args_cli.task, cfg=env_cfg, render_mode="rgb_array" if args_cli.video else None)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
env = multi_agent_to_single_agent(env)

# wrap for video recording
if args_cli.video:
video_kwargs = {
Expand All @@ -163,10 +168,6 @@ def main(env_cfg: ManagerBasedRLEnvCfg | DirectRLEnvCfg | DirectMARLEnvCfg, agen
print_dict(video_kwargs, nesting=4)
env = gym.wrappers.RecordVideo(env, **video_kwargs)

# convert to single-agent instance if required by the RL algorithm
if isinstance(env.unwrapped, DirectMARLEnv) and algorithm in ["ppo"]:
env = multi_agent_to_single_agent(env)

# wrap around environment for skrl
env = SkrlVecEnvWrapper(env, ml_framework=args_cli.ml_framework) # same as: `wrap_env(env, wrapper="auto")`

Expand Down

0 comments on commit 7ea72c4

Please sign in to comment.