Migrate to gymnasium (#143)

--------- Co-authored-by: William Blum <william.blum@microsoft.com>
microsoft · Aug 7, 2024 · 09622b8 · 09622b8
1 parent 399c327
commit 09622b8
Show file tree

Hide file tree

Showing 32 changed files with 1,389,677 additions and 1,389,677 deletions.
diff --git a/cyberbattle/__init__.py b/cyberbattle/__init__.py
@@ -3,8 +3,8 @@
 
 """Initialize CyberBattleSim module"""
 
-from gym.envs.registration import registry, EnvSpec
-from gym.error import Error
+from gymnasium.envs.registration import registry, EnvSpec
+from gymnasium.error import Error
 
 from . import simulation
 from . import agents

diff --git a/cyberbattle/_env/cyberbattle_env.py b/cyberbattle/_env/cyberbattle_env.py
@@ -8,10 +8,10 @@
 import logging
 import networkx
 from networkx import convert_matrix
-from typing import NamedTuple, Optional, Tuple, List, Dict, TypeVar, TypedDict
+from typing import NamedTuple, Optional, Tuple, List, Dict, TypeVar, TypedDict, cast
 
-from gym import spaces, Env
-from gym.utils import seeding
+from gymnasium import spaces, Env
+from gymnasium.utils import seeding
 
 import numpy
 
@@ -146,7 +146,7 @@ def inverse_dict(self: Dict[Key, Value]) -> Dict[Value, Key]:
 
 
 class DummySpace(spaces.Space):
-    """This class ensures that the values in the gym.spaces.Dict space are derived from gym.Space"""
+    """This class ensures that the values in the gym.spaces.Dict space are derived from gymnasium.Space"""
 
     def __init__(self, sample: object):
         self._sample = sample
@@ -526,7 +526,7 @@ def __init__(
         maximum_node_count = self.__bounds.maximum_node_count
         port_count = self.__bounds.port_count
 
-        action_spaces_dict: dict[str, spaces.Space] = {
+        action_spaces = {
             "local_vulnerability": spaces.MultiDiscrete(
                 # source_node_id, vulnerability_id
                 [maximum_node_count, local_vulnerabilities_count]
@@ -547,7 +547,7 @@ def __init__(
             ),
         }
 
-        self.action_space = DiscriminatedUnion[Action](spaces=action_spaces_dict)
+        self.action_space = DiscriminatedUnion[Action](cast(dict, action_spaces))  # type: ignore
 
         self.observation_space = ObservationSpaceType(self.__bounds)
 

diff --git a/cyberbattle/_env/cyberbattle_env_test.py b/cyberbattle/_env/cyberbattle_env_test.py
@@ -6,7 +6,7 @@
 from cyberbattle._env.option_wrapper import ContextWrapper, random_options
 from cyberbattle._env.cyberbattle_env import AttackerGoal, CyberBattleEnv
 import pytest
-import gym
+import gymnasium as gym
 import numpy as np
 from typing import cast
 

diff --git a/cyberbattle/_env/discriminatedunion.py b/cyberbattle/_env/discriminatedunion.py
@@ -4,12 +4,12 @@
 """A discriminated union space for Gym"""
 
 from collections import OrderedDict
-from typing import Mapping, Optional, TypeVar, Union
+from typing import Any, Mapping, Optional, Sequence, TypeVar, Union
 from typing import Dict as TypingDict, Generic, cast
 import numpy as np
 
-from gym import spaces
-from gym.utils import seeding
+from gymnasium import spaces
+from gymnasium.utils import seeding
 
 T_cov = TypeVar("T_cov", covariant=True)
 
@@ -80,10 +80,10 @@ def __getitem__(self, key: str) -> spaces.Space:
     def __repr__(self) -> str:
         return self.__class__.__name__ + "(" + ", ".join([str(k) + ":" + str(s) for k, s in self.spaces.items()]) + ")"
 
-    def to_jsonable(self, sample_n: list) -> dict:
+    def to_jsonable(self, sample_n: Sequence[dict[str, Any]]) -> dict[str, list[Any]]:
         return super().to_jsonable(sample_n)
 
-    def from_jsonable(self, sample_n: TypingDict[str, list]) -> list[dict]:
+    def from_jsonable(self, sample_n: TypingDict[str, list]) ->  list[OrderedDict[str, Any]]:
         ret = super().from_jsonable(sample_n)
         assert len(ret) == 1
         return ret

diff --git a/cyberbattle/_env/flatten_wrapper.py b/cyberbattle/_env/flatten_wrapper.py
@@ -4,10 +4,10 @@
 
 from collections import OrderedDict
 from sqlite3 import NotSupportedError
-from gym import spaces
+from gymnasium import spaces
 import numpy as np
 from cyberbattle._env.cyberbattle_env import DummySpace, CyberBattleEnv, Action, CyberBattleSpaceKind
-from gym.core import ObservationWrapper, ActionWrapper
+from gymnasium.core import ObservationWrapper, ActionWrapper
 
 
 class FlattenObservationWrapper(ObservationWrapper):

diff --git a/cyberbattle/_env/graph_spaces.py b/cyberbattle/_env/graph_spaces.py
@@ -4,7 +4,7 @@
 from typing import Optional
 
 import networkx as nx
-from gym.spaces import Space, Dict
+from gymnasium.spaces import Space, Dict
 
 
 class BaseGraph(Space):
@@ -82,7 +82,7 @@ class MultiDiGraph(BaseGraph):
 
 
 if __name__ == "__main__":
-    from gym.spaces import Box, Discrete
+    from gymnasium.spaces import Box, Discrete
     import matplotlib.pyplot as plt  # type:ignore
 
     space = DiGraph(

diff --git a/cyberbattle/_env/graph_wrapper.py b/cyberbattle/_env/graph_wrapper.py
@@ -3,7 +3,7 @@
 
 from typing import Union, Tuple
 
-import gym
+import gymnasium as gym
 import numpy as onp
 import networkx as nx
 

diff --git a/cyberbattle/_env/option_wrapper.py b/cyberbattle/_env/option_wrapper.py
@@ -3,8 +3,8 @@
 
 from typing import NamedTuple
 
-import gym
-from gym.spaces import Space, Discrete, Tuple
+import gymnasium as gym
+from gymnasium.spaces import Space, Discrete, Tuple
 import numpy as onp
 from cyberbattle._env.cyberbattle_env import Action, CyberBattleEnv
 

diff --git a/cyberbattle/agents/baseline/agent_wrapper.py b/cyberbattle/agents/baseline/agent_wrapper.py
@@ -9,7 +9,7 @@
 from typing import Optional, List, Tuple, overload
 import enum
 import numpy as np
-from gym import spaces, Wrapper
+from gymnasium import spaces, Wrapper
 from numpy import ndarray
 import cyberbattle._env.cyberbattle_env as cyberbattle_env
 import logging

diff --git a/cyberbattle/agents/baseline/baseline_test.py b/cyberbattle/agents/baseline/baseline_test.py
@@ -5,7 +5,7 @@
 """Test training of baseline agents."""
 
 import torch
-import gym
+import gymnasium as gym
 import logging
 import sys
 import cyberbattle._env.cyberbattle_env as cyberbattle_env

diff --git a/cyberbattle/agents/baseline/notebooks/notebook_benchmark.py b/cyberbattle/agents/baseline/notebooks/notebook_benchmark.py
@@ -15,7 +15,7 @@
 # %%
 import sys
 import logging
-import gym
+import gymnasium as gym
 import cyberbattle.agents.baseline.learner as learner
 import cyberbattle.agents.baseline.plotting as p
 import cyberbattle.agents.baseline.agent_wrapper as w

diff --git a/cyberbattle/agents/baseline/notebooks/notebook_dql_debug.py b/cyberbattle/agents/baseline/notebooks/notebook_dql_debug.py
@@ -23,7 +23,7 @@
 # %%
 import sys
 import logging
-import gym
+import gymnasium as gym
 import cyberbattle.agents.baseline.learner as learner
 import cyberbattle.agents.baseline.agent_wrapper as w
 import cyberbattle.agents.baseline.agent_dql as dqla

diff --git a/cyberbattle/agents/baseline/notebooks/notebook_dql_transfer.py b/cyberbattle/agents/baseline/notebooks/notebook_dql_transfer.py
@@ -16,7 +16,7 @@
 import os
 import sys
 import logging
-import gym
+import gymnasium as gym
 import torch
 
 import cyberbattle.agents.baseline.learner as learner

diff --git a/cyberbattle/agents/baseline/notebooks/notebook_randlookups.py b/cyberbattle/agents/baseline/notebooks/notebook_randlookups.py
@@ -14,7 +14,7 @@
 from cyberbattle._env.cyberbattle_env import AttackerGoal
 from cyberbattle.agents.baseline.agent_randomcredlookup import CredentialCacheExploiter
 import cyberbattle.agents.baseline.learner as learner
-import gym
+import gymnasium as gym
 import logging
 import sys
 import cyberbattle.agents.baseline.plotting as p

diff --git a/cyberbattle/agents/baseline/notebooks/notebook_tabularq.py b/cyberbattle/agents/baseline/notebooks/notebook_tabularq.py
@@ -14,7 +14,7 @@
 import sys
 import logging
 from typing import cast
-import gym
+import gymnasium as gym
 import numpy as np
 import matplotlib.pyplot as plt  # type:ignore
 from cyberbattle.agents.baseline.learner import TrainedLearner

diff --git a/cyberbattle/agents/baseline/notebooks/notebook_withdefender.py b/cyberbattle/agents/baseline/notebooks/notebook_withdefender.py
@@ -11,7 +11,7 @@
 # %%
 import sys
 import logging
-import gym
+import gymnasium as gym
 import importlib
 
 import cyberbattle.agents.baseline.learner as learner

diff --git a/cyberbattle/agents/baseline/run.py b/cyberbattle/agents/baseline/run.py
@@ -15,7 +15,7 @@
 """
 
 import torch
-import gym
+import gymnasium as gym
 import logging
 import sys
 import asciichartpy

diff --git a/notebooks/chainnetwork-optionwrapper.ipynb b/notebooks/chainnetwork-optionwrapper.ipynb