From 57588ad536b429e973f0b1ed9232803de83b9532 Mon Sep 17 00:00:00 2001
From: Darshan Patil <dapatil211@gmail.com>
Date: Wed, 7 Jun 2023 18:29:49 -0400
Subject: [PATCH] Deleted outdated marlgrid

---
 hive/envs/__init__.py                  |   6 -
 hive/envs/marlgrid/__init__.py         |   2 -
 hive/envs/marlgrid/ma_envs/__init__.py |  33 ----
 hive/envs/marlgrid/ma_envs/base.py     | 200 ---------------------
 hive/envs/marlgrid/ma_envs/checkers.py | 222 -----------------------
 hive/envs/marlgrid/ma_envs/pursuit.py  | 215 ----------------------
 hive/envs/marlgrid/ma_envs/switch.py   | 235 -------------------------
 hive/envs/marlgrid/marlgrid.py         |  81 ---------
 hive/envs/marlgrid/requirements.txt    |   3 -
 9 files changed, 997 deletions(-)
 delete mode 100644 hive/envs/marlgrid/__init__.py
 delete mode 100644 hive/envs/marlgrid/ma_envs/__init__.py
 delete mode 100644 hive/envs/marlgrid/ma_envs/base.py
 delete mode 100644 hive/envs/marlgrid/ma_envs/checkers.py
 delete mode 100644 hive/envs/marlgrid/ma_envs/pursuit.py
 delete mode 100644 hive/envs/marlgrid/ma_envs/switch.py
 delete mode 100644 hive/envs/marlgrid/marlgrid.py
 delete mode 100644 hive/envs/marlgrid/requirements.txt

diff --git a/hive/envs/__init__.py b/hive/envs/__init__.py
index 4c7e8f83..50e820fa 100644
--- a/hive/envs/__init__.py
+++ b/hive/envs/__init__.py
@@ -2,11 +2,6 @@
 from hive.envs.env_spec import EnvSpec
 from hive.envs.gym.gym_env import GymEnv
 
-try:
-    from hive.envs.marlgrid import MarlGridEnv
-except ImportError:
-    MarlGridEnv = None
-
 try:
     from hive.envs.pettingzoo import PettingZooEnv
 except ImportError:
@@ -18,7 +13,6 @@
     BaseEnv,
     {
         "GymEnv": GymEnv,
-        "MarlGridEnv": MarlGridEnv,
         "PettingZooEnv": PettingZooEnv,
     },
 )
diff --git a/hive/envs/marlgrid/__init__.py b/hive/envs/marlgrid/__init__.py
deleted file mode 100644
index 91b9a3ae..00000000
--- a/hive/envs/marlgrid/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from hive.envs.marlgrid import ma_envs
-from hive.envs.marlgrid.marlgrid import MarlGridEnv
diff --git a/hive/envs/marlgrid/ma_envs/__init__.py b/hive/envs/marlgrid/ma_envs/__init__.py
deleted file mode 100644
index 6d637f4f..00000000
--- a/hive/envs/marlgrid/ma_envs/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from gym.envs.registration import register
-from marlgrid.envs import register_marl_env
-
-from hive.envs.marlgrid.ma_envs.checkers import CheckersMultiGrid
-from hive.envs.marlgrid.ma_envs.pursuit import PursuitMultiGrid
-from hive.envs.marlgrid.ma_envs.switch import SwitchMultiGrid
-
-register_marl_env(
-    "MarlGrid-2AgentCheckers8x8-v0",
-    CheckersMultiGrid,
-    n_agents=2,
-    grid_size=8,
-    view_size=5,  # Needs to be same as grid_size if full_obs = True.
-    env_kwargs={"max_steps": 100, "full_obs": False},
-)
-
-register_marl_env(
-    "MarlGrid-2Agent1RandomPursuit8x8-v0",
-    PursuitMultiGrid,
-    n_agents=3,
-    grid_size=8,
-    view_size=5,  # Needs to be same as grid_size if full_obs = True.
-    env_kwargs={"max_steps": 500, "full_obs": False},
-)
-
-register_marl_env(
-    "MarlGrid-2AgentSwitch8x8-v0",
-    SwitchMultiGrid,
-    n_agents=2,
-    grid_size=12,
-    view_size=12,  # Needs to be same as grid_size if full_obs = True.
-    env_kwargs={"max_steps": 500, "full_obs": True},
-)
diff --git a/hive/envs/marlgrid/ma_envs/base.py b/hive/envs/marlgrid/ma_envs/base.py
deleted file mode 100644
index 2f1833bd..00000000
--- a/hive/envs/marlgrid/ma_envs/base.py
+++ /dev/null
@@ -1,200 +0,0 @@
-import gymnasium as gym
-import numpy as np
-from marlgrid.base import MultiGrid, MultiGridEnv, rotate_grid
-from marlgrid.rendering import SimpleImageViewer
-
-TILE_PIXELS = 32
-
-
-class MultiGridEnvHive(MultiGridEnv):
-    def __init__(
-        self,
-        agents,
-        grid_size=None,
-        width=None,
-        height=None,
-        max_steps=100,
-        reward_decay=True,
-        seed=1337,
-        respawn=False,
-        ghost_mode=True,
-        full_obs=False,
-        agent_spawn_kwargs={},
-    ):
-        self._full_obs = full_obs
-        super().__init__(
-            agents,
-            grid_size,
-            width,
-            height,
-            max_steps,
-            reward_decay,
-            seed,
-            respawn,
-            ghost_mode,
-            agent_spawn_kwargs,
-        )
-
-    def gen_obs_grid(self, agent):
-        # If the agent is inactive, return an empty grid and a visibility mask that hides everything.
-        if not agent.active:
-            # below, not sure orientation is correct but as of 6/27/2020 that doesn't matter because
-            # agent views are usually square and this grid won't be used for anything.
-            grid = MultiGrid(
-                (agent.view_size, agent.view_size), orientation=agent.dir + 1
-            )
-            vis_mask = np.zeros((agent.view_size, agent.view_size), dtype=np.bool)
-            return grid, vis_mask
-
-        if self._full_obs:
-            topX, topY, botX, botY = 0, 0, self.width, self.height
-            grid = self.grid.slice(topX, topY, self.width, self.height, rot_k=0)
-            vis_mask = np.ones((self.width, self.height), dtype=bool)
-        else:
-            topX, topY, botX, botY = agent.get_view_exts()
-            grid = self.grid.slice(
-                topX, topY, agent.view_size, agent.view_size, rot_k=agent.dir + 1
-            )
-            # Process occluders and visibility
-            # Note that this incurs some slight performance cost
-            vis_mask = agent.process_vis(grid.opacity)
-
-        # Warning about the rest of the function:
-        #  Allows masking away objects that the agent isn't supposed to see.
-        #  But breaks consistency between the states of the grid objects in the parial views
-        #   and the grid objects overall.
-        if len(getattr(agent, "hide_item_types", [])) > 0:
-            for i in range(grid.width):
-                for j in range(grid.height):
-                    item = grid.get(i, j)
-                    if (
-                        (item is not None)
-                        and (item is not agent)
-                        and (item.type in agent.hide_item_types)
-                    ):
-                        if len(item.agents) > 0:
-                            grid.set(i, j, item.agents[0])
-                        else:
-                            grid.set(i, j, None)
-
-        return grid, vis_mask
-
-    def render(
-        self,
-        mode="human",
-        close=False,
-        highlight=True,
-        tile_size=TILE_PIXELS,
-        show_agent_views=True,
-        max_agents_per_col=3,
-        agent_col_width_frac=0.3,
-        agent_col_padding_px=2,
-        pad_grey=100,
-    ):
-        """Render the whole-grid human view"""
-
-        if close:
-            if self.window:
-                self.window.close()
-            return
-
-        if mode == "human" and not self.window:
-            self.window = SimpleImageViewer(caption="Marlgrid")
-
-        # Compute which cells are visible to the agent
-        highlight_mask = np.full((self.width, self.height), False, dtype=np.bool)
-        for agent in self.agents:
-            if agent.active:
-                if self._full_obs:
-                    xlow, ylow, xhigh, yhigh = 0, 0, self.width, self.height
-                else:
-                    xlow, ylow, xhigh, yhigh = agent.get_view_exts()
-
-                dxlow, dylow = max(0, 0 - xlow), max(0, 0 - ylow)
-                dxhigh, dyhigh = max(0, xhigh - self.grid.width), max(
-                    0, yhigh - self.grid.height
-                )
-                if agent.see_through_walls:
-                    highlight_mask[
-                        xlow + dxlow : xhigh - dxhigh, ylow + dylow : yhigh - dyhigh
-                    ] = True
-                else:
-                    a, b = self.gen_obs_grid(agent)
-                    highlight_mask[
-                        xlow + dxlow : xhigh - dxhigh, ylow + dylow : yhigh - dyhigh
-                    ] |= rotate_grid(b, a.orientation)[
-                        dxlow : (xhigh - xlow) - dxhigh, dylow : (yhigh - ylow) - dyhigh
-                    ]
-
-        # Render the whole grid
-        img = self.grid.render(
-            tile_size, highlight_mask=highlight_mask if highlight else None
-        )
-        rescale = lambda X, rescale_factor=2: np.kron(
-            X, np.ones((int(rescale_factor), int(rescale_factor), 1))
-        )
-
-        if show_agent_views:
-            target_partial_width = int(
-                img.shape[0] * agent_col_width_frac - 2 * agent_col_padding_px
-            )
-            target_partial_height = (
-                img.shape[1] - 2 * agent_col_padding_px
-            ) // max_agents_per_col
-
-            agent_views = [self.gen_agent_obs(agent) for agent in self.agents]
-            agent_views = [
-                view["pov"] if isinstance(view, dict) else view for view in agent_views
-            ]
-            agent_views = [
-                rescale(
-                    view,
-                    min(
-                        target_partial_width / view.shape[0],
-                        target_partial_height / view.shape[1],
-                    ),
-                )
-                for view in agent_views
-            ]
-            agent_views = [
-                agent_views[pos : pos + max_agents_per_col]
-                for pos in range(0, len(agent_views), max_agents_per_col)
-            ]
-
-            f_offset = (
-                lambda view: np.array(
-                    [
-                        target_partial_height - view.shape[1],
-                        target_partial_width - view.shape[0],
-                    ]
-                )
-                // 2
-            )
-
-            cols = []
-            for col_views in agent_views:
-                col = np.full(
-                    (img.shape[0], target_partial_width + 2 * agent_col_padding_px, 3),
-                    pad_grey,
-                    dtype=np.uint8,
-                )
-                for k, view in enumerate(col_views):
-                    offset = f_offset(view) + agent_col_padding_px
-                    offset[0] += k * target_partial_height
-                    col[
-                        offset[0] : offset[0] + view.shape[0],
-                        offset[1] : offset[1] + view.shape[1],
-                        :,
-                    ] = view
-                cols.append(col)
-
-            img = np.concatenate((img, *cols), axis=1)
-
-        if mode == "human":
-            if not self.window.isopen:
-                self.window.imshow(img)
-                self.window.window.set_caption("Marlgrid")
-            else:
-                self.window.imshow(img)
-
-        return img
diff --git a/hive/envs/marlgrid/ma_envs/checkers.py b/hive/envs/marlgrid/ma_envs/checkers.py
deleted file mode 100644
index 62065565..00000000
--- a/hive/envs/marlgrid/ma_envs/checkers.py
+++ /dev/null
@@ -1,222 +0,0 @@
-import numpy as np
-from marlgrid.base import MultiGrid
-from marlgrid.objects import Goal, GridAgent
-
-from hive.envs.marlgrid.ma_envs.base import MultiGridEnvHive
-
-
-class CheckersMultiGrid(MultiGridEnvHive):
-    """
-    Checkers environment based on sunehag et al. 2017
-
-    "... The map contains apples and lemons. The first player is very sensitive and scores 10 for
-    the team for an apple (green square) and −10 for a lemon (orange square).
-    The second, less sensitive player scores 1 for the team for an apple and −1 for a lemon.
-    There is a wall of lemons between the players and the apples.
-    Apples and lemons disappear when collected.
-    The environment resets when all apples are eaten or maximum number of steps is reached.
-    """
-
-    def _gen_grid(self, width, height):
-        self.num_rows = 3
-        self.grid = MultiGrid((width, height))
-        self.grid.wall_rect(0, 0, width, height)
-        apple = Goal(color="green", reward=10)
-        orange = Goal(color="red", reward=-10)
-        self.num_remained_apples = 0
-        for j in range(self.num_rows):
-            oranges_loc = [2 * i + 1 + j % 2 for i in range(width // 2 - 1)]
-            apples_loc = [2 * i + 1 + (j + 1) % 2 for i in range(width // 2 - 1)]
-            for orange_loc in oranges_loc:
-                self.put_obj(orange, orange_loc, j + 1)
-
-            for apple_loc in apples_loc:
-                self.put_obj(apple, apple_loc, j + 1)
-                self.num_remained_apples += 1
-
-        self.agent_spawn_kwargs = {}
-        self.ghost_mode = False
-
-    def reset(self, **kwargs):
-        for agent in self.agents:
-            agent.agents = []
-            agent.reset(new_episode=True)
-
-        self._gen_grid(self.width, self.height)
-
-        for agent in self.agents:
-            if agent.spawn_delay == 0:
-                self.place_obj(
-                    agent,
-                    top=(0, self.num_rows + 1),
-                    size=(self.width, self.height - self.num_rows - 1),
-                    **self.agent_spawn_kwargs,
-                )
-                agent.activate()
-
-        self.step_count = 0
-        obs = self.gen_obs()
-        for ag_idx, _ in enumerate(obs):
-            obs[ag_idx] = np.array(obs[ag_idx], dtype=np.uint8)
-        return obs
-
-    def step(self, actions):
-        # Spawn agents if it's time.
-        for agent in self.agents:
-            if (
-                not agent.active
-                and not agent.done
-                and self.step_count >= agent.spawn_delay
-            ):
-                self.place_obj(agent, **self.agent_spawn_kwargs)
-                agent.activate()
-
-        assert len(actions) == len(self.agents)
-
-        step_rewards = np.zeros((len(self.agents)), dtype=np.float)
-
-        self.step_count += 1
-
-        iter_agents = list(enumerate(zip(self.agents, actions)))
-        iter_order = np.arange(len(iter_agents))
-        self.np_random.shuffle(iter_order)
-        for shuffled_ix in iter_order:
-            agent_no, (agent, action) = iter_agents[shuffled_ix]
-            agent.step_reward = 0
-
-            if agent.active:
-                cur_pos = agent.pos[:]
-                cur_cell = self.grid.get(*cur_pos)
-                fwd_pos = agent.front_pos[:]
-                fwd_cell = self.grid.get(*fwd_pos)
-                agent_moved = False
-
-                # Rotate left
-                if action == agent.actions.left:
-                    agent.dir = (agent.dir - 1) % 4
-
-                # Rotate right
-                elif action == agent.actions.right:
-                    agent.dir = (agent.dir + 1) % 4
-
-                # Move forward
-                elif action == agent.actions.forward:
-                    # Under the follow conditions, the agent can move forward.
-                    can_move = fwd_cell is None or fwd_cell.can_overlap()
-                    if self.ghost_mode is False and isinstance(fwd_cell, GridAgent):
-                        can_move = False
-
-                    if can_move:
-                        agent_moved = True
-                        # Add agent to new cell
-                        if fwd_cell is None or isinstance(fwd_cell, Goal):
-                            self.grid.set(*fwd_pos, agent)
-                            agent.pos = fwd_pos
-                        else:
-                            fwd_cell.agents.append(agent)
-                            agent.pos = fwd_pos
-
-                        # Remove agent from old cell
-                        if cur_cell == agent:
-                            self.grid.set(*cur_pos, None)
-                        else:
-                            assert cur_cell.can_overlap()
-                            cur_cell.agents.remove(agent)
-
-                        # Add agent's agents to old cell
-                        for left_behind in agent.agents:
-                            cur_obj = self.grid.get(*cur_pos)
-                            if cur_obj is None:
-                                self.grid.set(*cur_pos, left_behind)
-                            elif cur_obj.can_overlap():
-                                cur_obj.agents.append(left_behind)
-                            else:
-                                raise ValueError(
-                                    "How was agent there in the first place?"
-                                )
-
-                        # After moving, the agent shouldn't contain any other agents.
-                        agent.agents = []
-
-                        # Rewards can be got iff. fwd_cell has a "get_reward" method
-                        if hasattr(fwd_cell, "get_reward"):
-                            rwd = fwd_cell.get_reward(agent)
-
-                            # Modify the reward for less sensitive agent
-                            if agent_no == 0:
-                                rwd /= 10
-                            if bool(self.reward_decay):
-                                rwd *= 1.0 - 0.9 * (self.step_count / self.max_steps)
-                            step_rewards[agent_no] += rwd
-                            agent.reward(rwd)
-                            if rwd > 0:
-                                self.num_remained_apples -= 1
-
-                # Pick up an object
-                elif action == agent.actions.pickup:
-                    if fwd_cell and fwd_cell.can_pickup() and agent.carrying is None:
-                        agent.carrying = fwd_cell
-                        agent.carrying.cur_pos = np.array([-1, -1])
-                        self.grid.set(*fwd_pos, None)
-
-                # Drop an object
-                elif action == agent.actions.drop:
-                    if not fwd_cell and agent.carrying:
-                        self.grid.set(*fwd_pos, agent.carrying)
-                        agent.carrying.cur_pos = fwd_pos
-                        agent.carrying = None
-
-                # Toggle/activate an object
-                elif action == agent.actions.toggle:
-                    if fwd_cell:
-                        wasted = bool(fwd_cell.toggle(agent, fwd_pos))
-
-                # Done action (not used by default)
-                elif action == agent.actions.done:
-                    pass
-
-                else:
-                    raise ValueError(f"Environment can't handle action {action}.")
-
-                agent.on_step(fwd_cell if agent_moved else None)
-
-        # If any of the agents individually are "done" (hit lava or in some cases a goal)
-        # but the env requires respawning, then respawn those agents.
-        for agent in self.agents:
-            if agent.done:
-                if self.respawn:
-                    resting_place_obj = self.grid.get(*agent.pos)
-                    if resting_place_obj == agent:
-                        if agent.agents:
-                            self.grid.set(*agent.pos, agent.agents[0])
-                            agent.agents[0].agents += agent.agents[1:]
-                        else:
-                            self.grid.set(*agent.pos, None)
-                    else:
-                        resting_place_obj.agents.remove(agent)
-                        resting_place_obj.agents += agent.agents[:]
-                        agent.agents = []
-
-                    agent.reset(new_episode=False)
-                    self.place_obj(agent, **self.agent_spawn_kwargs)
-                    agent.activate()
-                else:  # if the agent shouldn't be respawned, then deactivate it.
-                    agent.deactivate()
-
-        # The episode overall is done if all the agents are done,
-        # or if it exceeds the step limit or all the apples are collected.
-        done = (
-            (self.step_count >= self.max_steps)
-            or all([agent.done for agent in self.agents])
-            or self.num_remained_apples == 0
-        )
-
-        obs = [
-            np.asarray(self.gen_agent_obs(agent), dtype=np.uint8)
-            for agent in self.agents
-        ]
-
-        # Team reward
-        step_rewards = np.array([np.sum(step_rewards) for _ in self.agents])
-
-        return obs, step_rewards, done, {}
diff --git a/hive/envs/marlgrid/ma_envs/pursuit.py b/hive/envs/marlgrid/ma_envs/pursuit.py
deleted file mode 100644
index 8137b994..00000000
--- a/hive/envs/marlgrid/ma_envs/pursuit.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import numpy as np
-from marlgrid.base import MultiGrid
-from marlgrid.objects import Goal, GridAgent, Lava, Wall
-
-from hive.envs.marlgrid.ma_envs.base import MultiGridEnvHive
-
-
-class PursuitMultiGrid(MultiGridEnvHive):
-    """
-    Pursuit–Evasion environment based on Gupta et al. 2017
-
-    "The pursuit-evasion domain consists of two sets of agents: evaders and pursuers.
-    The evaders are trying to avoid pursuers, while the pursuers are
-    trying to catch the evaders. The pursuers receive a reward of 5.0 when
-    they surround an evader or corner the agent"
-    """
-
-    metadata = {}
-
-    def _gen_grid(self, width, height):
-        self.grid = MultiGrid((width, height))
-        self.grid.wall_rect(0, 0, width, height)
-        self.ghost_mode = False
-
-    def reset(self, **kwargs):
-        obs = super().reset()
-        for ag_idx, _ in enumerate(obs):
-            obs[ag_idx] = np.array(obs[ag_idx], dtype=np.uint8)
-        return obs
-
-    def step(self, actions):
-        # Spawn agents if it's time.
-        for agent in self.agents:
-            if (
-                not agent.active
-                and not agent.done
-                and self.step_count >= agent.spawn_delay
-            ):
-                self.place_obj(agent, **self.agent_spawn_kwargs)
-                agent.activate()
-
-        num_learning_agents = len(actions)
-        num_rand_agents = self.num_agents - len(actions)
-
-        step_rewards = np.zeros((num_learning_agents), dtype=np.float)
-
-        self.step_count += 1
-        for i in range(num_learning_agents, self.num_agents):
-            actions.append(self.action_space[i].sample())
-        iter_agents = list(enumerate(zip(self.agents, actions)))
-        iter_order = np.arange(len(iter_agents))
-        for shuffled_ix in iter_order:
-            agent_no, (agent, action) = iter_agents[shuffled_ix]
-            agent.step_reward = 0
-
-            if agent.active:
-                cur_pos = agent.pos[:]
-                cur_cell = self.grid.get(*cur_pos)
-                fwd_pos = agent.front_pos[:]
-                fwd_cell = self.grid.get(*fwd_pos)
-                agent_moved = False
-                bot_pos = agent.pos + np.array([0, -1])
-                bot_cell = self.grid.get(*bot_pos)
-                abov_pos = agent.pos + np.array([0, +1])
-                abov_cell = self.grid.get(*abov_pos)
-                left_pos = agent.pos + np.array([-1, 0])
-                left_cell = self.grid.get(*left_pos)
-                right_pos = agent.pos + np.array([+1, 0])
-                right_cell = self.grid.get(*right_pos)
-
-                w = 0
-                a = 0
-                surrounding_cells = [bot_cell, abov_cell, left_cell, right_cell]
-                if agent_no == len(self.agents) - num_rand_agents:
-                    for cell in surrounding_cells:
-                        if isinstance(cell, GridAgent):
-                            a += 1
-                        if isinstance(cell, Wall):
-                            w += 1
-
-                    if a == len(self.agents) - num_rand_agents or (w == 2 and a == 2):
-                        step_rewards[:] += np.array(
-                            [5] * (len(self.agents) - num_rand_agents)
-                        )
-                        for agent in self.agents:
-                            agent.done = True
-
-                # Rotate left
-                if action == agent.actions.left:
-                    agent.dir = (agent.dir - 1) % 4
-
-                # Rotate right
-                elif action == agent.actions.right:
-                    agent.dir = (agent.dir + 1) % 4
-
-                # Move forward
-                elif action == agent.actions.forward:
-                    # Under the follow conditions, the agent can move forward.
-                    can_move = fwd_cell is None or fwd_cell.can_overlap()
-                    if self.ghost_mode is False and isinstance(fwd_cell, GridAgent):
-                        can_move = False
-
-                    if can_move:
-                        agent_moved = True
-                        # Add agent to new cell
-                        if fwd_cell is None:
-                            self.grid.set(*fwd_pos, agent)
-                            agent.pos = fwd_pos
-                        else:
-                            fwd_cell.agents.append(agent)
-                            agent.pos = fwd_pos
-
-                        # Remove agent from old cell
-                        if cur_cell == agent:
-                            self.grid.set(*cur_pos, None)
-                        else:
-                            assert cur_cell.can_overlap()
-                            cur_cell.agents.remove(agent)
-
-                        # Add agent's agents to old cell
-                        for left_behind in agent.agents:
-                            cur_obj = self.grid.get(*cur_pos)
-                            if cur_obj is None:
-                                self.grid.set(*cur_pos, left_behind)
-                            elif cur_obj.can_overlap():
-                                cur_obj.agents.append(left_behind)
-                            else:
-                                raise ValueError(
-                                    "How was agent there in teh first place?"
-                                )
-
-                        # After moving, the agent shouldn't contain any other agents.
-                        agent.agents = []
-
-                        # Rewards can be got iff. fwd_cell has a "get_reward" method
-                        if hasattr(fwd_cell, "get_reward"):
-                            rwd = fwd_cell.get_reward(agent)
-                            if bool(self.reward_decay):
-                                rwd *= 1.0 - 0.9 * (self.step_count / self.max_steps)
-                            step_rewards[agent_no] += rwd
-                            agent.reward(rwd)
-
-                        if isinstance(fwd_cell, (Lava, Goal)):
-                            agent.done = True
-
-                # Pick up an object
-                elif action == agent.actions.pickup:
-                    if fwd_cell and fwd_cell.can_pickup():
-                        if agent.carrying is None:
-                            agent.carrying = fwd_cell
-                            agent.carrying.cur_pos = np.array([-1, -1])
-                            self.grid.set(*fwd_pos, None)
-                    else:
-                        pass
-
-                # Drop an object
-                elif action == agent.actions.drop:
-                    if not fwd_cell and agent.carrying:
-                        self.grid.set(*fwd_pos, agent.carrying)
-                        agent.carrying.cur_pos = fwd_pos
-                        agent.carrying = None
-                    else:
-                        pass
-
-                # Toggle/activate an object
-                elif action == agent.actions.toggle:
-                    if fwd_cell:
-                        wasted = bool(fwd_cell.toggle(agent, fwd_pos))
-                    else:
-                        pass
-
-                # Done action (not used by default)
-                elif action == agent.actions.done:
-                    pass
-
-                else:
-                    raise ValueError(f"Environment can't handle action {action}.")
-
-                agent.on_step(fwd_cell if agent_moved else None)
-
-        # If any of the agents individually are "done" (hit lava or in some cases a goal)
-        # but the env requires respawning, then respawn those agents.
-        for agent in self.agents:
-            if agent.done:
-                if self.respawn:
-                    resting_place_obj = self.grid.get(*agent.pos)
-                    if resting_place_obj == agent:
-                        if agent.agents:
-                            self.grid.set(*agent.pos, agent.agents[0])
-                            agent.agents[0].agents += agent.agents[1:]
-                        else:
-                            self.grid.set(*agent.pos, None)
-                    else:
-                        resting_place_obj.agents.remove(agent)
-                        resting_place_obj.agents += agent.agents[:]
-                        agent.agents = []
-
-                    agent.reset(new_episode=False)
-                    self.place_obj(agent, **self.agent_spawn_kwargs)
-                    agent.activate()
-                else:  # if the agent shouldn't be respawned, then deactivate it.
-                    agent.deactivate()
-
-        # The episode overall is done if all the agents are done,
-        # or if it exceeds the step limit.
-        done = (self.step_count >= self.max_steps) or all(
-            [agent.done for agent in self.agents[:num_learning_agents]]
-        )
-
-        obs = [
-            np.asarray(self.gen_agent_obs(agent), dtype=np.uint8)
-            for agent in self.agents[:num_learning_agents]
-        ]
-
-        return obs, step_rewards, done, {}
diff --git a/hive/envs/marlgrid/ma_envs/switch.py b/hive/envs/marlgrid/ma_envs/switch.py
deleted file mode 100644
index ca13e09f..00000000
--- a/hive/envs/marlgrid/ma_envs/switch.py
+++ /dev/null
@@ -1,235 +0,0 @@
-import numpy as np
-from gym_minigrid.rendering import fill_coords, point_in_rect
-from marlgrid.base import MultiGrid
-from marlgrid.objects import Floor, Goal, GridAgent
-
-from hive.envs.marlgrid.ma_envs.base import MultiGridEnvHive
-
-
-class SwitchMultiGrid(MultiGridEnvHive):
-    """
-    Checkers environment based on sunehag et al. 2017
-
-    "... The map contains apples and lemons. The first player is very sensitive and scores 10 for
-    the team for an apple (green square) and −10 for a lemon (orange square).
-    The second, less sensitive player scores 1 for the team for an apple and −1 for a lemon.
-    There is a wall of lemons between the players and the apples.
-    Apples and lemons disappear when collected.
-    The environment resets when all apples are eaten or maximum number of steps is reached.
-    """
-
-    def _gen_grid(self, width, height):
-        self.grid = MultiGrid((width, height))
-        self.grid.wall_rect(0, 0, width, height)
-        for row in range(height - 2):
-            if row != (height - 2) // 2:
-                self.grid.horz_wall(3, row + 1, width - 6)
-
-        self.put_obj(SimpleFloor(color="blue"), 1, 1)
-        self.put_obj(SimpleFloor(color="red"), self.width - 2, self.height - 2)
-        self.agent_spawn_kwargs = {}
-        self.ghost_mode = False
-
-    def reset(self, **kwargs):
-        for agent in self.agents:
-            agent.agents = []
-            agent.reset(new_episode=True)
-
-        self._gen_grid(self.width, self.height)
-
-        for id, agent in enumerate(self.agents):
-            if id == 0:
-                top = (0, 0)
-            else:
-                top = (self.width - 3, 0)
-            if agent.spawn_delay == 0:
-                self.place_obj(
-                    agent,
-                    top=top,
-                    size=(2, self.height - 1),
-                    **self.agent_spawn_kwargs,
-                )
-                agent.activate()
-
-        self.step_count = 0
-        obs = self.gen_obs()
-        for ag_idx, _ in enumerate(obs):
-            obs[ag_idx] = np.array(obs[ag_idx], dtype=np.uint8)
-        return obs
-
-    def step(self, actions):
-        # Spawn agents if it's time.
-        for agent in self.agents:
-            if (
-                not agent.active
-                and not agent.done
-                and self.step_count >= agent.spawn_delay
-            ):
-                self.place_obj(agent, **self.agent_spawn_kwargs)
-                agent.activate()
-
-        if len(actions) != len(self.agents):
-            raise ValueError(
-                f"Number of actions is not equal to the number of agents {len(actions)} != {len(self.agents)}"
-            )
-
-        step_rewards = np.zeros((len(self.agents)), dtype=np.float)
-
-        self.step_count += 1
-
-        iter_agents = list(enumerate(zip(self.agents, actions)))
-        iter_order = np.arange(len(iter_agents))
-        self.np_random.shuffle(iter_order)
-        for shuffled_ix in iter_order:
-            agent_no, (agent, action) = iter_agents[shuffled_ix]
-            agent.step_reward = 0
-
-            if agent.active:
-                cur_pos = agent.pos[:]
-                cur_cell = self.grid.get(*cur_pos)
-                fwd_pos = agent.front_pos[:]
-                fwd_cell = self.grid.get(*fwd_pos)
-                agent_moved = False
-
-                # Rotate left
-                if action == agent.actions.left:
-                    agent.dir = (agent.dir - 1) % 4
-
-                # Rotate right
-                elif action == agent.actions.right:
-                    agent.dir = (agent.dir + 1) % 4
-
-                # Move forward
-                elif action == agent.actions.forward:
-                    # Under the follow conditions, the agent can move forward.
-                    can_move = fwd_cell is None or fwd_cell.can_overlap()
-                    if self.ghost_mode is False and isinstance(fwd_cell, GridAgent):
-                        can_move = False
-
-                    if can_move:
-                        agent_moved = True
-                        # Add agent to new cell
-                        if fwd_cell is None or isinstance(fwd_cell, Goal):
-                            self.grid.set(*fwd_pos, agent)
-                            agent.pos = fwd_pos
-                        else:
-                            fwd_cell.agents.append(agent)
-                            agent.pos = fwd_pos
-                            if (
-                                isinstance(fwd_cell, Floor)
-                                and agent.color == fwd_cell.color
-                            ):
-                                step_rewards[agent_no] += 5
-                                agent.reward(5)
-                                agent.done = True
-
-                        # Remove agent from old cell
-                        if cur_cell == agent:
-                            self.grid.set(*cur_pos, None)
-                        else:
-                            assert cur_cell.can_overlap()
-                            cur_cell.agents.remove(agent)
-
-                        # Add agent's agents to old cell
-                        for left_behind in agent.agents:
-                            cur_obj = self.grid.get(*cur_pos)
-                            if cur_obj is None:
-                                self.grid.set(*cur_pos, left_behind)
-                            elif cur_obj.can_overlap():
-                                cur_obj.agents.append(left_behind)
-                            else:
-                                raise ValueError(
-                                    "How was agent there in the first place?"
-                                )
-
-                        # After moving, the agent shouldn't contain any other agents.
-                        agent.agents = []
-
-                        # Rewards can be got iff. fwd_cell has a "get_reward" method
-                        if hasattr(fwd_cell, "get_reward"):
-                            rwd = fwd_cell.get_reward(agent)
-
-                            # Modify the reward for less sensitive agent
-                            if bool(self.reward_decay):
-                                rwd *= 1.0 - 0.9 * (self.step_count / self.max_steps)
-                            step_rewards[agent_no] += rwd
-                            agent.reward(rwd)
-
-                # Pick up an object
-                elif action == agent.actions.pickup:
-                    if fwd_cell and fwd_cell.can_pickup() and agent.carrying is None:
-                        agent.carrying = fwd_cell
-                        agent.carrying.cur_pos = np.array([-1, -1])
-                        self.grid.set(*fwd_pos, None)
-
-                # Drop an object
-                elif action == agent.actions.drop:
-                    if not fwd_cell and agent.carrying:
-                        self.grid.set(*fwd_pos, agent.carrying)
-                        agent.carrying.cur_pos = fwd_pos
-                        agent.carrying = None
-
-                # Toggle/activate an object
-                elif action == agent.actions.toggle:
-                    if fwd_cell:
-                        wasted = bool(fwd_cell.toggle(agent, fwd_pos))
-
-                # Done action (not used by default)
-                elif action == agent.actions.done:
-                    pass
-
-                else:
-                    raise ValueError(f"Environment can't handle action {action}.")
-
-                agent.on_step(fwd_cell if agent_moved else None)
-
-        # If any of the agents individually are "done" (hit lava or in some cases a goal)
-        # but the env requires respawning, then respawn those agents.
-        for agent in self.agents:
-            if agent.done:
-                if self.respawn:
-                    resting_place_obj = self.grid.get(*agent.pos)
-                    if resting_place_obj == agent:
-                        if agent.agents:
-                            self.grid.set(*agent.pos, agent.agents[0])
-                            agent.agents[0].agents += agent.agents[1:]
-                        else:
-                            self.grid.set(*agent.pos, None)
-                    else:
-                        resting_place_obj.agents.remove(agent)
-                        resting_place_obj.agents += agent.agents[:]
-                        agent.agents = []
-
-                    agent.reset(new_episode=False)
-                    self.place_obj(agent, **self.agent_spawn_kwargs)
-                    agent.activate()
-                else:  # if the agent shouldn't be respawned, then deactivate it.
-                    agent.deactivate()
-
-        # The episode overall is done if all the agents are done,
-        # or if it exceeds the step limit or all the apples are collected.
-        done = (self.step_count >= self.max_steps) or all(
-            [agent.done for agent in self.agents]
-        )
-
-        obs = [
-            np.asarray(self.gen_agent_obs(agent), dtype=np.uint8)
-            for agent in self.agents
-        ]
-
-        # Team reward
-        step_rewards = np.array([np.sum(step_rewards) for _ in self.agents])
-
-        return obs, step_rewards, done, {}
-
-
-# Map of color names to RGB values
-COLORS = {
-    "red": np.array([255, 0, 0]),
-    "blue": np.array([0, 0, 255]),
-}
-
-
-class SimpleFloor(Floor):
-    def render(self, img):
-        fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color])
diff --git a/hive/envs/marlgrid/marlgrid.py b/hive/envs/marlgrid/marlgrid.py
deleted file mode 100644
index b65055aa..00000000
--- a/hive/envs/marlgrid/marlgrid.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import gym
-import numpy as np
-from hive.envs import GymEnv, ParallelEnv
-from hive.envs.gym.gym_wrappers import FlattenWrapper, PermuteImageWrapper
-from marlgrid import envs
-from gym.wrappers.compatibility import EnvCompatibility
-
-from numpy.random._generator import Generator
-
-
-class MyGenerator(Generator):
-    def randint(
-        self: Generator,
-        low: int,
-        high: int,
-        size=None,
-        dtype="l",
-        endpoint: bool = False,
-    ):
-        """Replacement for `numpy.random.Generator.randint` that uses the
-        `Generator.integers` method instead of `Generator.random_integers`
-        which is deprecated."""
-        return self.integers(low, high, size=size, dtype=dtype, endpoint=endpoint)
-
-
-def _patched_np_random(seed: int = None):
-    """Replacement for `gym.utils.seeding.np_random` that uses the
-    `MyGenerator` class instead of `numpy.random.Generator`.
-    MyGenerator has a `.randint` method so the old code from marlgrid
-    can still work."""
-    from gym import error
-
-    if seed is not None and not (isinstance(seed, int) and 0 <= seed):
-        raise error.Error(f"Seed must be a non-negative integer or omitted, not {seed}")
-    seed_seq = np.random.SeedSequence(seed)
-    np_seed = seed_seq.entropy
-    rng = MyGenerator(np.random.PCG64(seed_seq))
-    return rng, np_seed
-
-
-gym.utils.seeding.np_random = _patched_np_random
-
-
-class MarlGridEnv(ParallelEnv, GymEnv):
-    """MarlGrid environment from https://github.com/kandouss/marlgrid/.
-
-    The environment can either be initialized with the name of a preregistered
-    environment from
-    https://github.com/kandouss/marlgrid/blob/master/marlgrid/envs/__init__.py,
-    or can be created using a config. See the original repo for details.
-    """
-
-    def create_env(self, env_name, randomize_seed=True, flatten=False, **kwargs):
-        """
-        Args:
-            env_name: The name of the environment.
-            randomize_seed: Whether to use a random random seed for the environment.
-            flatten: Whether to flatten the observations.
-        """
-        if env_name is None:
-            self._env = envs.env_from_config(kwargs, randomize_seed=randomize_seed)
-            self._env = EnvCompatibility(self._env)
-        else:
-            super().create_env(
-                "GymV22Environment-v0",
-                env_id=env_name,
-                **kwargs,
-            )
-
-        self._env = PermuteImageWrapper(self._env)
-        if flatten:
-            self._env = FlattenWrapper(self._env)
-
-    def create_env_spec(self, name, **kwargs):
-        return super().create_env_spec(
-            name if name is not None else f"Marlgrid_{str(kwargs)}", **kwargs
-        )
-
-    def reset(self):
-        obs = super().reset()
-        return obs
diff --git a/hive/envs/marlgrid/requirements.txt b/hive/envs/marlgrid/requirements.txt
deleted file mode 100644
index eec48136..00000000
--- a/hive/envs/marlgrid/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-marlgrid @ https://github.com/kandouss/marlgrid/archive/refs/heads/master.zip
-pyglet==2.0.3
-gym
\ No newline at end of file