diff --git a/hive/envs/__init__.py b/hive/envs/__init__.py index 4c7e8f83..50e820fa 100644 --- a/hive/envs/__init__.py +++ b/hive/envs/__init__.py @@ -2,11 +2,6 @@ from hive.envs.env_spec import EnvSpec from hive.envs.gym.gym_env import GymEnv -try: - from hive.envs.marlgrid import MarlGridEnv -except ImportError: - MarlGridEnv = None - try: from hive.envs.pettingzoo import PettingZooEnv except ImportError: @@ -18,7 +13,6 @@ BaseEnv, { "GymEnv": GymEnv, - "MarlGridEnv": MarlGridEnv, "PettingZooEnv": PettingZooEnv, }, ) diff --git a/hive/envs/marlgrid/__init__.py b/hive/envs/marlgrid/__init__.py deleted file mode 100644 index 91b9a3ae..00000000 --- a/hive/envs/marlgrid/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from hive.envs.marlgrid import ma_envs -from hive.envs.marlgrid.marlgrid import MarlGridEnv diff --git a/hive/envs/marlgrid/ma_envs/__init__.py b/hive/envs/marlgrid/ma_envs/__init__.py deleted file mode 100644 index 6d637f4f..00000000 --- a/hive/envs/marlgrid/ma_envs/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -from gym.envs.registration import register -from marlgrid.envs import register_marl_env - -from hive.envs.marlgrid.ma_envs.checkers import CheckersMultiGrid -from hive.envs.marlgrid.ma_envs.pursuit import PursuitMultiGrid -from hive.envs.marlgrid.ma_envs.switch import SwitchMultiGrid - -register_marl_env( - "MarlGrid-2AgentCheckers8x8-v0", - CheckersMultiGrid, - n_agents=2, - grid_size=8, - view_size=5, # Needs to be same as grid_size if full_obs = True. - env_kwargs={"max_steps": 100, "full_obs": False}, -) - -register_marl_env( - "MarlGrid-2Agent1RandomPursuit8x8-v0", - PursuitMultiGrid, - n_agents=3, - grid_size=8, - view_size=5, # Needs to be same as grid_size if full_obs = True. - env_kwargs={"max_steps": 500, "full_obs": False}, -) - -register_marl_env( - "MarlGrid-2AgentSwitch8x8-v0", - SwitchMultiGrid, - n_agents=2, - grid_size=12, - view_size=12, # Needs to be same as grid_size if full_obs = True. - env_kwargs={"max_steps": 500, "full_obs": True}, -) diff --git a/hive/envs/marlgrid/ma_envs/base.py b/hive/envs/marlgrid/ma_envs/base.py deleted file mode 100644 index 2f1833bd..00000000 --- a/hive/envs/marlgrid/ma_envs/base.py +++ /dev/null @@ -1,200 +0,0 @@ -import gymnasium as gym -import numpy as np -from marlgrid.base import MultiGrid, MultiGridEnv, rotate_grid -from marlgrid.rendering import SimpleImageViewer - -TILE_PIXELS = 32 - - -class MultiGridEnvHive(MultiGridEnv): - def __init__( - self, - agents, - grid_size=None, - width=None, - height=None, - max_steps=100, - reward_decay=True, - seed=1337, - respawn=False, - ghost_mode=True, - full_obs=False, - agent_spawn_kwargs={}, - ): - self._full_obs = full_obs - super().__init__( - agents, - grid_size, - width, - height, - max_steps, - reward_decay, - seed, - respawn, - ghost_mode, - agent_spawn_kwargs, - ) - - def gen_obs_grid(self, agent): - # If the agent is inactive, return an empty grid and a visibility mask that hides everything. - if not agent.active: - # below, not sure orientation is correct but as of 6/27/2020 that doesn't matter because - # agent views are usually square and this grid won't be used for anything. - grid = MultiGrid( - (agent.view_size, agent.view_size), orientation=agent.dir + 1 - ) - vis_mask = np.zeros((agent.view_size, agent.view_size), dtype=np.bool) - return grid, vis_mask - - if self._full_obs: - topX, topY, botX, botY = 0, 0, self.width, self.height - grid = self.grid.slice(topX, topY, self.width, self.height, rot_k=0) - vis_mask = np.ones((self.width, self.height), dtype=bool) - else: - topX, topY, botX, botY = agent.get_view_exts() - grid = self.grid.slice( - topX, topY, agent.view_size, agent.view_size, rot_k=agent.dir + 1 - ) - # Process occluders and visibility - # Note that this incurs some slight performance cost - vis_mask = agent.process_vis(grid.opacity) - - # Warning about the rest of the function: - # Allows masking away objects that the agent isn't supposed to see. - # But breaks consistency between the states of the grid objects in the parial views - # and the grid objects overall. - if len(getattr(agent, "hide_item_types", [])) > 0: - for i in range(grid.width): - for j in range(grid.height): - item = grid.get(i, j) - if ( - (item is not None) - and (item is not agent) - and (item.type in agent.hide_item_types) - ): - if len(item.agents) > 0: - grid.set(i, j, item.agents[0]) - else: - grid.set(i, j, None) - - return grid, vis_mask - - def render( - self, - mode="human", - close=False, - highlight=True, - tile_size=TILE_PIXELS, - show_agent_views=True, - max_agents_per_col=3, - agent_col_width_frac=0.3, - agent_col_padding_px=2, - pad_grey=100, - ): - """Render the whole-grid human view""" - - if close: - if self.window: - self.window.close() - return - - if mode == "human" and not self.window: - self.window = SimpleImageViewer(caption="Marlgrid") - - # Compute which cells are visible to the agent - highlight_mask = np.full((self.width, self.height), False, dtype=np.bool) - for agent in self.agents: - if agent.active: - if self._full_obs: - xlow, ylow, xhigh, yhigh = 0, 0, self.width, self.height - else: - xlow, ylow, xhigh, yhigh = agent.get_view_exts() - - dxlow, dylow = max(0, 0 - xlow), max(0, 0 - ylow) - dxhigh, dyhigh = max(0, xhigh - self.grid.width), max( - 0, yhigh - self.grid.height - ) - if agent.see_through_walls: - highlight_mask[ - xlow + dxlow : xhigh - dxhigh, ylow + dylow : yhigh - dyhigh - ] = True - else: - a, b = self.gen_obs_grid(agent) - highlight_mask[ - xlow + dxlow : xhigh - dxhigh, ylow + dylow : yhigh - dyhigh - ] |= rotate_grid(b, a.orientation)[ - dxlow : (xhigh - xlow) - dxhigh, dylow : (yhigh - ylow) - dyhigh - ] - - # Render the whole grid - img = self.grid.render( - tile_size, highlight_mask=highlight_mask if highlight else None - ) - rescale = lambda X, rescale_factor=2: np.kron( - X, np.ones((int(rescale_factor), int(rescale_factor), 1)) - ) - - if show_agent_views: - target_partial_width = int( - img.shape[0] * agent_col_width_frac - 2 * agent_col_padding_px - ) - target_partial_height = ( - img.shape[1] - 2 * agent_col_padding_px - ) // max_agents_per_col - - agent_views = [self.gen_agent_obs(agent) for agent in self.agents] - agent_views = [ - view["pov"] if isinstance(view, dict) else view for view in agent_views - ] - agent_views = [ - rescale( - view, - min( - target_partial_width / view.shape[0], - target_partial_height / view.shape[1], - ), - ) - for view in agent_views - ] - agent_views = [ - agent_views[pos : pos + max_agents_per_col] - for pos in range(0, len(agent_views), max_agents_per_col) - ] - - f_offset = ( - lambda view: np.array( - [ - target_partial_height - view.shape[1], - target_partial_width - view.shape[0], - ] - ) - // 2 - ) - - cols = [] - for col_views in agent_views: - col = np.full( - (img.shape[0], target_partial_width + 2 * agent_col_padding_px, 3), - pad_grey, - dtype=np.uint8, - ) - for k, view in enumerate(col_views): - offset = f_offset(view) + agent_col_padding_px - offset[0] += k * target_partial_height - col[ - offset[0] : offset[0] + view.shape[0], - offset[1] : offset[1] + view.shape[1], - :, - ] = view - cols.append(col) - - img = np.concatenate((img, *cols), axis=1) - - if mode == "human": - if not self.window.isopen: - self.window.imshow(img) - self.window.window.set_caption("Marlgrid") - else: - self.window.imshow(img) - - return img diff --git a/hive/envs/marlgrid/ma_envs/checkers.py b/hive/envs/marlgrid/ma_envs/checkers.py deleted file mode 100644 index 62065565..00000000 --- a/hive/envs/marlgrid/ma_envs/checkers.py +++ /dev/null @@ -1,222 +0,0 @@ -import numpy as np -from marlgrid.base import MultiGrid -from marlgrid.objects import Goal, GridAgent - -from hive.envs.marlgrid.ma_envs.base import MultiGridEnvHive - - -class CheckersMultiGrid(MultiGridEnvHive): - """ - Checkers environment based on sunehag et al. 2017 - - "... The map contains apples and lemons. The first player is very sensitive and scores 10 for - the team for an apple (green square) and −10 for a lemon (orange square). - The second, less sensitive player scores 1 for the team for an apple and −1 for a lemon. - There is a wall of lemons between the players and the apples. - Apples and lemons disappear when collected. - The environment resets when all apples are eaten or maximum number of steps is reached. - """ - - def _gen_grid(self, width, height): - self.num_rows = 3 - self.grid = MultiGrid((width, height)) - self.grid.wall_rect(0, 0, width, height) - apple = Goal(color="green", reward=10) - orange = Goal(color="red", reward=-10) - self.num_remained_apples = 0 - for j in range(self.num_rows): - oranges_loc = [2 * i + 1 + j % 2 for i in range(width // 2 - 1)] - apples_loc = [2 * i + 1 + (j + 1) % 2 for i in range(width // 2 - 1)] - for orange_loc in oranges_loc: - self.put_obj(orange, orange_loc, j + 1) - - for apple_loc in apples_loc: - self.put_obj(apple, apple_loc, j + 1) - self.num_remained_apples += 1 - - self.agent_spawn_kwargs = {} - self.ghost_mode = False - - def reset(self, **kwargs): - for agent in self.agents: - agent.agents = [] - agent.reset(new_episode=True) - - self._gen_grid(self.width, self.height) - - for agent in self.agents: - if agent.spawn_delay == 0: - self.place_obj( - agent, - top=(0, self.num_rows + 1), - size=(self.width, self.height - self.num_rows - 1), - **self.agent_spawn_kwargs, - ) - agent.activate() - - self.step_count = 0 - obs = self.gen_obs() - for ag_idx, _ in enumerate(obs): - obs[ag_idx] = np.array(obs[ag_idx], dtype=np.uint8) - return obs - - def step(self, actions): - # Spawn agents if it's time. - for agent in self.agents: - if ( - not agent.active - and not agent.done - and self.step_count >= agent.spawn_delay - ): - self.place_obj(agent, **self.agent_spawn_kwargs) - agent.activate() - - assert len(actions) == len(self.agents) - - step_rewards = np.zeros((len(self.agents)), dtype=np.float) - - self.step_count += 1 - - iter_agents = list(enumerate(zip(self.agents, actions))) - iter_order = np.arange(len(iter_agents)) - self.np_random.shuffle(iter_order) - for shuffled_ix in iter_order: - agent_no, (agent, action) = iter_agents[shuffled_ix] - agent.step_reward = 0 - - if agent.active: - cur_pos = agent.pos[:] - cur_cell = self.grid.get(*cur_pos) - fwd_pos = agent.front_pos[:] - fwd_cell = self.grid.get(*fwd_pos) - agent_moved = False - - # Rotate left - if action == agent.actions.left: - agent.dir = (agent.dir - 1) % 4 - - # Rotate right - elif action == agent.actions.right: - agent.dir = (agent.dir + 1) % 4 - - # Move forward - elif action == agent.actions.forward: - # Under the follow conditions, the agent can move forward. - can_move = fwd_cell is None or fwd_cell.can_overlap() - if self.ghost_mode is False and isinstance(fwd_cell, GridAgent): - can_move = False - - if can_move: - agent_moved = True - # Add agent to new cell - if fwd_cell is None or isinstance(fwd_cell, Goal): - self.grid.set(*fwd_pos, agent) - agent.pos = fwd_pos - else: - fwd_cell.agents.append(agent) - agent.pos = fwd_pos - - # Remove agent from old cell - if cur_cell == agent: - self.grid.set(*cur_pos, None) - else: - assert cur_cell.can_overlap() - cur_cell.agents.remove(agent) - - # Add agent's agents to old cell - for left_behind in agent.agents: - cur_obj = self.grid.get(*cur_pos) - if cur_obj is None: - self.grid.set(*cur_pos, left_behind) - elif cur_obj.can_overlap(): - cur_obj.agents.append(left_behind) - else: - raise ValueError( - "How was agent there in the first place?" - ) - - # After moving, the agent shouldn't contain any other agents. - agent.agents = [] - - # Rewards can be got iff. fwd_cell has a "get_reward" method - if hasattr(fwd_cell, "get_reward"): - rwd = fwd_cell.get_reward(agent) - - # Modify the reward for less sensitive agent - if agent_no == 0: - rwd /= 10 - if bool(self.reward_decay): - rwd *= 1.0 - 0.9 * (self.step_count / self.max_steps) - step_rewards[agent_no] += rwd - agent.reward(rwd) - if rwd > 0: - self.num_remained_apples -= 1 - - # Pick up an object - elif action == agent.actions.pickup: - if fwd_cell and fwd_cell.can_pickup() and agent.carrying is None: - agent.carrying = fwd_cell - agent.carrying.cur_pos = np.array([-1, -1]) - self.grid.set(*fwd_pos, None) - - # Drop an object - elif action == agent.actions.drop: - if not fwd_cell and agent.carrying: - self.grid.set(*fwd_pos, agent.carrying) - agent.carrying.cur_pos = fwd_pos - agent.carrying = None - - # Toggle/activate an object - elif action == agent.actions.toggle: - if fwd_cell: - wasted = bool(fwd_cell.toggle(agent, fwd_pos)) - - # Done action (not used by default) - elif action == agent.actions.done: - pass - - else: - raise ValueError(f"Environment can't handle action {action}.") - - agent.on_step(fwd_cell if agent_moved else None) - - # If any of the agents individually are "done" (hit lava or in some cases a goal) - # but the env requires respawning, then respawn those agents. - for agent in self.agents: - if agent.done: - if self.respawn: - resting_place_obj = self.grid.get(*agent.pos) - if resting_place_obj == agent: - if agent.agents: - self.grid.set(*agent.pos, agent.agents[0]) - agent.agents[0].agents += agent.agents[1:] - else: - self.grid.set(*agent.pos, None) - else: - resting_place_obj.agents.remove(agent) - resting_place_obj.agents += agent.agents[:] - agent.agents = [] - - agent.reset(new_episode=False) - self.place_obj(agent, **self.agent_spawn_kwargs) - agent.activate() - else: # if the agent shouldn't be respawned, then deactivate it. - agent.deactivate() - - # The episode overall is done if all the agents are done, - # or if it exceeds the step limit or all the apples are collected. - done = ( - (self.step_count >= self.max_steps) - or all([agent.done for agent in self.agents]) - or self.num_remained_apples == 0 - ) - - obs = [ - np.asarray(self.gen_agent_obs(agent), dtype=np.uint8) - for agent in self.agents - ] - - # Team reward - step_rewards = np.array([np.sum(step_rewards) for _ in self.agents]) - - return obs, step_rewards, done, {} diff --git a/hive/envs/marlgrid/ma_envs/pursuit.py b/hive/envs/marlgrid/ma_envs/pursuit.py deleted file mode 100644 index 8137b994..00000000 --- a/hive/envs/marlgrid/ma_envs/pursuit.py +++ /dev/null @@ -1,215 +0,0 @@ -import numpy as np -from marlgrid.base import MultiGrid -from marlgrid.objects import Goal, GridAgent, Lava, Wall - -from hive.envs.marlgrid.ma_envs.base import MultiGridEnvHive - - -class PursuitMultiGrid(MultiGridEnvHive): - """ - Pursuit–Evasion environment based on Gupta et al. 2017 - - "The pursuit-evasion domain consists of two sets of agents: evaders and pursuers. - The evaders are trying to avoid pursuers, while the pursuers are - trying to catch the evaders. The pursuers receive a reward of 5.0 when - they surround an evader or corner the agent" - """ - - metadata = {} - - def _gen_grid(self, width, height): - self.grid = MultiGrid((width, height)) - self.grid.wall_rect(0, 0, width, height) - self.ghost_mode = False - - def reset(self, **kwargs): - obs = super().reset() - for ag_idx, _ in enumerate(obs): - obs[ag_idx] = np.array(obs[ag_idx], dtype=np.uint8) - return obs - - def step(self, actions): - # Spawn agents if it's time. - for agent in self.agents: - if ( - not agent.active - and not agent.done - and self.step_count >= agent.spawn_delay - ): - self.place_obj(agent, **self.agent_spawn_kwargs) - agent.activate() - - num_learning_agents = len(actions) - num_rand_agents = self.num_agents - len(actions) - - step_rewards = np.zeros((num_learning_agents), dtype=np.float) - - self.step_count += 1 - for i in range(num_learning_agents, self.num_agents): - actions.append(self.action_space[i].sample()) - iter_agents = list(enumerate(zip(self.agents, actions))) - iter_order = np.arange(len(iter_agents)) - for shuffled_ix in iter_order: - agent_no, (agent, action) = iter_agents[shuffled_ix] - agent.step_reward = 0 - - if agent.active: - cur_pos = agent.pos[:] - cur_cell = self.grid.get(*cur_pos) - fwd_pos = agent.front_pos[:] - fwd_cell = self.grid.get(*fwd_pos) - agent_moved = False - bot_pos = agent.pos + np.array([0, -1]) - bot_cell = self.grid.get(*bot_pos) - abov_pos = agent.pos + np.array([0, +1]) - abov_cell = self.grid.get(*abov_pos) - left_pos = agent.pos + np.array([-1, 0]) - left_cell = self.grid.get(*left_pos) - right_pos = agent.pos + np.array([+1, 0]) - right_cell = self.grid.get(*right_pos) - - w = 0 - a = 0 - surrounding_cells = [bot_cell, abov_cell, left_cell, right_cell] - if agent_no == len(self.agents) - num_rand_agents: - for cell in surrounding_cells: - if isinstance(cell, GridAgent): - a += 1 - if isinstance(cell, Wall): - w += 1 - - if a == len(self.agents) - num_rand_agents or (w == 2 and a == 2): - step_rewards[:] += np.array( - [5] * (len(self.agents) - num_rand_agents) - ) - for agent in self.agents: - agent.done = True - - # Rotate left - if action == agent.actions.left: - agent.dir = (agent.dir - 1) % 4 - - # Rotate right - elif action == agent.actions.right: - agent.dir = (agent.dir + 1) % 4 - - # Move forward - elif action == agent.actions.forward: - # Under the follow conditions, the agent can move forward. - can_move = fwd_cell is None or fwd_cell.can_overlap() - if self.ghost_mode is False and isinstance(fwd_cell, GridAgent): - can_move = False - - if can_move: - agent_moved = True - # Add agent to new cell - if fwd_cell is None: - self.grid.set(*fwd_pos, agent) - agent.pos = fwd_pos - else: - fwd_cell.agents.append(agent) - agent.pos = fwd_pos - - # Remove agent from old cell - if cur_cell == agent: - self.grid.set(*cur_pos, None) - else: - assert cur_cell.can_overlap() - cur_cell.agents.remove(agent) - - # Add agent's agents to old cell - for left_behind in agent.agents: - cur_obj = self.grid.get(*cur_pos) - if cur_obj is None: - self.grid.set(*cur_pos, left_behind) - elif cur_obj.can_overlap(): - cur_obj.agents.append(left_behind) - else: - raise ValueError( - "How was agent there in teh first place?" - ) - - # After moving, the agent shouldn't contain any other agents. - agent.agents = [] - - # Rewards can be got iff. fwd_cell has a "get_reward" method - if hasattr(fwd_cell, "get_reward"): - rwd = fwd_cell.get_reward(agent) - if bool(self.reward_decay): - rwd *= 1.0 - 0.9 * (self.step_count / self.max_steps) - step_rewards[agent_no] += rwd - agent.reward(rwd) - - if isinstance(fwd_cell, (Lava, Goal)): - agent.done = True - - # Pick up an object - elif action == agent.actions.pickup: - if fwd_cell and fwd_cell.can_pickup(): - if agent.carrying is None: - agent.carrying = fwd_cell - agent.carrying.cur_pos = np.array([-1, -1]) - self.grid.set(*fwd_pos, None) - else: - pass - - # Drop an object - elif action == agent.actions.drop: - if not fwd_cell and agent.carrying: - self.grid.set(*fwd_pos, agent.carrying) - agent.carrying.cur_pos = fwd_pos - agent.carrying = None - else: - pass - - # Toggle/activate an object - elif action == agent.actions.toggle: - if fwd_cell: - wasted = bool(fwd_cell.toggle(agent, fwd_pos)) - else: - pass - - # Done action (not used by default) - elif action == agent.actions.done: - pass - - else: - raise ValueError(f"Environment can't handle action {action}.") - - agent.on_step(fwd_cell if agent_moved else None) - - # If any of the agents individually are "done" (hit lava or in some cases a goal) - # but the env requires respawning, then respawn those agents. - for agent in self.agents: - if agent.done: - if self.respawn: - resting_place_obj = self.grid.get(*agent.pos) - if resting_place_obj == agent: - if agent.agents: - self.grid.set(*agent.pos, agent.agents[0]) - agent.agents[0].agents += agent.agents[1:] - else: - self.grid.set(*agent.pos, None) - else: - resting_place_obj.agents.remove(agent) - resting_place_obj.agents += agent.agents[:] - agent.agents = [] - - agent.reset(new_episode=False) - self.place_obj(agent, **self.agent_spawn_kwargs) - agent.activate() - else: # if the agent shouldn't be respawned, then deactivate it. - agent.deactivate() - - # The episode overall is done if all the agents are done, - # or if it exceeds the step limit. - done = (self.step_count >= self.max_steps) or all( - [agent.done for agent in self.agents[:num_learning_agents]] - ) - - obs = [ - np.asarray(self.gen_agent_obs(agent), dtype=np.uint8) - for agent in self.agents[:num_learning_agents] - ] - - return obs, step_rewards, done, {} diff --git a/hive/envs/marlgrid/ma_envs/switch.py b/hive/envs/marlgrid/ma_envs/switch.py deleted file mode 100644 index ca13e09f..00000000 --- a/hive/envs/marlgrid/ma_envs/switch.py +++ /dev/null @@ -1,235 +0,0 @@ -import numpy as np -from gym_minigrid.rendering import fill_coords, point_in_rect -from marlgrid.base import MultiGrid -from marlgrid.objects import Floor, Goal, GridAgent - -from hive.envs.marlgrid.ma_envs.base import MultiGridEnvHive - - -class SwitchMultiGrid(MultiGridEnvHive): - """ - Checkers environment based on sunehag et al. 2017 - - "... The map contains apples and lemons. The first player is very sensitive and scores 10 for - the team for an apple (green square) and −10 for a lemon (orange square). - The second, less sensitive player scores 1 for the team for an apple and −1 for a lemon. - There is a wall of lemons between the players and the apples. - Apples and lemons disappear when collected. - The environment resets when all apples are eaten or maximum number of steps is reached. - """ - - def _gen_grid(self, width, height): - self.grid = MultiGrid((width, height)) - self.grid.wall_rect(0, 0, width, height) - for row in range(height - 2): - if row != (height - 2) // 2: - self.grid.horz_wall(3, row + 1, width - 6) - - self.put_obj(SimpleFloor(color="blue"), 1, 1) - self.put_obj(SimpleFloor(color="red"), self.width - 2, self.height - 2) - self.agent_spawn_kwargs = {} - self.ghost_mode = False - - def reset(self, **kwargs): - for agent in self.agents: - agent.agents = [] - agent.reset(new_episode=True) - - self._gen_grid(self.width, self.height) - - for id, agent in enumerate(self.agents): - if id == 0: - top = (0, 0) - else: - top = (self.width - 3, 0) - if agent.spawn_delay == 0: - self.place_obj( - agent, - top=top, - size=(2, self.height - 1), - **self.agent_spawn_kwargs, - ) - agent.activate() - - self.step_count = 0 - obs = self.gen_obs() - for ag_idx, _ in enumerate(obs): - obs[ag_idx] = np.array(obs[ag_idx], dtype=np.uint8) - return obs - - def step(self, actions): - # Spawn agents if it's time. - for agent in self.agents: - if ( - not agent.active - and not agent.done - and self.step_count >= agent.spawn_delay - ): - self.place_obj(agent, **self.agent_spawn_kwargs) - agent.activate() - - if len(actions) != len(self.agents): - raise ValueError( - f"Number of actions is not equal to the number of agents {len(actions)} != {len(self.agents)}" - ) - - step_rewards = np.zeros((len(self.agents)), dtype=np.float) - - self.step_count += 1 - - iter_agents = list(enumerate(zip(self.agents, actions))) - iter_order = np.arange(len(iter_agents)) - self.np_random.shuffle(iter_order) - for shuffled_ix in iter_order: - agent_no, (agent, action) = iter_agents[shuffled_ix] - agent.step_reward = 0 - - if agent.active: - cur_pos = agent.pos[:] - cur_cell = self.grid.get(*cur_pos) - fwd_pos = agent.front_pos[:] - fwd_cell = self.grid.get(*fwd_pos) - agent_moved = False - - # Rotate left - if action == agent.actions.left: - agent.dir = (agent.dir - 1) % 4 - - # Rotate right - elif action == agent.actions.right: - agent.dir = (agent.dir + 1) % 4 - - # Move forward - elif action == agent.actions.forward: - # Under the follow conditions, the agent can move forward. - can_move = fwd_cell is None or fwd_cell.can_overlap() - if self.ghost_mode is False and isinstance(fwd_cell, GridAgent): - can_move = False - - if can_move: - agent_moved = True - # Add agent to new cell - if fwd_cell is None or isinstance(fwd_cell, Goal): - self.grid.set(*fwd_pos, agent) - agent.pos = fwd_pos - else: - fwd_cell.agents.append(agent) - agent.pos = fwd_pos - if ( - isinstance(fwd_cell, Floor) - and agent.color == fwd_cell.color - ): - step_rewards[agent_no] += 5 - agent.reward(5) - agent.done = True - - # Remove agent from old cell - if cur_cell == agent: - self.grid.set(*cur_pos, None) - else: - assert cur_cell.can_overlap() - cur_cell.agents.remove(agent) - - # Add agent's agents to old cell - for left_behind in agent.agents: - cur_obj = self.grid.get(*cur_pos) - if cur_obj is None: - self.grid.set(*cur_pos, left_behind) - elif cur_obj.can_overlap(): - cur_obj.agents.append(left_behind) - else: - raise ValueError( - "How was agent there in the first place?" - ) - - # After moving, the agent shouldn't contain any other agents. - agent.agents = [] - - # Rewards can be got iff. fwd_cell has a "get_reward" method - if hasattr(fwd_cell, "get_reward"): - rwd = fwd_cell.get_reward(agent) - - # Modify the reward for less sensitive agent - if bool(self.reward_decay): - rwd *= 1.0 - 0.9 * (self.step_count / self.max_steps) - step_rewards[agent_no] += rwd - agent.reward(rwd) - - # Pick up an object - elif action == agent.actions.pickup: - if fwd_cell and fwd_cell.can_pickup() and agent.carrying is None: - agent.carrying = fwd_cell - agent.carrying.cur_pos = np.array([-1, -1]) - self.grid.set(*fwd_pos, None) - - # Drop an object - elif action == agent.actions.drop: - if not fwd_cell and agent.carrying: - self.grid.set(*fwd_pos, agent.carrying) - agent.carrying.cur_pos = fwd_pos - agent.carrying = None - - # Toggle/activate an object - elif action == agent.actions.toggle: - if fwd_cell: - wasted = bool(fwd_cell.toggle(agent, fwd_pos)) - - # Done action (not used by default) - elif action == agent.actions.done: - pass - - else: - raise ValueError(f"Environment can't handle action {action}.") - - agent.on_step(fwd_cell if agent_moved else None) - - # If any of the agents individually are "done" (hit lava or in some cases a goal) - # but the env requires respawning, then respawn those agents. - for agent in self.agents: - if agent.done: - if self.respawn: - resting_place_obj = self.grid.get(*agent.pos) - if resting_place_obj == agent: - if agent.agents: - self.grid.set(*agent.pos, agent.agents[0]) - agent.agents[0].agents += agent.agents[1:] - else: - self.grid.set(*agent.pos, None) - else: - resting_place_obj.agents.remove(agent) - resting_place_obj.agents += agent.agents[:] - agent.agents = [] - - agent.reset(new_episode=False) - self.place_obj(agent, **self.agent_spawn_kwargs) - agent.activate() - else: # if the agent shouldn't be respawned, then deactivate it. - agent.deactivate() - - # The episode overall is done if all the agents are done, - # or if it exceeds the step limit or all the apples are collected. - done = (self.step_count >= self.max_steps) or all( - [agent.done for agent in self.agents] - ) - - obs = [ - np.asarray(self.gen_agent_obs(agent), dtype=np.uint8) - for agent in self.agents - ] - - # Team reward - step_rewards = np.array([np.sum(step_rewards) for _ in self.agents]) - - return obs, step_rewards, done, {} - - -# Map of color names to RGB values -COLORS = { - "red": np.array([255, 0, 0]), - "blue": np.array([0, 0, 255]), -} - - -class SimpleFloor(Floor): - def render(self, img): - fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color]) diff --git a/hive/envs/marlgrid/marlgrid.py b/hive/envs/marlgrid/marlgrid.py deleted file mode 100644 index b65055aa..00000000 --- a/hive/envs/marlgrid/marlgrid.py +++ /dev/null @@ -1,81 +0,0 @@ -import gym -import numpy as np -from hive.envs import GymEnv, ParallelEnv -from hive.envs.gym.gym_wrappers import FlattenWrapper, PermuteImageWrapper -from marlgrid import envs -from gym.wrappers.compatibility import EnvCompatibility - -from numpy.random._generator import Generator - - -class MyGenerator(Generator): - def randint( - self: Generator, - low: int, - high: int, - size=None, - dtype="l", - endpoint: bool = False, - ): - """Replacement for `numpy.random.Generator.randint` that uses the - `Generator.integers` method instead of `Generator.random_integers` - which is deprecated.""" - return self.integers(low, high, size=size, dtype=dtype, endpoint=endpoint) - - -def _patched_np_random(seed: int = None): - """Replacement for `gym.utils.seeding.np_random` that uses the - `MyGenerator` class instead of `numpy.random.Generator`. - MyGenerator has a `.randint` method so the old code from marlgrid - can still work.""" - from gym import error - - if seed is not None and not (isinstance(seed, int) and 0 <= seed): - raise error.Error(f"Seed must be a non-negative integer or omitted, not {seed}") - seed_seq = np.random.SeedSequence(seed) - np_seed = seed_seq.entropy - rng = MyGenerator(np.random.PCG64(seed_seq)) - return rng, np_seed - - -gym.utils.seeding.np_random = _patched_np_random - - -class MarlGridEnv(ParallelEnv, GymEnv): - """MarlGrid environment from https://github.com/kandouss/marlgrid/. - - The environment can either be initialized with the name of a preregistered - environment from - https://github.com/kandouss/marlgrid/blob/master/marlgrid/envs/__init__.py, - or can be created using a config. See the original repo for details. - """ - - def create_env(self, env_name, randomize_seed=True, flatten=False, **kwargs): - """ - Args: - env_name: The name of the environment. - randomize_seed: Whether to use a random random seed for the environment. - flatten: Whether to flatten the observations. - """ - if env_name is None: - self._env = envs.env_from_config(kwargs, randomize_seed=randomize_seed) - self._env = EnvCompatibility(self._env) - else: - super().create_env( - "GymV22Environment-v0", - env_id=env_name, - **kwargs, - ) - - self._env = PermuteImageWrapper(self._env) - if flatten: - self._env = FlattenWrapper(self._env) - - def create_env_spec(self, name, **kwargs): - return super().create_env_spec( - name if name is not None else f"Marlgrid_{str(kwargs)}", **kwargs - ) - - def reset(self): - obs = super().reset() - return obs diff --git a/hive/envs/marlgrid/requirements.txt b/hive/envs/marlgrid/requirements.txt deleted file mode 100644 index eec48136..00000000 --- a/hive/envs/marlgrid/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -marlgrid @ https://github.com/kandouss/marlgrid/archive/refs/heads/master.zip -pyglet==2.0.3 -gym \ No newline at end of file