instadeepai · Louay-Ben-nessir · Jun 10, 2024 · Jun 10, 2024 · Jun 14, 2024 · Jun 14, 2024
@@ -1,4 +1,5 @@
 # --- Anakin config ---
+architecture_name: anakin
 
 # --- Training ---
 num_envs: 16  # Number of vectorised environments per device.

@@ -0,0 +1,19 @@
+# --- Sebulba config ---
+architecture_name: sebulba
+
+# --- Training ---
+num_envs: 32  # number of environments per thread.
+
+# --- Evaluation ---
+evaluation_greedy: False # Evaluate the policy greedily. If True the policy will select
+  # an action which corresponds to the greatest logit. If false, the policy will sample
+  # from the logits.
+num_eval_episodes: 32 # Number of episodes to evaluate per evaluation.
+num_evaluation: 200 # Number of evenly spaced evaluations to perform during training.
+absolute_metric: True # Whether the absolute metric should be computed. For more details
+  # on the absolute metric please see: https://arxiv.org/abs/2209.10485
+
+# --- Sebulba devices config ---
+n_threads_per_executor: 1  # num of different threads/env batches per actor
+executor_device_ids: [0] # ids of actor devices
+learner_device_ids: [0] # ids of learner devices
diff --git a/mava/configs/default_ff_ippo.yaml b/mava/configs/default_ff_ippo.yaml
@@ -3,5 +3,5 @@ defaults:
   - arch: anakin
   - system: ppo/ff_ippo
   - network: mlp
-  - env: rware
+  - env: rware_gym
   - _self_
diff --git a/mava/configs/default_ff_ippo_seb.yaml b/mava/configs/default_ff_ippo_seb.yaml
@@ -0,0 +1,7 @@
+defaults:
+  - logger: ff_ippo
+  - arch: sebulba
+  - system: ppo/ff_ippo
+  - network: mlp
+  - env: rware_gym
+  - _self_
@@ -0,0 +1,19 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+  - scenario: gym-lbf-2s-8x8-2p-2f-coop # [gym-lbf-2s-8x8-2p-2f-coop, gym-lbf-8x8-2p-2f-coop, gym-lbf-2s-10x10-3p-3f, gym-lbf-10x10-3p-3f, gym-lbf-15x15-3p-5f, gym-lbf-15x15-4p-3f, gym-lbf-15x15-4p-5f]
+
+env_name: LevelBasedForaging  # Used for logging purposes.
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the add agents IDs to the observations returned by the environment.
+add_agent_id : False
+
+# Whether or not to log the winrate of this environment.
+log_win_rate: False
+
+# Weather or not to sum the returned rewards over all of the agents.
+use_shared_rewards: True
@@ -0,0 +1,19 @@
+# ---Environment Configs---
+defaults:
+  - _self_
+  - scenario: gym-rware-tiny-2ag # [gym-rware-tiny-2ag, gym-rware-tiny-4ag, gym-rware-tiny-4ag-easy, gym-rware-small-4ag]
+
+env_name: RobotWarehouse  # Used for logging purposes.
+
+# Defines the metric that will be used to evaluate the performance of the agent.
+# This metric is returned at the end of an experiment and can be used for hyperparameter tuning.
+eval_metric: episode_return
+
+# Whether the add agents IDs to the observations returned by the environment.
+add_agent_id : False
+
+# Whether or not to log the winrate of this environment.
+log_win_rate: False
+
+# Weather or not to sum the returned rewards over all of the agents.
+use_shared_rewards: True
diff --git a/mava/configs/env/scenario/gym-lbf-10x10-3p-3f.yaml b/mava/configs/env/scenario/gym-lbf-10x10-3p-3f.yaml
@@ -0,0 +1,18 @@
+# The config of the 10x10-3p-3f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 10x10-3p-3f
+
+task_config:
+  field_size: [10,10]
+  sight: 10
+  players: 3
+  max_num_food: 3
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-15x15-3p-5f.yaml b/mava/configs/env/scenario/gym-lbf-15x15-3p-5f.yaml
@@ -0,0 +1,18 @@
+# The config of the 15x15-3p-5f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 15x15-3p-5f
+
+task_config:
+  field_size: [15, 15]
+  sight: 15
+  players: 3
+  max_num_food: 5
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-15x15-4p-3f.yaml b/mava/configs/env/scenario/gym-lbf-15x15-4p-3f.yaml
@@ -0,0 +1,18 @@
+# The config of the 15x15-4p-3f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 15x15-4p-3f
+
+task_config:
+  field_size: [15, 15]
+  sight: 15
+  players: 4
+  max_num_food: 3
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-15x15-4p-5f.yaml b/mava/configs/env/scenario/gym-lbf-15x15-4p-5f.yaml
@@ -0,0 +1,18 @@
+# The config of the 15x15-4p-5f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 15x15-4p-5f
+
+task_config:
+  field_size: [15, 15]
+  sight: 15
+  players: 4
+  max_num_food: 5
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-2s-10x10-3p-3f.yaml b/mava/configs/env/scenario/gym-lbf-2s-10x10-3p-3f.yaml
@@ -0,0 +1,18 @@
+# The config of the 2s10x10-3p-3f scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 2s-10x10-3p-3f
+
+task_config:
+  field_size: [10, 10]
+  sight: 2
+  players: 3
+  max_num_food: 3
+  max_player_level: 2
+  force_coop: False
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-2s-8x8-2p-2f-coop.yaml b/mava/configs/env/scenario/gym-lbf-2s-8x8-2p-2f-coop.yaml
@@ -0,0 +1,18 @@
+# The config of the 2s-8x8-2p-2f-coop scenario with the VectorObserver set as default.
+name: LevelBasedForaging
+task_name: 2s-8x8-2p-2f-coop
+
+task_config:
+  field_size: [8, 8] # size of the grid to generate.
+  sight: 2 # field of view of an agent.
+  players: 2 # number of agents on the grid.
+  max_num_food: 2 # number of food in the environment.
+  max_player_level: 2 # maximum level of the agents (inclusive).
+  force_coop: True # force cooperation between agents.
+  max_episode_steps: 100 # max number of steps per episode.
+  min_player_level : 1 # minimum level of the agents (inclusive).
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-lbf-8x8-2p-2f-coop.yaml b/mava/configs/env/scenario/gym-lbf-8x8-2p-2f-coop.yaml
@@ -0,0 +1,18 @@
+# The config of the 8x8-2p-2f-coop scenario with the VectorObserver set as default
+name: LevelBasedForaging
+task_name: 8x8-2p-2f-coop
+
+task_config:
+  field_size: [8, 8]
+  sight: 8
+  players: 2
+  max_num_food: 2
+  max_player_level: 2
+  force_coop: True
+  max_episode_steps: 100
+  min_player_level : 1
+  min_food_level : null
+  max_food_level : null
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-small-4ag.yaml b/mava/configs/env/scenario/gym-rware-small-4ag.yaml
@@ -0,0 +1,18 @@
+# The config of the small-4ag environment
+name: RobotWarehouse
+task_name: small-4ag
+
+task_config:
+  column_height: 8
+  shelf_rows: 2
+  shelf_columns: 3
+  n_agents: 4
+  sensor_range: 1
+  request_queue_size: 4
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-tiny-2ag.yaml b/mava/configs/env/scenario/gym-rware-tiny-2ag.yaml
@@ -0,0 +1,18 @@
+# The config of the tiny-2ag environment
+name: RobotWarehouse
+task_name: tiny-2ag
+
+task_config:
+  column_height: 8
+  shelf_rows: 1
+  shelf_columns: 3
+  n_agents: 2
+  sensor_range: 1
+  request_queue_size: 2
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-tiny-4ag-easy.yaml b/mava/configs/env/scenario/gym-rware-tiny-4ag-easy.yaml
@@ -0,0 +1,18 @@
+# The config of the tiny-4ag-easy environment
+name: RobotWarehouse
+task_name: tiny-4ag-easy
+
+task_config:
+  column_height: 8
+  shelf_rows: 1
+  shelf_columns: 3
+  n_agents: 4
+  sensor_range: 1
+  request_queue_size: 8
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env
diff --git a/mava/configs/env/scenario/gym-rware-tiny-4ag.yaml b/mava/configs/env/scenario/gym-rware-tiny-4ag.yaml
@@ -0,0 +1,18 @@
+# The config of the tiny_4ag environment
+name: RobotWarehouse
+task_name: tiny-4ag
+
+task_config:
+  column_height: 8
+  shelf_rows: 1
+  shelf_columns: 3
+  n_agents: 4
+  sensor_range: 1
+  request_queue_size: 4
+  msg_bits : 0
+  max_inactivity_steps : null
+  max_steps : 500
+  reward_type : 0
+
+env_kwargs:
+  {}  # there are no scenario specific env_kwargs for this env