Skip to content

Commit

Permalink
chore: polish params names
Browse files Browse the repository at this point in the history
chore: polish params names
  • Loading branch information
Gaiejj authored Oct 5, 2023
2 parents c1f290f + 784992d commit 7cc7c4a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
4 changes: 2 additions & 2 deletions safepo/multi_agent/macpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,11 @@ def trpo_update(self, sample):

g_step_dir = self.conjugate_gradient(
self.policy.actor, obs_batch, rnn_states_batch, actions_batch, masks_batch,\
available_actions_batch, active_masks_batch, reward_loss_grad.data, nsteps=10
available_actions_batch, active_masks_batch, reward_loss_grad.data, nsteps=self.config["conjugate_gradient_iters"]
)
b_step_dir = self.conjugate_gradient(
self.policy.actor, obs_batch, rnn_states_batch, actions_batch, masks_batch,\
available_actions_batch, active_masks_batch, B_cost_loss_grad.data, nsteps=10
available_actions_batch, active_masks_batch, B_cost_loss_grad.data, nsteps=self.config["conjugate_gradient_iters"]
)

q_coef = (reward_loss_grad * g_step_dir).sum(0, keepdim=True)
Expand Down
3 changes: 2 additions & 1 deletion safepo/multi_agent/marl_cfg/macpo/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use_proper_time_limits: False

target_kl: 0.016
searching_steps: 10
conjugate_gradient_iters: 10
accept_ratio: 0.5
clip_param: 0.2
learning_iters: 5
Expand Down Expand Up @@ -78,5 +79,5 @@ mamujoco:
gamma: 0.99
safety_gamma: 0.2
target_kl: 0.01
learning_iters: 15 # Conjugate Gradient Iterations
learning_iters: 15 # Number of SGD Iterations
entropy_coef: 0.01

0 comments on commit 7cc7c4a

Please sign in to comment.