Skip to content

Commit

Permalink
chore: cleaned up base reward functions
Browse files Browse the repository at this point in the history
  • Loading branch information
AugustDG committed Dec 14, 2024
1 parent abf04d6 commit 08af3ce
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 72 deletions.
1 change: 0 additions & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion configs/tasks/newton_idle_task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ seed: 14321

n_envs: 96
timesteps_per_env: 100_000
base_lr: 3e-5
base_lr: 6e-7
episode_length: 500

newton:
Expand Down
2 changes: 1 addition & 1 deletion configs/world.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ sim_params:
enable_scene_query_support: False # true to interact with the scene with the mouse (doesn't work with GPU dynamics)
enable_enhanced_determinism: False # true for deterministic simulation, more computationally expensive

use_fabric: False # true for fabric, NVIDIA's interop layer between Hydra & PhysX
use_fabric: True # true for fabric, NVIDIA's interop layer between Hydra & PhysX
enable_ccd: False # true for continuous collision detection, needs to be enabled on each rigid body & pair filter

# Solver parameters
Expand Down
2 changes: 1 addition & 1 deletion core/tasks/newton_base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _on_step(self) -> bool:
)
self.logger.record(
"observations/projected_gravities",
np.linalg.norm(agent_observations["projected_gravities"], axis=1).mean(),
agent_observations["projected_gravities"][:, -1].mean(),
)

return True
Expand Down
98 changes: 50 additions & 48 deletions core/tasks/newton_idle_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,11 @@ def step_wait(self) -> VecEnvStepReturn:
self.last_actions_buf[1, :, :] = self.last_actions_buf[0, :, :]
self.last_actions_buf[0, :, :] = self.actions_buf.copy()

heights = self.env.get_observations()["positions"][:, 2]
gravity_on_z = self.env.get_observations()["projected_gravities"][:, 2]
# terminated
self.dones_buf = np.where(heights <= self.reset_height, True, False)
self.dones_buf = np.where(heights >= 1.0, True, self.dones_buf)
self.dones_buf = np.where(
gravity_on_z >= 0.0, True, False
) # when we're not upright

self.dones_buf = np.where(
self.progress_buf > self.max_episode_length,
Expand Down Expand Up @@ -182,9 +183,9 @@ def _calculate_rewards(self) -> None:
# The current reward function is missing many features, all in comments below

obs = self.env.get_observations()
positions = obs["positions"]
angular_velocities = obs["angular_velocities"]
linear_velocities = obs["linear_velocities"]
projected_gravities = obs["projected_gravities"]

joints_order = self.agent.joints_controller.art_view.joint_names

Expand Down Expand Up @@ -239,59 +240,61 @@ def _calculate_rewards(self) -> None:
.numpy()
) # [-1, 1] unitless

from core.utils.math import magnitude_sqr_n
from core.utils.rl import (
exp_squared_norm,
fd_first_order_squared_norm,
fd_second_order_squared_norm,
)

# base position reward
# base orientation reward
base_linear_velocity_xy_reward = np.exp(
magnitude_sqr_n(
base_linear_velocity_xy - np.zeros_like(base_linear_velocity_xy)
)
* -8
base_linear_velocity_xy_reward = exp_squared_norm(
base_linear_velocity_xy,
mult=-8.0,
weight=1.0,
)
base_linear_velocity_z_reward = np.exp(
np.square(base_linear_velocity_z - np.zeros_like(base_linear_velocity_z))
* -8
base_linear_velocity_z_reward = exp_squared_norm(
base_angular_velocity_z,
mult=-8.0,
weight=1.0,
)
base_angular_velocity_xy_reward = (
np.exp(
magnitude_sqr_n(
base_angular_velocity_xy - np.zeros_like(base_angular_velocity_xy)
)
* -2
)
* 0.5
base_angular_velocity_xy_reward = exp_squared_norm(
base_angular_velocity_xy,
mult=-2,
weight=0.5,
)
base_angular_velocity_z_reward = (
np.exp(
np.square(
base_angular_velocity_z - np.zeros_like(base_angular_velocity_z)
)
* -2
)
* 0.5
base_angular_velocity_z_reward = exp_squared_norm(
base_angular_velocity_z,
mult=-2,
weight=0.5,
)
joint_positions_reward = (
-magnitude_sqr_n(joint_positions - animation_joint_positions) * 15.0
joint_positions_reward = fd_first_order_squared_norm(
joint_positions,
animation_joint_positions,
weight=15.0,
)
joint_velocities_reward = (
-magnitude_sqr_n(joint_velocities - animation_joint_velocities) * 0.1
joint_velocities_reward = fd_first_order_squared_norm(
joint_velocities,
animation_joint_velocities,
weight=0.01,
)
joint_efforts_reward = (
-magnitude_sqr_n(joint_efforts - np.zeros_like(joint_efforts)) * 0.001
joint_efforts_reward = fd_first_order_squared_norm(
joint_efforts,
np.zeros_like(joint_efforts),
weight=0.001,
)
# joint accelerations reward
joint_action_rate_reward = (
-magnitude_sqr_n(self.actions_buf - self.last_actions_buf[0]) * 1.5
joint_action_rate_reward = fd_first_order_squared_norm(
self.actions_buf,
self.last_actions_buf[0],
weight=1.0,
)
joint_action_acceleration_reward = fd_second_order_squared_norm(
self.actions_buf,
self.last_actions_buf[0],
self.last_actions_buf[1],
weight=0.45,
)
joint_action_acceleration_reward = (
-magnitude_sqr_n(
self.actions_buf
- 2 * self.last_actions_buf[0]
+ self.last_actions_buf[1]
)
* 0.45
) # not sure why this is the formula, but it comes from Disney

self.rewards_buf = (
base_linear_velocity_xy_reward
Expand All @@ -305,11 +308,10 @@ def _calculate_rewards(self) -> None:
+ joint_action_acceleration_reward
)

heights = positions[:, 2]
gravity_on_z = projected_gravities[:, 2]
self.rewards_buf = np.where(
heights <= self.reset_height, -20.0, self.rewards_buf
gravity_on_z >= 0.0, self.rewards_buf - 10.0, self.rewards_buf
)
self.rewards_buf = np.where(heights >= 1.0, -2.0, self.rewards_buf)

# TODO: should this be here?
self.rewards_buf = np.clip(
Expand Down
20 changes: 0 additions & 20 deletions core/utils/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,23 +69,3 @@ def quat_slerp_n(a: np.ndarray, b: np.ndarray, t: float) -> np.ndarray:
qc[3] = a[3] * ratio_a + b[3] * ratio_b

return qc


def difference_length_squared_n(a: np.ndarray, b: np.ndarray) -> np.ndarray:
diff = a - b

return np.sum(diff * diff, axis=1)


def difference_length_n(a: np.ndarray, b: np.ndarray) -> np.ndarray:
diff = a - b

return np.linalg.norm(diff)


def magnitude_sqr_n(a: np.ndarray, axis: int = 1) -> np.ndarray:
return np.sum(a * a, axis=axis)


def length_n(a: np.ndarray) -> np.ndarray:
return np.linalg.norm(a)
45 changes: 45 additions & 0 deletions core/utils/rl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy as np


def squared_norm(a: np.ndarray, weight: float = 1.0) -> np.ndarray:
return np.sum(a ** 2, axis=-1) * weight


def fd_first_order_squared_norm(
a: np.ndarray,
b: np.ndarray,
weight: float = 1.0,
) -> np.ndarray:
return squared_norm(a - b, weight)


def fd_second_order_squared_norm(
a: np.ndarray,
b: np.ndarray,
c: np.ndarray,
weight: float = 1.0,
) -> np.ndarray:
return squared_norm(a - 2 * b + c, weight)


def exp_squared_norm(a: np.ndarray, mult: float = 1.0, weight: float = 1.0) -> np.ndarray:
return np.exp(squared_norm(a), mult) * weight


def exp_fd_first_order_squared_norm(
a: np.ndarray,
b: np.ndarray,
mult: float = 1.0,
weight: float = 1.0,
) -> np.ndarray:
return np.exp(mult * fd_first_order_squared_norm(a, b)) * weight


def exp_fd_second_order_squared_norm(
a: np.ndarray,
b: np.ndarray,
c: np.ndarray,
mult: float = 1.0,
weight: float = 1.0,
) -> np.ndarray:
return np.exp(mult * fd_second_order_squared_norm(a, b, c)) * weight

0 comments on commit 08af3ce

Please sign in to comment.