Skip to content

Commit

Permalink
feat: adjusted rewards & observations for newton idle task
Browse files Browse the repository at this point in the history
  • Loading branch information
AugustDG committed Dec 11, 2024
1 parent 1b68377 commit b85ecea
Show file tree
Hide file tree
Showing 8 changed files with 3,843 additions and 3,686 deletions.
Binary file modified assets/newton/.thumbs/256x256/newton.usd.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7,252 changes: 3,693 additions & 3,559 deletions assets/newton/animations/newton_idle.keyframes.yaml

Large diffs are not rendered by default.

Binary file modified assets/newton/newton.usd
Binary file not shown.
63 changes: 53 additions & 10 deletions core/controllers/joints_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ def construct(self, path_expr: str) -> None:

self._is_constructed = True

def step(self, joint_positions: Tensor) -> None:
def step(self, joint_actions: Tensor) -> None:
assert self._is_constructed, "Joints controller not constructed: tried to step!"

self._target_joint_positions = self._process_joint_positions(
joint_positions,
self._target_joint_positions = self._process_joint_actions(
joint_actions,
self.box_joint_constraints,
self._noise_function,
)
Expand All @@ -93,7 +93,7 @@ def reset(self, joint_positions: Tensor, indices: Indices = None) -> None:

joint_positions = joint_positions.to(device=self.universe.physics_device)

self._target_joint_positions = self._process_joint_positions(
self._target_joint_positions = self._process_joint_actions(
joint_positions,
self.box_joint_constraints,
)
Expand All @@ -103,6 +103,31 @@ def reset(self, joint_positions: Tensor, indices: Indices = None) -> None:
indices=indices,
)

def get_normalized_joint_positions(self) -> Tensor:
"""
Returns:
The joint positions normalized to the joint constraints [-1, 1].
"""
joint_positions = self.get_joint_positions_deg()

from core.utils.math import map_range

joint_positions_normalized = map_range(
joint_positions.cpu(),
torch.from_numpy(self.box_joint_constraints.low),
torch.from_numpy(self.box_joint_constraints.high),
-1.0,
1.0,
)

return joint_positions_normalized

def get_joint_positions_deg(self) -> Tensor:
return torch.rad2deg(self._articulation_view.get_joint_positions())

def get_joint_velocities_deg(self) -> Tensor:
return torch.rad2deg(self._articulation_view.get_joint_velocities())

def _dict_to_box_constraints(self, joint_constraints: JointsConstraints) -> Box:
joint_names = list(joint_constraints.keys())

Expand Down Expand Up @@ -150,29 +175,47 @@ def _apply_joint_constraints(
box_joint_constraints.high[j].item()
)

def _process_joint_positions(
def _process_joint_actions(
self,
joint_positions: Tensor,
joint_actions: Tensor,
box_joint_constraints: Box,
noise_function: Optional[NoiseFunction] = None,
) -> Tensor:
if noise_function is not None:
joint_positions = noise_function(joint_positions)

"""
Joint actions are processed by mapping them to the joint constraints (degrees) and applying noise.
Args:
joint_actions: The joint actions to be processed [-1, 1].
box_joint_constraints: The joint constraints, as a gymnasium.Box object.
noise_function: The noise function to be applied to the computed joint positions.
Returns:
The processed joint positions (in radians).
"""
low_constraints_t = torch.from_numpy(box_joint_constraints.low).to(
device=self.universe.physics_device
)
high_constraints_t = torch.from_numpy(box_joint_constraints.high).to(
device=self.universe.physics_device
)

for i, _ in enumerate(joint_positions):
joint_positions = torch.zeros_like(joint_actions)

for i, _ in enumerate(joint_actions):
joint_positions[i] = torch.lerp(
low_constraints_t,
high_constraints_t,
(joint_actions[i] + 1) / 2,
)

joint_positions[i] = torch.clamp(
joint_positions[i],
min=low_constraints_t,
max=high_constraints_t,
)

if noise_function is not None:
joint_positions[i] = noise_function(joint_positions[i])

joint_positions[i] = torch.deg2rad(joint_positions[i])

return joint_positions
29 changes: 12 additions & 17 deletions core/domain_randomizer/newton_base_domain_randomizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,46 +49,41 @@ def on_reset(self, indices: Indices = None) -> None:
indices_n = indices

if indices is None:
indices = torch.arange(self._agent.num_agents)
indices_t = torch.arange(self._agent.num_agents)
else:
indices = torch.from_numpy(indices).to(device=self._universe.physics_device)
indices_t = torch.from_numpy(indices).to(
device=self._universe.physics_device
)

num_to_reset = indices.shape[0]
num_to_reset = indices_t.shape[0]

# TODO: decide where the reset positions and rotations should come from
self._newton_art_view.set_world_poses(
positions=self.initial_positions[indices],
orientations=self.initial_orientations[indices],
indices=indices,
indices=indices_t,
)

# using set_velocities instead of individual methods (lin & ang),
# because it's the only method supported in the GPU pipeline (default pipeline)
self._newton_art_view.set_velocities(
torch.zeros((num_to_reset, 6), dtype=torch.float32),
indices,
indices_t,
)

self._newton_art_view.set_joint_efforts(
torch.zeros((num_to_reset, 12), dtype=torch.float32),
indices,
indices_t,
)

self._newton_art_view.set_joint_velocities(
torch.zeros((num_to_reset, 12), dtype=torch.float32),
indices,
indices_t,
)

import numpy as np

joint_positions = torch.from_numpy(
np.array(
[
self._agent.joints_controller.box_joint_constraints.sample()
for _ in range(num_to_reset)
]
)
)
joint_positions = (
torch.rand((num_to_reset, 12), dtype=torch.float32) * 2.0 - 1.0
) # [-1, 1]

self._agent.joints_controller.reset(
joint_positions,
Expand Down
9 changes: 2 additions & 7 deletions core/tasks/newton_base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,6 @@ def _on_step(self) -> bool:
f"{self.logger.dir}/{self.save_path}_rew_{self.best_mean_reward:.2f}"
)

self.logger.record(
"rewards/best_cumulative",
self.cumulative_reward.max().item(),
)

# TODO: better metrics about the agent's state & the animation engine
agent_observations = task.agent.get_observations()
self.logger.record(
Expand Down Expand Up @@ -124,8 +119,8 @@ def __init__(

self.animation_engine: AnimationEngine = animation_engine
self.last_actions_buf: Actions = np.zeros(
(self.num_envs, self.num_actions), dtype=np.float32
)
(2, self.num_envs, self.num_actions), dtype=np.float32
) # 2 sets of past actions, 0: t - 1, 1: t - 2

@abstractmethod
def construct(self, universe: Universe) -> None:
Expand Down
Loading

0 comments on commit b85ecea

Please sign in to comment.