feat: adjusted rewards & observations for newton idle task

TheNewtonCapstone · Dec 11, 2024 · b85ecea · b85ecea
1 parent 1b68377
commit b85ecea
Show file tree

Hide file tree

Showing 8 changed files with 3,843 additions and 3,686 deletions.
diff --git a/assets/newton/.thumbs/256x256/newton.usd.png b/assets/newton/.thumbs/256x256/newton.usd.png
diff --git a/assets/newton/animations/newton_idle.keyframes.yaml b/assets/newton/animations/newton_idle.keyframes.yaml
diff --git a/assets/newton/newton.usd b/assets/newton/newton.usd
diff --git a/core/controllers/joints_controller.py b/core/controllers/joints_controller.py
@@ -66,11 +66,11 @@ def construct(self, path_expr: str) -> None:
 
         self._is_constructed = True
 
-    def step(self, joint_positions: Tensor) -> None:
+    def step(self, joint_actions: Tensor) -> None:
         assert self._is_constructed, "Joints controller not constructed: tried to step!"
 
-        self._target_joint_positions = self._process_joint_positions(
-            joint_positions,
+        self._target_joint_positions = self._process_joint_actions(
+            joint_actions,
             self.box_joint_constraints,
             self._noise_function,
         )
@@ -93,7 +93,7 @@ def reset(self, joint_positions: Tensor, indices: Indices = None) -> None:
 
         joint_positions = joint_positions.to(device=self.universe.physics_device)
 
-        self._target_joint_positions = self._process_joint_positions(
+        self._target_joint_positions = self._process_joint_actions(
             joint_positions,
             self.box_joint_constraints,
         )
@@ -103,6 +103,31 @@ def reset(self, joint_positions: Tensor, indices: Indices = None) -> None:
             indices=indices,
         )
 
+    def get_normalized_joint_positions(self) -> Tensor:
+        """
+        Returns:
+            The joint positions normalized to the joint constraints [-1, 1].
+        """
+        joint_positions = self.get_joint_positions_deg()
+
+        from core.utils.math import map_range
+
+        joint_positions_normalized = map_range(
+            joint_positions.cpu(),
+            torch.from_numpy(self.box_joint_constraints.low),
+            torch.from_numpy(self.box_joint_constraints.high),
+            -1.0,
+            1.0,
+        )
+
+        return joint_positions_normalized
+
+    def get_joint_positions_deg(self) -> Tensor:
+        return torch.rad2deg(self._articulation_view.get_joint_positions())
+
+    def get_joint_velocities_deg(self) -> Tensor:
+        return torch.rad2deg(self._articulation_view.get_joint_velocities())
+
     def _dict_to_box_constraints(self, joint_constraints: JointsConstraints) -> Box:
         joint_names = list(joint_constraints.keys())
 
@@ -150,29 +175,47 @@ def _apply_joint_constraints(
                     box_joint_constraints.high[j].item()
                 )
 
-    def _process_joint_positions(
+    def _process_joint_actions(
         self,
-        joint_positions: Tensor,
+        joint_actions: Tensor,
         box_joint_constraints: Box,
         noise_function: Optional[NoiseFunction] = None,
     ) -> Tensor:
-        if noise_function is not None:
-            joint_positions = noise_function(joint_positions)
-
+        """
+        Joint actions are processed by mapping them to the joint constraints (degrees) and applying noise.
+        Args:
+            joint_actions: The joint actions to be processed [-1, 1].
+            box_joint_constraints: The joint constraints, as a gymnasium.Box object.
+            noise_function: The noise function to be applied to the computed joint positions.
+
+        Returns:
+            The processed joint positions (in radians).
+        """
         low_constraints_t = torch.from_numpy(box_joint_constraints.low).to(
             device=self.universe.physics_device
         )
         high_constraints_t = torch.from_numpy(box_joint_constraints.high).to(
             device=self.universe.physics_device
         )
 
-        for i, _ in enumerate(joint_positions):
+        joint_positions = torch.zeros_like(joint_actions)
+
+        for i, _ in enumerate(joint_actions):
+            joint_positions[i] = torch.lerp(
+                low_constraints_t,
+                high_constraints_t,
+                (joint_actions[i] + 1) / 2,
+            )
+
             joint_positions[i] = torch.clamp(
                 joint_positions[i],
                 min=low_constraints_t,
                 max=high_constraints_t,
             )
 
+            if noise_function is not None:
+                joint_positions[i] = noise_function(joint_positions[i])
+
             joint_positions[i] = torch.deg2rad(joint_positions[i])
 
         return joint_positions
diff --git a/core/domain_randomizer/newton_base_domain_randomizer.py b/core/domain_randomizer/newton_base_domain_randomizer.py
@@ -49,46 +49,41 @@ def on_reset(self, indices: Indices = None) -> None:
         indices_n = indices
 
         if indices is None:
-            indices = torch.arange(self._agent.num_agents)
+            indices_t = torch.arange(self._agent.num_agents)
         else:
-            indices = torch.from_numpy(indices).to(device=self._universe.physics_device)
+            indices_t = torch.from_numpy(indices).to(
+                device=self._universe.physics_device
+            )
 
-        num_to_reset = indices.shape[0]
+        num_to_reset = indices_t.shape[0]
 
         # TODO: decide where the reset positions and rotations should come from
         self._newton_art_view.set_world_poses(
             positions=self.initial_positions[indices],
             orientations=self.initial_orientations[indices],
-            indices=indices,
+            indices=indices_t,
         )
 
         # using set_velocities instead of individual methods (lin & ang),
         # because it's the only method supported in the GPU pipeline (default pipeline)
         self._newton_art_view.set_velocities(
             torch.zeros((num_to_reset, 6), dtype=torch.float32),
-            indices,
+            indices_t,
         )
 
         self._newton_art_view.set_joint_efforts(
             torch.zeros((num_to_reset, 12), dtype=torch.float32),
-            indices,
+            indices_t,
         )
 
         self._newton_art_view.set_joint_velocities(
             torch.zeros((num_to_reset, 12), dtype=torch.float32),
-            indices,
+            indices_t,
         )
 
-        import numpy as np
-
-        joint_positions = torch.from_numpy(
-            np.array(
-                [
-                    self._agent.joints_controller.box_joint_constraints.sample()
-                    for _ in range(num_to_reset)
-                ]
-            )
-        )
+        joint_positions = (
+            torch.rand((num_to_reset, 12), dtype=torch.float32) * 2.0 - 1.0
+        )  # [-1, 1]
 
         self._agent.joints_controller.reset(
             joint_positions,

diff --git a/core/tasks/newton_base_task.py b/core/tasks/newton_base_task.py
@@ -56,11 +56,6 @@ def _on_step(self) -> bool:
                 f"{self.logger.dir}/{self.save_path}_rew_{self.best_mean_reward:.2f}"
             )
 
-        self.logger.record(
-            "rewards/best_cumulative",
-            self.cumulative_reward.max().item(),
-        )
-
         # TODO: better metrics about the agent's state & the animation engine
         agent_observations = task.agent.get_observations()
         self.logger.record(
@@ -124,8 +119,8 @@ def __init__(
 
         self.animation_engine: AnimationEngine = animation_engine
         self.last_actions_buf: Actions = np.zeros(
-            (self.num_envs, self.num_actions), dtype=np.float32
-        )
+            (2, self.num_envs, self.num_actions), dtype=np.float32
+        )  # 2 sets of past actions, 0: t - 1, 1: t - 2
 
     @abstractmethod
     def construct(self, universe: Universe) -> None: