fix: animation frames are correctly sampled

previously, they were erroneously sampled based on episode progress, but we needed to make sure time in the clip moved at the correct pace; therefore sampling the frames by time
TheNewtonCapstone · Dec 9, 2024 · 711d9e0 · 711d9e0
1 parent 6a5d645
commit 711d9e0
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 17 deletions.
diff --git a/configs/tasks/newton_idle_task.yaml b/configs/tasks/newton_idle_task.yaml
@@ -4,9 +4,9 @@ policy: "MlpPolicy"
 seed: 14321
 
 n_envs: 96
-timesteps_per_env: 500_000
+timesteps_per_env: 100_000
 base_lr: 3e-4
-episode_length: 5000
+episode_length: 500
 
 newton:
   inverse_control_frequency: 4  # we're aiming for a total compute budget of 2 times the physics timestep (0.005s * 4 = 0.02s)
@@ -33,8 +33,8 @@ ppo:
   ent_coef: 0.0
   vf_coef: 0.5
 
-  max_grad_norm: 0.5
+  max_grad_norm: 1.0
   use_sde: False
   sde_sample_freq: -1
 
-  target_kl: None
+  target_kl: 0.01
diff --git a/core/animation/animation_engine.py b/core/animation/animation_engine.py
@@ -30,19 +30,22 @@ class AnimationClip:
     framerate: int
     start_frame: int
     duration: int
+    duration_in_seconds: float
     keyframes: List[Keyframe]
 
 
 class AnimationEngine:
     def __init__(
         self,
         clips: Dict[str, Settings],
+        step_dt: float,
     ):
         self.current_clip_name: Optional[str] = None
 
         self.clip_configs: Dict[str, Settings] = clips
         self.clips: Dict[str, AnimationClip] = {}
 
+        self._step_dt: float = step_dt
         self._is_constructed: bool = False
 
     @property
@@ -88,11 +91,14 @@ def construct(self, current_clip: str) -> None:
 
                 keyframes.append(keyframe)
 
+            duration_in_seconds = clip_settings["duration"] / clip_settings["framerate"]
+
             self.clips[clip_name] = AnimationClip(
                 name=clip_name,
                 framerate=clip_settings["framerate"],
                 start_frame=clip_settings["start_frame"],
                 duration=clip_settings["duration"],
+                duration_in_seconds=duration_in_seconds,
                 keyframes=keyframes,
             )
 
@@ -107,7 +113,7 @@ def get_current_clip_datas_ordered(
         """
         Get the armature data for the current clip at the given progress. Optionally interpolates between keyframes.
         Args:
-            progress: The progress of the current episode, in the range [0, 1] for every vectorized agent.
+            progress: The progress of the current episode, in the range [0, max_episode_length] for every vectorized agent.
             joints_order: List of joint names in the order they should be returned.
             interpolate: Whether to interpolate between keyframes (continuous result, assuming animation is continuous).
 
@@ -151,7 +157,7 @@ def get_current_clip_datas(
         """
         Get the armature data for the current clip at the given progress. Optionally interpolates between keyframes.
         Args:
-            progress: The progress of the current episode, in the range [0, 1] for every vectorized agent.
+            progress: The progress of the current episode, in number of steps done, for every vectorized agent.
             interpolate: Whether to interpolate between keyframes (continuous result, assuming animation is continuous).
 
         Returns:
@@ -160,20 +166,24 @@ def get_current_clip_datas(
         return self.get_clip_datas(self.current_clip_name, progress, interpolate)
 
     def get_clip_datas(
-        self, clip_name: str, progress: np.ndarray, interpolate: bool = True
+        self,
+        clip_name: str,
+        progress: np.ndarray,
+        interpolate: bool = True,
     ) -> List[ArmatureData]:
         """
         Get the armature data for the given clip at the given progress. Optionally interpolates between keyframes.
         Args:
             clip_name: The name of the clip to get data from.
-            progress: The progress of the current episode, in the range [0, 1] for every vectorized agent.
+            progress: The progress of the current episode, in number of steps done, for every vectorized agent.
             interpolate: Whether to interpolate between keyframes (continuous result, assuming animation is continuous).
 
         Returns:
             A list of armature data for each agent.
         """
         clip: AnimationClip = self.clips[clip_name]
-        frames = progress * clip.duration
+        progress_in_seconds = progress * self._step_dt
+        frames = progress_in_seconds * clip.framerate
 
         data = []
 
@@ -183,7 +193,9 @@ def get_clip_datas(
         return data
 
     def get_current_clip_data(
-        self, frame: float, interpolate: bool = True
+        self,
+        frame: float,
+        interpolate: bool = True,
     ) -> ArmatureData:
         """
         Get the armature data for the current clip at the given frame. Optionally interpolates between keyframes.
@@ -197,7 +209,10 @@ def get_current_clip_data(
         return self.get_clip_data(self.current_clip_name, frame, interpolate)
 
     def get_clip_data(
-        self, clip_name: str, frame: float, interpolate: bool = True
+        self,
+        clip_name: str,
+        frame: float,
+        interpolate: bool = True,
     ) -> ArmatureData:
         """
         Get the armature data for the given clip at the given frame. Optionally interpolates between keyframes.

diff --git a/core/tasks/newton_idle_task.py b/core/tasks/newton_idle_task.py
@@ -176,6 +176,8 @@ def reset(self) -> VecEnvObs:
         obs_buf[:, 34:46] = (
             self.agent.joints_controller.art_view.get_joint_velocities().cpu().numpy()
         )
+        obs_buf[:, 46] = np.cos(2 * np.pi * 0)
+        obs_buf[:, 47] = np.sin(2 * np.pi * 0)
 
         self.env.reset()
 
@@ -191,8 +193,6 @@ def _get_observations(self) -> VecEnvObs:
     def _calculate_rewards(self) -> None:
         # TODO: rework rewards for Newton*Tasks
 
-        phase_signal = self.progress_buf / self.max_episode_length
-
         obs = self._get_observations()
         positions = obs["positions"]
         angular_velocities = obs["angular_velocities"]
@@ -228,7 +228,7 @@ def _calculate_rewards(self) -> None:
             .numpy()
         )  # in Nm
         animation_joint_data = self.animation_engine.get_current_clip_datas_ordered(
-            phase_signal, joints_order
+            self.progress_buf, joints_order
         )
         animation_joint_angles = animation_joint_data[:, :, 7] / self.reward_space.high
         # animation joint velocities

diff --git a/newton.py b/newton.py
@@ -233,8 +233,17 @@ def main():
         joints_controller=joints_controller,
     )
 
+    step_dt = (
+        rl_config["newton"]["inverse_control_frequency"] * world_config["physics_dt"]
+    )
+
+    # TODO: Add a separate animation only mode in simulation
+    #   This will allow us to test animations without the need for training/testing
+    #   labels=enhancement
+
     animation_engine = AnimationEngine(
         clips=animation_clips_settings,
+        step_dt=step_dt,
     )
 
     domain_randomizer = NewtonBaseDomainRandomizer(
@@ -243,9 +252,9 @@ def main():
         randomizer_settings=randomization_config,
     )
 
-    # ---------- #
-    # SIMULATION #
-    # ---------- #
+    # ---------------- #
+    #   PHYSICS ONLY   #
+    # ---------------- #
 
     if physics_only:
         env = NewtonMultiTerrainEnv(