update

markub3327 · Nov 18, 2023 · ddb8492 · ddb8492
1 parent 60066d7
commit ddb8492
Showing 1 changed file with 6 additions and 6 deletions.
diff --git a/rl_toolkit/networks/models/dueling.py b/rl_toolkit/networks/models/dueling.py
@@ -202,11 +202,11 @@ def _compute_n_step_rewards(
 
         # Create a discount factor tensor
         discounts = discount_factor ** tf.range(n + 1, dtype=rewards.dtype)
-        print(f"discounts: {discounts}")
+        # print(f"discounts: {discounts}")
 
         # Pad the rewards tensor to ensure proper handling of the last elements in each sequence
         padded_rewards = tf.pad(rewards, [[0, 0], [0, n - 1]])
-        print(f"padded_rewards: {padded_rewards}")
+        # print(f"padded_rewards: {padded_rewards}")
 
         # Create a sliding window view of the padded_rewards
         windows = tf.TensorArray(
@@ -217,20 +217,20 @@ def _compute_n_step_rewards(
 
         for i in tf.range(n):
             value = tf.slice(padded_rewards, [0, i], [-1, tf.shape(rewards)[1]])
-            print(f"value: {value}")
+            # print(f"value: {value}")
             windows = windows.write(i, value)
 
         # Stack the windows into a single tensor
         rewards_windows = tf.transpose(windows.stack(), [1, 0, 2])
-        print(f"rewards_windows: {rewards_windows}")
+        # print(f"rewards_windows: {rewards_windows}")
 
         # Multiply each window by the corresponding discount factor
         discounted_windows = rewards_windows * discounts[:-1]
-        print(f"discounted_windows: {discounted_windows}")
+        # print(f"discounted_windows: {discounted_windows}")
 
         # Sum along the time axis to get the n-step rewards
         n_step_rewards = tf.reduce_sum(discounted_windows, axis=-1)
-        print(f"n_step_rewards: {n_step_rewards}")
+        # print(f"n_step_rewards: {n_step_rewards}")
 
         # Add the next state value with discount
         n_step_rewards += (