Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
markub3327 committed Nov 18, 2023
1 parent 60066d7 commit ddb8492
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions rl_toolkit/networks/models/dueling.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,11 @@ def _compute_n_step_rewards(

# Create a discount factor tensor
discounts = discount_factor ** tf.range(n + 1, dtype=rewards.dtype)
print(f"discounts: {discounts}")
# print(f"discounts: {discounts}")

# Pad the rewards tensor to ensure proper handling of the last elements in each sequence
padded_rewards = tf.pad(rewards, [[0, 0], [0, n - 1]])
print(f"padded_rewards: {padded_rewards}")
# print(f"padded_rewards: {padded_rewards}")

# Create a sliding window view of the padded_rewards
windows = tf.TensorArray(
Expand All @@ -217,20 +217,20 @@ def _compute_n_step_rewards(

for i in tf.range(n):
value = tf.slice(padded_rewards, [0, i], [-1, tf.shape(rewards)[1]])
print(f"value: {value}")
# print(f"value: {value}")
windows = windows.write(i, value)

# Stack the windows into a single tensor
rewards_windows = tf.transpose(windows.stack(), [1, 0, 2])
print(f"rewards_windows: {rewards_windows}")
# print(f"rewards_windows: {rewards_windows}")

# Multiply each window by the corresponding discount factor
discounted_windows = rewards_windows * discounts[:-1]
print(f"discounted_windows: {discounted_windows}")
# print(f"discounted_windows: {discounted_windows}")

# Sum along the time axis to get the n-step rewards
n_step_rewards = tf.reduce_sum(discounted_windows, axis=-1)
print(f"n_step_rewards: {n_step_rewards}")
# print(f"n_step_rewards: {n_step_rewards}")

# Add the next state value with discount
n_step_rewards += (
Expand Down

0 comments on commit ddb8492

Please sign in to comment.