Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
markub3327 committed Dec 22, 2023
1 parent 86f1d5f commit a6709da
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
10 changes: 5 additions & 5 deletions config/dqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ Server:
# Agent process
Agent:
temp_init: 0.5
temp_min: 0.05
temp_decay: 0.99999
temp_min: 0.01
temp_decay: 0.999999
warmup_steps: 1000

# Learner process
Learner:
train_steps: 1000000
batch_size: 256
warmup_steps: 500
warmup_steps: 1000 # for learning rate scheduler
gamma: 0.99
tau: 0.01
tau: 0.005

# Model definition
Model:
Expand All @@ -31,7 +31,7 @@ Model:
learning_rate: !!float 3e-4
global_clipnorm: 1.0
weight_decay: !!float 1e-4
frame_stack: 16
frame_stack: 16 # 12

# Paths
save_path: "./save/model"
Expand Down
15 changes: 11 additions & 4 deletions rl_toolkit/networks/models/dueling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
Layer,
LayerNormalization,
MultiHeadAttention,
GlobalAveragePooling1D,
GlobalMaxPooling1D,
Lambda,
)


Expand Down Expand Up @@ -139,8 +142,12 @@ def __init__(
for _ in range(num_layers)
]

# Reduce
# self.flatten = Lambda(lambda x: x[:, -1])
# self.flatten = GlobalMaxPooling1D()
self.flatten = GlobalAveragePooling1D()

# Output
self.norm = LayerNormalization(epsilon=1e-6)
self.V = Dense(
1,
activation=None,
Expand All @@ -158,10 +165,10 @@ def call(self, inputs, training=None):
for layer in self.e_layers:
x = layer(x, training=training)

x = self.norm(x, training=training)

# select last timestep for prediction a_t
x = x[:, -1]
# Reduce block
x = self.flatten(x, training=training)
# x = self.drop_out(x, training=training)

# compute value & advantage
V = self.V(x, training=training)
Expand Down

0 comments on commit a6709da

Please sign in to comment.