Skip to content

Commit

Permalink
update: refactor configs and codes
Browse files Browse the repository at this point in the history
  • Loading branch information
typoverflow committed Mar 8, 2024
1 parent 05dd904 commit 1febe6f
Show file tree
Hide file tree
Showing 30 changed files with 418 additions and 621 deletions.
45 changes: 19 additions & 26 deletions scripts/configs/bt_awac/metaworld/state_dense.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
algorithm: BTAWAC
algorithm_kwargs:
algorithm:
class: BTAWAC
beta: 0.3333
max_exp_clip: 100.0
reward_steps: 50000
Expand Down Expand Up @@ -28,36 +28,31 @@ env_wrapper_kwargs:
optim:
default:
class: Adam
kwargs:
lr: 0.0003
lr: 0.0003

network:
reward:
class: EnsembleMLP
kwargs:
ensemble_size: 1
hidden_dims: [512, 512]
ensemble_size: 1
hidden_dims: [512, 512]
actor:
class: SquashedDeterministicActor
kwargs:
dropout: 0.25
hidden_dims: [512, 512]
dropout: 0.25
hidden_dims: [512, 512]
critic:
class: Critic
kwargs:
ensemble_size: 2
hidden_dims: [512, 512]
ensemble_size: 2
hidden_dims: [512, 512]

dataset:
class: MetaworldComparisonOfflineDataset
kwargs:
env: mw_drawer-open-v2
label_key: rl_sum
segment_length: null
batch_size: 96
capacity: 2500
mode: dense # Choices: {sparse, dense}
discount: 0.99
env: mw_drawer-open-v2
label_key: rl_sum
segment_length: null
batch_size: 96
capacity: 2500
mode: dense # Choices: {sparse, dense}
discount: 0.99
dataloader:
num_workers: 0 # use the main thread to sample data
batch_size: null # do not merge the data along batch axis
Expand All @@ -71,14 +66,12 @@ trainer:

eval:
function: eval_offline
kwargs:
num_ep: 10
deterministic: true
num_ep: 10
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
kwargs:
T_max: 500000
T_max: 500000

processor: null
45 changes: 19 additions & 26 deletions scripts/configs/bt_awac/metaworld/state_sparse.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
algorithm: BTAWAC
algorithm_kwargs:
algorithm:
class: BTAWAC
beta: 0.3333
max_exp_clip: 100.0
reward_steps: 50000
Expand Down Expand Up @@ -28,36 +28,31 @@ env_wrapper_kwargs:
optim:
default:
class: Adam
kwargs:
lr: 0.0003
lr: 0.0003

network:
reward:
class: EnsembleMLP
kwargs:
ensemble_size: 1
hidden_dims: [512, 512]
ensemble_size: 1
hidden_dims: [512, 512]
actor:
class: SquashedDeterministicActor
kwargs:
dropout: 0.25
hidden_dims: [512, 512]
dropout: 0.25
hidden_dims: [512, 512]
critic:
class: Critic
kwargs:
ensemble_size: 2
hidden_dims: [512, 512]
ensemble_size: 2
hidden_dims: [512, 512]

dataset:
class: MetaworldComparisonOfflineDataset
kwargs:
env: mw_drawer-open-v2
label_key: rl_sum
segment_length: null
batch_size: 96
capacity: 10000
mode: sparse # Choices: {sparse, dense}
discount: 0.99
env: mw_drawer-open-v2
label_key: rl_sum
segment_length: null
batch_size: 96
capacity: 10000
mode: sparse # Choices: {sparse, dense}
discount: 0.99
dataloader:
num_workers: 0 # use the main thread to sample data
batch_size: null # do not merge the data along batch axis
Expand All @@ -71,14 +66,12 @@ trainer:

eval:
function: eval_offline
kwargs:
num_ep: 10
deterministic: true
num_ep: 10
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
kwargs:
T_max: 500000
T_max: 500000

processor: null
57 changes: 24 additions & 33 deletions scripts/configs/bt_iql/gym/default.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
algorithm: BTIQL
algorithm_kwargs:
algorithm:
class: BTIQL
beta: 0.3333
expectile: 0.7
max_exp_clip: 100.0
Expand All @@ -24,46 +24,39 @@ env_wrapper_kwargs:
optim:
default:
class: Adam
kwargs:
lr: 0.0003
lr: 0.0003

network:
reward:
class: EnsembleMLP
kwargs:
ensemble_size: 1
hidden_dims: [256, 256]
ensemble_size: 1
hidden_dims: [256, 256]
actor:
class: SquashedGaussianActor
kwargs:
hidden_dims: [256, 256]
reparameterize: false
conditioned_logstd: false
logstd_min: -5
logstd_max: 2
hidden_dims: [256, 256]
reparameterize: false
conditioned_logstd: false
logstd_min: -5
logstd_max: 2
critic:
class: Critic
kwargs:
ensemble_size: 2
hidden_dims: [256, 256]
ensemble_size: 2
hidden_dims: [256, 256]
value:
class: Critic
kwargs:
ensemble_size: 1
hidden_dims: [256, 256]
ensemble_size: 1
hidden_dims: [256, 256]

dataset:
- class: IPLComparisonOfflineDataset
kwargs:
env: hopper-medium-replay-v2
batch_size: 8
segment_length: null
env: hopper-medium-replay-v2
batch_size: 8
segment_length: null
- class: D4RLOfflineDataset
kwargs:
env: hopper-medium-replay-v2
batch_size: 256
mode: transition
reward_normalize: true
env: hopper-medium-replay-v2
batch_size: 256
mode: transition
reward_normalize: true

dataloader:
num_workers: 0 # use the main thread to sample data
Expand All @@ -78,14 +71,12 @@ trainer:

eval:
function: eval_offline
kwargs:
num_ep: 10
deterministic: true
num_ep: 10
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
kwargs:
T_max: 1000000
T_max: 1000000

processor: null
50 changes: 21 additions & 29 deletions scripts/configs/bt_iql/metaworld/state_dense.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
algorithm: BTIQL
algorithm_kwargs:
algorithm:
class: BTIQL
beta: 0.3333
expectile: 0.7
max_exp_clip: 100.0
Expand Down Expand Up @@ -29,41 +29,35 @@ env_wrapper_kwargs:
optim:
default:
class: Adam
kwargs:
lr: 0.0003
lr: 0.0003

network:
reward:
class: EnsembleMLP
kwargs:
ensemble_size: 1
hidden_dims: [512, 512]
ensemble_size: 1
hidden_dims: [512, 512]
actor:
class: SquashedDeterministicActor
kwargs:
dropout: 0.25
hidden_dims: [512, 512]
dropout: 0.25
hidden_dims: [512, 512]
critic:
class: Critic
kwargs:
ensemble_size: 2
hidden_dims: [512, 512]
ensemble_size: 2
hidden_dims: [512, 512]
value:
class: Critic
kwargs:
ensemble_size: 1
hidden_dims: [512, 512]
ensemble_size: 1
hidden_dims: [512, 512]

dataset:
class: MetaworldComparisonOfflineDataset
kwargs:
env: mw_drawer-open-v2
label_key: rl_sum
segment_length: null
batch_size: 96
capacity: 2500
mode: dense # Choices: {sparse, dense}
discount: 0.99
env: mw_drawer-open-v2
label_key: rl_sum
segment_length: null
batch_size: 96
capacity: 2500
mode: dense # Choices: {sparse, dense}
discount: 0.99
dataloader:
num_workers: 0 # use the main thread to sample data
batch_size: null # do not merge the data along batch axis
Expand All @@ -77,14 +71,12 @@ trainer:

eval:
function: eval_offline
kwargs:
num_ep: 10
deterministic: true
num_ep: 10
deterministic: true

schedulers:
actor:
class: CosineAnnealingLR
kwargs:
T_max: 500000
T_max: 500000

processor: null
Loading

0 comments on commit 1febe6f

Please sign in to comment.