Step 1: collect data
Step 2: learn models on that data
Step 3: do RL. perhaps inside of a learned model
# single env
python -m research.main --mode=collect --num_envs=10 --train_barrels=100 --test_barrels=10 --env=Urchin --logdir=logs/datadump/Urchin
# all envs
python3 scripts/kicker.py collect
# single env
python -m research.main --mode=train --model=MultiStepArbiter --lr=0.0005 --bs=32 --log_n=1000 --datadir=logs/datadump/Urchin --logdir=logs/Urchin --total_itr=30000 --nfilter=64 --hidden_size=256 --window=5
# all envs
python3 scripts/kicker.py arbiter --model=MultiStepArbiter
// should run for about 70s per 1000 training iterations on my 1080Ti.
// maybe about 10 mins or so total, until the error in the pstate stuff goes away. around -6 log mse
# single env
python -m research.main --mode=train --model=BVAE --lr=0.0005 --bs=32 --log_n=1000 --datadir=logs/Urchin --logdir=logs/encoder/BVAE/Urchin --total_itr=30000 --total_itr=30000 --hidden_size=64 --vqK=64 --vqD=16 --nfilter=16 --window=5
# all envs
python3 scripts/kicker.py train --model=BVAE
# single env
python -m research.main --mode=train --model=RNLDA --lr=0.0005 --bs=32 --log_n=1000 --datadir=logs/Urchin --logdir=logs/encoder/RNLDA/Urchin --total_itr=30000 --total_itr=30000 --hidden_size=64 --vqK=64 --vqD=8 --nfilter=16 --window=5
# all envs
python3 scripts/kicker.py train --model=RNLDA
# single env
python -m research.main --mode=train --model=RSSM --lr=0.0005 --bs=32 --log_n=1000 --datadir=logs/Urchin --logdir=logs/video/RSSM/Urchin --total_itr=100000 --total_itr=100000 --arbiterdir=logs/arbiter/Urchin --nfilter=64 --hidden_size=300 --free_nats=0.01
# all envs
python3 scripts/kicker.py train --model=RSSM
# single env
python -m research.main --mode=train --model=FIT --lr=0.0005 --bs=32 --log_n=1000 --datadir=logs/Urchin --logdir=logs/video/FIT/Urchin --total_itr=100000 --total_itr=100000 --arbiterdir=logs/arbiter/Urchin --n_layer=2 --n_head=4 --n_embed=256 --hidden_size=256
# all envs
python3 scripts/kicker.py train --model=FIT
# single env
python -m research.main --mode=train --model=FBT --lr=0.0005 --bs=32 --log_n=1000 --datadir=logs/Urchin --logdir=logs/video/FBT/Urchin --total_itr=100000 --total_itr=100000 --arbiterdir=logs/arbiter/Urchin --n_layer=4 --n_head=8 --n_embed=512 --hidden_size=512 --weightdir=logs/encoder/BVAE/Urchin
# all envs
python3 scripts/kicker.py train --model=FBT
# single env
python -m research.main --mode=train --model=FRNLD --lr=0.0005 --bs=32 --log_n=1000 --datadir=logs/Urchin --logdir=logs/video/FRNLD/Urchin --total_itr=100000 --total_itr=100000 --arbiterdir=logs/arbiter/Urchin --n_layer=4 --n_head=8 --n_embed=512 --hidden_size=512 --weightdir=logs/encoder/RNDLA/Urchin
# all envs
python3 scripts/kicker.py train --model=FRNLD
python -m research.main --mode=eval --env=UrchinCube --datadir=logs/datadump/UrchinCube/ --arbiterdir=logs/arbiter/UrchinCube --model=FBT --prompt_n=3 --weightdir=logs/april28/video/FBT/UrchinCube/ --logdir=logs/evals/FBT_UrchinCube --bs=500
python -m research.main --mode=eval --env=Urchin --datadir=logs/datadump/Urchin/ --arbiterdir=logs/arbiter/Urchin --model=RSSM prompt_n=3 --weightdir=logs/video/RSSM/Urchin/ --logdir=logs/april22/eval/RSSM_Urchin --bs=1000
python rl/main.py ppo --env=Luxo --goals=1 --num_envs=12 --bs=4096 --hidden_size=256 --logdir=logs/rl/Luxo_real/ --total_steps=500000 --goal_thresh=0.05
python rl/main.py ppo --env=Urchin --goals=1 --num_envs=12 --bs=4096 --hidden_size=256 --logdir=logs/rl/Urchin_real/ --total_steps=1000000 --goal_thresh=0.05
python rl/main.py ppo --env=Luxo --model=FBT --weightdir=logs/video/FBT/Luxo/ --window=50 --goals=1 --num_envs=12 --bs=4096 --hidden_size=256 --lenv=1 --logdir=logs/rl/Luxo_lenv --lenv_temp=1.0 --total_steps=500000 --goal_thres=0.05
python rl/main.py ppo --env=Urchin --model=FBT --weightdir=logs/video/FBT/Urchin/ --window=50 --goals=1 --num_envs=12 --bs=4096 --hidden_size=256 --lenv=1 --logdir=logs/rl/Urchin_lenv --lenv_temp=1.0 --total_steps=1000000 --goal_thres=0.05
python rl/main.py ppo --env=UrchinCube --goals=1 --num_envs=24 --bs=4096 --hidden_size=256 --goal_thres=0.05 --pi_lr=1e-4 --vf_lr=1e-4 --state_key=full_state --diff_delt=1