-
Notifications
You must be signed in to change notification settings - Fork 95
/
4m-l_mod7_500b--spec_text2im_100b.yaml
51 lines (42 loc) · 1.32 KB
/
4m-l_mod7_500b--spec_text2im_100b.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Config for FSDP
# Adaptation of 4M-L model for text2im (biased towards captions as input)
run_name: auto
# Input
finetune: '/path/to/trained/4m-l/checkpoint' # Change me
# Input & Output
min_input_tokens: 64
num_input_tokens: 256
min_target_tokens: 256
num_target_tokens: 256
loss_type: mod
# Architecture
model: fm_large_24e_24d_swiglu_nobias
patch_size: 16
input_size: 224
dtype: bfloat16
tokenizer_path: "fourm/utils/tokenizer/trained/text_tokenizer_4m_wordpiece_30k.json"
# Train
epochs: -1
total_tokens: 100 # in Billions
opt: adamw
blr: 0.0001 # this is base_lr = 1e-4, lr = base_lr * batch_size / 256
min_blr: 0.
warmup_epochs: -1
warmup_tokens: 10 # in billions
batch_size: 64 # 64 x 128 = 8192
clip_grad: 10000.0 # With FSDP, set clip grad to a high value to obtain grad norm logs without impacting training dynamics.
skip_nan_grad: True
# Data
data_config: "cfgs/default/4m/data/cc12m/specialized/mix_caption_bias_a0.2_a1.0.yaml"
s3_data_endpoint: "/path/to/endpoint" # Change me
eval_freq: 1
fixed_eval: True
epoch_size: 10_000_000 # Number of samples per "epoch"
# Saving
save_ckpt_freq: 1 # Change if needed
output_dir: 'output/auto' # Change if needed
# Wandb
log_wandb: False # Set to True to log to Weights & Biases
wandb_project: '4m-train'
wandb_entity: null # Change if needed
wandb_run_name: auto # Change if needed