diff --git a/examples/audio-classification/README.md b/examples/audio-classification/README.md index 7e91e46eac..1b7161d15e 100644 --- a/examples/audio-classification/README.md +++ b/examples/audio-classification/README.md @@ -84,7 +84,7 @@ python ../gaudi_spawn.py \ --max_length_seconds 8 \ --attention_mask False \ --warmup_ratio 0.1 \ - --num_train_epochs 10 \ + --num_train_epochs 5 \ --per_device_train_batch_size 16 \ --per_device_eval_batch_size 32 \ --seed 0 \ diff --git a/examples/image-classification/README.md b/examples/image-classification/README.md index 0f85fb0b23..0ae5a82834 100644 --- a/examples/image-classification/README.md +++ b/examples/image-classification/README.md @@ -43,7 +43,7 @@ python run_image_classification.py \ --do_eval \ --learning_rate 3e-5 \ --num_train_epochs 5 \ - --per_device_train_batch_size 64 \ + --per_device_train_batch_size 128 \ --per_device_eval_batch_size 64 \ --evaluation_strategy epoch \ --save_strategy epoch \ @@ -195,7 +195,7 @@ python ../gaudi_spawn.py \ --do_eval \ --learning_rate 2e-4 \ --num_train_epochs 5 \ - --per_device_train_batch_size 64 \ + --per_device_train_batch_size 128 \ --per_device_eval_batch_size 64 \ --evaluation_strategy epoch \ --save_strategy epoch \ @@ -235,7 +235,7 @@ python ../gaudi_spawn.py \ --do_eval \ --learning_rate 2e-4 \ --num_train_epochs 5 \ - --per_device_train_batch_size 64 \ + --per_device_train_batch_size 128 \ --per_device_eval_batch_size 64 \ --evaluation_strategy epoch \ --save_strategy epoch \ diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md index 7db24d9aab..2cc8c496b0 100644 --- a/examples/language-modeling/README.md +++ b/examples/language-modeling/README.md @@ -114,7 +114,7 @@ python ../gaudi_spawn.py \ --model_name_or_path EleutherAI/gpt-j-6b \ --dataset_name wikitext \ --dataset_config_name wikitext-2-raw-v1 \ - --per_device_train_batch_size 4 \ + --per_device_train_batch_size 16 \ --per_device_eval_batch_size 4 \ --do_train \ --do_eval \ diff --git a/examples/question-answering/README.md b/examples/question-answering/README.md index d531bd9fcd..fabb165e35 100755 --- a/examples/question-answering/README.md +++ b/examples/question-answering/README.md @@ -50,7 +50,7 @@ PT_HPU_LAZY_MODE=0 python run_qa.py \ --dataset_name squad \ --do_train \ --do_eval \ - --per_device_train_batch_size 24 \ + --per_device_train_batch_size 32 \ --per_device_eval_batch_size 8 \ --learning_rate 3e-5 \ --num_train_epochs 2 \ @@ -78,7 +78,7 @@ PT_HPU_LAZY_MODE=0 python ../gaudi_spawn.py \ --dataset_name squad \ --do_train \ --do_eval \ - --per_device_train_batch_size 24 \ + --per_device_train_batch_size 32 \ --per_device_eval_batch_size 8 \ --learning_rate 3e-5 \ --num_train_epochs 2 \ @@ -106,7 +106,7 @@ python ../gaudi_spawn.py \ --dataset_name squad \ --do_train \ --do_eval \ - --per_device_train_batch_size 24 \ + --per_device_train_batch_size 32 \ --per_device_eval_batch_size 8 \ --learning_rate 3e-5 \ --num_train_epochs 2 \ diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md index 1ac4761203..a53d6123b7 100644 --- a/examples/stable-diffusion/README.md +++ b/examples/stable-diffusion/README.md @@ -31,7 +31,7 @@ python text_to_image_generation.py \ --model_name_or_path runwayml/stable-diffusion-v1-5 \ --prompts "An image of a squirrel in Picasso style" \ --num_images_per_prompt 20 \ - --batch_size 4 \ + --batch_size 7 \ --image_save_dir /tmp/stable_diffusion_images \ --use_habana \ --use_hpu_graphs \ @@ -90,7 +90,7 @@ python text_to_image_generation.py \ --model_name_or_path stabilityai/stable-diffusion-2-1 \ --prompts "An image of a squirrel in Picasso style" \ --num_images_per_prompt 10 \ - --batch_size 2 \ + --batch_size 7 \ --height 768 \ --width 768 \ --image_save_dir /tmp/stable_diffusion_images \ @@ -116,7 +116,7 @@ python text_to_image_generation.py \ --model_name_or_path "Intel/ldm3d-4c" \ --prompts "An image of a squirrel in Picasso style" \ --num_images_per_prompt 10 \ - --batch_size 2 \ + --batch_size 7 \ --height 768 \ --width 768 \ --image_save_dir /tmp/stable_diffusion_images \ @@ -158,7 +158,7 @@ python text_to_image_generation.py \ --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \ --prompts "Sailing ship painting by Van Gogh" \ --num_images_per_prompt 20 \ - --batch_size 4 \ + --batch_size 7 \ --image_save_dir /tmp/stable_diffusion_xl_images \ --scheduler euler_discrete \ --use_habana \ @@ -271,7 +271,7 @@ python text_to_image_generation.py \ --prompts "futuristic-looking woman" \ --control_image https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png \ --num_images_per_prompt 20 \ - --batch_size 4 \ + --batch_size 7 \ --image_save_dir /tmp/controlnet_images \ --use_habana \ --use_hpu_graphs \ @@ -288,7 +288,7 @@ python text_to_image_generation.py \ --prompts "futuristic-looking woman" "a rusty robot" \ --control_image https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png \ --num_images_per_prompt 10 \ - --batch_size 4 \ + --batch_size 7 \ --image_save_dir /tmp/controlnet_images \ --use_habana \ --use_hpu_graphs \ @@ -325,7 +325,7 @@ python text_to_image_generation.py \ --control_image https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/pose.png \ --control_preprocessing_type "none" \ --num_images_per_prompt 20 \ - --batch_size 4 \ + --batch_size 7 \ --image_save_dir /tmp/controlnet_images \ --use_habana \ --use_hpu_graphs \ @@ -344,7 +344,7 @@ python text_to_image_generation.py \ --prompts "bird" \ --seed 0 \ --num_images_per_prompt 10 \ - --batch_size 2 \ + --batch_size 7 \ --image_save_dir /tmp/controlnet-2-1_images \ --use_habana \ --use_hpu_graphs \ diff --git a/examples/text-classification/README.md b/examples/text-classification/README.md index f5af6bc7d3..8f8313efdb 100644 --- a/examples/text-classification/README.md +++ b/examples/text-classification/README.md @@ -51,7 +51,7 @@ python run_glue.py \ --task_name mrpc \ --do_train \ --do_eval \ - --per_device_train_batch_size 32 \ + --per_device_train_batch_size 64 \ --learning_rate 3e-5 \ --num_train_epochs 3 \ --max_seq_length 128 \ @@ -78,7 +78,7 @@ python ../gaudi_spawn.py \ --task_name mrpc \ --do_train \ --do_eval \ - --per_device_train_batch_size 32 \ + --per_device_train_batch_size 64 \ --per_device_eval_batch_size 8 \ --learning_rate 3e-5 \ --num_train_epochs 3 \ @@ -106,7 +106,7 @@ python ../gaudi_spawn.py \ --task_name mrpc \ --do_train \ --do_eval \ - --per_device_train_batch_size 32 \ + --per_device_train_batch_size 64 \ --per_device_eval_batch_size 8 \ --learning_rate 3e-5 \ --num_train_epochs 3 \ @@ -156,6 +156,7 @@ python run_glue.py \ --do_eval \ --max_seq_length 128 \ --output_dir ./output/mrpc/ \ + --per_device_eval_batch_size 8 \ --use_habana \ --use_lazy_mode \ --use_hpu_graphs_for_inference \ @@ -178,7 +179,7 @@ python ../gaudi_spawn.py \ --task_name mrpc \ --do_train \ --do_eval \ - --per_device_train_batch_size 32 \ + --per_device_train_batch_size 64 \ --per_device_eval_batch_size 8 \ --learning_rate 3e-5 \ --num_train_epochs 3 \ diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md index e020e72a79..440b18713c 100755 --- a/examples/text-generation/README.md +++ b/examples/text-generation/README.md @@ -142,7 +142,7 @@ python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py \ --bf16 \ --use_hpu_graphs \ --use_kv_cache \ ---batch_size 52 \ +--batch_size 180 \ --attn_softmax_bf16 \ --limit_hpu_graphs \ --reuse_cache \