From a1d350d88aead13a78278805d713efd669582070 Mon Sep 17 00:00:00 2001 From: Adibvafa Fallahpour <90617686+Adibvafa@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:44:50 -0400 Subject: [PATCH 1/3] Create slurm_scripts.md --- slurm_scripts.md | 204 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 slurm_scripts.md diff --git a/slurm_scripts.md b/slurm_scripts.md new file mode 100644 index 0000000..a0477a8 --- /dev/null +++ b/slurm_scripts.md @@ -0,0 +1,204 @@ +# Slurm Job Request Scripts + +## MultiBird - Pretrain +``` +#!/bin/bash +#SBATCH --job-name=multibird_pretrain +#SBATCH --gres=gpu:4 +#SBATCH --qos a100_amritk +#SBATCH -p a100 +#SBATCH -c 24 +#SBATCH --time=23:00:00 +#SBATCH --mem=200G +#SBATCH --output=/h/afallah/odyssey/multibird_a100-%j.out +#SBATCH --error=/h/afallah/odyssey/multibird_a100-%j.err +#SBATCH --no-requeue + +source /h/afallah/light/bin/activate + +cd /h/afallah/odyssey/odyssey + +export CUBLAS_WORKSPACE_CONFIG=:4096:2 +export NCCL_DEBUG=INFO +export PYTHONFAULTHANDLER=1 + +stdbuf -oL -eL srun python3 pretrain.py \ + --model-type cehr_bigbird \ + --exp-name multibird_pretrain \ + --config-dir models/configs \ + --data-dir data/bigbird_data \ + --sequence-file patient_sequences/patient_sequences_2048.parquet \ + --id-file patient_id_dict/dataset_2048_multi.pkl \ + --vocab-dir data/vocab \ + --val-size 0.1 \ + --checkpoint-dir checkpoints/multibird_pretrain +``` + + +## MultiBird - Finetune +``` +#!/bin/bash +#SBATCH --job-name=multibird_finetune +#SBATCH --gres=gpu:4 +#SBATCH --qos a100_amritk +#SBATCH -p a100 +#SBATCH -c 24 +#SBATCH --time=23:59:00 +#SBATCH --mem=200G +#SBATCH --output=/h/afallah/odyssey/multibird_finetune-%j.out +#SBATCH --error=/h/afallah/odyssey/multibird_finetune-%j.err +#SBATCH --no-requeue + +source /h/afallah/light/bin/activate + +cd /h/afallah/odyssey/odyssey + +export CUBLAS_WORKSPACE_CONFIG=:4096:2 +export NCCL_DEBUG=INFO +export PYTHONFAULTHANDLER=1 + +stdbuf -oL -eL srun python3 finetune.py \ + --model-type cehr_bigbird \ + --is-multi-model True \ + --exp-name multibird_finetune \ + --pretrained-path checkpoints/multibird_pretrain/multibird_pretrain/best.ckpt \ + --config-dir odyssey/models/configs \ + --data-dir odyssey/data/bigbird_data \ + --sequence-file patient_sequences_2048_multi.parquet \ + --id-file dataset_2048_multi.pkl \ + --vocab-dir odyssey/data/vocab \ + --val-size 0.15 \ + --valid_scheme few_shot \ + --num_finetune_patients all \ + --problem_type single_label_classification \ + --num_labels 2 \ + --checkpoint-dir checkpoints \ + --test_output_dir test_outputs \ + --tasks "mortality_1month los_1week readmission_1month c0 c1 c2" \ + --balance_guide "mortality_1month=0.5, los_1week=0.5, readmission_1month=0.5, c0=0.5, c1=0.5, c2=0.5" +``` + + +## BigBird - Finetune Mortality +``` +#!/bin/bash +#SBATCH --job-name=bigbird_finetune_mortality +#SBATCH --gres=gpu:2 +#SBATCH --qos a100_amritk +#SBATCH -p a100 +#SBATCH -c 6 +#SBATCH --time=15:00:00 +#SBATCH --mem=32G +#SBATCH --output=/h/afallah/odyssey/bigbird_finetune_mortality-%j.out +#SBATCH --error=/h/afallah/odyssey/bigbird_finetune_mortality-%j.err +#SBATCH --no-requeue + +source /h/afallah/light/bin/activate + +cd /h/afallah/odyssey/odyssey + +export CUBLAS_WORKSPACE_CONFIG=:4096:2 +export NCCL_DEBUG=INFO +export PYTHONFAULTHANDLER=1 + +stdbuf -oL -eL srun python3 finetune.py \ + --model-type cehr_bigbird \ + --exp-name mortality_1month_20000_patients \ + --pretrained-path checkpoints/bigbird_pretrain_with_conditions/pretrain_with_conditions/best-v1.ckpt \ + --label-name label_mortality_1month \ + --config-dir models/configs \ + --data-dir data/bigbird_data \ + --sequence-file patient_sequences/patient_sequences_2048_mortality.parquet \ + --id-file patient_id_dict/dataset_2048_mortality.pkl \ + --vocab-dir data/vocab \ + --val-size 0.1 \ + --valid_scheme few_shot \ + --num_finetune_patients '20000' \ + --problem_type 'single_label_classification' \ + --num_labels 2 \ + --checkpoint-dir checkpoints/bigbird_finetune_with_condition \ + --resume_checkpoint checkpoints/bigbird_finetune_with_condition/mortality_1month_20000_patients/best.ckpt +``` + + +## BigBird - Finetune Condition +``` +#!/bin/bash +#SBATCH --job-name=bigbird_finetune_condition +#SBATCH --gres=gpu:1 +#SBATCH --qos a100_amritk +#SBATCH -p a100 +#SBATCH -c 6 +#SBATCH --time=15:00:00 +#SBATCH --mem=32G +#SBATCH --output=/h/afallah/odyssey/bigbird_finetune_condition-%j.out +#SBATCH --error=/h/afallah/odyssey/bigbird_finetune_condition-%j.err +#SBATCH --no-requeue + +source /h/afallah/light/bin/activate + +cd /h/afallah/odyssey/odyssey + +export CUBLAS_WORKSPACE_CONFIG=:4096:2 +export NCCL_DEBUG=INFO +export PYTHONFAULTHANDLER=1 + +stdbuf -oL -eL srun python3 finetune.py \ + --model-type cehr_bigbird \ + --exp-name condition_50000_patients \ + --pretrained-path checkpoints/bigbird_pretrain_with_conditions/pretrain_with_conditions/best-v1.ckpt \ + --label-name all_conditions \ + --config-dir models/configs \ + --data-dir data/bigbird_data \ + --sequence-file patient_sequences/patient_sequences_2048_condition.parquet \ + --id-file patient_id_dict/dataset_2048_condition.pkl \ + --vocab-dir data/vocab \ + --val-size 0.1 \ + --valid_scheme few_shot \ + --num_finetune_patients '50000' \ + --problem_type 'multi_label_classification' \ + --num_labels 20 \ + --checkpoint-dir checkpoints/bigbird_finetune_with_condition \ + --resume_checkpoint checkpoints/bigbird_finetune_with_condition/condition_50000_patients/best.ckpt +``` + + +## BigBird - Finetune Readmission +``` +#!/bin/bash +#SBATCH --job-name=bigbird_finetune_readmission +#SBATCH --gres=gpu:2 +#SBATCH --qos a100_amritk +#SBATCH -p a100 +#SBATCH -c 6 +#SBATCH --time=15:00:00 +#SBATCH --mem=32G +#SBATCH --output=/h/afallah/odyssey/bigbird_finetune_readmission-%j.out +#SBATCH --error=/h/afallah/odyssey/bigbird_finetune_readmission-%j.err +#SBATCH --no-requeue + +source /h/afallah/light/bin/activate + +cd /h/afallah/odyssey/odyssey + +export CUBLAS_WORKSPACE_CONFIG=:4096:2 +export NCCL_DEBUG=INFO +export PYTHONFAULTHANDLER=1 + +stdbuf -oL -eL srun python3 finetune.py \ + --model-type cehr_bigbird \ + --exp-name readmission_1month_60000_patients \ + --pretrained-path checkpoints/bigbird_pretrain_with_conditions/pretrain_with_conditions/best-v1.ckpt \ + --label-name label_readmission_1month \ + --config-dir models/configs \ + --data-dir data/bigbird_data \ + --sequence-file patient_sequences/patient_sequences_2048_readmission.parquet \ + --id-file patient_id_dict/dataset_2048_readmission.pkl \ + --vocab-dir data/vocab \ + --val-size 0.1 \ + --valid_scheme few_shot \ + --num_finetune_patients '60000' \ + --problem_type 'single_label_classification' \ + --num_labels 2 \ + --checkpoint-dir checkpoints/bigbird_finetune_with_condition +``` From da3eb261bf6d7031bf0fa7abdc8553c493822d04 Mon Sep 17 00:00:00 2001 From: Adibvafa Fallahpour <90617686+Adibvafa@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:46:10 -0400 Subject: [PATCH 2/3] Update slurm_scripts.md --- slurm_scripts.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/slurm_scripts.md b/slurm_scripts.md index a0477a8..7c4d4e4 100644 --- a/slurm_scripts.md +++ b/slurm_scripts.md @@ -202,3 +202,32 @@ stdbuf -oL -eL srun python3 finetune.py \ --num_labels 2 \ --checkpoint-dir checkpoints/bigbird_finetune_with_condition ``` + + +## Bi-LSTM +``` +#!/bin/bash +#SBATCH --job-name=baseline_lstm +#SBATCH --gres=gpu:1 +#SBATCH --qos=normal +#SBATCH --time=6:00:00 +#SBATCH -c 30 +#SBATCH --mem=32G +#SBATCH --ntasks=1 +#SBATCH --output=/h/afallah/odyssey/slurm/baseline_lstm-%j.out +#SBATCH --error=/h/afallah/odyssey/slurm/baseline_lstm-%j.err + +#module --ignore_cache load cuda-11.8 +#module load anaconda/3.10 +#source activate light + +source /h/afallah/light/bin/activate + +cd /h/afallah/odyssey/slurm + +export CUBLAS_WORKSPACE_CONFIG=:4096:2 +export NCCL_DEBUG=INFO +export PYTHONFAULTHANDLER=1 + +stdbuf -oL -eL srun python3 Bi-LSTM.py +``` From 02a69746ca135a2e1a80b751fc4b7e05ea6182cf Mon Sep 17 00:00:00 2001 From: Adibvafa Fallahpour <90617686+Adibvafa@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:51:08 -0400 Subject: [PATCH 3/3] Update slurm_scripts.md --- slurm_scripts.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/slurm_scripts.md b/slurm_scripts.md index 7c4d4e4..ab48416 100644 --- a/slurm_scripts.md +++ b/slurm_scripts.md @@ -79,6 +79,41 @@ stdbuf -oL -eL srun python3 finetune.py \ ``` +## BigBird - Pretrain +``` +#!/bin/bash +#SBATCH --job-name=bigbird_pretrain +#SBATCH --gres=gpu:4 +#SBATCH --qos a100_amritk +#SBATCH -p a100 +#SBATCH -c 24 +#SBATCH --time=23:00:00 +#SBATCH --mem=200G +#SBATCH --output=/h/afallah/odyssey/multibird_a100-%j.out +#SBATCH --error=/h/afallah/odyssey/multibird_a100-%j.err +#SBATCH --no-requeue + +source /h/afallah/light/bin/activate + +cd /h/afallah/odyssey/odyssey + +export CUBLAS_WORKSPACE_CONFIG=:4096:2 +export NCCL_DEBUG=INFO +export PYTHONFAULTHANDLER=1 + +stdbuf -oL -eL srun python3 pretrain.py \ + --model-type cehr_bigbird \ + --exp-name bigbird_pretrain \ + --config-dir models/configs \ + --data-dir data/bigbird_data \ + --sequence-file patient_sequences/patient_sequences_2048.parquet \ + --id-file patient_id_dict/dataset_2048_pretrain.pkl \ + --vocab-dir data/vocab \ + --val-size 0.1 \ + --checkpoint-dir checkpoints/bigbird_pretrain +``` + + ## BigBird - Finetune Mortality ``` #!/bin/bash