forked from sabeenlohawala/tissue_labeling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit_requeue.sh
169 lines (146 loc) · 12.5 KB
/
submit_requeue.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/bin/bash
#SBATCH --requeue
#SBATCH -t 2-00:00:00
#SBATCH -N 1
#SBATCH -c 4
#SBATCH --ntasks-per-node=4
#SBATCH --gres=gpu:a100:4
#SBATCH --mem=40G # per node memory
#SBATCH -p gablab
#SBATCH -o ./logs/final/matth_aug.out
#SBATCH -e ./logs/final/matth_aug.err
#SBATCH --mail-user=sabeen@mit.edu
#SBATCH --mail-type=FAIL
echo "Submitted Job: $SLURM_JOB_ID"
export PATH="/om2/user/sabeen/miniconda/bin:$PATH"
conda init bash
# General hyperparams
BATCH_SIZE=288
LR=0.001
NUM_EPOCHS=300
MODEL_NAME="segformer"
PRETRAINED=0
LOSS_FN="dice"
DEBUG=0
NR_OF_CLASSES=50
LOG_IMAGES=0
CLASS_SPECIFIC_SCORES=0
CHECKPOINT_FREQ=2
# Dataset params
NEW_KWYK_DATA=2
BACKGROUND_PERCENT_CUTOFF=0
ROTATE_VOL=0
DATA_SIZE="med"
# Data augmentation params
AUGMENT=1
AUG_PERCENT=0.8
INTENSITY_SCALE=1
AUG_ELASTIC=0
AUG_PIECEWISE_AFFINE=0
AUG_CUTOUT=0
CUTOUT_N_HOLES=1
CUTOUT_LENGTH=32
AUG_MASK=0
MASK_N_HOLES=1
MASK_LENGTH=64
AUG_NULL_HALF=0
AUG_NULL_CEREBELLUM_BRAIN_STEM=0
AUG_BACKGROUND_MANIPULATION=0
AUG_SHAPES_BACKGROUND=0
AUG_GRID_BACKGROUND=0
AUG_NOISE_BACKGROUND=0
# pre 202404 logdirs
# LOGDIR="/om2/scratch/tmp/sabeen/20240215-grid-M$MODEL_NAME\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\A0"
# LOGDIR="/om2/scratch/tmp/sabeen/20240305-grid-M$MODEL_NAME\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240330-grid-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240227-aug-M$MODEL_NAME\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240305-cut-$CUTOUT_LENGTH-$CUTOUT_N_HOLES-M$MODEL_NAME\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240227-mask-$MASK_LENGTH-$MASK_N_HOLES-M$MODEL_NAME\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240313-mask-$MASK_LENGTH-$MASK_N_HOLES-M$MODEL_NAME\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240314-intensity-0.2-0.2-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240325-mask-$MASK_LENGTH-$MASK_N_HOLES-intensity-0.2-0.2-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/20240330-null-$AUG_NULL_HALF-intensity-0.2-0.2-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# 202404__ logdirs
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240425-grid-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240425-mask-$MASK_LENGTH-$MASK_N_HOLES-intensity-0.2-0.2-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240425-null-intensity-0.2-0.2-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240425-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240424-old-grid-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGIDR="test-new-kwyk-volume-dataset"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240426-old-dice2loss-2metrics-segformer-50class-512-1e-3"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240427-grid-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
## SATRA DATA EXPERIMENTS
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240504-grid-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240504-aff-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240504-aff-intensity-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240504-aff-intensity-elastic-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240508-aff-intensity-piecewiseAff-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240504-mask-$MASK_LENGTH-$MASK_N_HOLES-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240504-cut-$CUTOUT_LENGTH-$CUTOUT_N_HOLES-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240508-null-CBS$AUG_NULL_CEREBELLUM_BRAIN_STEM-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240504-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240508-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-noise-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240505-50-aff-intensity-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240505-50-cut-$CUTOUT_LENGTH-$CUTOUT_N_HOLES-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240505-50-null-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240506-50-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240508-shard-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240505-50-mask-$MASK_LENGTH-$MASK_N_HOLES-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240508-mask-$MASK_LENGTH-$MASK_N_HOLES-bkgd-$AUG_SHAPES_BACKGROUND-$AUG_GRID_BACKGROUND-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240508-cut-$CUTOUT_LENGTH-$CUTOUT_N_HOLES-bkgd-$AUG_SHAPES_BACKGROUND-$AUG_GRID_BACKGROUND-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om2/scratch/tmp/sabeen/results/20240508-null-CBS$AUG_NULL_CEREBELLUM_BRAIN_STEM-bkgd-$AUG_SHAPES_BACKGROUND-$AUG_GRID_BACKGROUND-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240504-aff-intensity-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240508-aff-intensity-piecewiseAff-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
## FINAL EXPERIMENTS
LOGDIR="/om/scratch/tmp/sabeen/results/20240514-aug-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240513-grid-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240511-mask-$MASK_LENGTH-$MASK_N_HOLES-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240511-cut-$CUTOUT_LENGTH-$CUTOUT_N_HOLES-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240511-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-noise-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240511-null-CBS$AUG_NULL_CEREBELLUM_BRAIN_STEM-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240511-mask-$MASK_LENGTH-$MASK_N_HOLES-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-noise-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240511-cut-$CUTOUT_LENGTH-$CUTOUT_N_HOLES-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-noise-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# LOGDIR="/om/scratch/tmp/sabeen/results/20240511-null-CBS$AUG_NULL_CEREBELLUM_BRAIN_STEM-bkgd-shapes-$AUG_SHAPES_BACKGROUND-grid-$AUG_GRID_BACKGROUND-noise-$AUG_NOISE_BACKGROUND-M$MODEL_NAME\L$LOSS_FN\S$DATA_SIZE\RV$ROTATE_VOL\BC$BACKGROUND_PERCENT_CUTOFF\C$NR_OF_CLASSES\B$BATCH_SIZE\LR$LR\PT$PRETRAINED\A$AUGMENT"
# Check if checkpoint file exists
if ls "$LOGDIR"/*.ckpt 1> /dev/null 2>&1; then
echo "Checkpoint file found. Resuming training..."
echo $LOGDIR
srun python -u scripts/commands/main.py resume-train \
--logdir $LOGDIR
else
echo "No checkpoint file found. Starting training..."
echo $LOGDIR
srun python -u scripts/commands/main.py train \
--model_name $MODEL_NAME \
--loss_fn $LOSS_FN \
--nr_of_classes $NR_OF_CLASSES \
--logdir $LOGDIR \
--num_epochs $NUM_EPOCHS \
--batch_size $BATCH_SIZE \
--lr $LR \
--debug $DEBUG \
--log_images $LOG_IMAGES \
--data_size $DATA_SIZE \
--pretrained $PRETRAINED \
--augment $AUGMENT \
--aug_percent $AUG_PERCENT \
--aug_cutout $AUG_CUTOUT \
--aug_mask $AUG_MASK \
--cutout_n_holes $CUTOUT_N_HOLES \
--cutout_length $CUTOUT_LENGTH \
--mask_n_holes $MASK_N_HOLES \
--mask_length $MASK_LENGTH \
--intensity_scale $INTENSITY_SCALE \
--aug_elastic $AUG_ELASTIC \
--aug_piecewise_affine $AUG_PIECEWISE_AFFINE \
--aug_null_half $AUG_NULL_HALF \
--aug_null_cerebellum_brain_stem $AUG_NULL_CEREBELLUM_BRAIN_STEM \
--new_kwyk_data $NEW_KWYK_DATA \
--background_percent_cutoff $BACKGROUND_PERCENT_CUTOFF \
--class_specific_scores $CLASS_SPECIFIC_SCORES \
--checkpoint_freq $CHECKPOINT_FREQ \
--aug_background_manipulation $AUG_BACKGROUND_MANIPULATION \
--aug_shapes_background $AUG_SHAPES_BACKGROUND \
--aug_grid_background $AUG_GRID_BACKGROUND \
--aug_noise_background $AUG_NOISE_BACKGROUND
fi