-
Notifications
You must be signed in to change notification settings - Fork 100
/
Copy pathstart-tr11-176B-ml
executable file
·63 lines (53 loc) · 1.97 KB
/
start-tr11-176B-ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# This is a python production script for JZ / tr11-176B-ml training
#
# Activate with:
#
# source ./start-tr11-176B-ml
#
#
# # if this session isn't run via a login shell, which is the case when running a
# # command which is not shell via ssh, the bash function `module` will be missing.
# # so work around it by emulating part of the login shell that loads modules environment
# if [ -z $(type -t module) ]
# then
# . /etc/profile.d/z_modules.sh
# fi
module purge
module load cuda/11.4.3
module load nvtop git git-lfs github-cli mc
#module load pytorch-gpu/py3/1.8.1
#module load gcc/9.3.0
# use the env for v100 and a100 w/o needing to rebuild the kernels for each run
export TORCH_CUDA_ARCH_LIST="7.0 8.0"
# git prompt
export GIT_PROMPT_ONLY_IN_REPO=0;
export GIT_PROMPT_THEME="JZPRod"
source $six_ALL_CCFRWORK/envs/.bash-git-prompt/gitprompt.sh
# We are using common disk spaces for datasets, caches, and experiment dumps:
#
#- Code, cache and datasets -> `$six_ALL_CCFRWORK/cache_dir` and ``$six_ALL_CCFRWORK/datasets`
#- Experiment dumps -> `$six_ALL_CCFRWORK/experiments`
# specific caches
export TRANSFORMERS_CACHE=$six_ALL_CCFRWORK/models
export HF_DATASETS_CACHE=$six_ALL_CCFRWORK/datasets
export HF_MODULES_CACHE=$six_ALL_CCFRWORK/modules
export HF_METRICS_CACHE=$six_ALL_CCFRWORK/metrics
export DATASETS_CUSTOM=$six_ALL_CCFRWORK/datasets-custom
### CONDA ###
# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
__conda_setup="$('/gpfslocalsup/pub/anaconda-py3/2020.02/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/profile.d/conda.sh" ]; then
. "/gpfslocalsup/pub/anaconda-py3/2020.02/etc/profile.d/conda.sh"
else
export PATH="/gpfslocalsup/pub/anaconda-py3/2020.02/bin:$PATH"
fi
fi
unset __conda_setup
# <<< conda initialize <<<
export CONDA_ENVS_PATH=$six_ALL_CCFRWORK/conda
conda activate base
conda activate tr11-176B-ml