forked from CERC-AAI/multimodal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
job.sh
34 lines (24 loc) · 1010 Bytes
/
job.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash
#BSUB -nnodes 1
#BSUB -W 2:00
#BSUB -q batch
#BSUB -o gpt_neox_out.%J
#BSUB -e gpt_neox_err.%J
#BSUB -J gpt_neox
#BSUB -alloc_flags gpudefault
#BSUB -P CSC499
source /gpfs/alpine/csc499/scratch/$(whoami)/setup.sh
#conda init bash
source /gpfs/alpine/csc499/scratch/$(whoami)/miniconda3/etc/profile.d/conda.sh
conda activate robin
export TORCH_EXTENSIONS_DIR=/gpfs/alpine/csc499/scratch/$(whoami)/latest_install/cache
# export this under /gpfs
TRAIN_PATH=/gpfs/alpine/csc499/scratch/$(whoami)/robin
# pls change this to your location of our repo
cd $TRAIN_PATH
bash /gpfs/alpine/csc499/scratch/$(whoami)/write_hostfile.sh
# pls find this shell script at GPT-NeoX on Summit
export DLTS_HOSTFILE=/gpfs/alpine/csc499/scratch/$(whoami)/hostfiles/$LSB_JOBID-hosts
export MASTER_ADDR=$(cat $LSB_DJOB_HOSTFILE | sort | uniq | grep -v batch | grep -v login | head -1)
python3 $TRAIN_PATH/deepy.py $TRAIN_PATH/train.py \
--conf_dir $TRAIN_PATH/configs magma_pythia_410M.yml magma_setup.yml