Skip to content

Commit 3d9f4eb

Browse files
committed
Add run-larnd-sim
1 parent 8b092b2 commit 3d9f4eb

File tree

12 files changed

+163
-0
lines changed

12 files changed

+163
-0
lines changed

run-larnd-sim/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/larnd-sim
2+
/venv
3+
/liblockfile
4+
/logs
5+
/input

run-larnd-sim/do_submit.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env bash
2+
3+
mkdir -p logs/"$ARCUBE_OUT_NAME"
4+
5+
sbatch -o logs/"$ARCUBE_OUT_NAME"/slurm-%j.out "$@" slurm_job.sh

run-larnd-sim/install_larnd_sim.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env bash
2+
3+
module load cudatoolkit # 11.7
4+
module load python # 3.9-anaconda-2021.11
5+
6+
rm -rf larnd-sim venv
7+
8+
python -m venv venv/
9+
source venv/bin/activate
10+
11+
pip install -U pip wheel setuptools
12+
13+
# https://docs.nersc.gov/development/languages/python/using-python-perlmutter/#installing-with-pip
14+
( git clone https://github.com/DUNE/larnd-sim.git
15+
cd larnd-sim || exit
16+
# HACK: Replace cupy with cupy-cuda11x
17+
mv setup.py setup.py.orig
18+
sed 's/cupy/cupy-cuda11x/' setup.py.orig > setup.py
19+
pip install .
20+
mv setup.py.orig setup.py )
21+
22+
# The job needs dotlockfile
23+
( git clone https://github.com/miquels/liblockfile.git
24+
cd liblockfile || exit
25+
./configure
26+
make -j4 )

run-larnd-sim/load_larnd_sim.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/usr/bin/env bash
2+
# source me
3+
4+
module load cudatoolkit # 11.7
5+
module load python # 3.9-anaconda-2021.11
6+
7+
source venv/bin/activate

run-larnd-sim/monitor.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
while true; do
4+
top ibn1
5+
# top -u $UID -b -n 1
6+
nvidia-smi
7+
printf "\n-oOo-\n"
8+
sleep 60
9+
done
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env bash
2+
3+
export ARCUBE_OUT_NAME=NuMI_RHC_128tasks
4+
5+
./do_submit.sh "$@"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env bash
2+
3+
export ARCUBE_OUT_NAME=NuMI_RHC_withRock_128tasks
4+
5+
./do_submit.sh "$@"

run-larnd-sim/prep_job.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
outName=$1; shift
4+
outDir=../run-edep-sim/output/"$outName"
5+
inDir=input/"$outName"
6+
7+
mkdir -p "$inDir"
8+
9+
ls $(realpath "$outDir"/EDEPSIM_H5)/*.h5 > "$inDir"/input.list

run-larnd-sim/slurm_job.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env bash
2+
#SBATCH --account=dune_g
3+
#SBATCH --qos=regular
4+
#SBATCH --constraint=gpu
5+
#SBATCH --time=2:00:00
6+
#SBATCH --nodes=1
7+
#SBATCH --ntasks-per-node=4
8+
#SBATCH --gpus-per-task=1
9+
#SBATCH --cpus-per-task=32
10+
11+
# https://docs.nersc.gov/systems/perlmutter/running-jobs/#1-node-4-tasks-4-gpus-1-gpu-visible-to-each-task
12+
export SLURM_CPU_BIND="cores"
13+
14+
# NOTE: Beginning with 22.05, srun will not inherit the --cpus-per-task value
15+
# requested by salloc or sbatch. It must be requested again with the call to
16+
# srun or set with the SRUN_CPUS_PER_TASK environment variable if desired for
17+
# the task(s). (WTF?!??!)
18+
19+
export SRUN_CPUS_PER_TASK="$SLURM_CPUS_PER_TASK"
20+
21+
logDir=logs/"$ARCUBE_OUT_NAME"/"$SLURM_JOBID"
22+
mkdir -p "$logDir"
23+
24+
echo JOB-START "$(date)"
25+
26+
srun -o "$logDir"/output-%j.%t.txt ./slurm_task.sh
27+
28+
echo JOB-END "$(date)"

run-larnd-sim/slurm_task.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env bash
2+
3+
# HACK: This will not wait for other tasks on the node to complete
4+
if [[ "$SLURM_LOCALID" == 0 ]]; then
5+
monitorFile=monitor-$SLURM_JOBID.$SLURM_NODEID.txt
6+
./monitor.sh >logs/"$ARCUBE_OUT_NAME"/"$SLURM_JOBID"/"$monitorFile" &
7+
fi
8+
9+
# Just in case, to avoid a rush for the lockfile
10+
sleep $((RANDOM % 20))
11+
12+
source venv/bin/activate
13+
14+
outDir=../run-edep-sim/output/${ARCUBE_OUT_NAME}/LARNDSIM
15+
mkdir -p "$outDir"
16+
17+
inFile=input/${ARCUBE_OUT_NAME}/input.list
18+
19+
dotlockfile=liblockfile/dotlockfile
20+
21+
while read -r input; do
22+
outName=$(basename "$input" .EDEPSIM.h5)
23+
24+
[[ -e "$outDir/$outName.lock" ]] && continue
25+
[[ -e "$outDir/$outName.time" ]] && continue
26+
$dotlockfile -r 0 "$outDir/$outName.lock" || continue
27+
28+
echo FILE-START "$(date)"
29+
30+
/usr/bin/time -f "%P %M %E" -o "$outDir/$outName.time" \
31+
simulate_pixels.py --input_filename "$input" \
32+
--output_filename "$outDir/$outName.LARNDSIM.h5" \
33+
--detector_properties larnd-sim/larndsim/detector_properties/2x2.yaml \
34+
--pixel_layout larnd-sim/larndsim/pixel_layouts/multi_tile_layout-2.3.16.yaml \
35+
--response_file larnd-sim/larndsim/bin/response_44.npy \
36+
--light_lut_filename larnd-sim/larndsim/bin/lightLUT.npz \
37+
--light_det_noise_filename larnd-sim/larndsim/bin/light_noise-2x2-example.npy
38+
39+
echo FILE-END "$(date)"
40+
41+
rm -f "$outDir/$outName.lock"
42+
done < "$inFile"

0 commit comments

Comments
 (0)