diff --git a/containers/detection/Dockerfile b/containers/detection/Dockerfile
index 5449faa..0f2ad21 100644
--- a/containers/detection/Dockerfile
+++ b/containers/detection/Dockerfile
@@ -1,7 +1,9 @@
 # mac users wanting to test locally must change the tag to: latest-arm64
 # it will run, but will be really slow
 FROM ultralytics/ultralytics@sha256:ae02bb4835db50cd355e29e0c5564b0825b934038b20fcd6441d999831e1f669
-RUN apt-get install -y file rsync
+RUN apt-get install -y file rsync wget
+RUN wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-pose.pt -O /usr/src/ultralytics/yolov8n-pose.pt
+RUN wget https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt -O /usr/src/ultralytics/yolov8n-seg.pt
 RUN mkdir -p /usr/local/bin
 ENV PATH="/usr/local/bin:${PATH}"
 COPY containers/scripts/run_program.sh /usr/local/bin/run_program.sh
diff --git a/containers/detection/run b/containers/detection/run
index 84b4130..6998495 100755
--- a/containers/detection/run
+++ b/containers/detection/run
@@ -5,8 +5,19 @@
 # export DEBUG=true
 # From: https://github.com/ultralytics/ultralytics/blob/main/docs/modes/predict.md
 export VALID_EXTENSIONS=bmp,dng,jpeg,jpg,mpo,png,tif,tiff,webp,pfm,asf,avi,gif,m4v,mkv,mov,mp4,mpeg,mpg,ts,wmv,webm
+export PARALLEL=true
 
-run_program.sh 'yolo detect predict cfg=/workspace/config.yaml source="${input_file}" project="${output_dir}"' /inputs /outputs 2>> /outputs/stderr 1>> /outputs/stdout
+run_program.sh 'yolo detect predict cfg=/workspace/config.yaml model=yolov8n.pt source="${input_file}" project="${output_dir}"' /inputs /outputs 2>> /outputs/stderr 1>> /outputs/stdout
+
+# The following are demos of pose and segmentation, but they just add too much time to the run
+
+# export VALID_EXTENSIONS=bmp,dng,jpeg,jpg,mpo,png,tif,tiff,webp
+
+# run_program.sh 'yolo detect predict cfg=/workspace/config.yaml model=yolov8n-pose.pt source="${input_file}" project="${output_dir}"' /inputs /outputs "pose" 2>> /outputs/stderr 1>> /outputs/stdout
+
+# export VALID_EXTENSIONS=bmp,dng,jpeg,jpg,mpo,png,tif,tiff,webp
+
+# run_program.sh 'yolo detect predict cfg=/workspace/config.yaml model=yolov8n-seg.pt source="${input_file}" project="${output_dir}"' /inputs /outputs "segment" 2>> /outputs/stderr 1>> /outputs/stdout
 
 for src in $(find /outputs -name predict* -type d); do
     dest=${src%/predict*}
diff --git a/containers/detection/yolo_config.yaml b/containers/detection/yolo_config.yaml
index 47c4814..cfbfee0 100644
--- a/containers/detection/yolo_config.yaml
+++ b/containers/detection/yolo_config.yaml
@@ -1,30 +1,63 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 # Default training settings and hyperparameters for medium-augmentation COCO training
 
-task: detect  # YOLO task, i.e. detect, segment, classify, pose
+task:  # YOLO task, i.e. detect, segment, classify, pose
 mode: predict  # YOLO mode, i.e. train, val, predict, export, track, benchmark
-project:
 
 # Train settings -------------------------------------------------------------------------------------------------------
-model: yolov8n.pt # path to model file, i.e. yolov8n.pt, yolov8n.yaml
+model: # path to model file, i.e. yolov8n.pt, yolov8n.yaml
 data:  # path to data file, i.e. coco128.yaml
+epochs: 100  # number of epochs to train for
+patience: 50  # epochs to wait for no observable improvement for early stopping of training
+batch: 16  # number of images per batch (-1 for AutoBatch)
+imgsz: 640  # size of input images as integer or w,h
 save: True  # save train checkpoints and predict results
+save_period: -1 # Save checkpoint every x epochs (disabled if < 1)
+cache: False  # True/ram, disk or False. Use cache for data loading
 device:  # device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
+workers: 8  # number of worker threads for data loading (per RANK if DDP)
+project:  # project name
+name:  # experiment name, results saved to 'project/name' directory
+exist_ok: False  # whether to overwrite existing experiment
+pretrained: False  # whether to use a pretrained model
+optimizer: SGD  # optimizer to use, choices=['SGD', 'Adam', 'AdamW', 'RMSProp']
 verbose: True  # whether to print verbose output
 seed: 0  # random seed for reproducibility
 deterministic: True  # whether to enable deterministic mode
+single_cls: False  # train multi-class data as single-class
+image_weights: False  # use weighted image selection for training
+rect: False  # rectangular training if mode='train' or rectangular validation if mode='val'
+cos_lr: False  # use cosine learning rate scheduler
+close_mosaic: 0  # (int) disable mosaic augmentation for final epochs
+resume: False  # resume training from last checkpoint
+amp: True  # Automatic Mixed Precision (AMP) training, choices=[True, False], True runs AMP check
+# Segmentation
+overlap_mask: True  # masks should overlap during training (segment train only)
+mask_ratio: 4  # mask downsample ratio (segment train only)
+# Classification
+dropout: 0.0  # use dropout regularization (classify train only)
 
-# Prediction settings --------------------------------------------------------------------------------------------------
+# Val/Test settings ----------------------------------------------------------------------------------------------------
+val: True  # validate/test during training
+split: val  # dataset split to use for validation, i.e. 'val', 'test' or 'train'
+save_json: True  # save results to JSON file
+save_hybrid: False  # save hybrid version of labels (labels + additional predictions)
 conf: 0.32 # object confidence threshold for detection (default 0.25 predict, 0.001 val)
+iou: 0.7  # intersection over union (IoU) threshold for NMS
+max_det: 300  # maximum number of detections per image
+half: False  # use half precision (FP16)
+dnn: False  # use OpenCV DNN for ONNX inference
+plots: True  # save plots during train/val
+
+# Prediction settings --------------------------------------------------------------------------------------------------
 source:  # source directory for images or videos
 show: False  # show results if possible
 save_txt: True  # save results as .txt file
-save_json: True
 save_conf: True  # save results with confidence scores
 save_crop: True  # save cropped images with results
 show_labels: True  # show object labels in plots
 show_conf: True  # show object confidence scores in plots
-vid_stride: 8  # video frame-rate stride
+vid_stride: 24  # video frame-rate stride
 line_thickness: 3  # bounding box thickness (pixels)
 visualize: False  # visualize model features
 augment: False  # apply image augmentation to prediction sources
@@ -32,3 +65,52 @@ agnostic_nms: False  # class-agnostic NMS
 classes:  # filter results by class, i.e. class=0, or class=[0,2,3]
 retina_masks: False  # use high-resolution segmentation masks
 boxes: True  # Show boxes in segmentation predictions
+
+# Export settings ------------------------------------------------------------------------------------------------------
+format: torchscript  # format to export to
+keras: False  # use Keras
+optimize: False  # TorchScript: optimize for mobile
+int8: False  # CoreML/TF INT8 quantization
+dynamic: False  # ONNX/TF/TensorRT: dynamic axes
+simplify: False  # ONNX: simplify model
+opset:  # ONNX: opset version (optional)
+workspace: 4  # TensorRT: workspace size (GB)
+nms: False  # CoreML: add NMS
+
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+lr0: 0.01  # initial learning rate (i.e. SGD=1E-2, Adam=1E-3)
+lrf: 0.01  # final learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.1  # warmup initial bias lr
+box: 7.5  # box loss gain
+cls: 0.5  # cls loss gain (scale with pixels)
+dfl: 1.5  # dfl loss gain
+pose: 12.0  # pose loss gain
+kobj: 1.0  # keypoint obj loss gain
+label_smoothing: 0.0  # label smoothing (fraction)
+nbs: 64  # nominal batch size
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.1  # image translation (+/- fraction)
+scale: 0.5  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mosaic: 1.0  # image mosaic (probability)
+mixup: 0.0  # image mixup (probability)
+copy_paste: 0.0  # segment copy-paste (probability)
+
+# Custom config.yaml ---------------------------------------------------------------------------------------------------
+cfg:  # for overriding defaults.yaml
+
+# Debug, do not modify -------------------------------------------------------------------------------------------------
+v5loader: False  # use legacy YOLOv5 dataloader
+
+# Tracker settings ------------------------------------------------------------------------------------------------------
+tracker: botsort.yaml  # tracker type, ['botsort.yaml', 'bytetrack.yaml']
\ No newline at end of file
diff --git a/containers/scripts/run_program.sh b/containers/scripts/run_program.sh
index d382bf0..3fdb68a 100755
--- a/containers/scripts/run_program.sh
+++ b/containers/scripts/run_program.sh
@@ -33,6 +33,7 @@ DEFAULT_FILENAME="${DEFAULT_FILENAME:-file}"
 DEFAULT_EXTENSION="${DEFAULT_EXTENSION:-}" # If blank, will use the `file` binary to determine the extension
 APPEND_EXTENSION="${APPEND_EXTENSION:-}" # If set, append this extension to the output file
 VALID_EXTENSIONS="${VALID_EXTENSIONS:-}" # If set, only process files with these extensions
+PARALLEL="${PARALLEL:-}" # If set, run in parallel mode
 
 # Check to see if input directory is actually a file. This happens when the
 # input CID is a blob.
@@ -173,7 +174,22 @@ if [ $MODE = "batch" ]; then
         rendered_command=$(eval "echo $COMMAND") # Danger! Can expose things like $USER.
         debug "rendered_command: $rendered_command"
 
-        # Run the program in a subshell 
-        bash -c "${rendered_command}"
+        # If running in parallel, run the command in the background and store the pid
+        if [ $PARALLEL ]; then
+            debug "running in parallel mode"
+            nohup bash -c "${rendered_command}" &
+        else
+            debug "running in serial mode"
+            # Run the command
+            bash -c "${rendered_command}"
+        fi
     done
+
+    if [ $PARALLEL ]; then
+        debug "waiting for all pids to finish: $(jobs -p)"
+        for job in $(jobs -p); do 
+            debug "waiting for $job"
+            wait ${job};
+        done
+    fi
 fi