Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions singularity/Singularity.rag
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ mkdir -p /app
cd /app
apt-get update && apt-get install -y --no-install-recommends git wget curl build-essential ca-certificates && rm -rf /var/lib/apt/lists/*
pip3 install --no-cache-dir chromadb "fastapi>=0.110,<0.112" "uvicorn[standard]>=0.29,<0.31" "transformers>=4.41,<4.45" "sentence-transformers>=2.6,<3" "chromadb==0.5.4" "watchdog>=3,<5" "pypdf>=4,<5" requests httpx pyyaml

HF_HOME=/root/.cache/huggingface
chmod +x singularity-entrypoint.sh

%environment
export HF_HOME=/root/.cache/huggingface
export LD_LIBRARY_PATH=/opt/conda/lib:/usr/local/lib:/usr/lib/x86_64-linux-gnu:/usr/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}

%runscript
/bin/bash singularity-entrypoint.sh
9 changes: 9 additions & 0 deletions singularity/Singularity.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ From: vllm/vllm-openai:latest
pkg-config cmake ninja-build
rm -rf /var/lib/apt/lists/*

# FIPS workaround: opencv-python-headless >=4.13 bundles a FIPS-enabled
# OpenSSL 1.1.1k from CentOS/RHEL that crashes on FIPS-enabled hosts.
# Pin to 4.12.0.88 which does not bundle OpenSSL.
# See: https://github.com/opencv/opencv-python/issues/1184
pip install opencv-python-headless==4.12.0.88

# Pick a Python interpreter that actually exists in the base image
if command -v python >/dev/null 2>&1; then
PY=python
Expand All @@ -37,6 +43,9 @@ print("Transformers:", transformers.__version__)
print("vLLM:", vllm.__version__)
PY

%environment
export LD_LIBRARY_PATH=/usr/local/lib:/usr/lib/x86_64-linux-gnu:/usr/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}

%runscript
mkdir -p /app
cd /app
Expand Down
44 changes: 41 additions & 3 deletions workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,48 @@ jobs:
REPO_URL="https://huggingface.co/$MODEL_ID"
[[ -n "$HF_TOKEN" ]] && REPO_URL="https://user:${HF_TOKEN}@huggingface.co/$MODEL_ID"

# Clone with LFS and pull large files
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 "$REPO_URL" "$TARGET_DIR"
# Clone without LFS or checkout, then sparse-checkout only safetensors
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --no-checkout "$REPO_URL" "$TARGET_DIR"
cd "$TARGET_DIR"
git lfs pull

# Sparse checkout: exclude bin/gguf/onnx/pth to only get safetensors weights
git sparse-checkout init --no-cone
git sparse-checkout set '/*' '!*.bin' '!*.gguf' '!*.onnx' '!consolidated*.pth'
git checkout

# Configure LFS for reliability with large models
git lfs install --local
git config lfs.concurrenttransfers 4
git config lfs.transfer.maxretries 10
git config lfs.transfer.maxretrydelay 30

# Fetch only safetensors LFS objects with progress and retry logic
attempt=1
max_attempts=3
while true; do
echo "LFS fetch attempt ${attempt}/${max_attempts}..."
if git lfs fetch --progress --include="*.safetensors"; then
break
fi
if [[ $attempt -ge $max_attempts ]]; then
echo "ERROR: LFS fetch failed after ${max_attempts} attempts"
exit 1
fi
sleep $((attempt * 5))
((attempt++))
done
echo "Checking out LFS files..."
git lfs checkout --include="*.safetensors"

# Fallback: if model only ships bin weights, re-include and fetch those
if ! ls *.safetensors 1>/dev/null 2>&1; then
echo "No safetensors files found, falling back to bin weights..."
git sparse-checkout set '/*'
git checkout
git lfs fetch --progress --include="*.bin"
git lfs checkout --include="*.bin"
fi

cd ..

# Verify model weights exist (not just LFS pointers)
Expand Down
44 changes: 41 additions & 3 deletions yamls/hsp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,48 @@ jobs:
REPO_URL="https://huggingface.co/$MODEL_ID"
[[ -n "$HF_TOKEN" ]] && REPO_URL="https://user:${HF_TOKEN}@huggingface.co/$MODEL_ID"

# Clone with LFS and pull large files
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 "$REPO_URL" "$TARGET_DIR"
# Clone without LFS or checkout, then sparse-checkout only safetensors
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 --no-checkout "$REPO_URL" "$TARGET_DIR"
cd "$TARGET_DIR"
git lfs pull

# Sparse checkout: exclude bin/gguf/onnx/pth to only get safetensors weights
git sparse-checkout init --no-cone
git sparse-checkout set '/*' '!*.bin' '!*.gguf' '!*.onnx' '!consolidated*.pth'
git checkout

# Configure LFS for reliability with large models
git lfs install --local
git config lfs.concurrenttransfers 4
git config lfs.transfer.maxretries 10
git config lfs.transfer.maxretrydelay 30

# Fetch only safetensors LFS objects with progress and retry logic
attempt=1
max_attempts=3
while true; do
echo "LFS fetch attempt ${attempt}/${max_attempts}..."
if git lfs fetch --progress --include="*.safetensors"; then
break
fi
if [[ $attempt -ge $max_attempts ]]; then
echo "ERROR: LFS fetch failed after ${max_attempts} attempts"
exit 1
fi
sleep $((attempt * 5))
((attempt++))
done
echo "Checking out LFS files..."
git lfs checkout --include="*.safetensors"

# Fallback: if model only ships bin weights, re-include and fetch those
if ! ls *.safetensors 1>/dev/null 2>&1; then
echo "No safetensors files found, falling back to bin weights..."
git sparse-checkout set '/*'
git checkout
git lfs fetch --progress --include="*.bin"
git lfs checkout --include="*.bin"
fi

cd ..

# Verify model weights exist (not just LFS pointers)
Expand Down