diff --git a/Dockerfile b/Dockerfile index 2e015f3f2..bf247a15c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,53 +39,56 @@ RUN chmod +x scripts/install_ffmpeg.sh \ && bash scripts/install_ffmpeg.sh \ && rm scripts/install_ffmpeg.sh -RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" -O /tmp/miniforge.sh \ - && bash /tmp/miniforge.sh -b -p /opt/conda \ - && rm /tmp/miniforge.sh - -# Add conda to the PATH -ENV PATH=/opt/conda/bin:$PATH - -RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - CONDA_SUBDIR=linux-aarch64; \ - else \ - CONDA_SUBDIR=linux-64; \ - fi; - -# Install Mamba, a faster alternative to conda, within the base environment -RUN --mount=type=cache,target=/opt/conda/pkgs \ - --mount=type=cache,target=/root/.cache/pip \ - conda install -y mamba conda-build==24.5.1 conda-merge -n base -c conda-forge +# Install micromamba, a faster alternative to conda at /usr/local/bin/micromamba +ENV MAMBA_ROOT_PREFIX=/opt/conda +RUN set -eux; \ + arch="$(uname -m)"; \ + case "$arch" in \ + x86_64) m_arch="64" ;; \ + aarch64) m_arch="aarch64" ;; \ + *) echo "Unsupported arch: $arch" && exit 1 ;; \ + esac; \ + curl -L "https://micro.mamba.pm/api/micromamba/linux-${m_arch}/latest" -o /tmp/micromamba.tar.bz2; \ + mkdir -p /usr/local/bin; \ + tar -xvjf /tmp/micromamba.tar.bz2 -C /usr/local/bin --strip-components=1 bin/micromamba; \ + rm -f /tmp/micromamba.tar.bz2; \ + mkdir -p "$MAMBA_ROOT_PREFIX" + +# Cache mounts: +# - /opt/conda/pkgs : conda/mamba/micromamba package cache +# - /root/.cache/pip: pip cache +# We’ll use micromamba-run instead of "source activate". +SHELL ["/bin/bash", "-lc"] COPY conda/environments/nv_ingest_environment.base.yml /workspace/nv_ingest_environment.base.yml COPY conda/environments/nv_ingest_environment.linux_64.yml /workspace/nv_ingest_environment.linux_64.yml COPY conda/environments/nv_ingest_environment.linux_aarch64.yml /workspace/nv_ingest_environment.linux_aarch64.yml -# Create nv_ingest base environment +# Set `extract_threads 1` for QEMU+ARM build +# https://github.com/mamba-org/mamba/issues/1611 +RUN micromamba config set extract_threads 1 + +# Install conda-merge into base so we can merge YAMLs +RUN --mount=type=cache,target=/opt/conda/pkgs \ + micromamba install -y -n base -c conda-forge conda-merge + +# Merge env files per-arch and create nv_ingest base environment RUN --mount=type=cache,target=/opt/conda/pkgs \ - --mount=type=cache,target=/root/.cache/pip \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - conda-merge /workspace/nv_ingest_environment.base.yml /workspace/nv_ingest_environment.linux_aarch64.yml > /workspace/nv_ingest_environment.yml; \ + micromamba run -n base conda-merge /workspace/nv_ingest_environment.base.yml /workspace/nv_ingest_environment.linux_aarch64.yml > /workspace/nv_ingest_environment.yml; \ rm /workspace/nv_ingest_environment.base.yml /workspace/nv_ingest_environment.linux_aarch64.yml; \ else \ - conda-merge /workspace/nv_ingest_environment.base.yml /workspace/nv_ingest_environment.linux_64.yml > /workspace/nv_ingest_environment.yml; \ + micromamba run -n base conda-merge /workspace/nv_ingest_environment.base.yml /workspace/nv_ingest_environment.linux_64.yml > /workspace/nv_ingest_environment.yml; \ rm /workspace/nv_ingest_environment.base.yml /workspace/nv_ingest_environment.linux_64.yml; \ fi; \ - mamba env create -f /workspace/nv_ingest_environment.yml - -# Set default shell to bash -SHELL ["/bin/bash", "-c"] - -# Activate the environment (make it default for subsequent commands) -RUN echo "source activate nv_ingest_runtime" >> ~/.bashrc + micromamba create -y -n nv_ingest_runtime -f /workspace/nv_ingest_environment.yml -# Install Tini via conda from the conda-forge channel +# Install tini in the runtime env RUN --mount=type=cache,target=/opt/conda/pkgs \ - --mount=type=cache,target=/root/.cache/pip \ - source activate nv_ingest_runtime \ - && mamba install -y -c conda-forge tini + micromamba install -y -n nv_ingest_runtime -c conda-forge tini # Ensure dynamically linked libraries in the conda environment are found at runtime +ENV PATH=/opt/conda/envs/nv_ingest_runtime/bin:/usr/local/bin:$PATH ENV LD_LIBRARY_PATH=/opt/conda/envs/nv_ingest_runtime/lib:$LD_LIBRARY_PATH # Set the working directory in the container @@ -125,25 +128,22 @@ COPY src src RUN rm -rf ./src/nv_ingest/dist ./src/dist ./client/dist ./api/dist # Install python build from pip, version needed not present in conda -RUN source activate nv_ingest_runtime \ - && pip install 'build>=1.2.2' +RUN --mount=type=cache,target=/root/.cache/pip \ + micromamba run -n nv_ingest_runtime pip install 'build>=1.2.2' # Add pip cache path to match conda's package cache RUN --mount=type=cache,target=/opt/conda/pkgs \ --mount=type=cache,target=/root/.cache/pip \ chmod +x ./ci/scripts/build_pip_packages.sh \ - && source activate nv_ingest_runtime \ - && ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib api \ - && ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib client \ - && ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib service + && micromamba run -n nv_ingest_runtime ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib api \ + && micromamba run -n nv_ingest_runtime ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib client \ + && micromamba run -n nv_ingest_runtime ./ci/scripts/build_pip_packages.sh --type ${RELEASE_TYPE} --lib service -RUN --mount=type=cache,target=/opt/conda/pkgs\ +RUN --mount=type=cache,target=/opt/conda/pkgs \ --mount=type=cache,target=/root/.cache/pip \ - source activate nv_ingest_runtime \ - && pip install ./src/dist/*.whl \ - && pip install ./api/dist/*.whl \ - && pip install ./client/dist/*.whl - + micromamba run -n nv_ingest_runtime pip install ./src/dist/*.whl \ + && micromamba run -n nv_ingest_runtime pip install ./api/dist/*.whl \ + && micromamba run -n nv_ingest_runtime pip install ./client/dist/*.whl RUN rm -rf src @@ -160,13 +160,11 @@ COPY ./docker/scripts/entrypoint_source_ext.sh /workspace/docker/entrypoint_sour COPY ./docker/scripts/post_build_triggers.py /workspace/docker/post_build_triggers.py RUN --mount=type=cache,target=/root/.cache/pip \ - source activate nv_ingest_runtime \ - && python3 /workspace/docker/post_build_triggers.py + micromamba run -n nv_ingest_runtime python3 /workspace/docker/post_build_triggers.py # Remove graphviz and its dependencies to reduce image size -RUN source activate nv_ingest_runtime && \ - mamba remove graphviz python-graphviz --force -y && \ - mamba uninstall gtk3 pango cairo fonts-conda-ecosystem -y +RUN micromamba remove -y -n nv_ingest_runtime graphviz python-graphviz || true && \ + micromamba remove -y -n nv_ingest_runtime gtk3 pango cairo fonts-conda-ecosystem || true RUN chmod +x /workspace/docker/entrypoint.sh @@ -175,10 +173,9 @@ ENTRYPOINT ["/opt/conda/envs/nv_ingest_runtime/bin/tini", "--", "/workspace/dock FROM nv_ingest_install AS development -RUN source activate nv_ingest_runtime && \ - --mount=type=cache,target=/opt/conda/pkgs \ +RUN --mount=type=cache,target=/opt/conda/pkgs \ --mount=type=cache,target=/root/.cache/pip \ - pip install -e ./client + micromamba run -n nv_ingest_runtime pip install -e ./client CMD ["/bin/bash"] @@ -197,8 +194,8 @@ COPY src src COPY api api COPY client client -RUN source activate nv_ingest_runtime && \ - pip install -r ./docs/requirements.txt +RUN --mount=type=cache,target=/root/.cache/pip \ + micromamba run -n nv_ingest_runtime pip install -r ./docs/requirements.txt # Default command: Run `make docs` CMD ["bash", "-c", "cd /workspace/docs && source activate nv_ingest_runtime && make docs"] diff --git a/docker/scripts/entrypoint.sh b/docker/scripts/entrypoint.sh index abb8a389d..9187abecc 100755 --- a/docker/scripts/entrypoint.sh +++ b/docker/scripts/entrypoint.sh @@ -17,11 +17,7 @@ #!/bin/bash -# Activate the `nv_ingest_runtime` conda environment - set -e -. /opt/conda/etc/profile.d/conda.sh -conda activate nv_ingest_runtime # Source "source" file if it exists SRC_FILE="/opt/docker/bin/entrypoint_source" diff --git a/docker/scripts/entrypoint_devcontainer.sh b/docker/scripts/entrypoint_devcontainer.sh index 7625bd44a..ac9ee656e 100755 --- a/docker/scripts/entrypoint_devcontainer.sh +++ b/docker/scripts/entrypoint_devcontainer.sh @@ -17,10 +17,6 @@ #!/bin/bash -# Activate the `nv_ingest_runtime` conda environment -. /opt/conda/etc/profile.d/conda.sh -conda activate nv_ingest_runtime - # Source "source" file if it exists SRC_FILE="/opt/docker/bin/entrypoint_source" [ -f "${SRC_FILE}" ] && source "${SRC_FILE}" diff --git a/docker/scripts/entrypoint_source_ext.sh b/docker/scripts/entrypoint_source_ext.sh index dd4d62ec2..f6abc17fd 100755 --- a/docker/scripts/entrypoint_source_ext.sh +++ b/docker/scripts/entrypoint_source_ext.sh @@ -6,8 +6,19 @@ set -e # Run preparation tasks here +# Helper: truthy check (1/true/on/yes, case-insensitive) +is_truthy() { + [ -n "$1" ] || return 1 + case "$(printf "%s" "$1" | tr '[:upper:]' '[:lower:]')" in + 1|true|on|yes) return 0 ;; + *) return 1 ;; + esac +} -if [ "$INSTALL_ADOBE_SDK" = "true" ]; then +# Ensure micromamba is available if we need to install conda packages +need_micromamba=0 + +if is_truthy "${INSTALL_ADOBE_SDK}"; then echo "Checking if Adobe PDF Services SDK is installed..." # Check if pdfservices-sdk is installed @@ -20,11 +31,16 @@ fi # Install audio dependencies if ! python -c "import pkg_resources; pkg_resources.require('librosa')" 2>/dev/null; then echo "Installing librosa using conda..." - mamba install -y -c conda-forge librosa + need_micromamba=1 + micromamba install -y -n nv_ingest_runtime -c conda-forge librosa fi # If MEM_TRACE is set in the environment, use mamba to install memray -if [ -n "$MEM_TRACE" ]; then +if is_truthy "${MEM_TRACE}" || is_truthy "${INGEST_MEM_TRACE}"; then echo "MEM_TRACE is set. Installing memray via mamba..." - mamba install -y conda-forge::memray + need_micromamba=1 + micromamba install -y -n nv_ingest_runtime -c conda-forge memray || { + echo "Fallback: installing memray via pip..." + pip install memray + } fi