From 8142bbc3fbc5081b0027ec6765c450df7e18a051 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Mon, 7 Oct 2024 11:06:23 +1100 Subject: [PATCH 1/4] Change nltk and pillow versions --- requirements.dev.txt | 4 ++-- src/marqo/s2_inference/processing/text.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/requirements.dev.txt b/requirements.dev.txt index 039473af2..c0a2b19e8 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -23,10 +23,10 @@ huggingface-hub==0.25.0 more_itertools boto3==1.25.4 botocore==1.28.4 -nltk==3.7 +nltk==3.9.1 torch==1.12.1 torchvision==0.13.1 -Pillow==9.3.0 +Pillow==10.4.0 numpy==1.23.4 validators==0.20.0 sentence-transformers==2.2.2 diff --git a/src/marqo/s2_inference/processing/text.py b/src/marqo/s2_inference/processing/text.py index 9c9cc3ef3..5029d67d1 100644 --- a/src/marqo/s2_inference/processing/text.py +++ b/src/marqo/s2_inference/processing/text.py @@ -30,6 +30,12 @@ def _splitting_functions(split_by: str, language: str='english') -> FunctionType except LookupError: nltk.download("punkt") + # Punkt_tab needs to be downloaded after NLTK 3.8 and later + try: + nltk.data.find("tokenizers/punkt_tab") + except LookupError: + nltk.download("punkt_tab") + MAPPING = { 'character':list, 'word': partial(word_tokenize, language=language), From d599de6fc3424403335fbe946193a90fe3e42c78 Mon Sep 17 00:00:00 2001 From: Li Wan Date: Mon, 7 Oct 2024 11:09:04 +1100 Subject: [PATCH 2/4] Update Marqo-base version --- Dockerfile | 2 +- src/marqo/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9ec82fa4f..c616a8e26 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ COPY vespa . RUN mvn clean package # Stage 2: Base image for Python setup -FROM marqoai/marqo-base:30 as base_image +FROM marqoai/marqo-base:33 as base_image # Allow mounting volume containing data and configs for vespa VOLUME /opt/vespa/var diff --git a/src/marqo/version.py b/src/marqo/version.py index 1b87bdac2..9d3004739 100644 --- a/src/marqo/version.py +++ b/src/marqo/version.py @@ -1,4 +1,4 @@ -__version__ = "2.12.1" +__version__ = "2.12.2" def get_version() -> str: return f"{__version__}" From 8ae7c22d3575ffbb35af6d0404a415699ab6fd8a Mon Sep 17 00:00:00 2001 From: Li Wan Date: Tue, 8 Oct 2024 15:08:20 +1100 Subject: [PATCH 3/4] Upgrade to base image 36 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c616a8e26..cfcb97225 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ COPY vespa . RUN mvn clean package # Stage 2: Base image for Python setup -FROM marqoai/marqo-base:33 as base_image +FROM marqoai/marqo-base:36 as base_image # Allow mounting volume containing data and configs for vespa VOLUME /opt/vespa/var From a4b630865f80cf3bfe72e34b743b08a60ceb9c2d Mon Sep 17 00:00:00 2001 From: Li Wan Date: Tue, 8 Oct 2024 15:12:36 +1100 Subject: [PATCH 4/4] Fix tests --- .github/workflows/cuda_docker_marqo.yml | 25 ++++++++++--------- .github/workflows/largemodel_unit_test_CI.yml | 13 ++++------ 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/.github/workflows/cuda_docker_marqo.yml b/.github/workflows/cuda_docker_marqo.yml index bd28b4e20..7e41147c2 100644 --- a/.github/workflows/cuda_docker_marqo.yml +++ b/.github/workflows/cuda_docker_marqo.yml @@ -58,7 +58,7 @@ jobs: github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} # CUDA AMD64 instance - ec2-image-id: ${{ secrets.LARGEMODELTEST_AMI }} + ec2-image-id: ${{ vars.MARQO_CUDA_TESTS_INSTANCE_AMI }} ec2-instance-type: g4dn.2xlarge subnet-id: ${{ secrets.LARGEMODELTEST_SUBNET_ID }} security-group-id: ${{ secrets.LARGEMODELTEST_SECURITY_GROUP }} @@ -66,11 +66,11 @@ jobs: name: Run CUDA Docker Marqo API Tests needs: Start-Runner # required to start the main job when the runner is ready runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - - environment: marqo-test-suite - + + environment: marqo-test-suite + steps: - + - name: Checkout marqo repo uses: actions/checkout@v3 with: @@ -81,13 +81,13 @@ jobs: with: python-version: "3.8" cache: "pip" - + - name: Install Dependencies run: | #pip install -r requirements.txt pip install tox==3.26 pip install flake8 - + - name: Set MQ_PY_MARQO_BRANCH variable run: | if [[ "${{ inputs.py_marqo_branch }}" == "marqo" ]]; then @@ -97,24 +97,25 @@ jobs: else echo "MQ_PY_MARQO_BRANCH=git+https://github.com/marqo-ai/py-marqo.git@${{ inputs.py_marqo_branch }}" >> $GITHUB_ENV fi - + - name: Checkout marqo-api-tests repo uses: actions/checkout@v3 with: repository: marqo-ai/marqo-api-tests ref: ${{ github.event.inputs.api_tests_branch }} - + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - + - name: Set up Environment run: | # Set up conf file echo 'export MARQO_API_TESTS_ROOT="${{ github.workspace }}"' >> conf - + - name: Run CUDA Integration Tests - CUDA Docker Marqo run: | - export MQ_API_TEST_BRANCH=$(echo "${GITHUB_REF}" | cut -d'/' -f3-) + export MQ_API_TEST_BRANCH="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" + echo "$MQ_API_TEST_BRANCH" CUSTOM_TEST_IMG="${{ github.event.inputs.image_to_test }}" export MQ_API_TEST_IMG=${CUSTOM_TEST_IMG:-"marqo_docker_0"} tox -e py3-cuda_docker_marqo diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml index dbea79d4c..3f3d7da03 100644 --- a/.github/workflows/largemodel_unit_test_CI.yml +++ b/.github/workflows/largemodel_unit_test_CI.yml @@ -13,6 +13,10 @@ on: branches: - mainline +concurrency: + group: large-model-unit-tests-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read @@ -36,7 +40,7 @@ jobs: with: mode: start github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ secrets.LARGEMODELTEST_AMI }} + ec2-image-id: ${{ vars.MARQO_CUDA_TESTS_INSTANCE_AMI }} ec2-instance-type: g4dn.2xlarge subnet-id: ${{ secrets.LARGEMODELTEST_SUBNET_ID }} security-group-id: ${{ secrets.LARGEMODELTEST_SECURITY_GROUP }} @@ -65,13 +69,6 @@ jobs: repository: marqo-ai/marqo-base path: marqo-base - - name: Install FFmpeg and libmagic - run: | - sudo apt-get update - sudo apt-get install -y ffmpeg libmagic1 - ffmpeg -version # Verify installation - file --version # Verify libmagic installation and version - - name: Install dependencies run: | pip install -r marqo-base/requirements.txt