From 8142bbc3fbc5081b0027ec6765c450df7e18a051 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 11:06:23 +1100
Subject: [PATCH 1/4] Change nltk and pillow versions

---
 requirements.dev.txt                      | 4 ++--
 src/marqo/s2_inference/processing/text.py | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/requirements.dev.txt b/requirements.dev.txt
index 039473af2..c0a2b19e8 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -23,10 +23,10 @@ huggingface-hub==0.25.0
 more_itertools
 boto3==1.25.4
 botocore==1.28.4
-nltk==3.7
+nltk==3.9.1
 torch==1.12.1
 torchvision==0.13.1
-Pillow==9.3.0
+Pillow==10.4.0
 numpy==1.23.4
 validators==0.20.0
 sentence-transformers==2.2.2
diff --git a/src/marqo/s2_inference/processing/text.py b/src/marqo/s2_inference/processing/text.py
index 9c9cc3ef3..5029d67d1 100644
--- a/src/marqo/s2_inference/processing/text.py
+++ b/src/marqo/s2_inference/processing/text.py
@@ -30,6 +30,12 @@ def _splitting_functions(split_by: str, language: str='english') -> FunctionType
     except LookupError:
         nltk.download("punkt")
 
+    # Punkt_tab needs to be downloaded after NLTK 3.8 and later
+    try:
+        nltk.data.find("tokenizers/punkt_tab")
+    except LookupError:
+        nltk.download("punkt_tab")
+
     MAPPING = {
         'character':list,
         'word': partial(word_tokenize, language=language),

From d599de6fc3424403335fbe946193a90fe3e42c78 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 11:09:04 +1100
Subject: [PATCH 2/4] Update Marqo-base version

---
 Dockerfile           | 2 +-
 src/marqo/version.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 9ec82fa4f..c616a8e26 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,7 +6,7 @@ COPY vespa .
 RUN mvn clean package
 
 # Stage 2: Base image for Python setup
-FROM marqoai/marqo-base:30 as base_image
+FROM marqoai/marqo-base:33 as base_image
 
 # Allow mounting volume containing data and configs for vespa
 VOLUME /opt/vespa/var
diff --git a/src/marqo/version.py b/src/marqo/version.py
index 1b87bdac2..9d3004739 100644
--- a/src/marqo/version.py
+++ b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "2.12.1"
+__version__ = "2.12.2"
 
 def get_version() -> str:
     return f"{__version__}"

From 8ae7c22d3575ffbb35af6d0404a415699ab6fd8a Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 8 Oct 2024 15:08:20 +1100
Subject: [PATCH 3/4] Upgrade to base image 36

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index c616a8e26..cfcb97225 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,7 +6,7 @@ COPY vespa .
 RUN mvn clean package
 
 # Stage 2: Base image for Python setup
-FROM marqoai/marqo-base:33 as base_image
+FROM marqoai/marqo-base:36 as base_image
 
 # Allow mounting volume containing data and configs for vespa
 VOLUME /opt/vespa/var

From a4b630865f80cf3bfe72e34b743b08a60ceb9c2d Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 8 Oct 2024 15:12:36 +1100
Subject: [PATCH 4/4] Fix tests

---
 .github/workflows/cuda_docker_marqo.yml       | 25 ++++++++++---------
 .github/workflows/largemodel_unit_test_CI.yml | 13 ++++------
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/cuda_docker_marqo.yml b/.github/workflows/cuda_docker_marqo.yml
index bd28b4e20..7e41147c2 100644
--- a/.github/workflows/cuda_docker_marqo.yml
+++ b/.github/workflows/cuda_docker_marqo.yml
@@ -58,7 +58,7 @@ jobs:
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
 
           # CUDA AMD64 instance
-          ec2-image-id: ${{ secrets.LARGEMODELTEST_AMI }}
+          ec2-image-id: ${{ vars.MARQO_CUDA_TESTS_INSTANCE_AMI }}
           ec2-instance-type: g4dn.2xlarge
           subnet-id: ${{ secrets.LARGEMODELTEST_SUBNET_ID }}
           security-group-id: ${{ secrets.LARGEMODELTEST_SECURITY_GROUP }}
@@ -66,11 +66,11 @@ jobs:
     name: Run CUDA Docker Marqo API Tests
     needs: Start-Runner # required to start the main job when the runner is ready
     runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
-                
-    environment: marqo-test-suite 
-    
+
+    environment: marqo-test-suite
+
     steps:
-       
+
       - name: Checkout marqo repo
         uses: actions/checkout@v3
         with:
@@ -81,13 +81,13 @@ jobs:
         with:
           python-version: "3.8"
           cache: "pip"
-          
+
       - name: Install Dependencies
         run: |
           #pip install -r requirements.txt
           pip install tox==3.26
           pip install flake8
-      
+
       - name: Set MQ_PY_MARQO_BRANCH variable
         run: |
           if [[ "${{ inputs.py_marqo_branch }}" == "marqo" ]]; then
@@ -97,24 +97,25 @@ jobs:
           else
             echo "MQ_PY_MARQO_BRANCH=git+https://github.com/marqo-ai/py-marqo.git@${{ inputs.py_marqo_branch }}" >> $GITHUB_ENV
           fi
-  
+
       - name: Checkout marqo-api-tests repo
         uses: actions/checkout@v3
         with:
           repository: marqo-ai/marqo-api-tests
           ref: ${{ github.event.inputs.api_tests_branch }}
-          
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v2
-          
+
       - name: Set up Environment
         run: |
           # Set up conf file
           echo 'export MARQO_API_TESTS_ROOT="${{ github.workspace }}"' >> conf
-          
+
       - name: Run CUDA Integration Tests - CUDA Docker Marqo
         run: |
-          export MQ_API_TEST_BRANCH=$(echo "${GITHUB_REF}" | cut -d'/' -f3-)
+          export MQ_API_TEST_BRANCH="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
+          echo "$MQ_API_TEST_BRANCH"
           CUSTOM_TEST_IMG="${{ github.event.inputs.image_to_test }}"
           export MQ_API_TEST_IMG=${CUSTOM_TEST_IMG:-"marqo_docker_0"}
           tox -e py3-cuda_docker_marqo
diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
index dbea79d4c..3f3d7da03 100644
--- a/.github/workflows/largemodel_unit_test_CI.yml
+++ b/.github/workflows/largemodel_unit_test_CI.yml
@@ -13,6 +13,10 @@ on:
     branches:
       - mainline
 
+concurrency:
+  group: large-model-unit-tests-${{ github.ref }}
+  cancel-in-progress: true
+
 permissions:
   contents: read
 
@@ -36,7 +40,7 @@ jobs:
         with:
           mode: start
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
-          ec2-image-id: ${{ secrets.LARGEMODELTEST_AMI }}
+          ec2-image-id: ${{ vars.MARQO_CUDA_TESTS_INSTANCE_AMI }}
           ec2-instance-type: g4dn.2xlarge
           subnet-id: ${{ secrets.LARGEMODELTEST_SUBNET_ID }}
           security-group-id: ${{ secrets.LARGEMODELTEST_SECURITY_GROUP }}
@@ -65,13 +69,6 @@ jobs:
           repository: marqo-ai/marqo-base
           path: marqo-base
 
-      - name: Install FFmpeg and libmagic
-        run: |
-          sudo apt-get update
-          sudo apt-get install -y ffmpeg libmagic1
-          ffmpeg -version  # Verify installation
-          file --version   # Verify libmagic installation and version
-
       - name: Install dependencies
         run: |
           pip install -r marqo-base/requirements.txt