uhh-lt · bigabig · Jan 25, 2025 · Nov 24, 2024 · Nov 24, 2024 · Nov 24, 2024
diff --git a/.github/workflows/frontend_checks.yml b/.github/workflows/frontend_checks.yml
@@ -19,25 +19,69 @@ jobs:
       COMPOSE_PROFILES: "weaviate,ray,background,backend,frontend"
       RAY_CONFIG: "config_gpu.yaml"
       JWT_SECRET: ${{ secrets.JWT_SECRET }}
+      BACKEND_HAS_NEW_REQUIREMENTS: false
+      RAY_HAS_NEW_REQUIREMENTS: false
     steps:
       - uses: actions/checkout@v3
       - name: Checkout PR
         if: github.event_name == 'pull_request'
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: gh pr checkout ${{ github.event.pull_request.number }}
+      - name: Check for new backend requirements
+        id: check_backend_requirements
+        if: github.event_name == 'pull_request'
+        run: |
+          base_branch=$(git merge-base origin/${{ github.event.pull_request.base.ref }} HEAD)
+          if git diff --name-only $base_branch HEAD | grep -q -e '^backend/requirements.txt' -e '^backend/environment.yml'; then
+            echo "BACKEND_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
+          fi
+      - name: Check for new backend requirements on push
+        id: check_backend_requirements2
+        if: github.event_name == 'push'
+        run: |
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q -e '^backend/requirements.txt' -e '^backend/environment.yml'; then
+            echo "BACKEND_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
+          fi
+      - name: Check for new ray requirements
+        id: check_ray_requirements
+        if: github.event_name == 'pull_request'
+        run: |
+          base_branch=$(git merge-base origin/${{ github.event.pull_request.base.ref }} HEAD)
+          if git diff --name-only $base_branch HEAD | grep -q -e '^backend/src/app/preprocessing/ray_model_worker/requirements.txt'; then
+            echo "RAY_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
+          fi
+      - name: Check for new ray requirements on push
+        id: check_ray_requirements2
+        if: github.event_name == 'push'
+        run: |
+          if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q -e '^backend/src/app/preprocessing/ray_model_worker/requirements.txt'; then
+            echo "RAY_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
+          fi
       - name: Prepare environment
         run: |
           ./bin/setup-folders.sh
           ./bin/setup-envs.sh --project_name action-runner --port_prefix 131
           sed -i 's/\(DATS_FRONTEND_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
+          if [ "${{ env.RAY_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
+            sed -i 's/\(DATS_RAY_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
+          fi
+          if [ "${{ env.BACKEND_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
+            sed -i 's/\(DATS_BACKEND_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
+          fi
           echo "SPACY_CACHE_DIR=$HOME/spacy_models" >> docker/.env
           echo "MODELS_CACHE_DIR=$HOME/models_cache" >> docker/.env
           mkdir -p $HOME/spacy_models
           mkdir -p $HOME/models_cache
       - name: Build & Start Docker Containers
         working-directory: docker
         run: |
+          if [ "${{ env.RAY_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
+            docker compose build ray
+          fi
+          if [ "${{ env.BACKEND_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
+            docker compose build dats-backend-api
+          fi
           docker compose build dats-frontend
           docker compose up --wait --quiet-pull
       - name: Setup node

diff --git a/backend/environment.yml b/backend/environment.yml
@@ -32,7 +32,8 @@ dependencies:
   - pip=23.3.2
   - pre-commit=3.3.3
   - psycopg2-binary=2.9
-  - pydantic=2.5.3
+  - pydantic=2.10.5
+  - pydantic-core=2.27.2
   - pytest=7.4.3
   - python-jose=3.3
   - python-magic=0.4

diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -1,5 +1,5 @@
 mammoth==1.6.0
-ollama==0.3.1
+ollama==0.4.6
 pymupdf==1.23.4
 pyright==1.1.385
 pytest-order==1.2.1

diff --git a/backend/src/alembic/versions/050f9378a3e1_add_sentence_annotation.py b/backend/src/alembic/versions/050f9378a3e1_add_sentence_annotation.py
@@ -0,0 +1,157 @@
+"""add sentence annotation
+
+Revision ID: 050f9378a3e1
+Revises: f3108bb5e496
+Create Date: 2024-11-21 10:57:16.865538
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "050f9378a3e1"
+down_revision: Union[str, None] = "f3108bb5e496"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "sentenceannotation",
+        sa.Column("id", sa.Integer(), nullable=False),
+        sa.Column("sentence_id_start", sa.Integer(), nullable=False),
+        sa.Column("sentence_id_end", sa.Integer(), nullable=False),
+        sa.Column(
+            "created", sa.DateTime(), server_default=sa.text("now()"), nullable=False
+        ),
+        sa.Column(
+            "updated", sa.DateTime(), server_default=sa.text("now()"), nullable=False
+        ),
+        sa.Column("code_id", sa.Integer(), nullable=False),
+        sa.Column("annotation_document_id", sa.Integer(), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["annotation_document_id"], ["annotationdocument.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["code_id"], ["code.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        op.f("ix_sentenceannotation_annotation_document_id"),
+        "sentenceannotation",
+        ["annotation_document_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_sentenceannotation_code_id"),
+        "sentenceannotation",
+        ["code_id"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_sentenceannotation_created"),
+        "sentenceannotation",
+        ["created"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_sentenceannotation_id"), "sentenceannotation", ["id"], unique=False
+    )
+    op.create_index(
+        op.f("ix_sentenceannotation_sentence_id_end"),
+        "sentenceannotation",
+        ["sentence_id_end"],
+        unique=False,
+    )
+    op.create_index(
+        op.f("ix_sentenceannotation_sentence_id_start"),
+        "sentenceannotation",
+        ["sentence_id_start"],
+        unique=False,
+    )
+    op.add_column(
+        "objecthandle", sa.Column("sentence_annotation_id", sa.Integer(), nullable=True)
+    )
+    op.drop_constraint(
+        "UC_only_one_object_handle_per_instance", "objecthandle", type_="unique"
+    )
+    op.create_unique_constraint(
+        "UC_only_one_object_handle_per_instance",
+        "objecthandle",
+        [
+            "user_id",
+            "project_id",
+            "code_id",
+            "memo_id",
+            "source_document_id",
+            "span_annotation_id",
+            "bbox_annotation_id",
+            "sentence_annotation_id",
+            "span_group_id",
+            "document_tag_id",
+        ],
+    )
+    op.drop_index("idx_for_uc_work_with_null", table_name="objecthandle")
+    op.create_index(
+        "idx_for_uc_work_with_null",
+        "objecthandle",
+        [
+            sa.text("coalesce(user_id, 0)"),
+            sa.text("coalesce(project_id, 0)"),
+            sa.text("coalesce(code_id, 0)"),
+            sa.text("coalesce(source_document_id, 0)"),
+            sa.text("coalesce(span_annotation_id, 0)"),
+            sa.text("coalesce(bbox_annotation_id, 0)"),
+            sa.text("coalesce(sentence_annotation_id, 0)"),
+            sa.text("coalesce(span_group_id, 0)"),
+            sa.text("coalesce(document_tag_id, 0)"),
+            sa.text("coalesce(memo_id, 0)"),
+        ],
+        unique=True,
+    )
+    op.create_index(
+        op.f("ix_objecthandle_sentence_annotation_id"),
+        "objecthandle",
+        ["sentence_annotation_id"],
+        unique=False,
+    )
+    op.create_foreign_key(
+        None,
+        "objecthandle",
+        "sentenceannotation",
+        ["sentence_annotation_id"],
+        ["id"],
+        ondelete="CASCADE",
+    )
+    # ### end Alembic commands ###
+    op.drop_constraint(
+        constraint_name="CC_object_handle_refers_to_exactly_one_instance",
+        table_name="objecthandle",
+    )
+
+    op.create_check_constraint(
+        constraint_name="CC_object_handle_refers_to_exactly_one_instance",
+        table_name="objecthandle",
+        condition=sa.text(
+            """(
+                CASE WHEN user_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN project_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN code_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN memo_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN source_document_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN span_annotation_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN bbox_annotation_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN sentence_annotation_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN span_group_id IS NULL THEN 0 ELSE 1 END
+                + CASE WHEN document_tag_id IS NULL THEN 0 ELSE 1 END
+            ) = 1"""
+        ),
+    )
+
+
+def downgrade() -> None:
+    pass