Skip to content

Commit 5feeb92

Browse files
authored
Merge pull request #481 from uhh-lt/sentence-annotations
Sentence annotations
2 parents 841f340 + cbb60ff commit 5feeb92

File tree

249 files changed

+11800
-2520
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

249 files changed

+11800
-2520
lines changed

.github/workflows/frontend_checks.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,69 @@ jobs:
1919
COMPOSE_PROFILES: "weaviate,ray,background,backend,frontend"
2020
RAY_CONFIG: "config_gpu.yaml"
2121
JWT_SECRET: ${{ secrets.JWT_SECRET }}
22+
BACKEND_HAS_NEW_REQUIREMENTS: false
23+
RAY_HAS_NEW_REQUIREMENTS: false
2224
steps:
2325
- uses: actions/checkout@v3
2426
- name: Checkout PR
2527
if: github.event_name == 'pull_request'
2628
env:
2729
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2830
run: gh pr checkout ${{ github.event.pull_request.number }}
31+
- name: Check for new backend requirements
32+
id: check_backend_requirements
33+
if: github.event_name == 'pull_request'
34+
run: |
35+
base_branch=$(git merge-base origin/${{ github.event.pull_request.base.ref }} HEAD)
36+
if git diff --name-only $base_branch HEAD | grep -q -e '^backend/requirements.txt' -e '^backend/environment.yml'; then
37+
echo "BACKEND_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
38+
fi
39+
- name: Check for new backend requirements on push
40+
id: check_backend_requirements2
41+
if: github.event_name == 'push'
42+
run: |
43+
if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q -e '^backend/requirements.txt' -e '^backend/environment.yml'; then
44+
echo "BACKEND_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
45+
fi
46+
- name: Check for new ray requirements
47+
id: check_ray_requirements
48+
if: github.event_name == 'pull_request'
49+
run: |
50+
base_branch=$(git merge-base origin/${{ github.event.pull_request.base.ref }} HEAD)
51+
if git diff --name-only $base_branch HEAD | grep -q -e '^backend/src/app/preprocessing/ray_model_worker/requirements.txt'; then
52+
echo "RAY_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
53+
fi
54+
- name: Check for new ray requirements on push
55+
id: check_ray_requirements2
56+
if: github.event_name == 'push'
57+
run: |
58+
if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q -e '^backend/src/app/preprocessing/ray_model_worker/requirements.txt'; then
59+
echo "RAY_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
60+
fi
2961
- name: Prepare environment
3062
run: |
3163
./bin/setup-folders.sh
3264
./bin/setup-envs.sh --project_name action-runner --port_prefix 131
3365
sed -i 's/\(DATS_FRONTEND_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
66+
if [ "${{ env.RAY_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
67+
sed -i 's/\(DATS_RAY_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
68+
fi
69+
if [ "${{ env.BACKEND_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
70+
sed -i 's/\(DATS_BACKEND_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
71+
fi
3472
echo "SPACY_CACHE_DIR=$HOME/spacy_models" >> docker/.env
3573
echo "MODELS_CACHE_DIR=$HOME/models_cache" >> docker/.env
3674
mkdir -p $HOME/spacy_models
3775
mkdir -p $HOME/models_cache
3876
- name: Build & Start Docker Containers
3977
working-directory: docker
4078
run: |
79+
if [ "${{ env.RAY_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
80+
docker compose build ray
81+
fi
82+
if [ "${{ env.BACKEND_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
83+
docker compose build dats-backend-api
84+
fi
4185
docker compose build dats-frontend
4286
docker compose up --wait --quiet-pull
4387
- name: Setup node

backend/environment.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ dependencies:
3232
- pip=23.3.2
3333
- pre-commit=3.3.3
3434
- psycopg2-binary=2.9
35-
- pydantic=2.5.3
35+
- pydantic=2.10.5
36+
- pydantic-core=2.27.2
3637
- pytest=7.4.3
3738
- python-jose=3.3
3839
- python-magic=0.4

backend/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
mammoth==1.6.0
2-
ollama==0.3.1
2+
ollama==0.4.6
33
pymupdf==1.23.4
44
pyright==1.1.385
55
pytest-order==1.2.1
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
"""add sentence annotation
2+
3+
Revision ID: 050f9378a3e1
4+
Revises: f3108bb5e496
5+
Create Date: 2024-11-21 10:57:16.865538
6+
7+
"""
8+
9+
from typing import Sequence, Union
10+
11+
import sqlalchemy as sa
12+
13+
from alembic import op
14+
15+
# revision identifiers, used by Alembic.
16+
revision: str = "050f9378a3e1"
17+
down_revision: Union[str, None] = "f3108bb5e496"
18+
branch_labels: Union[str, Sequence[str], None] = None
19+
depends_on: Union[str, Sequence[str], None] = None
20+
21+
22+
def upgrade() -> None:
23+
# ### commands auto generated by Alembic - please adjust! ###
24+
op.create_table(
25+
"sentenceannotation",
26+
sa.Column("id", sa.Integer(), nullable=False),
27+
sa.Column("sentence_id_start", sa.Integer(), nullable=False),
28+
sa.Column("sentence_id_end", sa.Integer(), nullable=False),
29+
sa.Column(
30+
"created", sa.DateTime(), server_default=sa.text("now()"), nullable=False
31+
),
32+
sa.Column(
33+
"updated", sa.DateTime(), server_default=sa.text("now()"), nullable=False
34+
),
35+
sa.Column("code_id", sa.Integer(), nullable=False),
36+
sa.Column("annotation_document_id", sa.Integer(), nullable=False),
37+
sa.ForeignKeyConstraint(
38+
["annotation_document_id"], ["annotationdocument.id"], ondelete="CASCADE"
39+
),
40+
sa.ForeignKeyConstraint(["code_id"], ["code.id"], ondelete="CASCADE"),
41+
sa.PrimaryKeyConstraint("id"),
42+
)
43+
op.create_index(
44+
op.f("ix_sentenceannotation_annotation_document_id"),
45+
"sentenceannotation",
46+
["annotation_document_id"],
47+
unique=False,
48+
)
49+
op.create_index(
50+
op.f("ix_sentenceannotation_code_id"),
51+
"sentenceannotation",
52+
["code_id"],
53+
unique=False,
54+
)
55+
op.create_index(
56+
op.f("ix_sentenceannotation_created"),
57+
"sentenceannotation",
58+
["created"],
59+
unique=False,
60+
)
61+
op.create_index(
62+
op.f("ix_sentenceannotation_id"), "sentenceannotation", ["id"], unique=False
63+
)
64+
op.create_index(
65+
op.f("ix_sentenceannotation_sentence_id_end"),
66+
"sentenceannotation",
67+
["sentence_id_end"],
68+
unique=False,
69+
)
70+
op.create_index(
71+
op.f("ix_sentenceannotation_sentence_id_start"),
72+
"sentenceannotation",
73+
["sentence_id_start"],
74+
unique=False,
75+
)
76+
op.add_column(
77+
"objecthandle", sa.Column("sentence_annotation_id", sa.Integer(), nullable=True)
78+
)
79+
op.drop_constraint(
80+
"UC_only_one_object_handle_per_instance", "objecthandle", type_="unique"
81+
)
82+
op.create_unique_constraint(
83+
"UC_only_one_object_handle_per_instance",
84+
"objecthandle",
85+
[
86+
"user_id",
87+
"project_id",
88+
"code_id",
89+
"memo_id",
90+
"source_document_id",
91+
"span_annotation_id",
92+
"bbox_annotation_id",
93+
"sentence_annotation_id",
94+
"span_group_id",
95+
"document_tag_id",
96+
],
97+
)
98+
op.drop_index("idx_for_uc_work_with_null", table_name="objecthandle")
99+
op.create_index(
100+
"idx_for_uc_work_with_null",
101+
"objecthandle",
102+
[
103+
sa.text("coalesce(user_id, 0)"),
104+
sa.text("coalesce(project_id, 0)"),
105+
sa.text("coalesce(code_id, 0)"),
106+
sa.text("coalesce(source_document_id, 0)"),
107+
sa.text("coalesce(span_annotation_id, 0)"),
108+
sa.text("coalesce(bbox_annotation_id, 0)"),
109+
sa.text("coalesce(sentence_annotation_id, 0)"),
110+
sa.text("coalesce(span_group_id, 0)"),
111+
sa.text("coalesce(document_tag_id, 0)"),
112+
sa.text("coalesce(memo_id, 0)"),
113+
],
114+
unique=True,
115+
)
116+
op.create_index(
117+
op.f("ix_objecthandle_sentence_annotation_id"),
118+
"objecthandle",
119+
["sentence_annotation_id"],
120+
unique=False,
121+
)
122+
op.create_foreign_key(
123+
None,
124+
"objecthandle",
125+
"sentenceannotation",
126+
["sentence_annotation_id"],
127+
["id"],
128+
ondelete="CASCADE",
129+
)
130+
# ### end Alembic commands ###
131+
op.drop_constraint(
132+
constraint_name="CC_object_handle_refers_to_exactly_one_instance",
133+
table_name="objecthandle",
134+
)
135+
136+
op.create_check_constraint(
137+
constraint_name="CC_object_handle_refers_to_exactly_one_instance",
138+
table_name="objecthandle",
139+
condition=sa.text(
140+
"""(
141+
CASE WHEN user_id IS NULL THEN 0 ELSE 1 END
142+
+ CASE WHEN project_id IS NULL THEN 0 ELSE 1 END
143+
+ CASE WHEN code_id IS NULL THEN 0 ELSE 1 END
144+
+ CASE WHEN memo_id IS NULL THEN 0 ELSE 1 END
145+
+ CASE WHEN source_document_id IS NULL THEN 0 ELSE 1 END
146+
+ CASE WHEN span_annotation_id IS NULL THEN 0 ELSE 1 END
147+
+ CASE WHEN bbox_annotation_id IS NULL THEN 0 ELSE 1 END
148+
+ CASE WHEN sentence_annotation_id IS NULL THEN 0 ELSE 1 END
149+
+ CASE WHEN span_group_id IS NULL THEN 0 ELSE 1 END
150+
+ CASE WHEN document_tag_id IS NULL THEN 0 ELSE 1 END
151+
) = 1"""
152+
),
153+
)
154+
155+
156+
def downgrade() -> None:
157+
pass

0 commit comments

Comments
 (0)