Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
90 commits
Select commit Hold shift + click to select a range
05d756e
added sentence annotations
bigabig Nov 24, 2024
f9c8e2b
updated api
bigabig Nov 24, 2024
b7e1725
first version of sentence annotator
bigabig Nov 24, 2024
f074f99
new icons
bigabig Nov 24, 2024
ce22eae
highlight annotations on code explorer hover
bigabig Nov 24, 2024
a5cda1d
highlight annotation during annotation meu
bigabig Nov 24, 2024
784900a
fast annotation with "mostRecentCode"
bigabig Nov 24, 2024
06bad90
fixed fast annotation issue
bigabig Nov 24, 2024
b0e5d4c
removed virtualization
bigabig Nov 24, 2024
214148e
fixed cursor
bigabig Nov 24, 2024
97b405e
add sentence annotation explorer placeholder
bigabig Nov 24, 2024
4466ba9
first version of sentence annotation comparison
bigabig Nov 25, 2024
4a4d113
changed endpoint to only accept a single user id
bigabig Nov 26, 2024
cff8e08
updated api
bigabig Nov 26, 2024
2d75633
working version of sentence comparison
bigabig Nov 26, 2024
b20947f
first version of zero-shot sentence annotation
bigabig Dec 4, 2024
29bce8a
updated api
bigabig Dec 4, 2024
1c8f92f
added SentenceAnnotation to LLM Dialog
bigabig Dec 4, 2024
afd0e9c
fixed hex/rgb color issue
bigabig Dec 4, 2024
edd56ce
fixed issue with keys
bigabig Dec 4, 2024
97bb1c6
add llm dialog button to annotator
bigabig Dec 4, 2024
5dcd89e
remove logging
bigabig Dec 4, 2024
1c09484
fix docker issue
bigabig Jan 13, 2025
f4aa612
fix linting issue
bigabig Jan 13, 2025
6bfe33c
fixed typo in cota
bigabig Jan 14, 2025
d519102
added endpoints for bulk create / delete
bigabig Jan 16, 2025
ea4e25a
added new checks to sss
bigabig Jan 16, 2025
e820692
added function to retrieve all sent embeddings by sdoc_id
bigabig Jan 16, 2025
1f66e35
new ray feature: seq sentence tagger training (first working version)
bigabig Jan 16, 2025
54abc13
new llm assistant feature: model training for sentence annotation
bigabig Jan 16, 2025
d466db1
updated api
bigabig Jan 16, 2025
f833f61
updated LLM Assistant for new feature: Train Sent Anno Model
bigabig Jan 16, 2025
04a25b6
added status descriptions to llm job
bigabig Jan 17, 2025
78ca6c2
updated status step to show new status messages
bigabig Jan 17, 2025
878962a
added missing result to ray sent anno
bigabig Jan 17, 2025
2a5e5fd
remove unnecessary log msg
bigabig Jan 17, 2025
5534ea1
updated Background Jobs View to correctly show LLM Jobs
bigabig Jan 17, 2025
e7b9740
use newer ollama container
bigabig Jan 17, 2025
316b2d1
updated ollama dependency
bigabig Jan 17, 2025
9f51860
updated llm service to use structured generation
bigabig Jan 17, 2025
0159db2
updated ollama_service to new api
bigabig Jan 17, 2025
443edf4
replace demo by system user
bigabig Jan 17, 2025
1a056da
added "finished" as final step
bigabig Jan 17, 2025
85214f1
delete existing sentence annotations before creating the suggestions
bigabig Jan 20, 2025
3cf6a3a
removed wrong nextStep call
bigabig Jan 20, 2025
654e81e
automatic reload of updated sentence annotations
bigabig Jan 20, 2025
9768e39
refactor: split code into multiple files
bigabig Jan 20, 2025
b553d9f
implemented SentenceAnnotationExplorer
bigabig Jan 20, 2025
50595c1
removed unused endpoint /annotation_occurrences
bigabig Jan 21, 2025
e01ead6
updated api
bigabig Jan 21, 2025
aa87a0f
refactored analysis service into new folder structure
bigabig Jan 21, 2025
712bfdc
updated code frequency view to support sentence annotations
bigabig Jan 21, 2025
e4fdf09
added sentence annotation search
bigabig Jan 21, 2025
e33a84d
updated api
bigabig Jan 21, 2025
8f4f64d
added SentenceAnnotationTable
bigabig Jan 21, 2025
8d622ac
added whiteboard support for sentence annotations
bigabig Jan 21, 2025
1eb6fb0
refactored span and bbox anno search to be more similar to new sent a…
bigabig Jan 21, 2025
9f07964
renaming according to refactor
bigabig Jan 21, 2025
b988312
fixed bug
bigabig Jan 21, 2025
204d496
refactored annotated segments -> span annotation analysis
bigabig Jan 22, 2025
b452c7d
added sentence annotation export
bigabig Jan 22, 2025
bb92a18
updated api
bigabig Jan 22, 2025
316afa4
created Sentence Annotation Analysis feature
bigabig Jan 22, 2025
5fe59e8
updated annotation searchs to return all users' annotations
bigabig Jan 22, 2025
55b64e5
updated api
bigabig Jan 22, 2025
d7a9982
implemented api change: no user id for searches
bigabig Jan 22, 2025
9a7e50b
fixed bug: all annotations were deleted
bigabig Jan 22, 2025
b92e488
virtualization for sentence annotator
bigabig Jan 22, 2025
eb99fad
virtualization for sentence annotation comparison
bigabig Jan 22, 2025
f4dd8d4
change logdir to temporary_files
bigabig Jan 23, 2025
0298b1c
added few shot prompt template generation
bigabig Jan 23, 2025
43e9aaf
fixed warning in pipeline step
bigabig Jan 23, 2025
5cfda59
updated api to support few shot prompt generation
bigabig Jan 23, 2025
0290dcc
disabled training parameter editor for now
bigabig Jan 23, 2025
f48749e
added ollama model customization
bigabig Jan 24, 2025
112d2bf
added ollama model customization
bigabig Jan 24, 2025
d820d96
customize ollama context size (this time correctly)
bigabig Jan 24, 2025
78f2360
added assistant users
bigabig Jan 24, 2025
ec6f31f
use assistant ids for llm assistance
bigabig Jan 24, 2025
a9c83b3
changed assistant user ids
bigabig Jan 24, 2025
1ba8113
fixed bug: missing @
bigabig Jan 24, 2025
e98bbc4
added log message
bigabig Jan 24, 2025
c463d80
updated api
bigabig Jan 24, 2025
bb7de7c
fixed type issue
bigabig Jan 24, 2025
2015f62
fixed tests
bigabig Jan 24, 2025
8168fa0
build backend, ray container in frontend workflow, if there are depen…
bigabig Jan 25, 2025
bf558aa
small fix
bigabig Jan 25, 2025
19f2574
fixed checks for pull requests
bigabig Jan 25, 2025
6f3292b
fixed typo
bigabig Jan 25, 2025
cbb60ff
add a requirements check also for push requests
bigabig Jan 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .github/workflows/frontend_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,69 @@ jobs:
COMPOSE_PROFILES: "weaviate,ray,background,backend,frontend"
RAY_CONFIG: "config_gpu.yaml"
JWT_SECRET: ${{ secrets.JWT_SECRET }}
BACKEND_HAS_NEW_REQUIREMENTS: false
RAY_HAS_NEW_REQUIREMENTS: false
steps:
- uses: actions/checkout@v3
- name: Checkout PR
if: github.event_name == 'pull_request'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: gh pr checkout ${{ github.event.pull_request.number }}
- name: Check for new backend requirements
id: check_backend_requirements
if: github.event_name == 'pull_request'
run: |
base_branch=$(git merge-base origin/${{ github.event.pull_request.base.ref }} HEAD)
if git diff --name-only $base_branch HEAD | grep -q -e '^backend/requirements.txt' -e '^backend/environment.yml'; then
echo "BACKEND_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
fi
- name: Check for new backend requirements on push
id: check_backend_requirements2
if: github.event_name == 'push'
run: |
if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q -e '^backend/requirements.txt' -e '^backend/environment.yml'; then
echo "BACKEND_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
fi
- name: Check for new ray requirements
id: check_ray_requirements
if: github.event_name == 'pull_request'
run: |
base_branch=$(git merge-base origin/${{ github.event.pull_request.base.ref }} HEAD)
if git diff --name-only $base_branch HEAD | grep -q -e '^backend/src/app/preprocessing/ray_model_worker/requirements.txt'; then
echo "RAY_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
fi
- name: Check for new ray requirements on push
id: check_ray_requirements2
if: github.event_name == 'push'
run: |
if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -q -e '^backend/src/app/preprocessing/ray_model_worker/requirements.txt'; then
echo "RAY_HAS_NEW_REQUIREMENTS=true" >> $GITHUB_ENV
fi
- name: Prepare environment
run: |
./bin/setup-folders.sh
./bin/setup-envs.sh --project_name action-runner --port_prefix 131
sed -i 's/\(DATS_FRONTEND_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
if [ "${{ env.RAY_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
sed -i 's/\(DATS_RAY_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
fi
if [ "${{ env.BACKEND_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
sed -i 's/\(DATS_BACKEND_DOCKER_VERSION=[0-9.]*\)/\1-${{ github.run_id }}/' docker/.env
fi
echo "SPACY_CACHE_DIR=$HOME/spacy_models" >> docker/.env
echo "MODELS_CACHE_DIR=$HOME/models_cache" >> docker/.env
mkdir -p $HOME/spacy_models
mkdir -p $HOME/models_cache
- name: Build & Start Docker Containers
working-directory: docker
run: |
if [ "${{ env.RAY_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
docker compose build ray
fi
if [ "${{ env.BACKEND_HAS_NEW_REQUIREMENTS }}" = "true" ]; then
docker compose build dats-backend-api
fi
docker compose build dats-frontend
docker compose up --wait --quiet-pull
- name: Setup node
Expand Down
3 changes: 2 additions & 1 deletion backend/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ dependencies:
- pip=23.3.2
- pre-commit=3.3.3
- psycopg2-binary=2.9
- pydantic=2.5.3
- pydantic=2.10.5
- pydantic-core=2.27.2
- pytest=7.4.3
- python-jose=3.3
- python-magic=0.4
Expand Down
2 changes: 1 addition & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mammoth==1.6.0
ollama==0.3.1
ollama==0.4.6
pymupdf==1.23.4
pyright==1.1.385
pytest-order==1.2.1
Expand Down
157 changes: 157 additions & 0 deletions backend/src/alembic/versions/050f9378a3e1_add_sentence_annotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""add sentence annotation

Revision ID: 050f9378a3e1
Revises: f3108bb5e496
Create Date: 2024-11-21 10:57:16.865538

"""

from typing import Sequence, Union

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "050f9378a3e1"
down_revision: Union[str, None] = "f3108bb5e496"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"sentenceannotation",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("sentence_id_start", sa.Integer(), nullable=False),
sa.Column("sentence_id_end", sa.Integer(), nullable=False),
sa.Column(
"created", sa.DateTime(), server_default=sa.text("now()"), nullable=False
),
sa.Column(
"updated", sa.DateTime(), server_default=sa.text("now()"), nullable=False
),
sa.Column("code_id", sa.Integer(), nullable=False),
sa.Column("annotation_document_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["annotation_document_id"], ["annotationdocument.id"], ondelete="CASCADE"
),
sa.ForeignKeyConstraint(["code_id"], ["code.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_sentenceannotation_annotation_document_id"),
"sentenceannotation",
["annotation_document_id"],
unique=False,
)
op.create_index(
op.f("ix_sentenceannotation_code_id"),
"sentenceannotation",
["code_id"],
unique=False,
)
op.create_index(
op.f("ix_sentenceannotation_created"),
"sentenceannotation",
["created"],
unique=False,
)
op.create_index(
op.f("ix_sentenceannotation_id"), "sentenceannotation", ["id"], unique=False
)
op.create_index(
op.f("ix_sentenceannotation_sentence_id_end"),
"sentenceannotation",
["sentence_id_end"],
unique=False,
)
op.create_index(
op.f("ix_sentenceannotation_sentence_id_start"),
"sentenceannotation",
["sentence_id_start"],
unique=False,
)
op.add_column(
"objecthandle", sa.Column("sentence_annotation_id", sa.Integer(), nullable=True)
)
op.drop_constraint(
"UC_only_one_object_handle_per_instance", "objecthandle", type_="unique"
)
op.create_unique_constraint(
"UC_only_one_object_handle_per_instance",
"objecthandle",
[
"user_id",
"project_id",
"code_id",
"memo_id",
"source_document_id",
"span_annotation_id",
"bbox_annotation_id",
"sentence_annotation_id",
"span_group_id",
"document_tag_id",
],
)
op.drop_index("idx_for_uc_work_with_null", table_name="objecthandle")
op.create_index(
"idx_for_uc_work_with_null",
"objecthandle",
[
sa.text("coalesce(user_id, 0)"),
sa.text("coalesce(project_id, 0)"),
sa.text("coalesce(code_id, 0)"),
sa.text("coalesce(source_document_id, 0)"),
sa.text("coalesce(span_annotation_id, 0)"),
sa.text("coalesce(bbox_annotation_id, 0)"),
sa.text("coalesce(sentence_annotation_id, 0)"),
sa.text("coalesce(span_group_id, 0)"),
sa.text("coalesce(document_tag_id, 0)"),
sa.text("coalesce(memo_id, 0)"),
],
unique=True,
)
op.create_index(
op.f("ix_objecthandle_sentence_annotation_id"),
"objecthandle",
["sentence_annotation_id"],
unique=False,
)
op.create_foreign_key(
None,
"objecthandle",
"sentenceannotation",
["sentence_annotation_id"],
["id"],
ondelete="CASCADE",
)
# ### end Alembic commands ###
op.drop_constraint(
constraint_name="CC_object_handle_refers_to_exactly_one_instance",
table_name="objecthandle",
)

op.create_check_constraint(
constraint_name="CC_object_handle_refers_to_exactly_one_instance",
table_name="objecthandle",
condition=sa.text(
"""(
CASE WHEN user_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN project_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN code_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN memo_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN source_document_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN span_annotation_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN bbox_annotation_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN sentence_annotation_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN span_group_id IS NULL THEN 0 ELSE 1 END
+ CASE WHEN document_tag_id IS NULL THEN 0 ELSE 1 END
) = 1"""
),
)


def downgrade() -> None:
pass
Loading
Loading