Skip to content

Commit

Permalink
Merge branch 'develop' into 'fb-optic-1178/memory-leak'
Browse files Browse the repository at this point in the history
  • Loading branch information
yyassi-heartex committed Dec 13, 2024
2 parents 5f84b12 + 510fe94 commit 71669af
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ jobs:
fail-fast: false
matrix:
python-version:
- '3.9'
- '3.10'

# required for poetry action
# see https://github.com/marketplace/actions/install-poetry-action#running-on-windows
Expand Down
18 changes: 12 additions & 6 deletions label_studio/core/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,14 @@
'django.contrib.staticfiles.finders.FileSystemFinder',
'django.contrib.staticfiles.finders.AppDirectoriesFinder',
)
STATICFILES_STORAGE = 'core.storage.SkipMissedManifestStaticFilesStorage'
STORAGES = {
'default': {
'BACKEND': 'django.core.files.storage.FileSystemStorage',
},
'staticfiles': {
'BACKEND': 'core.storage.SkipMissedManifestStaticFilesStorage',
},
}

# Sessions and CSRF
SESSION_COOKIE_SECURE = bool(int(get_env('SESSION_COOKIE_SECURE', False)))
Expand Down Expand Up @@ -648,7 +655,7 @@ def collect_versions_dummy(**kwargs):

if get_env('MINIO_STORAGE_ENDPOINT') and not get_bool_env('MINIO_SKIP', False):
CLOUD_FILE_STORAGE_ENABLED = True
DEFAULT_FILE_STORAGE = 'storages.backends.s3boto3.S3Boto3Storage'
STORAGES['default']['BACKEND'] = 'storages.backends.s3boto3.S3Boto3Storage'
AWS_STORAGE_BUCKET_NAME = get_env('MINIO_STORAGE_BUCKET_NAME')
AWS_ACCESS_KEY_ID = get_env('MINIO_STORAGE_ACCESS_KEY')
AWS_SECRET_ACCESS_KEY = get_env('MINIO_STORAGE_SECRET_KEY')
Expand All @@ -661,7 +668,7 @@ def collect_versions_dummy(**kwargs):

if get_env('STORAGE_TYPE') == 's3':
CLOUD_FILE_STORAGE_ENABLED = True
DEFAULT_FILE_STORAGE = 'core.storage.CustomS3Boto3Storage'
STORAGES['default']['BACKEND'] = 'core.storage.CustomS3Boto3Storage'
if get_env('STORAGE_AWS_ACCESS_KEY_ID'):
AWS_ACCESS_KEY_ID = get_env('STORAGE_AWS_ACCESS_KEY_ID')
if get_env('STORAGE_AWS_SECRET_ACCESS_KEY'):
Expand All @@ -681,7 +688,7 @@ def collect_versions_dummy(**kwargs):

if get_env('STORAGE_TYPE') == 'azure':
CLOUD_FILE_STORAGE_ENABLED = True
DEFAULT_FILE_STORAGE = 'core.storage.CustomAzureStorage'
STORAGES['default']['BACKEND'] = 'core.storage.CustomAzureStorage'
AZURE_ACCOUNT_NAME = get_env('STORAGE_AZURE_ACCOUNT_NAME')
AZURE_ACCOUNT_KEY = get_env('STORAGE_AZURE_ACCOUNT_KEY')
AZURE_CONTAINER = get_env('STORAGE_AZURE_CONTAINER_NAME')
Expand All @@ -690,8 +697,7 @@ def collect_versions_dummy(**kwargs):

if get_env('STORAGE_TYPE') == 'gcs':
CLOUD_FILE_STORAGE_ENABLED = True
# DEFAULT_FILE_STORAGE = 'storages.backends.gcloud.GoogleCloudStorage'
DEFAULT_FILE_STORAGE = 'core.storage.AlternativeGoogleCloudStorage'
STORAGES['default']['BACKEND'] = 'core.storage.AlternativeGoogleCloudStorage'
GS_PROJECT_ID = get_env('STORAGE_GCS_PROJECT_ID')
GS_BUCKET_NAME = get_env('STORAGE_GCS_BUCKET_NAME')
GS_EXPIRATION = timedelta(seconds=int(get_env('STORAGE_GCS_EXPIRATION_SECS', '86400')))
Expand Down
10 changes: 5 additions & 5 deletions label_studio/data_manager/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def annotate_annotations_results(queryset):
)
)
else:
return queryset.annotate(annotations_results=ArrayAgg('annotations__result', distinct=True))
return queryset.annotate(annotations_results=ArrayAgg('annotations__result', distinct=True, default=Value([])))


def annotate_predictions_results(queryset):
Expand All @@ -610,7 +610,7 @@ def annotate_predictions_results(queryset):
)
)
else:
return queryset.annotate(predictions_results=ArrayAgg('predictions__result', distinct=True))
return queryset.annotate(predictions_results=ArrayAgg('predictions__result', distinct=True, default=Value([])))


def annotate_annotators(queryset):
Expand All @@ -619,7 +619,7 @@ def annotate_annotators(queryset):
annotators=Coalesce(GroupConcat('annotations__completed_by'), Value(''), output_field=models.CharField())
)
else:
return queryset.annotate(annotators=ArrayAgg('annotations__completed_by', distinct=True))
return queryset.annotate(annotators=ArrayAgg('annotations__completed_by', distinct=True, default=Value([])))


def annotate_predictions_score(queryset):
Expand Down Expand Up @@ -653,7 +653,7 @@ def annotate_annotations_ids(queryset):
if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
return queryset.annotate(annotations_ids=GroupConcat('annotations__id', output_field=models.CharField()))
else:
return queryset.annotate(annotations_ids=ArrayAgg('annotations__id'))
return queryset.annotate(annotations_ids=ArrayAgg('annotations__id', default=Value([])))


def annotate_predictions_model_versions(queryset):
Expand All @@ -662,7 +662,7 @@ def annotate_predictions_model_versions(queryset):
predictions_model_versions=GroupConcat('predictions__model_version', output_field=models.CharField())
)
else:
return queryset.annotate(predictions_model_versions=ArrayAgg('predictions__model_version'))
return queryset.annotate(predictions_model_versions=ArrayAgg('predictions__model_version', default=Value([])))


def annotate_avg_lead_time(queryset):
Expand Down
83 changes: 83 additions & 0 deletions label_studio/projects/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,30 @@
import bleach
from constants import SAFE_HTML_ATTRIBUTES, SAFE_HTML_TAGS
from django.db.models import Q
from label_studio_sdk.label_interface import LabelInterface
from label_studio_sdk.label_interface.control_tags import (
BrushLabelsTag,
BrushTag,
ChoicesTag,
DateTimeTag,
EllipseLabelsTag,
EllipseTag,
HyperTextLabelsTag,
KeyPointLabelsTag,
KeyPointTag,
LabelsTag,
NumberTag,
ParagraphLabelsTag,
PolygonLabelsTag,
PolygonTag,
RatingTag,
RectangleLabelsTag,
RectangleTag,
TaxonomyTag,
TextAreaTag,
TimeSeriesLabelsTag,
VideoRectangleTag,
)
from projects.models import Project, ProjectImport, ProjectOnboarding, ProjectReimport, ProjectSummary
from rest_flex_fields import FlexFieldsModelSerializer
from rest_framework import serializers
Expand Down Expand Up @@ -66,6 +90,9 @@ class ProjectSerializer(FlexFieldsModelSerializer):
config_has_control_tags = SerializerMethodField(
default=None, read_only=True, help_text='Flag to detect is project ready for labeling'
)
config_suitable_for_bulk_annotation = serializers.SerializerMethodField(
default=None, read_only=True, help_text='Flag to detect is project ready for bulk annotation'
)
finished_task_number = serializers.IntegerField(default=None, read_only=True, help_text='Finished tasks')

queue_total = serializers.SerializerMethodField()
Expand All @@ -82,6 +109,61 @@ def user_id(self):
def get_config_has_control_tags(project):
return len(project.get_parsed_config()) > 0

@staticmethod
def get_config_suitable_for_bulk_annotation(project):
li = LabelInterface(project.label_config)

# List of tags that should not be present
disallowed_tags = [
LabelsTag,
BrushTag,
BrushLabelsTag,
EllipseTag,
EllipseLabelsTag,
KeyPointTag,
KeyPointLabelsTag,
PolygonTag,
PolygonLabelsTag,
RectangleTag,
RectangleLabelsTag,
HyperTextLabelsTag,
ParagraphLabelsTag,
TimeSeriesLabelsTag,
VideoRectangleTag,
]

# Return False if any disallowed tag is present
for tag_class in disallowed_tags:
if li.find_tags_by_class(tag_class):
return False

# Check perRegion/perItem for expanded list of tags, plus value="no" for Choices/Taxonomy
allowed_tags_for_checks = [ChoicesTag, TaxonomyTag, DateTimeTag, NumberTag, RatingTag, TextAreaTag]
for tag_class in allowed_tags_for_checks:
tags = li.find_tags_by_class(tag_class)
for tag in tags:
per_region = tag.attr.get('perRegion', 'false').lower() == 'true'
per_item = tag.attr.get('perItem', 'false').lower() == 'true'
if per_region or per_item:
return False
# For ChoicesTag and TaxonomyTag, the value attribute must not be set at all
if tag_class in [ChoicesTag, TaxonomyTag]:
if 'value' in tag.attr:
return False

# For TaxonomyTag, check labeling and apiUrl
taxonomy_tags = li.find_tags_by_class(TaxonomyTag)
for tag in taxonomy_tags:
labeling = tag.attr.get('labeling', 'false').lower() == 'true'
if labeling:
return False
api_url = tag.attr.get('apiUrl', None)
if api_url is not None:
return False

# If all checks pass, return True
return True

@staticmethod
def get_parsed_label_config(project):
return project.get_parsed_config()
Expand Down Expand Up @@ -156,6 +238,7 @@ class Meta:
'finished_task_number',
'queue_total',
'queue_done',
'config_suitable_for_bulk_annotation',
]

def validate_label_config(self, value):
Expand Down
71 changes: 70 additions & 1 deletion label_studio/tests/config_validation.tavern.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1435,4 +1435,73 @@ stages:
method: POST
url: '{django_live_url}/api/projects/{pk}/validate'
response:
status_code: 200
status_code: 200

---
test_name: check_config_suitable_for_bulk_annotation
strict: false
marks:
- usefixtures:
- django_live_url
stages:

- id: signup
type: ref

- name: create classification project
request:
data:
label_config: |
<View>
<Text name="text" value="$text"/>
<Choices name="sentiment" toName="text" choice="single">
<Choice value="Positive"/>
<Choice value="Neutral"/>
<Choice value="Negative"/>
</Choices>
</View>
method: POST
url: '{django_live_url}/api/projects'
response:
status_code: 201
save:
json:
classification_project_id: id

- name: check classification project property
request:
method: GET
url: '{django_live_url}/api/projects/{classification_project_id}'
response:
status_code: 200
json:
config_suitable_for_bulk_annotation: true

- name: create object detection project
request:
data:
label_config: |
<View>
<Image name="image" value="$image"/>
<RectangleLabels name="label" toName="image">
<Label value="Car"/>
<Label value="Tree"/>
<Label value="Person"/>
</RectangleLabels>
</View>
method: POST
url: '{django_live_url}/api/projects'
response:
status_code: 201
save:
json:
detection_project_id: id

- name: check object detection project property
request:
method: GET
url: '{django_live_url}/api/projects/{detection_project_id}'
response:
status_code: 200
json:
config_suitable_for_bulk_annotation: false
34 changes: 16 additions & 18 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 71669af

Please sign in to comment.