From 6c6f17462378f5e92b0e8cb33198ab3630dff1d9 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 18 Sep 2024 16:58:44 -0500 Subject: [PATCH] initial qb age filter operations back-end --- specifyweb/specify/geo_time.py | 517 ++++++++++++++++++++ specifyweb/specify/load_datamodel.py | 12 +- specifyweb/stored_queries/query_ops.py | 53 +- specifyweb/stored_queries/queryfieldspec.py | 88 +++- 4 files changed, 657 insertions(+), 13 deletions(-) create mode 100644 specifyweb/specify/geo_time.py diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py new file mode 100644 index 00000000000..d0d8d5b9233 --- /dev/null +++ b/specifyweb/specify/geo_time.py @@ -0,0 +1,517 @@ +from itertools import chain +from typing import List, Set +from django.db.models import Q, F +from sqlalchemy import func, literal, or_, and_, exists +from sqlalchemy.sql import or_, and_ +from sqlalchemy.orm import aliased + +from specifyweb.specify.models import AbsoluteAge, RelativeAge, Geologictimeperiod, Collectionobject +from specifyweb.stored_queries import models as sq_models + +# TODO: Integrate into the query builder + +# Paths from CollectionObject to AbsoluteAge or GeologicTimePeriod: +# - collectionobject->paleocontext->chronostrat +# - collectionobject->collectionevent->paleocontext->chronostrat +# - collectionobject->collectionevent->loc->paleocontext->chronostrat +# - collectionobject->relativeage->chronostrat +# - collectionobject->absoluteage + +def assert_valid_time_range(start_time: float, end_time: float): + assert start_time <= end_time, "Start time must be less than or equal to end time." + +def search_co_ids_in_time_range(start_time: float, end_time: float, require_full_overlap: bool = False) -> Set[int]: + """ + Search for collections that overlap with the given time range. + + :param start_time: The start time of the range. + :param end_time: The end time of the range. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otheerwise partial overlap is used. + :return: A list of collection object IDs. + """ + + assert_valid_time_range(start_time, end_time) + + absolute_start_filter = Q(absoluteage__gte=start_time - F('ageuncertainty')) + absolute_end_filter = Q(absoluteage__lte=end_time + F('ageuncertainty')) + chrono_start_filter = Q(startperiod__gte=start_time - F('startuncertainty')) + chrono_end_filter = Q(startperiod__lte=end_time + F('enduncertainty')) + + absolute_overlap_filter = absolute_start_filter | absolute_end_filter + chrono_overlap_filter = chrono_start_filter | chrono_end_filter + if require_full_overlap: + absolute_overlap_filter = absolute_start_filter & absolute_end_filter + chrono_overlap_filter = chrono_start_filter & chrono_end_filter + + absolute_ids = set( + AbsoluteAge.objects.filter(absolute_overlap_filter) + .select_related("collectionobject") + .values_list("collectionobject_id", flat=True) + ) + chrono_ids = set( + Geologictimeperiod.objects.filter(chrono_overlap_filter) + .select_related("relativeages__collectionobject") + .values_list("relativeages__collectionobject_id", flat=True) + ) + + return absolute_ids.union(chrono_ids) + +def search_co_in_time_range(start_time: float, end_time: float, require_full_overlap: bool = False): + """ + Search for collections that overlap with the given time range. + + :param start_time: The start time of the range. + :param end_time: The end time of the range. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otherwise partial overlap is used. + :return: A list of collection objects. + """ + + assert_valid_time_range(start_time, end_time) + + absolute_start_filter = Q(absoluteage__gte=start_time - F('ageuncertainty')) + absolute_end_filter = Q(absoluteage__lte=end_time + F('ageuncertainty')) + chrono_start_filter = Q(startperiod__gte=start_time - F('startuncertainty')) + chrono_end_filter = Q(startperiod__lte=end_time + F('enduncertainty')) + + absolute_overlap_filter = absolute_start_filter | absolute_end_filter + chrono_overlap_filter = chrono_start_filter | chrono_end_filter + if require_full_overlap: + absolute_overlap_filter = absolute_start_filter & absolute_end_filter + chrono_overlap_filter = chrono_start_filter & chrono_end_filter + + # Combine all filters into a single query + # TODO: Fix and make more efficient + combined_filter = ( + Q(absoluteages__in=AbsoluteAge.objects.filter(absolute_overlap_filter)) | + Q(relativeages__agename__in=Geologictimeperiod.objects.filter(chrono_overlap_filter)) | + Q(collectingevent__paleocontext__chronosstrat__in=Geologictimeperiod.objects.filter(chrono_overlap_filter)) | + # Q(collectingevent__paleocontext__chronosstratend__in=Geologictimeperiod.objects.filter(chrono_overlap_filter)) | # TODO: Fix + Q(collectingevent__locality__paleocontext__chronosstrat__in=Geologictimeperiod.objects.filter(chrono_overlap_filter)) + # Q(collectingevent__locality__paleocontext__chronosstratend__in=Geologictimeperiod.objects.filter(chrono_overlap_filter)) # TODO: Fix + ) + + return Collectionobject.objects.filter(combined_filter).distinct() + +def search_co_ids_in_time_margin(time: float, uncertanty: float, require_full_overlap: bool = False) -> List[int]: + start_time = time - uncertanty + end_time = time + uncertanty + return search_co_ids_in_time_range(start_time, end_time, require_full_overlap) + +def search_co_ids_in_time_period(time_period_name: str, require_full_overlap: bool = False) -> List[int]: + """ + Search for collections that overlap with the given time period. + + :param time_period_name: The name of the time period. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otheerwise partial overlap is used. + :return: A list of collection object IDs. + """ + + time_period = Geologictimeperiod.objects.filter(name=time_period_name).first() + return search_co_ids_in_time_range(time_period.start_time, time_period.end_time) + +def subquery_co_in_time_range( + qb_query, + start_time: float, + end_time: float, + session=None, + require_full_overlap: bool = False, +): + # create initial sqlalchmey subquery 'SELECT collectionobject_id FROM collectionobject' + subquery = qb_query.session.query(sq_models.CollectionObject.id).subquery() + +# TODO: Remove once an implementation which implemention to keep +def query_co_in_time_range_1( + qb_query, + start_time: float, + end_time: float, + session=None, + require_full_overlap: bool = False, +): + """ + Edit the incoming Query Builder SQL Alchemy query to search for collections that overlap with the given time range. + + # :param session: The SQL Alchemy session. + :param qb_query: The Query Builder's sqlalchemy query to filter. + :param start_time: The start time of the range. + :param end_time: The end time of the range. + :param session: The SQL Alchemy session. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otheerwise partial overlap is used. + :return: A list of collection object IDs. + """ + + assert_valid_time_range(start_time, end_time) + + # Assert that the base table of the query is CollectionObject + assert sq_models.CollectionObject in [entity.entity_zero for entity in qb_query._entities], \ + "The base table of the query must be CollectionObject." + + # Define filters + absolute_start_filter = sq_models.AbsoluteAge.absoluteage >= (start_time - sq_models.AbsoluteAge.ageuncertainty) + absolute_end_filter = sq_models.AbsoluteAge.absoluteage <= (end_time + sq_models.AbsoluteAge.ageuncertainty) + chrono_start_filter = sq_models.Geologictimeperiod.startPeriod >= (start_time - sq_models.Geologictimeperiod.startUncertainty) + chrono_end_filter = sq_models.Geologictimeperiod.startPeriod <= (end_time + sq_models.Geologictimeperiod.endUncertainty) + + if require_full_overlap: + absolute_overlap_filter = and_(absolute_start_filter, absolute_end_filter) + chrono_overlap_filter = and_(chrono_start_filter, chrono_end_filter) + else: + absolute_overlap_filter = or_(absolute_start_filter, absolute_end_filter) + chrono_overlap_filter = or_(chrono_start_filter, chrono_end_filter) + + # Aliases for joins + relative_age_alias = aliased(sq_models.RelativeAge) + geo_time_period_alias = aliased(sq_models.Geologictimeperiod) + collecting_event_alias = aliased(sq_models.CollectingEvent) + paleocontext_alias = aliased(sq_models.PaleoContext) + + # Combine all filters into a single query + # TODO: Fix and make more efficient + if session is None: + session = sq_models.session_context() + with session as qb_session: + combined_filter = or_( + sq_models.CollectionObject.absoluteages.any(absolute_overlap_filter), + sq_models.CollectionObject.relativeages.any(relative_age_alias.agename.in_( + qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + )), + sq_models.CollectionObject.collectingevent.has( + collecting_event_alias.paleocontext.has( + or_( + paleocontext_alias.chronosstrat.in_( + qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + ), + paleocontext_alias.chronosstratend.in_( + qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + ) + ) + ) + ), + sq_models.CollectionObject.collectingevent.has( + collecting_event_alias.locality.has( + paleocontext_alias.chronosstrat.in_( + qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + ) + ) + ), + sq_models.CollectionObject.collectingevent.has( + collecting_event_alias.locality.has( + paleocontext_alias.chronosstratend.in_( + qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + ) + ) + ) + ) + + # Execute query + # return qb_session.query(sq_models.CollectionObject).filter(combined_filter).distinct().all() + # return qb_query.filter(combined_filter).distinct().all() + + # Add age filter statement to the query + # return qb_query.filter(combined_filter).distinct() + return qb_query.filter(combined_filter) + # age_subquery = session.query + # return age_subquery.filter(combined_filter).distinct() + +# TODO: Remove once an implementation which implemention to keep +def query_co_in_time_range_2( + qb_query, + start_time: float, + end_time: float, + session=None, + require_full_overlap: bool = False, +): + """ + Edit the incoming Query Builder SQL Alchemy query to search for collections that overlap with the given time range. + + # :param session: The SQL Alchemy session. + :param qb_query: The Query Builder's sqlalchemy query to filter. + :param start_time: The start time of the range. + :param end_time: The end time of the range. + :param session: The SQL Alchemy session. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otheerwise partial overlap is used. + :return: A list of collection object IDs. + """ + + start_time = float(start_time) + end_time = float(end_time) + + assert_valid_time_range(start_time, end_time) + + # Assert that the base table of the query is CollectionObject + # assert sq_models.CollectionObject in [entity.entity_zero for entity in qb_query._entities], \ + # "The base table of the query must be CollectionObject." + # base_model = qb_query.column_descriptions[0]['entity'] + # assert base_model == sq_models.CollectionObject, "The base table of the query must be CollectionObject." + + # Define filters + absolute_start_filter = sq_models.AbsoluteAge.absoluteage >= ( + literal(start_time) - sq_models.AbsoluteAge.ageuncertainty + ) + absolute_end_filter = sq_models.AbsoluteAge.absoluteage <= ( + literal(end_time) + sq_models.AbsoluteAge.ageuncertainty + ) + chrono_start_filter = sq_models.Geologictimeperiod.startPeriod >= ( + literal(start_time) - sq_models.Geologictimeperiod.startUncertainty + ) + chrono_end_filter = sq_models.Geologictimeperiod.startPeriod <= ( + literal(end_time) + sq_models.Geologictimeperiod.endUncertainty + ) + + if require_full_overlap: + absolute_overlap_filter = and_(absolute_start_filter, absolute_end_filter) + chrono_overlap_filter = and_(chrono_start_filter, chrono_end_filter) + else: + absolute_overlap_filter = or_(absolute_start_filter, absolute_end_filter) + chrono_overlap_filter = or_(chrono_start_filter, chrono_end_filter) + + # Aliases for joins + relative_age_alias = aliased(sq_models.RelativeAge) + geo_time_period_alias = aliased(sq_models.GeologicTimePeriod) + collecting_event_alias = aliased(sq_models.CollectingEvent) + paleocontext_alias = aliased(sq_models.PaleoContext) + + # Combine all filters into a single query + # TODO: Fix and make more efficient + if session is None: + session = sq_models.session_context() + with session as qb_session: + # combined_filter = or_( + # # sq_models.CollectionObject.absoluteages.any(absolute_overlap_filter), # 'CollectionObject' has no attribute 'absoluteages' + # # sq_models.CollectionObject.relativeages.any(relative_age_alias.agename.in_( 'CollectionObject' has no attribute 'relativeages' + # # qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + # # )), + # sq_models.CollectionObject.collectingevent.has( # Need to do sq_models.CollecintEvent.collectionObjects + # collecting_event_alias.paleocontext.has( + # or_( + # paleocontext_alias.chronosstrat.in_( + # qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + # ), + # paleocontext_alias.chronosstratend.in_( + # qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + # ) + # ) + # ) + # ), + # sq_models.CollectionObject.collectingevent.has( # Need to do sq_models.CollecintEvent.collectionObjects + # collecting_event_alias.locality.has( + # paleocontext_alias.chronosstrat.in_( + # qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + # ) + # ) + # ), + # sq_models.CollectionObject.collectingevent.has( # Need to do sq_models.CollecintEvent.collectionObjects + # collecting_event_alias.locality.has( + # paleocontext_alias.chronosstratend.in_( + # qb_session.query(geo_time_period_alias.id).filter(chrono_overlap_filter) + # ) + # ) + # ) + # ) + # combined_filter = or_( + # sq_models.CollectingEvent.collectionObjects.any( + # collecting_event_alias.paleoContext.has( + # or_( + # paleocontext_alias.ChronosStratID.in_( + # qb_session.query(geo_time_period_alias.geologicTimePeriodId).filter(chrono_overlap_filter) + # ), + # paleocontext_alias.ChronosStratEndID.in_( + # qb_session.query(geo_time_period_alias.geologicTimePeriodId).filter(chrono_overlap_filter) + # ) + # ) + # ) + # ), + # sq_models.CollectingEvent.collectionObjects.any( + # collecting_event_alias.locality.has( + # paleocontext_alias.ChronosStratID.in_( + # qb_session.query(geo_time_period_alias.geologicTimePeriodId).filter(chrono_overlap_filter) + # ) + # ) + # ), + # sq_models.CollectingEvent.collectionObjects.any( + # collecting_event_alias.locality.has( + # paleocontext_alias.ChronosStratEndID.in_( + # qb_session.query(geo_time_period_alias.geologicTimePeriodId).filter(chrono_overlap_filter) + # ) + # ) + # ) + # ) + + # Extract foreign key values from the subquery + foreign_key_subquery = ( + qb_session.query(geo_time_period_alias.geologicTimePeriodId) + .filter(chrono_overlap_filter) + .subquery() + ) + + # Use the foreign key values in the in_ method + combined_filter = or_( + sq_models.CollectingEvent.collectionObjects.any( + collecting_event_alias.paleoContext.has( + or_( + paleocontext_alias.ChronosStratID.in_(foreign_key_subquery), + paleocontext_alias.ChronosStratEndID.in_(foreign_key_subquery) + ) + ) + ), + sq_models.CollectingEvent.collectionObjects.any( + collecting_event_alias.locality.has( + paleocontext_alias.ChronosStratID.in_(foreign_key_subquery) + ) + ), + sq_models.CollectingEvent.collectionObjects.any( + collecting_event_alias.locality.has( + paleocontext_alias.ChronosStratEndID.in_(foreign_key_subquery) + ) + ) + ) + + # Execute query + # return qb_session.query(sq_models.CollectionObject).filter(combined_filter).distinct().all() + # return qb_query.filter(combined_filter).distinct().all() + + # Execute query + # return qb_session.query(sq_models.CollectionObject).filter(combined_filter).distinct().all() + # return qb_query.filter(combined_filter).distinct().all() + + # Add age filter statement to the query + # return qb_query.filter(combined_filter).distinct() + return qb_query.filter(combined_filter) + # age_subquery = session.query + # return age_subquery.filter(combined_filter).distinct() + +# TODO: Remove once an implementation which implemention to keep +def query_co_in_time_range(query, start_time, end_time, session=None, require_full_overlap=False): + """ + Modify the given SQLAlchemy query to include filters that only select collection objects + overlapping with the given time range. + + :param query: The existing SQLAlchemy query on CollectionObject. + :param start_time: The start time of the range. + :param end_time: The end time of the range. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned; + otherwise, partial overlap is used. + :return: A new query with the additional filters applied. + """ + + start_time = float(start_time) + end_time = float(end_time) + + # Build the absolute age filters + absolute_start_filter = sq_models.AbsoluteAge.absoluteage >= (literal(start_time) - sq_models.AbsoluteAge.ageuncertainty) + absolute_end_filter = sq_models.AbsoluteAge.absoluteage <= (literal(end_time) + sq_models.AbsoluteAge.ageuncertainty) + + if require_full_overlap: + absolute_overlap_filter = and_(absolute_start_filter, absolute_end_filter) + else: + absolute_overlap_filter = or_(absolute_start_filter, absolute_end_filter) + + # Build the geologic time period filters + chrono_start_filter = sq_models.GeologicTimePeriod.startPeriod >= ( + literal(start_time) - sq_models.GeologicTimePeriod.startUncertainty + ) + chrono_end_filter = sq_models.GeologicTimePeriod.startPeriod <= ( + literal(end_time) + sq_models.GeologicTimePeriod.endUncertainty + ) + + if require_full_overlap: + chrono_overlap_filter = and_(chrono_start_filter, chrono_end_filter) + else: + chrono_overlap_filter = or_(chrono_start_filter, chrono_end_filter) + + # Build the EXISTS clauses for absolute ages + absolute_exists = exists().where( + and_( + sq_models.AbsoluteAge.CollectionObjectID == sq_models.CollectionObject.collectionObjectId, + absolute_overlap_filter + ) + ) + + # Build the EXISTS clauses for geologic time periods via relative ages + chrono_exists = exists().where( + and_( + sq_models.RelativeAge.CollectionObjectID == sq_models.CollectionObject.collectionObjectId, + sq_models.RelativeAge.AgeNameID == sq_models.GeologicTimePeriod.geologicTimePeriodId, + chrono_overlap_filter + ) + ) + + # Modify the original query by adding the filters + query = query.filter(or_(absolute_exists, chrono_exists)) + + return query + + +# TODO: Remove once an implementation which implemention to keep +def query_co_in_time_range_with_joins(query, start_time, end_time, session=None, require_full_overlap=False): + start_time = float(start_time) + end_time = float(end_time) + + # Build the absolute age filters + absolute_start_filter = sq_models.AbsoluteAge.absoluteage >= ( + literal(start_time) - sq_models.AbsoluteAge.ageuncertainty + ) + absolute_end_filter = sq_models.AbsoluteAge.absoluteage <= ( + literal(end_time) + sq_models.AbsoluteAge.ageuncertainty + ) + + if require_full_overlap: + absolute_overlap_filter = and_(absolute_start_filter, absolute_end_filter) + else: + absolute_overlap_filter = or_(absolute_start_filter, absolute_end_filter) + + # Build the geologic time period filters + chrono_start_filter = sq_models.GeologicTimePeriod.startPeriod >= ( + literal(start_time) - sq_models.GeologicTimePeriod.startUncertainty + ) + chrono_end_filter = sq_models.GeologicTimePeriod.startPeriod <= ( + literal(end_time) + sq_models.GeologicTimePeriod.endUncertainty + ) + + if require_full_overlap: + chrono_overlap_filter = and_(chrono_start_filter, chrono_end_filter) + else: + chrono_overlap_filter = or_(chrono_start_filter, chrono_end_filter) + + # Join with AbsoluteAge and apply filter + absolute_query = query.join( + sq_models.AbsoluteAge, + sq_models.AbsoluteAge.CollectionObjectID + == sq_models.CollectionObject.collectionObjectId, + ) + absolute_query = absolute_query.filter(absolute_overlap_filter) + + # Join with RelativeAge and GeologicTimePeriod and apply filter + chrono_query = query.join( + sq_models.RelativeAge, + sq_models.RelativeAge.CollectionObjectID == sq_models.CollectionObject.collectionObjectId, + ) + chrono_query = chrono_query.join( + sq_models.GeologicTimePeriod, + sq_models.RelativeAge.AgeNameID == sq_models.GeologicTimePeriod.geologicTimePeriodId, + ) + chrono_query = chrono_query.filter(chrono_overlap_filter) + + # Combine the two queries using UNION to avoid duplicates + combined_query = absolute_query.union(chrono_query) + + return combined_query + +def query_co_in_time_margin(qb_query, time: float, uncertanty: float, session=None, require_full_overlap: bool = False): + start_time = time - uncertanty + end_time = time + uncertanty + return query_co_in_time_range(session, start_time, end_time, require_full_overlap) + +def query_co_in_time_period(qb_query, time_period_name: str, session=None, require_full_overlap: bool = False): + """ + Create SQL Alchemy query to search for collections that overlap with the given time period. + + :param session: The SQL Alchemy session. + :param qb_query: The Query Builder's sqlalchemy query to filter. + :param time_period_name: The name of the time period. + :param session: The SQL Alchemy session. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otheerwise partial overlap is used. + :return: A list of collection object IDs. + """ + + time_period = session.query(sq_models.Geologictimeperiod).filter_by(name=time_period_name).first() + return query_co_in_time_range(session, time_period.start_time, time_period.end_time, require_full_overlap) \ No newline at end of file diff --git a/specifyweb/specify/load_datamodel.py b/specifyweb/specify/load_datamodel.py index 88280c9d578..97b0ec86909 100644 --- a/specifyweb/specify/load_datamodel.py +++ b/specifyweb/specify/load_datamodel.py @@ -77,13 +77,14 @@ class Table(object): fieldAliases: List[Dict[str, str]] sp7_only: bool = False django_app: str = 'specify' + virtual_fiels: List['Field'] = [] def __init__(self, classname: str = None, table: str = None, tableId: int = None, idColumn: str = None, idFieldName: str = None, idField: 'Field' = None, view: Optional[str] = None, searchDialog: Optional[str] = None, fields: List['Field'] = None, indexes: List['Index'] = None, relationships: List['Relationship'] = None, fieldAliases: List[Dict[str, str]] = None, system: bool = False, - sp7_only: bool = False, django_app: str = 'specify'): + sp7_only: bool = False, django_app: str = 'specify', virtual_fields: List['Field'] = None): if not classname: raise ValueError("classname is required") if not table: @@ -111,6 +112,7 @@ def __init__(self, classname: str = None, table: str = None, tableId: int = None self.fieldAliases = fieldAliases if fieldAliases is not None else [] self.sp7_only = sp7_only self.django_app = django_app + self.virtual_fields = virtual_fields if virtual_fields is not None else [] @property def name(self) -> str: @@ -132,6 +134,9 @@ def af() -> Iterable[Union['Field', 'Relationship']]: return list(af()) + def is_virtual_field(self, fieldname: str) -> bool: + return fieldname in [f.name for f in self.virtual_fields] + def get_field(self, fieldname: str, strict: bool=False) -> Union['Field', 'Relationship', None]: return strict_to_optional(self.get_field_strict, fieldname, strict) @@ -140,6 +145,11 @@ def get_field_strict(self, fieldname: str) -> Union['Field', 'Relationship']: for field in self.all_fields: if field.name.lower() == fieldname: return field + for field in self.virtual_fields: + if field.name.lower() == fieldname: + return field + # if self.table == 'collectionobject' and fieldname == 'age': # TODO: This is temporary for testing, more conprehensive solution to come. + # return Field(name='age', column='age', indexed=False, unique=False, required=False, type='java.lang.Integer', length=0) raise FieldDoesNotExistError(_("Field %(field_name)s not in table %(table_name)s. ") % {'field_name':fieldname, 'table_name':self.name} + _("Fields: %(fields)s") % {'fields':[f.name for f in self.all_fields]}) diff --git a/specifyweb/stored_queries/query_ops.py b/specifyweb/stored_queries/query_ops.py index 3d706c94c23..8a45d1b841a 100644 --- a/specifyweb/stored_queries/query_ops.py +++ b/specifyweb/stored_queries/query_ops.py @@ -1,6 +1,7 @@ from collections import namedtuple import sqlalchemy +from specifyweb.specify.geo_time import query_co_in_time_period, query_co_in_time_range, search_co_ids_in_time_range from specifyweb.specify.uiformatters import CNNField, FormatMismatch @@ -27,10 +28,26 @@ class QueryOps(namedtuple("QueryOps", "uiformatter")): 'op_trueornull', # 13 'op_falseornull', # 14 'op_startswith', # 15 - ] + 'op_age_range', # 16 + 'op_age_period', # 17 + 'op_age_range_strict', # 18 + 'op_age_period_strict', # 19 + ] + + PREPROSESSED_OPERATIONS = [ + 'op_age_range,' + 'op_age_period', + 'op_age_range_strict', + 'op_age_period_strict', + ] + + PREPROSESSED_OPERATION_NUMS = {16, 17, 18, 19} def by_op_num(self, op_num): return getattr(self, self.OPERATIONS[op_num]) + + def is_preprocessed(self, op_num): + return op_num in self.PREPROSESSED_OPERATION_NUMS def format(self, value): if self.uiformatter is not None: @@ -104,3 +121,37 @@ def op_startswith(self, field, value): return field.op('REGEXP')("^0*" + value) else: return field.like(value + "%") + + # TODO: Remove once an implementation which implemention to keep + def op_age_range_simple(self, field, value): + values = [self.format(v.strip()) for v in value.split(',')[:2]] + start_time, end_time = values + co_ids = search_co_ids_in_time_range(start_time, end_time, require_full_overlap=False) + return field.in_(co_ids) + + def op_age_range(self, field, value, query): + values = [self.format(v.strip()) for v in value.split(',')[:2]] + start_time, end_time = values + return query_co_in_time_range(query.query, start_time, end_time, session=None, require_full_overlap=False) + + # TODO: Add new operations for age range and period + def op_age_range_2(self, field, value, query): + values = [self.format(v.strip()) for v in value.split(',')[:2]] + query.query = field(query.query, values[0], values[1], session=None, require_full_overlap=False) + return field.between(*values) + # TODO: Maybe call query_co_in_time_range here, or maybe outside of this function maybe in apply_filter. + # Need to either add a join a filter statement to the query, + # or do a subquery, or run query to get the data and add to the query with filter statement (in [1,2,3]). + + def op_age_period(self, field, value, query): + time_period_name = value + return query_co_in_time_period(query.query, time_period_name, session=None, require_full_overlap=False) + + def op_age_range_strict(self, field, value, query): + values = [self.format(v.strip()) for v in value.split(',')[:2]] + start_time, end_time = values + return query_co_in_time_range(query.query, start_time, end_time, session=None, require_full_overlap=True) + + def op_age_period_strict(self, field, value, query): + time_period_name = value + return query_co_in_time_period(query.query, time_period_name, session=None, require_full_overlap=True) \ No newline at end of file diff --git a/specifyweb/stored_queries/queryfieldspec.py b/specifyweb/stored_queries/queryfieldspec.py index b24ee42da7b..b75ad93a2db 100644 --- a/specifyweb/stored_queries/queryfieldspec.py +++ b/specifyweb/stored_queries/queryfieldspec.py @@ -1,11 +1,15 @@ +from dataclasses import fields import logging import re from collections import namedtuple, deque +from typing import NamedTuple, Optional, Tuple from sqlalchemy import sql +from specifyweb.specify.load_datamodel import Field, Table from specifyweb.specify.models import datamodel from specifyweb.specify.uiformatters import get_uiformatter +from specifyweb.specify.geo_time import query_co_in_time_range from . import models from .query_ops import QueryOps @@ -60,7 +64,40 @@ def make_stringid(fs, table_list): field_name += 'Numeric' + fs.date_part return table_list, fs.table.name.lower(), field_name - +# class QueryFieldSpec(NamedTuple): +# root_table: 'Table' +# root_sql_table: 'SQLTable' # type: ignore +# join_path: Tuple['Field', ...] +# table: 'Table' +# date_part: Optional[str] +# tree_rank: Optional[str] +# tree_field: Optional[str] + +# @classmethod +# def create(cls, root_table, root_sql_table, join_path, table, date_part=None, tree_rank=None, tree_field=None): +# # Create a new QueryFieldSpec instance +# instance = cls( +# root_table=root_table, +# root_sql_table=root_sql_table, +# join_path=join_path, +# table=table, +# date_part=date_part, +# tree_rank=tree_rank, +# tree_field=tree_field +# ) +# # Validate the instance +# instance.validate() +# return instance + +# def validate(self): +# valid_date_parts = ('Full Date', 'Day', 'Month', 'Year', None) +# assert self.is_temporal() or self.date_part is None +# if self.date_part not in valid_date_parts: +# raise AssertionError( +# f"Invalid date part '{self.date_part}'. Expected one of {valid_date_parts}", +# {"datePart": self.date_part, +# "validDateParts": str(valid_date_parts), +# "localizationKey": "invalidDatePart"}) class QueryFieldSpec(namedtuple("QueryFieldSpec", "root_table root_sql_table join_path table date_part tree_rank tree_field")): @classmethod def from_path(cls, path_in, add_id=False): @@ -145,13 +182,17 @@ def from_stringid(cls, stringid, is_relation): return result def __init__(self, *args, **kwargs): + self.validate() + + def validate(self): valid_date_parts = ('Full Date', 'Day', 'Month', 'Year', None) assert self.is_temporal() or self.date_part is None - if self.date_part not in valid_date_parts: raise AssertionError( - f"Invalid date part '{self.date_part}'. Expected one of {valid_date_parts}", - {"datePart" : self.date_part, - "validDateParts" : str(valid_date_parts), - "localizationKey" : "invalidDatePart"}) + if self.date_part not in valid_date_parts: + raise AssertionError( + f"Invalid date part '{self.date_part}'. Expected one of {valid_date_parts}", + {"datePart": self.date_part, + "validDateParts": str(valid_date_parts), + "localizationKey": "invalidDatePart"}) def to_spquery_attrs(self): table_list = make_table_list(self) @@ -208,9 +249,15 @@ def apply_filter(self, query, orm_field, field, table, value=None, op_num=None, uiformatter = field and get_uiformatter(query.collection, table.name, field.name) value = value - op = QueryOps(uiformatter).by_op_num(op_num) - - f = op(orm_field, value) + query_op = QueryOps(uiformatter) + op = query_op.by_op_num(op_num) + if query_op.is_preprocessed(op_num): + # f = op(orm_field, value, query) + new_query = op(orm_field, value, query) + query = query._replace(query=new_query) + f = None + else: + f = op(orm_field, value) # TODO: Handle when orm_field is a function instead of a field predicate = sql.not_(f) if negate else f else: predicate = None @@ -247,7 +294,26 @@ def add_spec_to_query(self, query, formatter=None, aggregator=None, cycle_detect if self.tree_rank is not None: query, orm_field = query.handle_tree_field(orm_model, table, self.tree_rank, self.tree_field) else: - orm_field = getattr(orm_model, self.get_field().name) + try: + field_name = self.get_field().name + orm_field = getattr(orm_model, field_name) + except AttributeError: + # TODO: Cleanup once an implementation for virtual QB fields is decieded on + # if self.is_virtual_field(field.name): + # if hasattr(self, 'is_virtual_field') and self.is_virtual_field(field.name): + # if table.is_virtual_field(self.get_field().name): + if table.is_virtual_field(field.name) and table.name == 'CollectionObject' and field_name == 'age': # TODO: Create map for all special cases + # orm_field = orm_model.catalogNumber + orm_field = orm_model.collectionObjectId + # TODO: Remove once an implementation for virtual QB fields is decieded on + # elif table.is_virtual_field(field.name): + # # TODO: Handle SQLAlchemy virtual field creation + # # NOTE: This might not be the right place to call query_co_in_time_range, maybe find a better place + # orm_field = query_co_in_time_range # Look at me, I'm a function now! + # # raise NotImplementedError("Virtual field not implemented yet") + # # query_co_in_time_range(query.query, start_time, end_time, session=None, require_full_overlap=False) + else: + raise if field.type == "java.sql.Timestamp": # Only consider the date portion of timestamp fields. @@ -259,4 +325,4 @@ def add_spec_to_query(self, query, formatter=None, aggregator=None, cycle_detect if field.is_temporal() and self.date_part != "Full Date": orm_field = sql.extract(self.date_part, orm_field) - return query, orm_field, field, table + return query, orm_field, field, table \ No newline at end of file