From 0bc00ee652744edac15bc941b8e444a6b517da43 Mon Sep 17 00:00:00 2001 From: capjamesg Date: Sat, 24 Aug 2024 16:26:53 +0100 Subject: [PATCH] add DATE index type --- jamesql/index.py | 18 ++++++++++++++---- jamesql/rewriter.py | 6 +++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/jamesql/index.py b/jamesql/index.py index a720078..78c6d8c 100644 --- a/jamesql/index.py +++ b/jamesql/index.py @@ -39,6 +39,7 @@ class GSI_INDEX_STRATEGIES(Enum): FLAT = "flat" NUMERIC = "numeric" INFER = "infer" + DATE = "date" class RANKING_STRATEGIES(Enum): @@ -323,6 +324,7 @@ def create_gsi( """ documents_in_indexed_by = [item.get(index_by) for item in self.global_index.values()] + if strategy == GSI_INDEX_STRATEGIES.INFER: if all([isinstance(item, list) for item in documents_in_indexed_by]): strategy = GSI_INDEX_STRATEGIES.FLAT @@ -354,10 +356,18 @@ def create_gsi( gsi[inner].append(item.get("uuid")) else: gsi[item.get(index_by)].append(item.get("uuid")) - elif strategy == GSI_INDEX_STRATEGIES.NUMERIC: + elif strategy == GSI_INDEX_STRATEGIES.NUMERIC or strategy == GSI_INDEX_STRATEGIES.DATE: gsi = OOBTree() - gsi.update({item.get(index_by): item.get("uuid") for item in self.global_index.values()}) + for item in self.global_index.values(): + if gsi.get(item.get(index_by)) is None: + gsi[item.get(index_by)] = [] + + if isinstance(item.get(index_by), list): + for inner in item.get(index_by): + gsi[inner].append(item.get("uuid")) + + gsi[item.get(index_by)].append(item.get("uuid")) else: raise ValueError( "Invalid GSI strategy. Must be one of: " @@ -621,7 +631,7 @@ def _run(self, query: dict, query_field: str) -> List[str]: query_terms = [query_term.replace("*", c) for c in string.ascii_lowercase] for query_term in query_terms: - if gsi_type not in (GSI_INDEX_STRATEGIES.FLAT, GSI_INDEX_STRATEGIES.NUMERIC): + if gsi_type not in (GSI_INDEX_STRATEGIES.FLAT, GSI_INDEX_STRATEGIES.NUMERIC, GSI_INDEX_STRATEGIES.DATE): if ( query_type == "starts_with" and gsi_type == GSI_INDEX_STRATEGIES.PREFIX @@ -755,7 +765,7 @@ def _run(self, query: dict, query_field: str) -> List[str]: ) elif query_type in QUERY_TYPE_COMPARISON_METHODS: matching_documents.extend( - QUERY_TYPE_COMPARISON_METHODS[query_type](query_term, gsi) + *QUERY_TYPE_COMPARISON_METHODS[query_type](query_term, gsi) ) else: for key, value in gsi.items(): diff --git a/jamesql/rewriter.py b/jamesql/rewriter.py index cfd52b5..8e71616 100644 --- a/jamesql/rewriter.py +++ b/jamesql/rewriter.py @@ -13,7 +13,7 @@ field_query: TERM ":" "'" MULTI_WORD "'" | TERM ":" WORD negate_query: "-" "'" MULTI_WORD "'" | "-" WORD OPERATOR: ">" | "<" | ">=" | "<=" -WORD: /[a-zA-Z0-9_.!?*]+/ +WORD: /[a-zA-Z0-9_.!?*-]+/ MULTI_WORD: /[a-zA-Z0-9 ]+/ TERM: /[a-zA-Z0-9_]+/ @@ -48,7 +48,7 @@ def negate_query(self, items): field = key value = items[0] - if self.indexing_strategies.get(field) == "NUMERIC": + if self.indexing_strategies.get(field) in {"NUMERIC", "DATE"}: continue result.append({field: {self.get_query_strategy(field, value): value}}) @@ -74,7 +74,7 @@ def strict_search_query(self, items): "strict": True, } for field in self.query_keys - if self.indexing_strategies.get(field) != "NUMERIC" + if self.indexing_strategies.get(field) not in {"NUMERIC", "DATE"} } }