From a69a049fb8ca12427667d343fb343d094c93b7ea Mon Sep 17 00:00:00 2001 From: capjamesg Date: Fri, 18 Oct 2024 01:00:38 +0100 Subject: [PATCH] fix date --- jamesql/index.py | 14 +++++++------- jamesql/script_lang.py | 2 +- tests/test.py | 3 +++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/jamesql/index.py b/jamesql/index.py index 44b4bae..fb7be1a 100644 --- a/jamesql/index.py +++ b/jamesql/index.py @@ -238,16 +238,16 @@ def _create_reverse_index( else: self.tf_idf[word][score] = [document["uuid"]] - for w in document[index_by].split(" "): - if self.reverse_tf_idf[w].get(index_by) is None: - self.reverse_tf_idf[w][index_by] = {} + for w in document[index_by].split(" "): + if self.reverse_tf_idf[w].get(index_by) is None: + self.reverse_tf_idf[w][index_by] = {} - self.reverse_tf_idf[w][index_by][document["uuid"]] = score + self.reverse_tf_idf[w][index_by][document["uuid"]] = score - if self.reverse_tf_idf[w.lower()].get(index_by) is None: - self.reverse_tf_idf[w.lower()][index_by] = {} + if self.reverse_tf_idf[w.lower()].get(index_by) is None: + self.reverse_tf_idf[w.lower()][index_by] = {} - self.reverse_tf_idf[w.lower()][index_by][document["uuid"]] = score + self.reverse_tf_idf[w.lower()][index_by][document["uuid"]] = score return index diff --git a/jamesql/script_lang.py b/jamesql/script_lang.py index 2d8d838..f2ee579 100644 --- a/jamesql/script_lang.py +++ b/jamesql/script_lang.py @@ -54,7 +54,7 @@ def start(self, items): def decay(self, items): # decay by half for every 30 days # item is datetime.dateime object - days_since_post = (datetime.datetime.now() - items[0]).days + days_since_post = (datetime.datetime.now() - datetime.datetime.strptime(items[0], "%Y-%m-%dT%H:%M:%S")).days return 0.9 ** (days_since_post / 30) diff --git a/tests/test.py b/tests/test.py index 4db4a5f..cb320c0 100644 --- a/tests/test.py +++ b/tests/test.py @@ -374,6 +374,9 @@ def test_search( assert float(response["query_time"]) < 0.06 +# TODO: TF/IDF needs to be calculated after all documents have been inserted +# Otherwise TF/IDF score will vary on document insertion order +# which we don't want @pytest.mark.parametrize( "query, top_document_name, top_document_score, raises_exception", [