Skip to content

Commit

Permalink
work on ranking
Browse files Browse the repository at this point in the history
  • Loading branch information
capjamesg committed Nov 21, 2024
1 parent f95ab98 commit 6e9ca8c
Showing 1 changed file with 7 additions and 12 deletions.
19 changes: 7 additions & 12 deletions jamesql/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,24 +923,19 @@ def search(
starting_word_pos = self.gsis[field]["gsi"][term_queries[0]]["documents"]["uuid"][doc["uuid"]]
# print(term_queries[0], word_pos)
first_word_pos = set(starting_word_pos)
found = False
# add -1

for i, term in enumerate(term_queries):
word_pos = self.gsis[field]["gsi"][term]["documents"]["uuid"][doc["uuid"]]

positions = set([x - i for x in word_pos]) # | set([x + i for x in word_pos])
# first_word_pos &= positions
if len(first_word_pos.intersection(positions)) > 0: # and i != len(term_queries) - 1:
found = True
first_word_pos &= positions

if found and field != "title_lower":
# print(first_word_pos, doc["title"], field, "union")
occurences = len(first_word_pos)
first_word_pos &= positions
# if len(first_word_pos.intersection(positions)) > 0: # and i != len(term_queries) - 1:
# first_word_pos &= positions

if first_word_pos and field != "title_lower":
doc_scores[doc["uuid"]] += len(first_word_pos)
# * len(set(word_pos[term_queries[0]]))
elif found and field == "title_lower":
if field == "title_lower":
# get word overlap between title and terms
overlap = set(term_queries).intersection(set(doc["title_lower"].split(" ")))
# calculate overlap ratio
Expand All @@ -954,7 +949,7 @@ def search(

# # add weight for the first time the term is mentioned
# the closer the mention is to the beginning of the document, the higher the weight
if first_word_pos and field == "title_lower" and found:
if first_word_pos and field == "title_lower":
# print(first_word_pos, doc["title"], field)
min_pos = min(first_word_pos)
# print(min_pos)
Expand Down

0 comments on commit 6e9ca8c

Please sign in to comment.