Skip to content

Commit

Permalink
Replace whoosh search backend by sifts (#528)
Browse files Browse the repository at this point in the history
* Refactor existing search, part 1

* Refactor existing search, part 2

* More unit tests

* Implement sifts search backend

* Fix old Python syntax errors

* Bump sifts

* Fix delete, objects, transaction endpoints

* Update test_post.py

* Fix handle searches in test_post

* Fix tests in test_post.py

* Fix test in test_delete.py

* Fix test in test_put.py

* Fix last issue in test_delete.py

* Fix last issue in test_put.py

* Add sifts version to metadata

* Use sifts v0.6

* Add search type to api spec

* Bump version to 2.4, add search count to metadata

* Bump sifts to 0.7

* Search: support change date filter, bump sifts to 0.8

* Fix method

* Remove fix not needed on Gramps 5.2 anymore

* Add family parents' names and event participants' names to search index (#285)
  • Loading branch information
DavidMStraub authored Jul 27, 2024
1 parent c6dbfd4 commit d9c79f3
Show file tree
Hide file tree
Showing 25 changed files with 719 additions and 512 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.db

*_cache

Pipfile
Expand Down
6 changes: 2 additions & 4 deletions gramps_webapi/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
import warnings

import click
from whoosh.index import LockError

from .api.util import get_db_manager, get_search_indexer, list_trees
from .api.search import get_search_indexer
from .api.util import get_db_manager, list_trees
from .app import create_app
from .auth import add_user, delete_user, fill_tree, user_db
from .const import ENV_CONFIG_FILE, TREE_MULTI
Expand Down Expand Up @@ -158,8 +158,6 @@ def index_full(ctx):

try:
indexer.reindex_full(db, progress_cb=progress_callback_count)
except LockError:
LOG.warning("Index is locked")
except:
LOG.exception("Error during indexing")
finally:
Expand Down
2 changes: 1 addition & 1 deletion gramps_webapi/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@
#

# make sure to match this version with the one in apispec.yaml
__version__ = "2.3.1"
__version__ = "2.4.0"
24 changes: 10 additions & 14 deletions gramps_webapi/api/resources/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,11 @@
from ...auth.const import PERM_ADD_OBJ, PERM_DEL_OBJ, PERM_EDIT_OBJ
from ...const import GRAMPS_OBJECT_PLURAL
from ..auth import require_permissions
from ..search import SearchIndexer
from ..search import SearchIndexer, get_search_indexer
from ..util import (
check_quota_people,
get_db_handle,
get_locale_for_language,
get_search_indexer,
get_tree_from_jwt,
update_usage_people,
use_args,
Expand Down Expand Up @@ -264,8 +263,7 @@ def delete(self, handle: str) -> Response:
# update search index
tree = get_tree_from_jwt()
indexer: SearchIndexer = get_search_indexer(tree)
with indexer.get_writer(overwrite=False, use_async=True) as writer:
indexer.delete_object(writer, handle)
indexer.delete_object(handle=handle, class_name=self.gramps_class_name)
return self.response(200, trans_dict, total_items=len(trans_dict))

def put(self, handle: str) -> Response:
Expand All @@ -292,11 +290,10 @@ def put(self, handle: str) -> Response:
# update search index
tree = get_tree_from_jwt()
indexer: SearchIndexer = get_search_indexer(tree)
with indexer.get_writer(overwrite=False, use_async=True) as writer:
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
indexer.add_or_update_object(writer, handle, db_handle, class_name)
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
indexer.add_or_update_object(handle, db_handle, class_name)
return self.response(200, trans_dict, total_items=len(trans_dict))


Expand Down Expand Up @@ -470,11 +467,10 @@ def post(self) -> Response:
# update search index
tree = get_tree_from_jwt()
indexer: SearchIndexer = get_search_indexer(tree)
with indexer.get_writer(overwrite=False, use_async=True) as writer:
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
indexer.add_or_update_object(writer, handle, db_handle, class_name)
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
indexer.add_or_update_object(handle, db_handle, class_name)
return self.response(201, trans_dict, total_items=len(trans_dict))


Expand Down
2 changes: 1 addition & 1 deletion gramps_webapi/api/resources/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
)

from ...const import GRAMPS_OBJECT_PLURAL
from ..search import get_total_number_of_objects
from ..util import get_total_number_of_objects
from .util import transaction_to_json


Expand Down
12 changes: 11 additions & 1 deletion gramps_webapi/api/resources/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import gramps_ql as gql
import pytesseract
import sifts
from flask import Response, current_app
from gramps.cli.clidbman import CLIDbManager
from gramps.gen.const import ENV, GRAMPS_LOCALE
Expand All @@ -32,7 +33,10 @@

from gramps_webapi.const import TREE_MULTI, VERSION

from ..util import get_db_handle, use_args
from ...auth.const import PERM_VIEW_PRIVATE
from ..auth import has_permissions
from ..search import get_search_indexer
from ..util import get_db_handle, get_tree_from_jwt, use_args
from . import ProtectedResource
from .emit import GrampsJSONEncoder

Expand Down Expand Up @@ -82,6 +86,11 @@ def get(self, args) -> Response:
except pytesseract.TesseractNotFoundError:
has_ocr = False
ocr_languages = []
tree = get_tree_from_jwt()
searcher = get_search_indexer(tree)
search_count = searcher.count(
include_private=has_permissions({PERM_VIEW_PRIVATE})
)

result = {
"database": {
Expand Down Expand Up @@ -119,6 +128,7 @@ def get(self, args) -> Response:
"tags": db_handle.get_number_of_tags(),
},
"researcher": db_handle.get_researcher(),
"search": {"sifts": {"version": sifts.__version__, "count": search_count}},
"server": {
"multi_tree": is_multi_tree,
"task_queue": has_task_queue,
Expand Down
12 changes: 5 additions & 7 deletions gramps_webapi/api/resources/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,12 @@
from ...auth.const import PERM_ADD_OBJ, PERM_DEL_OBJ_BATCH, PERM_EDIT_OBJ
from ...const import GRAMPS_OBJECT_PLURAL
from ..auth import require_permissions
from ..search import SearchIndexer
from ..search import SearchIndexer, get_search_indexer
from ..tasks import AsyncResult, delete_objects, make_task_response, run_task
from ..util import (
abort_with_message,
check_quota_people,
get_db_handle,
get_search_indexer,
get_tree_from_jwt,
update_usage_people,
use_args,
Expand Down Expand Up @@ -93,11 +92,10 @@ def post(self) -> Response:
# update search index
tree = get_tree_from_jwt()
indexer: SearchIndexer = get_search_indexer(tree)
with indexer.get_writer(overwrite=False, use_async=True) as writer:
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
indexer.add_or_update_object(writer, handle, db_handle, class_name)
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
indexer.add_or_update_object(handle, db_handle, class_name)
res = Response(
response=json.dumps(trans_dict),
status=201,
Expand Down
28 changes: 26 additions & 2 deletions gramps_webapi/api/resources/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

"""Full-text search endpoint."""

from typing import Dict
import re
from typing import Dict, Optional

from flask import Response
from flask_jwt_extended import get_jwt_identity
Expand All @@ -30,7 +31,9 @@
from webargs import fields, validate

from ...auth.const import PERM_TRIGGER_REINDEX, PERM_VIEW_PRIVATE
from ...const import PRIMARY_GRAMPS_OBJECTS
from ..auth import has_permissions, require_permissions
from ..search import get_search_indexer
from ..tasks import (
AsyncResult,
make_task_response,
Expand All @@ -41,13 +44,13 @@
from ..util import (
get_db_handle,
get_locale_for_language,
get_search_indexer,
get_tree_from_jwt,
use_args,
)
from . import ProtectedResource
from .emit import GrampsJSONEncoder
from .util import (
abort_with_message,
get_citation_profile_for_object,
get_event_profile_for_object,
get_family_profile_for_object,
Expand Down Expand Up @@ -115,20 +118,41 @@ def get_object_from_handle(
),
),
"strip": fields.Boolean(load_default=False),
"type": fields.DelimitedList(
fields.Str(validate=validate.Length(min=1)),
validate=validate.ContainsOnly(
choices=[t.lower() for t in PRIMARY_GRAMPS_OBJECTS]
),
),
"change": fields.Str(validate=validate.Length(min=2)),
},
location="query",
)
def get(self, args: Dict):
"""Get search result."""
tree = get_tree_from_jwt()
searcher = get_search_indexer(tree)
if args.get("change"):
match = re.match(r"^(<=|>=|<|>)(\d+(\.\d+)?)$", args["change"])
if match:
change_op: Optional[str] = match.group(1)
change_value: Optional[float] = float(match.group(2))
else:
abort_with_message(422, "change parameter has invalid format")
else:
change_op = None
change_value = None

total, hits = searcher.search(
query=args["query"],
page=args["page"],
pagesize=args["pagesize"],
# search in private records if allowed to
include_private=has_permissions([PERM_VIEW_PRIVATE]),
sort=args.get("sort"),
object_types=args.get("type") or None,
change_op=change_op,
change_value=change_value,
)
if hits:
locale = get_locale_for_language(args["locale"], default=True)
Expand Down
18 changes: 8 additions & 10 deletions gramps_webapi/api/resources/transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@

from ...auth.const import PERM_ADD_OBJ, PERM_DEL_OBJ, PERM_EDIT_OBJ
from ..auth import require_permissions
from ..search import SearchIndexer
from ..search import SearchIndexer, get_search_indexer
from ..util import (
abort_with_message,
check_quota_people,
get_db_handle,
get_search_indexer,
get_tree_from_jwt,
update_usage_people,
use_args,
Expand Down Expand Up @@ -118,14 +117,13 @@ def post(self, args) -> Response:
# update search index
tree = get_tree_from_jwt()
indexer: SearchIndexer = get_search_indexer(tree)
with indexer.get_writer(overwrite=False, use_async=True) as writer:
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
if _trans_dict["type"] == "delete":
indexer.delete_object(writer, handle)
else:
indexer.add_or_update_object(writer, handle, db_handle, class_name)
for _trans_dict in trans_dict:
handle = _trans_dict["handle"]
class_name = _trans_dict["_class"]
if _trans_dict["type"] == "delete":
indexer.delete_object(handle, class_name)
else:
indexer.add_or_update_object(handle, db_handle, class_name)
res = Response(
response=json.dumps(trans_dict),
status=200,
Expand Down
57 changes: 39 additions & 18 deletions gramps_webapi/api/resources/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from gramps.gen.lib import (
Citation,
Event,
EventRoleType,
Family,
Media,
Person,
Expand Down Expand Up @@ -139,7 +140,7 @@ def get_event_participants_for_handle(
db_handle: DbReadBase,
handle: Handle,
locale: GrampsLocale = glocale,
) -> Dict:
) -> Dict[str, List[Tuple[EventRoleType, Union[Person, Family]]]]:
"""Get event participants given a handle."""
result = {"people": [], "families": []}
seen = set() # to avoid duplicates
Expand All @@ -156,14 +157,10 @@ def get_event_participants_for_handle(
for event_ref in person.get_event_ref_list():
if handle == event_ref.ref:
result["people"].append(
{
"role": locale.translation.sgettext(
event_ref.get_role().xml_str()
),
"person": get_person_profile_for_handle(
db_handle, backref_handle, args=[], locale=locale
),
}
(
event_ref.get_role(),
db_handle.get_person_from_handle(backref_handle),
)
)
elif class_name == "Family":
family = db_handle.get_family_from_handle(backref_handle)
Expand All @@ -172,18 +169,42 @@ def get_event_participants_for_handle(
for event_ref in family.get_event_ref_list():
if handle == event_ref.ref:
result["families"].append(
{
"role": locale.translation.sgettext(
event_ref.get_role().xml_str()
),
"family": get_family_profile_for_handle(
db_handle, backref_handle, args=[], locale=locale
),
}
(
event_ref.get_role(),
db_handle.get_family_from_handle(backref_handle),
)
)
return result


def get_event_participants_profile_for_handle(
db_handle: DbReadBase,
handle: Handle,
locale: GrampsLocale = glocale,
) -> Dict:
"""Get event participants given a handle."""
event_participants = get_event_participants_for_handle(
db_handle=db_handle,
handle=handle,
locale=locale,
)
result = {"people": [], "families": []}

for role, person in event_participants["people"]:
person_profile = get_person_profile_for_object(
db_handle, person, args=[], locale=locale
)
role_str = locale.translation.sgettext(role.xml_str())
result["people"].append({"role": role_str, "person": person_profile})
for role, person in event_participants["families"]:
person_profile = get_family_profile_for_object(
db_handle, person, args=[], locale=locale
)
role_str = locale.translation.sgettext(role.xml_str())
result["families"].append({"role": role_str, "family": person_profile})
return result


def get_event_summary_from_object(
db_handle: DbReadBase, event: Event, locale: GrampsLocale = glocale
):
Expand Down Expand Up @@ -215,7 +236,7 @@ def get_event_profile_for_object(
if role is not None:
result["role"] = role
if "all" in args or "participants" in args:
result["participants"] = get_event_participants_for_handle(
result["participants"] = get_event_participants_profile_for_handle(
db_handle, event.handle, locale=locale
)
if "all" in args or "ratings" in args:
Expand Down
Loading

0 comments on commit d9c79f3

Please sign in to comment.