Skip to content

Commit

Permalink
Merge pull request #42 from uktrade/feature/orpd-54-search-terms
Browse files Browse the repository at this point in the history
feat:reimplemented search backend service
  • Loading branch information
hareshkainthdbt authored Nov 14, 2024
2 parents 1570675 + b822177 commit 01023c1
Show file tree
Hide file tree
Showing 21 changed files with 3,426 additions and 8,792 deletions.
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
per-file-ignores =
construction_legislation.py: E501
1 change: 0 additions & 1 deletion orp/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@
**dj_database_url.parse(
DATABASE_URL,
engine="postgresql",
conn_max_age=0,
),
"ENGINE": "django.db.backends.postgresql",
}
Expand Down
9 changes: 9 additions & 0 deletions orp/config/settings/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
# Applications that are required to load before DJANGO_APPS
BASE_APPS = [
"whitenoise.runserver_nostatic", # Serve static files via WhiteNoise
"rest_framework",
]

# REST_FRAMEWORK = {
# # Use Django's standard `django.contrib.auth` permissions,
# # or allow read-only access for unauthenticated users.
# 'DEFAULT_PERMISSION_CLASSES': [
# 'rest_framework.permissions.DjangoModelPermissionsOrAnonReadOnly',
# ]
# }

INSTALLED_APPS = BASE_APPS + INSTALLED_APPS # noqa
125 changes: 123 additions & 2 deletions orp/config/urls.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,137 @@
"""orp URL configuration."""

import logging
import time

import orp_search.views as orp_search_views

from orp_search.config import SearchDocumentConfig
from orp_search.models import DataResponseModel
from orp_search.utils.documents import clear_all_documents
from orp_search.utils.search import search
from rest_framework import routers, serializers, status, viewsets
from rest_framework.decorators import action
from rest_framework.response import Response

from django.conf import settings
from django.contrib import admin
from django.urls import path
from django.urls import include, path

import core.views as core_views

urls_logger = logging.getLogger(__name__)


# Serializers define the API representation.
class DataResponseSerializer(serializers.HyperlinkedModelSerializer):
class Meta:
model = DataResponseModel
fields = [
"id",
"title",
"link",
"publisher",
"language",
"format",
"description",
"date_issued",
"date_modified",
"date_valid",
"audience",
"coverage",
"subject",
"type",
"license",
"regulatory_topics",
"status",
"date_uploaded_to_orp",
"has_format",
"is_format_of",
"has_version",
"is_version_of",
"references",
"is_referenced_by",
"has_part",
"is_part_of",
"is_replaced_by",
"replaces",
"related_legislation",
"id",
]


class DataResponseViewSet(viewsets.ModelViewSet):
@action(detail=False, methods=["get"], url_path="search")
def search(self, request, *args, **kwargs):
context = {
"service_name": settings.SERVICE_NAME_SEARCH,
}

try:
response_data = search(context, request)

# Create a json object from context but exclude paginator
response_data = {
"results": response_data["results"],
"results_count": response_data["results_count"],
"is_paginated": response_data["is_paginated"],
"results_total_count": response_data["results_total_count"],
"results_page_total": response_data["results_page_total"],
"current_page": response_data["current_page"],
"start_index": response_data["start_index"],
"end_index": response_data["end_index"],
}

# Return the response
return Response(response_data, status=status.HTTP_200_OK)
except Exception as e:
return Response(
data={"message": f"error searching: {e}"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


class RebuildCacheViewSet(viewsets.ViewSet):
@action(detail=False, methods=["post"], url_path="rebuildcache")
def rebuild_cache(self, request, *args, **kwargs):
from orp_search.legislation import Legislation
from orp_search.public_gateway import PublicGateway

tx_begin = time.time()
try:
clear_all_documents()
config = SearchDocumentConfig(search_query="", timeout=10)
Legislation().build_cache(config)
PublicGateway().build_cache(config)
except Exception as e:
return Response(
data={"message": f"[urls] error clearing documents: {e}"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

tx_end = time.time()
urls_logger.info(
f"time taken to rebuild cache: "
f"{round(tx_end - tx_begin, 2)} seconds"
)
return Response(
data={
"message": "rebuilt cache",
"duration": round(tx_end - tx_begin, 2),
},
status=status.HTTP_200_OK,
)


# Routers provide an easy way of automatically determining the URL conf.
router = routers.DefaultRouter()
router.register(r"v1", DataResponseViewSet, basename="search")
router.register(r"v1", RebuildCacheViewSet, basename="rebuildcache")

urlpatterns = [
path("", include(router.urls)),
path("", orp_search_views.search_react, name="search_react"),
path("nojs/", orp_search_views.search, name="search"),
path("nojs/", orp_search_views.search_django, name="search_django"),
# If we choose to have a start page with green button, this is it:
# path("", core_views.home, name="home"),
path(
Expand Down
48 changes: 13 additions & 35 deletions orp/orp_search/config.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
import logging

from orp_search.utils.terms import combine_search_terms, parse_search_terms

logger = logging.getLogger(__name__)


class SearchDocumentConfig:
def __init__(
self,
search_terms: str,
search_query: str,
document_types=None,
timeout=None,
dummy=False,
limit=10,
offset=1,
publisher_terms=None,
publisher_names=None,
sort_by=None,
id=None,
):
Expand All @@ -27,24 +24,15 @@ def __init__(
:param timeout: Optional. The timeout in seconds for the search
request.
"""
self.search_terms = [term.strip() for term in search_terms.split(",")]
self.search_query = search_query
self.document_types = document_types
self.timeout = None if timeout is None else int(timeout)
self.dummy = dummy
self.limit = limit
self.offset = offset
self.publisher_terms = publisher_terms
self.publisher_names = publisher_names
self.sort_by = sort_by
self.id = id

# Parse search terms
search_terms_and, search_terms_or = parse_search_terms(search_terms)
self.search_terms_and = search_terms_and
self.search_terms_or = search_terms_or
self.final_search_expression = combine_search_terms(
search_terms_and, search_terms_or
)

def validate(self):
"""
Expand Down Expand Up @@ -73,22 +61,12 @@ def validate(self):
return False
return True

def build_search_term(self):
# Rules config.search_terms
# 1. If search terms is empty, return empty string
# 2. If search terms begin with a quote and end with a quote
# then treat as a phrase
# 3. If search terms contain a + between two terms then treat
# as an AND search
# 4. If search terms contain a space between two terms then treat
# as a OR search

search_term_tmp = []

for term in self.search_terms:
if term.startswith('"') and term.endswith('"'):
search_term_tmp.append(f'"{term}"')
elif "+" in term:
search_term_tmp.append(term.replace("+", " AND "))
else:
search_term_tmp.append(term)
def print_to_log(self):
logger.info(f"search_query: {self.search_query}")
logger.info(f"document_types: {self.document_types}")
logger.info(f"timeout: {self.timeout}")
logger.info(f"limit: {self.limit}")
logger.info(f"offset: {self.offset}")
logger.info(f"publisher_names: {self.publisher_names}")
logger.info(f"sort_by: {self.sort_by}")
logger.info(f"id: {self.id}")
Loading

0 comments on commit 01023c1

Please sign in to comment.