Skip to content

Commit

Permalink
task/WP-164 Implement Workspace Search (#886)
Browse files Browse the repository at this point in the history
* simplified and fixed search on project listing and project file listing
* small fix
* fix project file search bug, added search on project id
* fixing a test
  • Loading branch information
shayanaijaz authored Oct 19, 2023
1 parent c522d1c commit f12180a
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 37 deletions.
39 changes: 32 additions & 7 deletions server/portal/apps/projects/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
list_projects, get_project, create_shared_workspace,\
update_project, get_workspace_role, change_user_role, add_user_to_workspace,\
remove_user, transfer_ownership

from portal.apps.search.tasks import tapis_project_listing_indexer
from portal.libs.elasticsearch.indexes import IndexedProject
from elasticsearch_dsl import Q

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -63,12 +65,35 @@ def get(self, request):
}
```
"""
# TODOv3: Support Elasticsearch queries for V3 projects https://jira.tacc.utexas.edu/browse/TV3-160
# query_string = request.GET.get('query_string')
# offset = int(request.GET.get('offset', 0))
# limit = int(request.GET.get('limit', 100))
client = request.user.tapis_oauth.client
listing = list_projects(client)

query_string = request.GET.get('query_string')
offset = int(request.GET.get('offset', 0))
limit = int(request.GET.get('limit', 100))

listing = []

if query_string:
search = IndexedProject.search()

ngram_query = Q("query_string", query=query_string,
fields=["title", "id"],
minimum_should_match='100%',
default_operator='or')

wildcard_query = Q("wildcard", title=f'*{query_string}*') | Q("wildcard", id=f'*{query_string}*')

search = search.query(ngram_query | wildcard_query)
search = search.extra(from_=int(offset), size=int(limit))

res = search.execute()
hits = [hit.to_dict() for hit in res]
listing = hits
else:
client = request.user.tapis_oauth.client
listing = list_projects(client)

tapis_project_listing_indexer.delay(listing)

return JsonResponse({"status": 200, "response": listing})

def post(self, request): # pylint: disable=no-self-use
Expand Down
7 changes: 6 additions & 1 deletion server/portal/apps/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.conf import settings
from celery import shared_task
from portal.libs.agave.utils import user_account, service_account
from portal.libs.elasticsearch.utils import index_listing
from portal.libs.elasticsearch.utils import index_listing, index_project_listing
from portal.apps.users.utils import get_tas_allocations
from portal.apps.projects.models.metadata import ProjectMetadata
from portal.libs.elasticsearch.docs.base import (IndexedAllocation,
Expand Down Expand Up @@ -78,3 +78,8 @@ def index_project(self, project_id):
project_doc = IndexedProject(**project_dict)
project_doc.meta.id = project_id
project_doc.save()


@shared_task(bind=True, max_retries=3, queue='default')
def tapis_project_listing_indexer(self, projects):
index_project_listing(projects)
2 changes: 1 addition & 1 deletion server/portal/libs/agave/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def search(client, system, path='', offset=0, limit=100, query_string='', filter
if filter:
search = search.filter(filter_query)

search = search.filter('prefix', **{'path._exact': path})
search = search.filter('prefix', **{'path._exact': path.strip('/')})
search = search.filter('term', **{'system._exact': system})
search = search.extra(from_=int(offset), size=int(limit))
res = search.execute()
Expand Down
2 changes: 1 addition & 1 deletion server/portal/libs/agave/operations_unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_search(self, mock_search, mock_listing):
"name._exact, name._pattern"],
default_operator='and'))

mock_search().query().filter.assert_called_with('prefix', **{'path._exact': '/path'})
mock_search().query().filter.assert_called_with('prefix', **{'path._exact': 'path'})
mock_search().query().filter().filter.assert_called_with('term', **{'system._exact': 'test.system'})
mock_search().query().filter().filter().extra.assert_called_with(from_=int(0), size=int(100))
self.assertEqual(search_res, {'listing':
Expand Down
38 changes: 11 additions & 27 deletions server/portal/libs/elasticsearch/docs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,21 @@


class IndexedProject(Document):
id = Keyword(fields={'_exact': Keyword()})
title = Text(fields={'_exact': Keyword()})
description = Text()
created = Date()
lastModified = Date()
projectId = Keyword()
path = Text()
name = Text()
host = Text()
owner = Object(
properties={
'username': Keyword(),
'fullName': Text()
}
)
pi = Object(
properties={
'username': Keyword(),
'fullName': Text()
}
)
coPIs = Object(
multi=True,
properties={
'username': Keyword(),
'fullName': Text()
}
)
teamMembers = Object(
multi=True,
properties={
'username': Keyword(),
'fullName': Text()
}
properties={
'username': Keyword(),
'firstName': Text(),
'lastName': Text(),
'email': Text()
}
)
updated = Date()

@classmethod
def from_id(cls, projectId):
Expand Down
22 changes: 22 additions & 0 deletions server/portal/libs/elasticsearch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,25 @@ def index_listing(files):
})

bulk(client, ops)


def index_project_listing(projects):
from portal.libs.elasticsearch.docs.base import IndexedProject

idx = IndexedProject.Index.name
client = get_connection('default')
ops = []

for _project in projects:
project_dict = dict(_project)
project_dict['updated'] = current_time()
project_uuid = get_sha256_hash(project_dict['id'])
ops.append({
'_index': idx,
'_id': project_uuid,
'doc': project_dict,
'_op_type': 'update',
'doc_as_upsert': True
})

bulk(client, ops)

0 comments on commit f12180a

Please sign in to comment.