diff --git a/docs/openapi.yml b/docs/openapi.yml index 2cd742ad9..484398baf 100644 --- a/docs/openapi.yml +++ b/docs/openapi.yml @@ -2429,6 +2429,20 @@ components: type: string description: Type of submission enum: ["XML", "CSV", "Form"] + rems: + type: object + description: Preserved REMS information for each dataset + additionalProperties: true + properties: + url: + type: string + description: URL for application to apply access for dataset in REMS + resourceId: + type: string + description: REMS id of the resource created. Created automatically during the publication process + catalogueItemId: + type: string + description: REMS id of the catalogue created. Created automatically during the publication process drafts: type: array items: @@ -3331,12 +3345,12 @@ components: description: ID of an organization in REMS workflowId: type: integer - description: ID of a Workflow, obtained from REMS + description: ID of a Workflow, obtained from REMS, and must belong to organizationId licenses: type: array items: type: integer - description: ID of a license, obtained from REMS, and must belong to organizationId + description: IDs of Licenses, obtained from REMS, and must belong to organizationId REMS: type: array description: Array of organizations with workflows and licenses from REMS database diff --git a/metadata_backend/api/handlers/object.py b/metadata_backend/api/handlers/object.py index f1cd88207..c66edb75e 100644 --- a/metadata_backend/api/handlers/object.py +++ b/metadata_backend/api/handlers/object.py @@ -161,15 +161,13 @@ async def post_object(self, req: Request) -> Response: # ensure only one object with the same schema exists in the submission if is_single_instance: - schemas_in_submission = set() submission = { "metadataObjects": await submission_op.get_submission_field_list(submission_id, "metadataObjects") } for _, schema in self.iter_submission_objects(submission): - if schema == schema_type and schema in schemas_in_submission: + if schema == schema_type: reason = f"Submission of type {workflow.name} already has a '{schema}', and it can have only one." raise web.HTTPBadRequest(reason=reason) - schemas_in_submission.add(schema) collection = f"draft-{schema_type}" if req.path.startswith(f"{API_PREFIX}/drafts") else schema_type diff --git a/metadata_backend/api/handlers/publish.py b/metadata_backend/api/handlers/publish.py index 88aacaa82..5a1683f7e 100644 --- a/metadata_backend/api/handlers/publish.py +++ b/metadata_backend/api/handlers/publish.py @@ -419,8 +419,9 @@ async def _publish_rems(self, submission: dict[str, Any], obj_op: ObjectOperator "infourl": self._make_discovery_url(object_data, submission["workflow"]), } }, - "metaxIdentifier": object_data.get("metaxIdentifier", ""), } + if schema == "dataset": + rems_ds.update({"metaxIdentifier": object_data.get("metaxIdentifier", "")}) rems_datasets.append(rems_ds) org_id = submission["rems"]["organizationId"] @@ -440,16 +441,16 @@ async def _publish_rems(self, submission: dict[str, Any], obj_op: ObjectOperator # Add rems URL to metax dataset description rems_url = self.rems_handler.application_url(catalogue_id) if "metaxIdentifier" in ds: - new_description = ds["description"] + f"\n\nREMS DAC: {rems_url}" + new_description = ds["description"] + f"\n\nSD Apply's Application link: {rems_url}" await self.metax_handler.update_draft_dataset_description(ds["metaxIdentifier"], new_description) + + # Update metadata object with preserved rems info that might need later await obj_op.update_metadata_object( ds["schema"], ds["accession_id"], { - "rems": { + "internal_rems": { "url": rems_url, - "workflowId": workflow_id, - "organizationId": org_id, "resourceId": resource_id, "catalogueId": catalogue_id, }, @@ -482,6 +483,9 @@ async def publish_submission(self, req: Request) -> Response: if "datacite" in workflow.required_schemas and "doiInfo" not in submission: raise web.HTTPBadRequest(reason=f"Submission '{submission_id}' must have the required DOI metadata.") + if "dac" in workflow.required_schemas and "rems" not in submission: + raise web.HTTPBadRequest(reason=f"Submission '{submission_id}' must have rems information.") + schemas_in_submission = set() has_study = False for _, schema in self.iter_submission_objects(submission): @@ -494,9 +498,7 @@ async def publish_submission(self, req: Request) -> Response: reason = f"Submission of type {workflow.name} has more than one '{schema}', and it can have only one" raise web.HTTPBadRequest(reason=reason) schemas_in_submission.add(schema) - # TO_DO: Can only be enabled after we have unified the DAC from EGA and REMS - # if "dac" in workflow.required_schemas and "rems" not in submission: - # raise web.HTTPBadRequest(reason=f"Submission '{accession_id}' must have rems.") + if workflow.name != "BigPicture" and not has_study: raise web.HTTPBadRequest(reason=f"Submission '{submission_id}' must have a study.") @@ -513,7 +515,7 @@ async def publish_submission(self, req: Request) -> Response: ) raise web.HTTPBadRequest(reason=reason) - # TO_DO: Can only be enabled after we have unified the DAC from EGA and REMS + # TO_DO: Need to include all required objects before publishing submission in unit and integration tests # missing_schemas = set() # for required_schema in workflow.required_schemas: # if required_schema not in schemas_in_submission: @@ -523,6 +525,7 @@ async def publish_submission(self, req: Request) -> Response: # raise web.HTTPBadRequest( # reason=f"{workflow.name} submission '{submission_id}' is missing '{required}' schema(s)." # ) + # Create draft DOI and Metax records if not existing external_user_id = await self.get_user_external_id(req) async for accession_id, schema, object_data in self.iter_submission_objects_data(submission, obj_op): diff --git a/metadata_backend/api/handlers/rems_proxy.py b/metadata_backend/api/handlers/rems_proxy.py index 1e88c97b5..5fa1d59f1 100644 --- a/metadata_backend/api/handlers/rems_proxy.py +++ b/metadata_backend/api/handlers/rems_proxy.py @@ -48,7 +48,7 @@ def _get_localized(language: str, _dict: dict[str, Any], fallback_language: str async def get_workflows_licenses_from_rems(self, request: web.Request) -> web.Response: """Get workflows and Policies for frontend.""" - language = request.query.get("language", "en").split("_")[0] + language = request.query.get("language", "en") workflows = await self.rems_handler.get_workflows() licenses = await self.rems_handler.get_licenses() @@ -68,7 +68,9 @@ def add_organization(organization_id: str, organization_name: str) -> None: org_id = workflow["organization"]["organization/id"] if org_id not in organizations: add_organization(str(org_id), str(org_name)) - organizations[org_id]["workflows"].append({"id": workflow["id"], "title": title}) + organizations[org_id]["workflows"].append( + {"id": workflow["id"], "title": title, "licenses": workflow["licenses"]} + ) for lic in licenses: title = self._get_localized(language, lic["localizations"]) diff --git a/metadata_backend/conf/workflows/2_sdsx.json b/metadata_backend/conf/workflows/2_sdsx.json index 0e2ba2df2..c8bc05d67 100644 --- a/metadata_backend/conf/workflows/2_sdsx.json +++ b/metadata_backend/conf/workflows/2_sdsx.json @@ -9,7 +9,7 @@ { "name": "dac", "required": true, - "allowMultipleObjects": true + "allowMultipleObjects": false }, { "name": "policy", @@ -36,7 +36,7 @@ { "name": "dataset", "required": true, - "allowMultipleObjects": true + "allowMultipleObjects": false } ] }, diff --git a/metadata_backend/helpers/schemas/submission.json b/metadata_backend/helpers/schemas/submission.json index 17698e916..ecf7f6772 100644 --- a/metadata_backend/helpers/schemas/submission.json +++ b/metadata_backend/helpers/schemas/submission.json @@ -1140,6 +1140,20 @@ ] } } + }, + "internal_rems": { + "url": { + "type": "string", + "title": "URL for application to apply access for dataset in REMS" + }, + "resourceId": { + "type": "string", + "title": "REMS id of the resource created. Created automatically during the publication process" + }, + "catalogueItemId": { + "type": "string", + "title": "REMS id of the catalogue created. Created automatically during the publication process" + } } } }, @@ -1208,14 +1222,6 @@ "type": "integer", "title": "REMS id of a policy/license" } - }, - "resourceId": { - "type": "string", - "title": "REMS id of the resource created. Created automatically during the publication process" - }, - "catalogueItemId": { - "type": "string", - "title": "REMS id of the catalogue created. Created automatically during the publication process" } } }, diff --git a/metadata_backend/services/rems_service_handler.py b/metadata_backend/services/rems_service_handler.py index c1896fb0e..4282d954e 100644 --- a/metadata_backend/services/rems_service_handler.py +++ b/metadata_backend/services/rems_service_handler.py @@ -13,7 +13,7 @@ """ import time -from typing import Any +from typing import Any, Optional from aiohttp import ClientTimeout, web from aiohttp.client_exceptions import ClientConnectorError, InvalidURL @@ -104,12 +104,15 @@ async def create_catalogue_item( result: str = created["id"] return result - async def item_ok(self, item_type: str, organization_id: str, item_id: int) -> bool: + async def item_ok( + self, item_type: str, organization_id: str, item_id: int, licenses: Optional[list[int]] = None + ) -> bool: """Check that item exists. :param item_type: 'workflow' or 'license' :param organization_id: REMS organization/id :param item_id: REMS item id + :param licenses: list of licence IDs :raises HTTPError :returns: True or raises """ @@ -127,6 +130,17 @@ async def item_ok(self, item_type: str, organization_id: str, item_id: int) -> b reason=f"{capitalized_item_type} '{item_id}' doesn't belong to organization '{organization_id}'", status=400, ) + if item_type == "workflow": + linked_licenses = item["licenses"] # licenses that are pre-linked to workflow in REMS + for lic in linked_licenses: + if lic["license/id"] not in licenses: + raise self.make_exception( + reason=f"Linked license '{lic['license/id']}' doesn't exist in licenses '{licenses}'", + status=400, + ) + + LOG.debug("Licenses are linked to workflow correctly.") + except KeyError as exc: raise self.make_exception( reason=f"{capitalized_item_type} '{item_id}' has unexpected structure.", status=400 @@ -138,7 +152,7 @@ async def item_ok(self, item_type: str, organization_id: str, item_id: int) -> b return True async def validate_workflow_licenses(self, organization_id: str, workflow_id: int, licenses: list[int]) -> bool: - """Check that workflow and policies exist. + """Check that workflow and licenses exist. :param organization_id: REMS organization/id :param workflow_id: REMS workflow id @@ -159,13 +173,13 @@ async def validate_workflow_licenses(self, organization_id: str, workflow_id: in if not isinstance(licenses, list): raise self.make_exception(reason=f"Licenses '{licenses}' must be a list of integers.", status=400) - await self.item_ok("workflow", organization_id, workflow_id) + await self.item_ok("workflow", organization_id, workflow_id, licenses) for license_id in licenses: if not isinstance(license_id, int): raise self.make_exception(reason=f"Workflow ID '{license_id}' must be an integer.", status=400) - await self.item_ok("license", organization_id, license_id) + LOG.debug("REMS All ok.") return True diff --git a/tests/http/rems.http b/tests/http/rems.http index 520d1117f..86d68d49d 100644 --- a/tests/http/rems.http +++ b/tests/http/rems.http @@ -3,17 +3,19 @@ @backend_url = http://localhost:5430 -# @auth_url = http://localhost:8000 -@rems_url = https://test-rems.sd.csc.fi/api -# @rems_url = http://localhost:8003/api -@rems_user_id = sd-submit-robot -@rems_api_key = sd-submit-test-key +@auth_url = http://localhost:8000 +# @rems_url = https://rems-dev.2.rahtiapp.fi/api +@rems_url = http://localhost:8003/api +@rems_url_2 = http://localhost:8003/ + +@rems_user_id = owner +@rems_api_key = 42 @auth_email = mock_user@test.what @resid = {{resid_req.response.body.id}} -@wfid = {{dac_req.response.body.$[0].id}} -@orgid = {{dac_req.response.body.$[0].organization.organization/id}} +@wfid = {{dac_req.response.body.id}} +@orgid = {{dac_req.response.body.organization.organization/id}} @application_id = {{catalogue_req.response.body.id}} ### Mock authentication @@ -30,7 +32,6 @@ GET {{backend_url}}/v1/rems ?language=fi ### Get workflows -# @name dac_req GET {{rems_url}}/workflows ?disabled=false &archived=false @@ -53,7 +54,6 @@ x-rems-user-id: {{rems_user_id}} x-rems-api-key: {{rems_api_key}} ### Get license -# @name dac_req GET {{rems_url}}/licenses/1 ?disabled=false &archived=false @@ -70,7 +70,7 @@ Content-Type: application/json { "resid": "test-doi-here", "organization": { - "organization/id": "another_org" + "organization/id": "CSC" }, "licenses": [ 1 @@ -105,9 +105,8 @@ Content-Type: application/json "archived": false } - ### Get application - only with mock api GET {{rems_url}}/application - ?items=2 + ?items=1 x-rems-user-id: {{rems_user_id}} x-rems-api-key: {{rems_api_key}} diff --git a/tests/integration/mock_rems_api.py b/tests/integration/mock_rems_api.py index 42e80392f..f84e42118 100644 --- a/tests/integration/mock_rems_api.py +++ b/tests/integration/mock_rems_api.py @@ -54,7 +54,31 @@ "handlers": ["user_id_1", "user_id_2"], "forms": [1], }, - "licenses": [], + "licenses": [ + { + "license/id": 1, + "licensetype": "link", + "organization": { + "organization/id": "CSC", + "organization/short-name": organizations["CSC"]["short-name"], + "organization/name": organizations["CSC"]["name"], + }, + "localizations": { + "fi": { + "title": "Nimeä 4.0 Kansainvälinen (CC BY 4.0)", + "textcontent": "https://creativecommons.org/licenses/by/4.0/deed.fi", + "attachment-id": None, + }, + "en": { + "title": "Creative Commons Attribution 4.0 International (CC BY 4.0)", + "textcontent": "https://creativecommons.org/licenses/by/4.0/", + "attachment-id": None, + }, + }, + "enabled": True, + "archived": False, + } + ], "enabled": True, "archived": False, }, @@ -231,16 +255,20 @@ async def get_license(request: web.Request) -> web.Response: raise web.HTTPNotFound(reason=f"License with id '{license_id}' was not found.") +# TO_DO: Check with REMS if an application includes multiple catalogues would work async def get_application(request: web.Request) -> web.Response: """REMS application.""" + application_catalogues = [] try: - items = int(request.query["items"]) - item = catalogue[items] + items = request.query["items"].split(",") + for i in items: + item = catalogue[int(i)] + application_catalogues.append(item) except (KeyError, TypeError) as e: LOG.exception(e) LOG.debug(request) return web.HTTPNotFound(reason="Catalogue item was not found") - return web.json_response(data=item) + return web.json_response(data=application_catalogues) async def post_resource(request: web.Request) -> web.Response: @@ -321,7 +349,7 @@ async def init() -> web.Application: app.router.add_get("/api/licenses/{license_id}", get_license) app.router.add_post("/api/resources/create", post_resource) app.router.add_post("/api/catalogue-items/create", post_catalogue_item) - app.router.add_get("/application", get_application) + app.router.add_get("/api/application", get_application) return app diff --git a/tests/integration/test_files.py b/tests/integration/test_files.py index 7c657f5ac..4aeaae606 100644 --- a/tests/integration/test_files.py +++ b/tests/integration/test_files.py @@ -311,6 +311,6 @@ async def test_creating_file_version(self, client_logged_in): posted_files3 = await post_project_files(client_logged_in, file_data) assert posted_files3[0]["version"] == 3 - # Get files. Reads each version as separate file + # Get file. Only file's latest version is returned project_files = await get_project_files(client_logged_in, projectId) - assert len(project_files) == 3 + assert len(project_files) == 1 diff --git a/tests/integration/test_metax.py b/tests/integration/test_metax.py index 0aefd8ab1..5e6db37c1 100644 --- a/tests/integration/test_metax.py +++ b/tests/integration/test_metax.py @@ -40,8 +40,8 @@ async def test_metax_id_created(client_logged_in, submission_fega): doi_data_raw = await create_request_json_data("doi", "test_doi.json") await put_submission_doi(client_logged_in, submission_fega, doi_data_raw) - # rems_data = await create_request_json_data("dac", "dac_rems.json") - # await put_submission_rems(client_logged_in, submission_fega, rems_data) + rems_data = await create_request_json_data("dac", "dac_rems.json") + await put_submission_rems(client_logged_in, submission_fega, rems_data) await post_object_json(client_logged_in, "run", submission_fega, "ERR000076.json") await publish_submission(client_logged_in, submission_fega) diff --git a/tests/integration/test_publications.py b/tests/integration/test_publications.py index 9862129e3..eed25b913 100644 --- a/tests/integration/test_publications.py +++ b/tests/integration/test_publications.py @@ -72,8 +72,7 @@ async def test_minimal_fega_json_publication_rems(self, client_logged_in, submis LOG.debug(f"Checking that dataset {ds_id} in submission {submission_fega} has rems data") res = await resp.json() assert res["accessionId"] == ds_id, "expected dataset id does not match" - assert "rems" in res, "expected rems field not found in dataset" - assert res["rems"]["workflowId"] == 1, "expected workflowId does not match" - assert res["rems"]["organizationId"] == "CSC", "expected organizationId does not match" - assert "resourceId" in res["rems"], "expected resourceId not found in rems field" - assert "catalogueId" in res["rems"], "expected catalogueId not found in rems field" + assert "internal_rems" in res, "expected internal_rems field not found in dataset" + assert "url" in res["internal_rems"], "expected url not found in internal_rems field" + assert "resourceId" in res["internal_rems"], "expected resourceId not found in internal_rems field" + assert "catalogueId" in res["internal_rems"], "expected catalogueId not found in internal_rems field" diff --git a/tests/integration/test_submissions.py b/tests/integration/test_submissions.py index ffe56ff4f..025312aca 100644 --- a/tests/integration/test_submissions.py +++ b/tests/integration/test_submissions.py @@ -568,6 +568,51 @@ async def test_linking_folder_to_submission_works(self, client_logged_in, projec submission = await get_submission(client_logged_in, submission_id) assert submission["linkedFolder"] == name + async def test_adding_rems_info_to_submission_works(self, client_logged_in, project_id): + """Test that correct REMS info can be added to submission and invalid REMS info will raise error. + + :param client_logged_in: HTTP client in which request call is made + :param project_id: id of the project the submission belongs to + """ + # Create new submission and check its creation succeeded + submission_data = { + "name": "REMS Submission", + "description": "Mock Base submission for adding REMS info", + "projectId": project_id, + "workflow": "SDSX", + } + submission_sdsx = await post_submission(client_logged_in, submission_data) + async with client_logged_in.get(f"{submissions_url}/{submission_sdsx}") as resp: + LOG.debug(f"Checking that submission {submission_sdsx} was created") + assert resp.status == 200, f"HTTP Status code error, got {resp.status}" + + # Get correctly formatted REMS info and patch it into the new submission successfully + rems_data_raw = await create_request_json_data("dac", "dac_rems.json") + rems_data = json.loads(rems_data_raw) + await put_submission_rems(client_logged_in, submission_sdsx, rems_data_raw) + + async with client_logged_in.get(f"{submissions_url}/{submission_sdsx}") as resp: + LOG.debug(f"Checking that submission {submission_sdsx} was patched") + res = await resp.json() + assert res["submissionId"] == submission_sdsx, "expected submission id does not match" + assert res["name"] == submission_data["name"], "expected submission name does not match" + assert res["description"] == submission_data["description"], "submission description content mismatch" + assert res["published"] is False, "submission is published, expected False" + assert res["rems"] == rems_data, "rems info does not match" + + # Test that an incorrect REMS object fails to patch into the submission + # error case: REMS's licenses do not include DAC's linked license + put_bad_rems = {"workflowId": 1, "organizationId": "CSC", "licenses": [2, 3]} + async with client_logged_in.put( + f"{submissions_url}/{submission_sdsx}/rems", data=json.dumps(put_bad_rems) + ) as resp: + LOG.debug(f"Tried updating submission {submission_sdsx}") + assert resp.status == 400, f"HTTP Status code error, got {resp.status}" + res = await resp.json() + assert ( + res["detail"] == "Rems error: Linked license '1' doesn't exist in licenses '[2, 3]'" + ), "expected error mismatch" + class TestSubmissionPagination: """Testing getting submissions, draft submissions and draft templates with pagination.""" diff --git a/tests/unit/test_handlers.py b/tests/unit/test_handlers.py index e8b3102b3..de08e554a 100644 --- a/tests/unit/test_handlers.py +++ b/tests/unit/test_handlers.py @@ -83,7 +83,24 @@ async def setUpAsync(self): ], "drafts": [], "linkedFolder": "", - "doiInfo": {"creators": [{"name": "Creator, Test"}]}, + "doiInfo": { + "creators": [ + { + "givenName": "Test", + "familyName": "Creator", + "affiliation": [ + { + "name": "affiliation place", + "schemeUri": "https://ror.org", + "affiliationIdentifier": "https://ror.org/test1", + "affiliationIdentifierScheme": "ROR", + } + ], + } + ], + "subjects": [{"subject": "999 - Other"}], + "keywords": "test,keyword", + }, "files": [{"accessionId": "file1", "version": 1, "status": "added"}], } self.user_id = "USR12345678" @@ -1111,6 +1128,45 @@ async def test_put_linked_folder_fails(self): self.assertEqual(response.status, 400) self.assertIn("It already has a linked folder", await response.text()) + async def test_put_submission_rems_works(self): + """Test put method for rems data to submission works.""" + self.MockedSubmissionOperator().update_submission.return_value = self.submission_id + data = ujson.load(open(self.TESTFILES_ROOT / "dac" / "dac_rems.json")) + with ( + patch( + "metadata_backend.services.rems_service_handler.RemsServiceHandler.validate_workflow_licenses", + return_value=True, + ), + self.p_get_sess_restapi, + ): + response = await self.client.put(f"{API_PREFIX}/submissions/{self.submission_id}/rems", json=data) + self.assertEqual(response.status, 200) + json_resp = await response.json() + self.assertEqual(json_resp["submissionId"], self.submission_id) + + response = await self.client.get(f"{API_PREFIX}/submissions/{self.submission_id}") + self.assertEqual(response.status, 200) + json_resp = await response.json() + self.assertIn("rems", json_resp) + + async def test_put_submission_rems_fails_with_missing_fields(self): + """Test put method for rems data to submission fails if required fields are missing.""" + self.MockedSubmissionOperator().update_submission.return_value = self.submission_id + data = {"workflowId": 1} + with self.p_get_sess_restapi: + response = await self.client.put(f"{API_PREFIX}/submissions/{self.submission_id}/rems", json=data) + self.assertEqual(response.status, 400) + self.assertIn("REMS DAC is missing one or more of the required fields", await response.text()) + + async def test_put_submission_rems_fails_with_wrong_value_type(self): + """Test put method for rems data to submission fails if values have incorrect types.""" + self.MockedSubmissionOperator().update_submission.return_value = self.submission_id + data = {"workflowId": 1, "organizationId": 1, "licenses": [1]} + with self.p_get_sess_restapi: + response = await self.client.put(f"{API_PREFIX}/submissions/{self.submission_id}/rems", json=data) + self.assertEqual(response.status, 400) + self.assertIn("Organization ID '1' must be a string.", await response.text()) + class PublishSubmissionHandlerTestCase(HandlersTestCase): """Publishing API endpoint class test cases.""" @@ -1153,6 +1209,7 @@ async def tearDownAsync(self): async def test_submission_is_published(self): """Test that submission would be published and DOI would be added.""" + self.test_submission = {**self.test_submission, **{"rems": {}}} self.MockedSubmissionOperator().update_submission.return_value = self.submission_id with ( patch(f"{self._publish_handler}.create_draft_doi", return_value=self.user_id),