diff --git a/credentials.py b/credentials.py index d33b4b9..c70eda5 100644 --- a/credentials.py +++ b/credentials.py @@ -14,8 +14,10 @@ # If you do not have a UMLS account, you may apply for a license on the UMLS Terminology Services (UTS) website. # https://documentation.uts.nlm.nih.gov/rest/authentication.html -# TODO: add option for UMLS api key auth +# UMLS api key auth +umls_apikey = None - -# SNOMED authentication from international and TRUD -# TODO add arg for api key auth +# SNOMED authentication from NHS TRUD. International releases will require different API access creds. +# api key auth from NHS TRUD +# For more information please see: https://isd.digital.nhs.uk/trud/users/guest/filters/0/api +snomed_apikey = None diff --git a/data/snomed/preprocessing_snomed_ct.ipynb b/data/snomed/preprocessing_snomed_ct.ipynb index 6430dbb..7578503 100644 --- a/data/snomed/preprocessing_snomed_ct.ipynb +++ b/data/snomed/preprocessing_snomed_ct.ipynb @@ -11,21 +11,141 @@ "\n", "SNOMED CT is a standarised clinical terminology consisting of >350,000 unique concepts. It is owned, maintained and distributed by SNOMED International.\n", "\n", + "## Access to SNOMED CT files\n", + "\n", "Please visit and explore https://www.snomed.org/ to find out further information about the various SNOMED CT products and services which they offer.\n", "\n", "-------\n", "\n", "UK Edition files can be found via [NHS TRUD](https://isd.digital.nhs.uk/)\n", "\n", - "Download files via API coming soon...\n", - "\n", - "\n", "--------\n", "\n", "All raw files from SNOMED should be placed in the local directory [here](data/snomed)\n", "\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using the NHS TRUD API\n", + "\n", + "### Release list endpoint\n", + "\n", + "##### Request\n", + "A request to this endpoint is a HTTP GET of a URL that looks like this:\n", + "\n", + "https://isd.digital.nhs.uk/trud/api/v1/keys/deadc0de/items/123/releases\n", + "\n", + "Replace *deadc0de* with the API key, and *123* with the item number.\n", + "\n", + "Item numbers can be found in the URLs of releases pages. For example, the URL for the [NHS National Interim Clinical Imaging Procedures](https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/2/items/14/releases) releases page is:\n", + "\n", + "https://isd.digital.nhs.uk/trud/users/guest/filters/0/categories/2/items/14/releases\n", + "\n", + "In this example the item number is 14.\n", + "\n", + "To request only the latest release add *?latest* to the URL, likew this:\n", + "\n", + "https://isd.digital.nhs.uk/trud/api/v1/keys/deadc0de/items/123/releases?latest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import sys\n", + "from getpass import getpass\n", + "sys.path.append('../..')\n", + "from credentials import * # you can store your api key here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Important URLs\n", + "if snomed_apikey is not None:\n", + " pass\n", + "else:\n", + " snomed_apikey = getpass('Please enter your SNOMED api key: ')\n", + "# SNOMED CT UK Clinical Edition\n", + "clinical_info_url = f'https://isd.digital.nhs.uk/trud/api/v1/keys/{snomed_apikey}/items/101/releases?latest'\n", + "\n", + "# SNOMED CT UK Drug Extension\n", + "drug_info_url = f'https://isd.digital.nhs.uk/trud/api/v1/keys/{snomed_apikey}/items/105/releases?latest'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download the SNOMED CT UK Clinical Edition\n", + "response = requests.get(clinical_info_url)\n", + "if response.status_code == 200:\n", + " file_name = response.json()['releases'][0]['id']\n", + " url = response.json()['releases'][0]['archiveFileUrl']\n", + " print('SNOMED information retrieved successfully')\n", + "else:\n", + " print(f'Error: {response.status_code}')\n", + " print(response.json())\n", + "\n", + "# Download the file\n", + "print(f'Downloading {file_name}...')\n", + "response = requests.get(url)\n", + "if response.status_code == 200:\n", + " try:\n", + " with open(f'{file_name}', 'wb') as file:\n", + " file.write(response.content)\n", + " print('Download completed successfully')\n", + "\n", + " except ValueError:\n", + " print(\"Response content is not a valid JSON\")\n", + "else:\n", + " print(f'Failed to download file. Status code: {response.status_code}')\n", + " print(response.text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download the SNOMED CT UK Drug Extension\n", + "response = requests.get(drug_info_url)\n", + "if response.status_code == 200:\n", + " file_name = response.json()['releases'][0]['id']\n", + " url = response.json()['releases'][0]['archiveFileUrl']\n", + " print('SNOMED information retrieved successfully')\n", + "else:\n", + " print(f'Error: {response.status_code}')\n", + " print(response.json())\n", + "\n", + "# Download the file\n", + "print(f'Downloading {file_name}...')\n", + "response = requests.get(url)\n", + "if response.status_code == 200:\n", + " try:\n", + " with open(f'{file_name}', 'wb') as file:\n", + " file.write(response.content)\n", + " print('Download completed successfully')\n", + "\n", + " except ValueError:\n", + " print(\"Response content is not a valid JSON\")\n", + "else:\n", + " print(f'Failed to download file. Status code: {response.status_code}')\n", + " print(response.text)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -70,7 +190,7 @@ }, "source": [ "### Load the data\n", - "Please see the section: [Access to SNOMED CT release files](#access_to_snomed_ct) for how to retrieve the zipped SNOMED CT release." + "Please see the section: [Access to SNOMED CT release files](##Access-to-SNOMED-CT-files) for how to retrieve the zipped SNOMED CT release." ] }, { @@ -356,46 +476,7 @@ "outputs": [], "source": [ "# ICD-10\n", - "icd_df = snomed.map_snomed2icd10()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 496 - }, - "id": "SynBfXCi-Zpb", - "outputId": "f3cde34a-c5f9-428c-874a-01516832f4a1" - }, - "outputs": [], - "source": [ - "icd_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# drop codes with no mapping\n", - "icd_df = icd_df[icd_df['mapTarget']!='']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sctid2icd10 = icd_df.groupby('referencedComponentId').apply(lambda group: [{'code': row['mapTarget'],\n", - " 'mapGroup': row['mapPriority'],\n", - " 'mapPriority': row['mapPriority'],\n", - " 'mapRule': row['mapRule'],\n", - " 'mapAdvice': row['mapAdvice']} for _, row in group.iterrows()]).to_dict()" + "sctid2icd10 = snomed.map_snomed2icd10()" ] }, { @@ -437,53 +518,7 @@ }, "outputs": [], "source": [ - "opcs_df = snomed.map_snomed2opcs4()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nINW3byN-dd5" - }, - "outputs": [], - "source": [ - "opcs_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "opcs_df['refsetId'].unique() # notice how there are two codes?\n", - "# SCTID:'999002271000000101' represents ICD10 codes and SCTID:'1126441000000105' OPCS4\n", - "# Filtering by '999002271000000101' will also show more ICD10 codes. \n", - "# This is because SNOMED UK ext has duplicated information here. For SNOMED UK ext I would use the ICD10 in the refset rather than the internation ed.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Filter for just OPCS4\n", - "opcs_df = opcs_df[opcs_df['refsetId']=='1126441000000105']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sctid2opcs4 = opcs_df.groupby('referencedComponentId').apply(lambda group: [{'code': row['mapTarget'],\n", - " 'mapGroup': row['mapPriority'],\n", - " 'mapPriority': row['mapPriority'],\n", - " 'mapBlock': row['mapBlock'],\n", - " 'mapAdvice': row['mapAdvice']} for _, row in group.iterrows()]).to_dict()" + "sctid2opcs4 = snomed.map_snomed2opcs4()" ] }, { @@ -491,9 +526,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Optional Enrich with UMLS terms.\n", + "## Optional: Enrich with UMLS terms.\n", "\n", - "To preprocess UMLS for SNOMED CT, please look [here](/data/snomed/umls_enricher.py). For further details, please refer to the [UMLS folder](/data/umls/ReadMe.md).\n", + "To preprocess UMLS for SNOMED CT, please look [here](umls_enricher.py). For further details, please refer to the [UMLS folder](../umls/ReadMe.md).\n", "\n", "For offical UMLS documentation from the NLM:\n", "Please explore the [UMLS Metathesaurus Vocabulary Documentation](https://www.nlm.nih.gov/research/umls/sourcereleasedocs/index.html?_gl=1*1t5e3g7*_ga*OTQwMzA2NjEyLjE2NjI2NzEyMjU.*_ga_P1FPTH9PL4*MTY2MjY3MTIyNC4xLjEuMTY2MjY3MzE2NS4wLjAuMA..)\n", diff --git a/medcat/evaluate_mct_export/mct_analysis.py b/medcat/evaluate_mct_export/mct_analysis.py index 861b413..59e4de2 100644 --- a/medcat/evaluate_mct_export/mct_analysis.py +++ b/medcat/evaluate_mct_export/mct_analysis.py @@ -357,7 +357,6 @@ def full_annotation_df(self) -> pd.DataFrame: else: print(f"Warning: Unexpected column location type: {type(loc)}") meta_df.insert(1, f'predict_{meta_model}', pred_meta_values) - #meta_df.insert(int(meta_df.columns.get_loc(meta_model)) + 1, f'predict_{meta_model}', pred_meta_values) # TODO fix this line return meta_df diff --git a/requirements.txt b/requirements.txt index 936a5be..d86067a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -medcat==1.10.2 +medcat~=1.10.2 plotly~=5.19.0 eland==8.12.1 en_core_web_md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.6.0/en_core_web_md-3.6.0-py3-none-any.whl ipyfilechooser -jupyter_contrib_nbextensions \ No newline at end of file +jupyter_contrib_nbextensions