diff --git a/code/UI/autocomplete/GenerateQuestionTerms.py b/code/UI/autocomplete/GenerateQuestionTerms.py deleted file mode 100644 index 0b00e458f..000000000 --- a/code/UI/autocomplete/GenerateQuestionTerms.py +++ /dev/null @@ -1,84 +0,0 @@ -import sys, os -import json - -question_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), - 'reasoningtool/QuestionAnswering') -sys.path.append(question_dir) -from Question import Question - -neo4j_helper_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), - 'reasoningtool/kg-construction') -sys.path.append(neo4j_helper_dir) -from Neo4jConnection import Neo4jConnection - -sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../") # code directory -from RTXConfiguration import RTXConfiguration - - -class GenerateQuestionTerms: - @staticmethod - def __get_question_templates(): - question_templates = [] - with open(os.path.join(question_dir, 'Questions.tsv'), 'r') as fid: - for line in fid.readlines(): - if line[0] == "#": - pass - else: - question = Question(line) - question_templates.append(question) - return question_templates - - @staticmethod - def __get_node_names(type): - # # connect to Neo4j - # f = open(os.path.join(neo4j_helper_dir, 'config.json'), 'r') - # config_data = f.read() - # f.close() - # config = json.loads(config_data) - - # create the RTXConfiguration object - rtxConfig = RTXConfiguration() - - kg2_neo4j_info = rtxConfig.get_neo4j_info("KG2pre") - - conn = Neo4jConnection(kg2_neo4j_info['bolt'], - kg2_neo4j_info['username'], - kg2_neo4j_info['password']) - names = conn.get_node_names(type) - conn.close() - - return names - - @staticmethod - def generateQuetionsToTXT(): - question_templates = GenerateQuestionTerms.__get_question_templates() - - have_writen = False - for i, question in enumerate(question_templates): - - # retrieve the type and template from question_template - if len(question.parameter_names) == 0: - continue - type = (question.parameter_names)[0] - question_template = question.restated_question_template - - names = GenerateQuestionTerms.__get_node_names(type) - - if len(names) != 0: - question_content = '' - for name in names: - question_phase = question_template.safe_substitute({type: name}) - question_content = question_content + question_phase + '\n' - - # write content to file - if have_writen: - with open('question_terms.txt', 'a') as w_f: - w_f.write(question_content) - else: - with open('question_terms.txt', 'w') as w_f: - w_f.write(question_content) - have_writen = True - - -if __name__ == '__main__': - GenerateQuestionTerms.generateQuetionsToTXT() diff --git a/code/reasoningtool/sharedtrunk.py b/code/archive/sharedtrunk.py similarity index 100% rename from code/reasoningtool/sharedtrunk.py rename to code/archive/sharedtrunk.py diff --git a/code/reasoningtool/CmdPubMedNGD.py b/code/reasoningtool/CmdPubMedNGD.py deleted file mode 100644 index 65b132550..000000000 --- a/code/reasoningtool/CmdPubMedNGD.py +++ /dev/null @@ -1,45 +0,0 @@ -'''Returns the Normalized Google semantic distance between two string MeSH terms - - Usage: python3 CmdPubMedNGD.py term1 term2 - - Example: python3 CmdPubMedNGD.py atherosclerosis hypercholesterolemia - - Output: JSON dump of dict (keys are "value" and "status") -''' -__author__ = 'Stephen Ramsey' -__copyright__ = 'Oregon State University' -__credits__ = ['Stephen Ramsey'] -__license__ = 'MIT' -__version__ = '0.1.0' -__maintainer__ = '' -__email__ = '' -__status__ = 'Prototype' - -import argparse -import math -import json -from QueryNCBIeUtils import QueryNCBIeUtils - -def main(): - parser = argparse.ArgumentParser(description="Returns the Normalized Google semantic distance between two string MeSH terms", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('terms', metavar='terms', type=str, nargs=2, help='two string arguments; must be MeSH terms') - args = parser.parse_args() - mesh_terms = args.terms - ngd = QueryNCBIeUtils.normalized_google_distance(*mesh_terms) - res_dict = dict() - res_dict['value'] = ngd - - if math.isnan(ngd): - res_dict['status'] = 'unsuccessful' - if not QueryNCBIeUtils.is_mesh_term(mesh_terms[0]): - res_dict['status'] += '; Term 1 is not a valid MeSH term' - if not QueryNCBIeUtils.is_mesh_term(mesh_terms[1]): - res_dict['status'] += '; Term 2 is not a valid MeSH term' - else: - res_dict['status'] = 'success' - print(json.dumps(res_dict)) - -if __name__ == "__main__": - main() - diff --git a/code/reasoningtool/MLDrugRepurposing/MyChemGT.py b/code/reasoningtool/MLDrugRepurposing/MyChemGT.py deleted file mode 100644 index d722ece84..000000000 --- a/code/reasoningtool/MLDrugRepurposing/MyChemGT.py +++ /dev/null @@ -1,111 +0,0 @@ -import sys -import os -new_path = os.path.join(os.getcwd(), '..', 'kg-construction') -sys.path.insert(0, new_path) - -new_path2 = os.path.join(os.getcwd(), '..', 'SemMedDB') -sys.path.insert(0, new_path2) - -from SynonymMapper import SynonymMapper -from QueryMyChem import QueryMyChem -from DrugMapper import DrugMapper -from QueryUMLSApi import QueryUMLS -import requests -import pandas -import time -import requests_cache -import numpy -import urllib -import ast - -requests_cache.install_cache('MyChemCache') - -df = pandas.read_csv('data/drugs.csv') -df = df.loc[df["id"].str.upper().str.startswith('CHEMBL', na=False)].reset_index(drop=True) - -def map_drug_to_ontology(chembl_id): - """ - mapping between a drug and Disease Ontology IDs and/or Human Phenotype Ontology IDs corresponding to indications - - :param chembl_id: The CHEMBL ID for a drug - - :return: A dictionary with two fields ('indication' and 'contraindication'). Each field is a set of strings - containing the found hp / omim / doid ids or empty set if none were found - """ - indication_onto_set = set() - contraindication_onto_set = set() - if not isinstance(chembl_id, str): - return {'indications': indication_onto_set, "contraindications": contraindication_onto_set} - drug_use = QueryMyChem.get_drug_use(chembl_id) - indications = drug_use['indications'] - contraindications = drug_use['contraindications'] - sm = SynonymMapper() - for indication in indications: - if 'snomed_concept_id' in indication.keys(): - oxo_results = sm.get_all_from_oxo('SNOMEDCT:' + str(indication['snomed_concept_id']), ['DOID', 'OMIM', 'HP']) - if oxo_results is not None: - for oxo_result in oxo_results: - indication_onto_set.add(oxo_result) - else: - oxo_results = sm.get_all_from_oxo('SNOMEDCT:' + str(indication['snomed_concept_id']), ['UMLS']) - if oxo_results is not None: - for oxo_result in oxo_results: - indication_onto_set.add(oxo_result) - for contraindication in contraindications: - if 'snomed_concept_id' in contraindication.keys(): - oxo_results = sm.get_all_from_oxo('SNOMEDCT:' + str(contraindication['snomed_concept_id']), ['DOID', 'OMIM', 'HP']) - if oxo_results is not None: - for oxo_result in oxo_results: - contraindication_onto_set.add(oxo_result) - else: - oxo_results = sm.get_all_from_oxo('SNOMEDCT:' + str(contraindication['snomed_concept_id']), ['UMLS']) - if oxo_results is not None: - for oxo_result in oxo_results: - contraindication_onto_set.add(oxo_result) - return {'indications': indication_onto_set, "contraindications": contraindication_onto_set} - -# Initialized the lists used to create the dataframes -mychem_tp_list = [] -mychem_tn_list = [] -# UMLS targets will be seperated to be converted into DOID, HP, or OMIM -umls_tn_list = [] -umls_tp_list = [] - -d = 0 -for drug in df['id']: - chembl_id = drug.split(':')[1] - if not chembl_id.startswith('CHEMBL'): - chembl_id = 'CHEMBL' + chembl_id - elif chembl_id.startswith('CHEMBL.COMPOUND'): - curie_id = curie_id.split(':')[1] - res = map_drug_to_ontology(chembl_id) - # Load indications and contraintications into their respective lists - for ind in res['indications']: - if ind.startswith('UMLS:'): - umls_tp_list += [[drug,ind.split(':')[1]]] - else: - mychem_tp_list += [[drug,ind]] - for cont in res['contraindications']: - if cont.startswith('UMLS:'): - umls_tn_list += [[drug,cont.split(':')[1]]] - else: - mychem_tn_list += [[drug,cont]] - d += 1 - # This prints percentage progress every 10%. Uncomment if you want this. - #if d % int(len(df)/10 + 1) == 0: - # print(d/len(df)) - -# Convert lists to dataframes -tp_df = pandas.DataFrame(mychem_tp_list,columns = ['source','target']) -tn_df = pandas.DataFrame(mychem_tn_list,columns = ['source','target']) -umls_tp_df = pandas.DataFrame(umls_tp_list,columns = ['source','target']) -umls_tn_df = pandas.DataFrame(umls_tn_list,columns = ['source','target']) - -# Save dataframes as csvs -tp_df.to_csv("data/mychem_tp.csv",index=False) -tn_df.to_csv("data/mychem_tn.csv",index=False) -umls_tp_df.to_csv("data/mychem_tp_umls.csv",index=False) -umls_tn_df.to_csv("data/mychem_tn_umls.csv",index=False) - - - diff --git a/code/reasoningtool/MLDrugRepurposing/README.md b/code/reasoningtool/MLDrugRepurposing/README.md index 557d5d58e..95a4e8e94 100644 --- a/code/reasoningtool/MLDrugRepurposing/README.md +++ b/code/reasoningtool/MLDrugRepurposing/README.md @@ -1,10 +1,15 @@ # NOTE: dependencies have been removed -As of May 15, 2025, one of the modules on which the code in this directory +On May 15, 2025, one of the modules on which some code in this directory depends, `RTX/code/reasoningtool/SemMedDB`, was deleted from the `RTXteam/RTX` project area (see #2454). But if you need this code, you can obtain it from any -RTXteam/RTX [release](https://github.com/RTXteam/RTX/releases) (the code in the -SemMedDB project directory hasn't changed since 2019). +earlier RTXteam/RTX [release](https://github.com/RTXteam/RTX/releases). + +On Oct. 9, 2025, a module, +`RTX/code/reasoningtool/kg-construction/SynonymMapper.py`, on which the +`MyChemGT.py` module in this directory depends, was deleted from the +`RTXteam/RTX` project area (see #2582). But if you need this code, you can +obtain it from any earlier RTXteam/RTX [release](https://github.com/RTXteam/RTX/releases). # Make sure python is set up correctly diff --git a/code/reasoningtool/Neo4jToNetworkX.py b/code/reasoningtool/Neo4jToNetworkX.py deleted file mode 100644 index c2fa9c8a1..000000000 --- a/code/reasoningtool/Neo4jToNetworkX.py +++ /dev/null @@ -1,63 +0,0 @@ -import networkx as nx -import cypher -from collections import namedtuple - -import sys, os - -# Get rtxConfig -sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../") # code directory -from RTXConfiguration import RTXConfiguration -rtxConfig = RTXConfiguration() - -# Connection information for the ipython-cypher package -connection = "http://" + rtxConfig.neo4j_username + ":" + rtxConfig.neo4j_password + "@" + rtxConfig.neo4j_database -DEFAULT_CONFIGURABLE = { - "auto_limit": 0, - "style": 'DEFAULT', - "short_errors": True, - "data_contents": True, - "display_limit": 0, - "auto_pandas": False, - "auto_html": False, - "auto_networkx": False, - "rest": False, - "feedback": False, # turn off verbosity in ipython-cypher - "uri": connection, -} -DefaultConfigurable = namedtuple( - "DefaultConfigurable", - ", ".join([k for k in DEFAULT_CONFIGURABLE.keys()]) -) -config = DefaultConfigurable(**DEFAULT_CONFIGURABLE) - -# Convert neo4j subgraph (from cypher query) into a networkx graph -def get_graph(res, directed=True): - """ - This function takes the result (subgraph) of a ipython-cypher query and builds a networkx graph from it - :param res: output from an ipython-cypher query - :param directed: Flag indicating if the resulting graph should be treated as directed or not - :return: networkx graph (MultiDiGraph or MultiGraph) - """ - if nx is None: - raise ImportError("Try installing NetworkX first.") - if directed: - graph = nx.MultiDiGraph() - else: - graph = nx.MultiGraph() - for item in res._results.graph: - for node in item['nodes']: - graph.add_node(node['id'], properties=node['properties'], labels=node['labels'], names=node['properties']['name'], description=node['properties']['description']) - for rel in item['relationships']: - graph.add_edge(rel['startNode'], rel['endNode'], id=rel['id'], properties=rel['properties'], type=rel['type']) - return graph - -def test_get_graph(): - query = "MATCH path=allShortestPaths((s:omim_disease)-[*1..%d]-(t:disont_disease)) " \ - "WHERE s.name='%s' AND t.name='%s' " \ - "RETURN path" % (4, 'OMIM:137920', 'DOID:11476') - res = cypher.run(query, conn=connection, config=config) - graph = get_graph(res, directed=True) - if type(graph) is not nx.classes.MultiDiGraph: - raise(Exception("A networkx graph was not returned")) - if graph.number_of_nodes() < 1: - raise(Exception("An empty graph was returned")) diff --git a/code/reasoningtool/QuestionAnswering/LilGimTestQuestion.py b/code/reasoningtool/QuestionAnswering/LilGimTestQuestion.py deleted file mode 100644 index f9f0465bf..000000000 --- a/code/reasoningtool/QuestionAnswering/LilGimTestQuestion.py +++ /dev/null @@ -1,176 +0,0 @@ -# This script will return X that are similar to Y based on high Jaccard index of common one-hop nodes Z (X<->Z<->Y) - -import os -import sys -import argparse -# PyCharm doesn't play well with relative imports + python console + terminal -try: - from code.reasoningtool import ReasoningUtilities as RU -except ImportError: - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - import ReasoningUtilities as RU - -import FormatOutput -import networkx as nx -import QueryLilGIM -import CustomExceptions -import ast - -class LilGim: - - def __init__(self): - None - - @staticmethod - def answer(tissue_id, input_protein_list, use_json=False, num_show=20, rev=True): - - # Initialize the response class - response = FormatOutput.FormatResponse(6) - - # Make sure everything exists in the graph - if not RU.node_exists_with_property(tissue_id, "id"): - tissue_id = RU.get_node_property(tissue_id, "id", node_label="anatomical_entity") - - for i in range(len(input_protein_list)): - id = input_protein_list[i] - if not RU.node_exists_with_property(id, "id"): - input_protein_list[i] = RU.get_node_property(id, "id", node_label="protein") - - # Initialize the QueryLilGim class - q = QueryLilGIM.QueryLilGIM() - - # get the description - tissue_description = RU.get_node_property(tissue_id, 'name', node_label="anatomical_entity") - - # Get the correlated proteins - try: - correlated_proteins_dict = q.query_neighbor_genes_for_gene_set_in_a_given_anatomy(tissue_id, tuple(input_protein_list)) - #correlated_proteins_dict = {'UniProtKB:Q99618': 0.4276333333333333, 'UniProtKB:Q92698': 0.464, 'UniProtKB:P56282': 0.5810000000000001, 'UniProtKB:P49454': 0.4441, 'UniProtKB:P49642': 0.5188333333333334, 'UniProtKB:Q9BZD4': 0.5042666666666668, 'UniProtKB:P38398': 0.4464, 'UniProtKB:Q9BXL8': 0.5009, 'UniProtKB:P42166': 0.4263000000000001, 'UniProtKB:Q96CS2': 0.5844333333333332, 'UniProtKB:Q9BQP7': 0.4903333333333333, 'UniProtKB:O95997': 0.4743333333333333, 'UniProtKB:Q9H4K1': 0.4709, 'UniProtKB:Q9H967': 0.5646666666666667, 'UniProtKB:Q12834': 0.4478, 'UniProtKB:Q71F23': 0.4361, 'UniProtKB:Q9UQ84': 0.4800666666666666, 'UniProtKB:Q9NSP4': 0.4347} - except: - error_message = "Lil'GIM is experiencing a problem." - error_code = "LilGIMerror" - response.add_error_message(error_code, error_message) - response.print() - return 1 - - # as a list of tuples - correlated_proteins_tupes = [] - for k, v in correlated_proteins_dict.items(): - correlated_proteins_tupes.append((k, v)) - - # sort by freq - correlated_proteins_tupes_sorted = sorted(correlated_proteins_tupes, key=lambda x: x[1], reverse=rev) - correlated_proteins_tupes_sorted = correlated_proteins_tupes_sorted[0:num_show] - correlated_proteins_tupes = correlated_proteins_tupes_sorted - - - # return the results - if not use_json: - try: - protein_descriptions = RU.get_node_property(input_protein_list[0], "name", node_label="protein", name_type="id") - except: - protein_descriptions = input_protein_list[0] - for id in input_protein_list[1:-1]: - protein_descriptions += ", " - try: - protein_descriptions += RU.get_node_property(id, "name", node_label="protein", name_type="id") - except: - protein_descriptions += id - if len(input_protein_list) > 1: - try: - protein_descriptions += ", and %s" % RU.get_node_property(input_protein_list[-1], "name", node_label="protein", name_type="id") - except: - protein_descriptions += ", and %s" % input_protein_list[-1] - if rev: - to_print = "In the tissue: %s, the proteins that correlate most with %s" % (tissue_description, protein_descriptions) - else: - to_print = "In the tissue: %s, the proteins that correlate least with %s" % (tissue_description, protein_descriptions) - to_print += " according to Lil'GIM, are:\n" - for id, val in correlated_proteins_tupes_sorted: - try: - to_print += "protein: %s\t correlation %f\n" % (RU.get_node_property(id, "name", node_label="protein", name_type="id"), val) - except: - to_print += "protein: %s\t correlation %f\n" % (id, val) - print(to_print) - else: - # otherwise, you want a JSON output - protein_descriptions = [] - is_in_KG_list = [] - for protein, corr in correlated_proteins_tupes: - try: - description = RU.get_node_property(protein, "name", node_label="protein", name_type="id") - protein_descriptions.append(description) - is_in_KG_list.append(True) - except: - protein_description = protein - protein_descriptions.append(protein_description) - is_in_KG_list.append(False) - - # just get the ones that are actually in the KG. TODO: do something with the ones that are not in the KG - correlated_proteins_tupes_in_KG = [] - for i in range(len(correlated_proteins_tupes)): - if is_in_KG_list[i]: - correlated_proteins_tupes_in_KG.append(correlated_proteins_tupes[i]) - - # Return the results - full_g = RU.get_graph_from_nodes([id for id, val in correlated_proteins_tupes_in_KG], node_property_label="id") - id2node = dict() - for nx_id, node in full_g.nodes(data=True): - id2node[node['properties']['id']] = node - for id, corr in correlated_proteins_tupes_in_KG: - to_print = "In the tissue: %s, the protein %s has correlation %f with the given list of proteins." %(tissue_description, RU.get_node_property(id, "name", node_label="protein", name_type="id"), corr) - response.add_subgraph([(id, id2node[id])], [], to_print, corr) - response.print() - - @staticmethod - def describe(): - output = "Answers questions of the form: 'What proteins correlate with [$protein1, $protein2,...,$proteinK?] in blood?'" + "\n" - # TODO: subsample disease nodes - return output - - -def main(): - parser = argparse.ArgumentParser(description="Answers questions of the form: 'What proteins correlate with [$protein1, $protein2,...,$proteinK?] in blood?'", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('-t', '--tissue', type=str, help="Tissue id/name", default="UBERON:0002384") - parser.add_argument('-r', '--reverse', action='store_true', help="Include flag if you want the least correlations.") - parser.add_argument('-p', '--proteins', type=str, help="List of proteins.", default="[\'UniProtKB:P12004\']") - parser.add_argument('-j', '--json', action='store_true', help='Flag specifying that results should be printed in JSON format (to stdout)', default=False) - parser.add_argument('--describe', action='store_true', help='Print a description of the question to stdout and quit', default=False) - parser.add_argument('--num_show', type=int, help='Maximum number of results to return', default=20) - - # Parse and check args - args = parser.parse_args() - tissue_id = args.tissue - is_reverse = args.reverse - proteins = args.proteins - use_json = args.json - describe_flag = args.describe - num_show = args.num_show - - # Convert the string to an actual list - #print(proteins) - proteins_preserved = proteins - try: - proteins = proteins.replace(",", "','").replace("[", "['").replace("]", "']") - protein_list = ast.literal_eval(proteins) - protein_list_strip = [] - for protein in protein_list: - protein_list_strip.append(protein.strip()) - - protein_list = protein_list_strip - - except: - protein_list = eval(proteins_preserved) - - # Initialize the question class - Q = LilGim() - - if describe_flag: - res = Q.describe() - print(res) - else: - Q.answer(tissue_id, protein_list, use_json=use_json, num_show=num_show, rev=not(is_reverse)) - -if __name__ == "__main__": - main() diff --git a/code/reasoningtool/QuestionAnswering/QueryLilGIM.py b/code/reasoningtool/QuestionAnswering/QueryLilGIM.py deleted file mode 100644 index 67addd85c..000000000 --- a/code/reasoningtool/QuestionAnswering/QueryLilGIM.py +++ /dev/null @@ -1,181 +0,0 @@ -""" This module defines the module QueryLilGIM. QueryLilGIM provides -a method for finding neighboring genes (in a distance space defined -by correlation similarity) for a set of query genes, based on gene -expression data that are stored in a Google BigQuery table. The search -for neighboring genes is based on correlation measurements computed -in a specific anatomical context (specified by the user of this module). - -Based on an example Jupyter notebook provided here: -https://github.com/NCATS-Tangerine/cq-notebooks/blob/master/BigGIM/lilGIM%20and%20BigCLAM%20Examples.ipynb -""" - -__author__ = 'Stephen Ramsey' -__copyright__ = 'Oregon State University' -__credits__ = ['Stephen Ramsey', 'Theo Knijnenburg', 'John Earls', 'David Palzer'] -__license__ = 'MIT' -__version__ = '0.1.0' -__maintainer__ = '' -__email__ = '' -__status__ = 'Prototype' - -import urllib.request -import urllib.parse -# NOTE: this module *WILL NOT WORK* if you use requests package if caching via requests-cache is turned on -import json -import pandas -import time -import sys -import os -import functools -import ast - -from ReasoningUtilities import get_nodes_that_match_in_list - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../kg-construction'))) # Go up one level and look for it - -from QueryEBIOLS import QueryEBIOLS -from QueryMyGene import QueryMyGene -import CachedMethods - -class QueryLilGIM: - BASE_URL = "http://biggim.ncats.io/api" - ENDPOINT = "lilgim/query" - DEFAULT_LIMIT = 100 - - def __init__(self, limit=DEFAULT_LIMIT): - self.limit = limit - self.mg = QueryMyGene() - - @staticmethod - def _get(endpoint, data={}, base_url=BASE_URL): - post_params = urllib.parse.urlencode(data) - url = '%s/%s?%s' % (base_url, endpoint, post_params) - req = urllib.request.urlopen(urllib.request.Request(url, headers={'Accept': 'application/json'})) -# print("Sent: GET %s?%s" % (req.request.url, req.request.body)) - return json.loads(req.read().decode()) - - @staticmethod - def _jprint(dct): - print(json.dumps(dct, indent=2)) - - @staticmethod - def _wrapper(endpoint, data={}, base_url=BASE_URL): - try: - response = QueryLilGIM._get(endpoint, data, base_url) -# QueryLilGIM._jprint(response) - except BaseException as e: - print(e, file=sys.stderr) - if e.response.status_code == 400: - QueryLilGIM._jprint(e.response.json(), file=sys.stderr) - raise - try: - ctr = 1 - while True: - query_status = QueryLilGIM._get('%s/status/%s' % (endpoint.split('/')[0], - response['request_id'],)) -# QueryLilGIM._jprint(query_status) - if query_status['status'] != 'running': - # query has finished - break - else: - time.sleep(ctr) - ctr += 1 - # linear backoff - except BaseException as e: - print(e, file=sys.stderr) - if e.response.status_code == 400: - QueryLilGIM._jprint(e.response.json(), file=sys.stderr) - raise - return pandas.concat(map(pandas.read_csv, query_status['request_uri'])) - - # anatomy_curie_id_str: string CURIE ID for an Uberon anatomy term - # protein_set_curie_id_str: a tuple containing one or more string CURIE IDs for proteins (UniProtKB) - # return value: a dict in which keys are string Uniprot CURIE IDs and values are correlation coeffs - @CachedMethods.register - @functools.lru_cache(maxsize=1024, typed=False) - def query_neighbor_genes_for_gene_set_in_a_given_anatomy(self, - anatomy_curie_id_str, - protein_set_curie_id_str): - - assert type(protein_set_curie_id_str) == tuple - assert len(protein_set_curie_id_str) > 0 - assert type(anatomy_curie_id_str) == str - - # convert UBERON anatomy curie ID str to a brenda anatomy ID - assert anatomy_curie_id_str.startswith("UBERON:") - bto_id_set = QueryEBIOLS.get_bto_id_for_uberon_id(anatomy_curie_id_str) - ret_dict = dict() - if len(bto_id_set) == 0: - return ret_dict - - assert len(bto_id_set) == 1 - - bto_term = QueryEBIOLS.get_bto_term_for_bto_id(next(iter(bto_id_set))).replace(" ", "_") - - entrez_gene_ids = set() - entrez_gene_ids_int = set() - - # convert uniprot IDs to Entrez gene IDs - for protein_curie_id_str in protein_set_curie_id_str: - assert protein_curie_id_str.startswith("UniProtKB:") - uniprot_acc = protein_curie_id_str.split(":")[1] - entrez_gene_id_set = self.mg.convert_uniprot_id_to_entrez_gene_ID(uniprot_acc) - for entrez_gene_id in entrez_gene_id_set: - entrez_gene_ids_int.add(entrez_gene_id) - entrez_gene_ids.add(str(entrez_gene_id)) - - entrez_gene_ids_str = ",".join(entrez_gene_ids) - - data = {"ids": entrez_gene_ids_str, - "tissue": bto_term, - "limit": self.limit} - - results = self._wrapper(self.ENDPOINT, data) - - ret_dict = dict() - gene_dict = dict() - - for index, row in results.iterrows(): - gene1 = row["Gene1"] - gene2 = row["Gene2"] - avg_corr = row["aveCorr"] - assert type(gene1) == int - assert type(gene2) == int - assert type(avg_corr) == float - if gene1 in entrez_gene_ids_int: - if gene2 in entrez_gene_ids_int: - # do nothing since this is not a new gene - new_gene_id = None - else: - # gene2 is the new gene - new_gene_id = gene2 - else: - if gene2 in entrez_gene_ids_int: - new_gene_id = gene1 - else: - print("neither gene was in the set of query genes, this should not happen", file=sys.stderr) - assert False - if new_gene_id is not None: - gene_dict[new_gene_id] = avg_corr - - for gene_id, avg_corr in gene_dict.items(): - uniprot_id_set = self.mg.convert_entrez_gene_id_to_uniprot_id(gene_id) - if len(uniprot_id_set) > 0: - for uniprot_id in uniprot_id_set: - ret_dict["UniProtKB:" + uniprot_id] = avg_corr - - query_res = get_nodes_that_match_in_list(ret_dict.keys(), 'protein') - res_list = str(query_res[0]) - res_list = ast.literal_eval(res_list[res_list.find('['):-1]) - - for uniprot_id in list(ret_dict): - if uniprot_id not in res_list: - ret_dict.pop(uniprot_id) - - return ret_dict - -if __name__ == '__main__': - qlg = QueryLilGIM() - print(qlg.query_neighbor_genes_for_gene_set_in_a_given_anatomy("UBERON:0002384", ("UniProtKB:P12004",))) - print(qlg.query_neighbor_genes_for_gene_set_in_a_given_anatomy("UBERON:0000178", ("UniProtKB:P01579",))) - # print(qlg.query_neighbor_genes_for_gene_set_in_a_given_anatomy("UBERON:0000178", {"UniProtKB:P01579"})) diff --git a/code/reasoningtool/QuestionAnswering/README.md b/code/reasoningtool/QuestionAnswering/README.md new file mode 100644 index 000000000..0fb6113b1 --- /dev/null +++ b/code/reasoningtool/QuestionAnswering/README.md @@ -0,0 +1,14 @@ +# What is the code in this directory? + +This directory contains mostly deprecated old code from the Feasibility Assessment Phase of +the Biomedical Data Translator project. Breakage of modules in this directory due to +elimination of deprecated downstream dependencies will be noted in this README.md. + +As of Oct. 9, 2025, the function `get_ngd_for_all` was +removed from the module `RTX/code/reasoningtool/kg-construction/NormGoogleDistance.py`; +this change breaks a number of modules in this directory. If you need that function, +you can get it from the `NormGoogleDistance.py` module in any older release of +the `RTXteam/RTX` project code. + + + diff --git a/code/reasoningtool/kg-construction/KGNodeIndex.py b/code/reasoningtool/kg-construction/KGNodeIndex.py deleted file mode 100644 index cebb5ace7..000000000 --- a/code/reasoningtool/kg-construction/KGNodeIndex.py +++ /dev/null @@ -1,669 +0,0 @@ -#!/usr/bin/env python3 -# -# Class to build and query an index of nodes in the KG -# -import os -import sys -import re -import timeit -import argparse -import sqlite3 - -sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../") -sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../QuestionAnswering") -import ReasoningUtilities as RU -#from RTXConfiguration import RTXConfiguration - -# Testing and debugging flags -DEBUG = False -TESTSUFFIX = "" -#TESTSUFFIX = "_test2" - - -# Main class -class KGNodeIndex: - - # Constructor - def __init__(self): - filepath = os.path.dirname(os.path.abspath(__file__)) - self.databaseLocation = filepath - self.lookup_table = {} - - #self.databaseLocation = 'C:/Users/ericd/Documents/zztmp' - #print(f"INFO: Temporarily using filepath {self.databaseLocation}") - - is_rtx_production = False - #if re.match("/mnt/data/orangeboard", filepath): - # is_rtx_production = True - #if DEBUG: - # print("INFO: is_rtx_production="+str(is_rtx_production)) - - if is_rtx_production: - self.databaseName = "RTXFeedback" - self.engine_type = "mysql" - else: - self.databaseName = "KGNodeIndex.sqlite" - self.engine_type = "sqlite" - self.connection = None - self.connect() - - - # Destructor - def __del__(self): - if self.engine_type == "mysql": - self.disconnect() - else: - pass - - - # Create and store a database connection - def connect(self): - # If already connected, don't need to do it again - if self.connection is not None: - return - # Create an engine object - if DEBUG is True: - print("INFO: Connecting to database") - if self.engine_type == "sqlite": - self.connection = sqlite3.connect(f"{self.databaseLocation}/{self.databaseName}") - else: - pass - #rtxConfig = RTXConfiguration() - #engine = create_engine("mysql+pymysql://" + rtxConfig.mysql_feedback_username + ":" + - # rtxConfig.mysql_feedback_password + "@" + rtxConfig.mysql_feedback_host + "/" + self.databaseName) - - - # Destroy the database connection - def disconnect(self): - - if self.connection is None: - if DEBUG is True: - print("INFO: Skip disconnecting from database") - return - - if DEBUG is True: - print("INFO: Disconnecting from database") - self.connection.close() - self.connection = None - - - # Delete and create the kgnode table - def create_tables(self): - if DEBUG is True: - print("INFO: Creating database "+self.databaseName) - self.connection.execute(f"DROP TABLE IF EXISTS kgnode{TESTSUFFIX}") - self.connection.execute(f"DROP TABLE IF EXISTS kg1node{TESTSUFFIX}") - self.connection.execute(f"CREATE TABLE kg1node{TESTSUFFIX}( curie VARCHAR(255), name VARCHAR(255), type VARCHAR(255), reference_curie VARCHAR(255) )" ) - self.connection.execute(f"DROP TABLE IF EXISTS kg2node{TESTSUFFIX}") - self.connection.execute(f"CREATE TABLE kg2node{TESTSUFFIX}( curie VARCHAR(255), name VARCHAR(255), type VARCHAR(255), reference_curie VARCHAR(255) )" ) - - - # Create the KG node table - def populate_table(self, kg_name): - - if kg_name == 'KG1': - table_name = 'kg1node' - file_suffix = '_KG1' - elif kg_name == 'KG2': - table_name = 'kg2node' - file_suffix = '_KG2' - else: - print("ERROR: kg_name must be either 'KG1' or 'KG2'") - sys.exit(5) - - filename = os.path.dirname(os.path.abspath(__file__)) + f"/../../../data/KGmetadata/NodeNamesDescriptions{file_suffix}.tsv" - filesize = os.path.getsize(filename) - previous_percentage = -1 - bytes_read = 0 - - lineCounter = 0 - fh = open(filename, 'r', encoding="latin-1", errors="replace") - print(f"INFO: Populating table {table_name}") - - # Have a dict for items already inserted so that we don't insert them twice - namesDict = {} - rows = [] - - for line in fh: - bytes_read += len(line) - columns = line.strip().split("\t") - curie = columns[0] - name = columns[1] - type = columns[2] - - #### For debugging problems - debug_flag = False - #if 'P06865' in curie: debug_flag = True - - # Some cleanup - - # Many MONDO names have a ' (disease)' suffix, which seems undesirable, so strip them out - if 'MONDO:' in curie: - name = re.sub(r'\s*\(disease\)\s*$','',name) - # Many PR names have a ' (human)' suffix, which seems undesirable, so strip them out - if 'PR:' in curie: - name = re.sub(r'\s*\(human\)\s*$','',name) - - # Create a list of all the possible names we will add to the database - names = [name] - - if re.match("OMIM:", curie): - multipleNames = name.split("; ") - if len(multipleNames) > 1: - for possibleName in multipleNames: - if possibleName == multipleNames[0]: - next - names.append(possibleName) - - elif re.match("R-HSA-", curie): - # Also store the path name without embedded abbreviations - if re.search(r' \([A-Z0-9]{1,8}\)', name): - newName = re.sub( - r' \([A-Z0-9]{1,8}\)', "", name, flags=re.IGNORECASE) - names.append(newName) - - # If this is a UniProt identifier, also add the CURIE and the naked identifier without the prefix - elif re.match("UniProtKB:[A-Z][A-Z0-9]{5}", curie) or re.match("UniProtKB:A[A-Z0-9]{9}", curie): - tmp = re.sub("UniProtKB:", "", curie) - names.append(tmp) - - # If this is a PR identifier, also add the CURIE and the naked identifier without the prefix - elif re.match("PR:[A-Z][A-Z0-9]{5}", curie) or re.match("PR:A[A-Z0-9]{9}", curie): - tmp = re.sub("PR:", "", curie) - names.append(tmp) - - # Create duplicates for various DoctorName's diseases - for name in names: - if re.search("'s ", name): - newName = re.sub("'s ", "s ", name) - names.append(newName) - #print(" duplicated _"+name+"_ to _"+newName+"_") - newName = re.sub("'s ", " ", name) - names.append(newName) - #print(" duplicated _"+name+"_ to _"+newName+"_") - - # A few special cases - if re.search("alzheimer ", name, flags=re.IGNORECASE): - newName = re.sub("alzheimer ", "alzheimers ", - name, flags=re.IGNORECASE) - names.append(newName) - #print(" duplicated _"+name+"_ to _"+newName+"_") - - newName = re.sub("alzheimer ", "alzheimer's ", - name, flags=re.IGNORECASE) - names.append(newName) - #print(" duplicated _"+name+"_ to _"+newName+"_") - - # Add all the possible names to the database - if debug_flag: - print() - print(names) - - for name in names: - name = name.upper() - if name in namesDict and curie in namesDict[name]: - continue - - # Hard-coded list of short abbreviations to ignore because they're also English - if name == "IS": - continue - if name == "AS": - continue - - # Check and add an entry to the lookup table - reference_curie = None - if name in self.lookup_table: - reference_curie = self.lookup_table[name] - if curie not in self.lookup_table: - self.lookup_table[curie] = reference_curie - else: - reference_curie = curie - if curie in self.lookup_table: - self.lookup_table[name] = reference_curie - else: - self.lookup_table[curie] = reference_curie - self.lookup_table[name] = reference_curie - if debug_flag: print(f"reference_curie for {name} is {reference_curie}") - - # Add a row for this node - rows.append([curie,name,type,reference_curie]) - if debug_flag: print([curie,name,type,reference_curie]) - if name not in namesDict: - namesDict[name] = {} - namesDict[name][curie] = 1 - - # Try also adding in the curie as a resolvable name - if curie not in namesDict: - if debug_flag: print(f"reference_curie for {curie} is {reference_curie}") - rows.append([curie,curie.upper(),type,reference_curie]) - if debug_flag: print([curie,curie.upper(),type,reference_curie]) - if curie not in namesDict: - namesDict[curie] = {} - namesDict[curie][curie] = 1 - - # Commit every 10000 lines - percentage = int(bytes_read*100.0/filesize) - if percentage > previous_percentage: - self.connection.executemany(f"INSERT INTO {table_name}{TESTSUFFIX}(curie,name,type,reference_curie) values (?,?,?,?)", rows) - self.connection.commit() - rows = [] - previous_percentage = percentage - print(str(percentage)+"%..", end='', flush=True) - - debug_flag = False - lineCounter += 1 - - # Write out the last rows - if len(rows) > 0: - self.connection.executemany(f"INSERT INTO {table_name}{TESTSUFFIX}(curie,name,type,reference_curie) values (?,?,?,?)", rows) - self.connection.commit() - print("100..", end='', flush=True) - - fh.close() - print("") - - - def create_indexes(self, kg_name): - - if kg_name == 'KG1': - table_name = 'kg1node' - elif kg_name == 'KG2': - table_name = 'kg2node' - else: - print("ERROR: kg_name must be either 'KG1' or 'KG2'") - sys.exit(5) - - print(f"INFO: Creating INDEXes on {table_name}{TESTSUFFIX}") - self.connection.execute(f"CREATE INDEX idx_{table_name}{TESTSUFFIX}_name ON {table_name}{TESTSUFFIX}(name)") - self.connection.execute(f"CREATE INDEX idx_{table_name}{TESTSUFFIX}_curie ON {table_name}{TESTSUFFIX}(curie)") - self.connection.execute(f"CREATE INDEX idx_{table_name}{TESTSUFFIX}_reference_curie ON {table_name}{TESTSUFFIX}(reference_curie)") - - - def get_curies_and_types(self, name, kg_name='KG1'): - - table_name = 'kg1node' - if kg_name.upper() == 'KG2': - table_name = 'kg2node' - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE name = ?", (name.upper(),) ) - rows = cursor.fetchall() - curies_and_types = [] - for row in rows: - curies_and_types.append({"curie": row[0], "type": row[2]}) - return curies_and_types - - - def get_curies_and_types_and_names(self, name, kg_name='KG1'): - - table_name = 'kg1node' - if kg_name.upper() == 'KG2': - table_name = 'kg2node' - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE name = ?", (name.upper(),) ) - rows = cursor.fetchall() - curies_and_types_and_names = [] - for row in rows: - names = self.get_names(row[0],kg_name=kg_name) - best_name = "?" - if names is not None: - best_name = names[0] - entity = {"curie": row[0], - "type": row[2], "name": best_name} - - # Also try to fetch the description from the knowledge graph - try: - properties = RU.get_node_properties(row[0]) - if 'description' in properties: - entity['description'] = properties['description'] - except: - # This will happen with this node is in KG2 but not KG1. FIXME - pass - curies_and_types_and_names.append(entity) - - return curies_and_types_and_names - - - def get_names(self, curie, kg_name='KG1'): - - table_name = 'kg1node' - if kg_name.upper() == 'KG2': - table_name = 'kg2node' - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE curie = ?", (curie,) ) - rows = cursor.fetchall() - - # Return a list of curies - curies = [] - for row in rows: - if row[1] == curie: - continue - curies.append(row[0]) - return curies - - - def get_curies(self, name, kg_name='KG1'): - curies_and_types = self.get_curies_and_types(name, kg_name) - - if curies_and_types is None: - return None - - # Return a list of curies - curies = [] - for curies_and_type in curies_and_types: - curies.append(curies_and_type["curie"]) - return(curies) - - - def is_curie_present(self, curie, kg_name='KG1'): - - table_name = 'kg1node' - if kg_name.upper() == 'KG2': - table_name = 'kg2node' - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE curie = ?", (curie,) ) - rows = cursor.fetchall() - - if len(rows) == 0: - return False - return True - - - def get_KG1_curies(self, name): - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM kg1node{TESTSUFFIX} WHERE name = ?", (name.upper(),) ) - rows = cursor.fetchall() - - if len(rows) == 0: - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM kg2node{TESTSUFFIX} WHERE name = ?", (name.upper(),) ) - rows = cursor.fetchall() - - curies = {} - curies_list = [] - for row in rows: - curie = row[3] - if curie not in curies: - if self.is_curie_present(curie): - curies_list.append(curie) - curies[curie] = 1 - return curies_list - - - def convert_curie(self, curie, namespace): - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM kg2node{TESTSUFFIX} WHERE name = ?", (curie.upper(),) ) - rows = cursor.fetchall() - - if len(rows) == 0: return [] - - reference_curie = rows[0][3] - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM kg2node{TESTSUFFIX} WHERE reference_curie = ?", (reference_curie,) ) - rows = cursor.fetchall() - - curies = {} - curies_list = [] - for row in rows: - curie = row[0] - match = re.match(namespace+':',curie) - if match: - if curie not in curies: - curies_list.append(curie) - curies[curie] = 1 - return curies_list - - - def get_equivalent_curies(self, curie, kg_name='KG2'): - - table_name = 'kg1node' - if kg_name.upper() == 'KG2': - table_name = 'kg2node' - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE curie = ?", (curie,) ) - rows = cursor.fetchall() - - if len(rows) == 0: return [] - - reference_curies = {} - reference_curie = None - for row in rows: - reference_curies[row[3]] = 1 - reference_curie = row[3] - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE reference_curie = ?", (reference_curie,) ) - rows = cursor.fetchall() - - curies = {} - for row in rows: - curies[row[0]] = 1 - - return list(curies.keys()) - - - def get_equivalent_entities(self, curie, kg_name='KG2'): - - table_name = 'kg1node' - if kg_name.upper() == 'KG2': - table_name = 'kg2node' - - equivalence = { curie: { } } - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE curie = ?", (curie,) ) - rows = cursor.fetchall() - - if len(rows) == 0: return equivalence - - reference_curie = rows[0][3] - equivalence[curie]['id'] = { 'identifier': reference_curie } - equivalence[curie]['equivalent_identifiers'] = [] - equivalence[curie]['type'] = [ rows[0][2]] - - # What if there are multiple rows returned, this is not handled. FIXME - #reference_curies = {} - #for row in rows: - # reference_curies[row[3]] = 1 - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM {table_name}{TESTSUFFIX} WHERE reference_curie = ?", (reference_curie,) ) - rows = cursor.fetchall() - - curies = {} - for row in rows: - row_curie = row[0] - if row_curie not in curies: - equivalence[curie]['equivalent_identifiers'].append( { 'identifier': row_curie, 'label': row[1] } ) - if row_curie == curie: - equivalence[curie]['id']['label'] = row[1] - curies[row_curie] = 1 - - return equivalence - - - def get_total_entity_count(self, type, kg_name='KG1'): - - table_name = 'kg1node' - if kg_name.upper() == 'KG2': - table_name = 'kg2node' - - count = None - - cursor = self.connection.cursor() - cursor.execute( f"SELECT COUNT(DISTINCT reference_curie) FROM {table_name}{TESTSUFFIX} WHERE type = ?", (type,) ) - rows = cursor.fetchall() - - if len(rows) == 0: - return count - - return rows[0][0] - - - - def test_select(self, name): - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM kg1node{TESTSUFFIX} WHERE curie = ?", (name.upper(),) ) - rows = cursor.fetchall() - for row in rows: - print('KG1:',row) - - cursor = self.connection.cursor() - cursor.execute( f"SELECT * FROM kg2node{TESTSUFFIX} WHERE curie = ?", (name.upper(),) ) - rows = cursor.fetchall() - for row in rows: - print('KG2:',row) - - -#################################################################################################### -def main(): - - import json - - parser = argparse.ArgumentParser( - description="Tests or rebuilds the KG Node Index", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('-b', '--build', action="store_true", - help="If set, (re)build the index from scratch", default=False) - parser.add_argument('-t', '--test', action="store_true", - help="If set, run a test of the index by doing several lookups", default=False) - args = parser.parse_args() - - if not args.build and not args.test: - parser.print_help() - sys.exit(2) - - kgNodeIndex = KGNodeIndex() - - # To (re)build - if args.build: - kgNodeIndex.create_tables() - kgNodeIndex.populate_table(kg_name='KG1') - kgNodeIndex.create_indexes(kg_name='KG1') - kgNodeIndex.populate_table(kg_name='KG2') - kgNodeIndex.create_indexes(kg_name='KG2') - - # Exit here if tests are not requested - if not args.test: - return - - print("==== Testing for finding curies by name ====") - tests = ["APS2", "phenylketonuria", "Gaucher's disease", "Gauchers disease", "Gaucher disease", - "Alzheimer Disease", "Alzheimers disease", "Alzheimer's Disease", "kidney", "KIDney", "P06865", "HEXA", - "UniProtKB:P12004", "rickets", "fanconi anemia", "retina", "is"] - - # The first one takes a bit longer, so do one before starting the timer - test = kgNodeIndex.get_curies("ibuprofen") - - t0 = timeit.default_timer() - for test in tests: - curies = kgNodeIndex.get_curies(test) - print(test+" = "+str(curies)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - - print("==== Testing presence of CURIEs ============================") - tests = ["REACT:R-HSA-2160456", "DOID:9281", "OMIM:261600", "DOID:1926xx", "HP:0002511", - "UBERON:0002113", "UniProtKB:P06865", "P06865", "KEGG:C10399", "GO:0034187", "DOID:10652xx"] - - t0 = timeit.default_timer() - for test in tests: - is_present = kgNodeIndex.is_curie_present(test) - print(test+" = "+str(is_present)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - - print("==== Getting properties by CURIE ============================") - tests = ["REACT:R-HSA-2160456", "DOID:9281", - "OMIM:261600", "DOID:1926xx", "P06865"] - - t0 = timeit.default_timer() - for test in tests: - node_properties = kgNodeIndex.get_curies_and_types_and_names(test) - print(test+" = "+str(node_properties)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - - print("==== Testing for KG1 and KG2 ============================") - tests = ["APS2", "phenylketonuria", "Gauchers disease", "kidney", "HEXA", - "UniProtKB:P12004", "fanconi anemia", "ibuprofen"] - - t0 = timeit.default_timer() - for test in tests: - curies = kgNodeIndex.get_curies(test) - print(test+" in KG1 = "+str(curies)) - curies = kgNodeIndex.get_curies(test, kg_name='KG2') - print(test+" in KG2 = "+str(curies)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - - print("==== Getting KG1 CURIEs ============================") - tests = ["CUI:C0031485", "CUI:C0017205", "UniProtKB:P06865", "MESH:D005199", "HEXA", - "CHEBI:5855", "fanconi anemia", "ibuprofen", 'DOID:9281'] - - t0 = timeit.default_timer() - for test in tests: - curies = kgNodeIndex.get_KG1_curies(test) - print(test+" = "+str(curies)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - print("==== Convert CURIEs to requested namespace ============================") - tests = [ [ "CUI:C0031485", "DOID" ], [ "FMA:7203", "UBERON" ], [ "MESH:D005199", "DOID" ], - [ "CHEBI:5855", "CHEMBL.COMPOUND" ], [ "ibuprofen", "CUI" ] ] - - t0 = timeit.default_timer() - for test in tests: - curies = kgNodeIndex.convert_curie(test[0], test[1]) - print(f"{test[0]} -> {test[1]} = " + str(curies)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - print("==== Get all known synonyms of a CURIE using KG2 index ============================") - tests = [ "DOID:14330", "CUI:C0031485", "FMA:7203", "MESH:D005199", "CHEBI:5855", "DOID:9281" ] - tests = [ "DOID:9281" ] - - t0 = timeit.default_timer() - for test in tests: - curies = kgNodeIndex.get_equivalent_curies(test,kg_name='KG1') - print(f"{test} = " + str(curies)) - curies = kgNodeIndex.get_equivalent_curies(test,kg_name='KG2') - print(f"{test} = " + str(curies)) - equivalence_mapping = kgNodeIndex.get_equivalent_entities(test,kg_name='KG1') - print(json.dumps(equivalence_mapping,sort_keys=True,indent=2)) - equivalence_mapping = kgNodeIndex.get_equivalent_entities(test,kg_name='KG2') - print(json.dumps(equivalence_mapping,sort_keys=True,indent=2)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - print("==== Get total number of drug nodes and disease nodes ============================") - t0 = timeit.default_timer() - kg = 'KG1' - print(kgNodeIndex.get_total_entity_count('chemical_substance', kg_name=kg)) - print(kgNodeIndex.get_total_entity_count('disease', kg_name=kg)) - print(kgNodeIndex.get_total_entity_count('protein', kg_name=kg)) - print(kgNodeIndex.get_total_entity_count('drug', kg_name=kg)) - print(kgNodeIndex.get_total_entity_count('cheesecake', kg_name=kg)) - t1 = timeit.default_timer() - print("Elapsed time: "+str(t1-t0)) - - #print("==== Test SELECT ============================") - #kgNodeIndex.test_select('phenylketonuria') - #kgNodeIndex.test_select('CUI:C4710278') - #kgNodeIndex.test_select('UniProtKB:P06865') - #print(kgNodeIndex.is_curie_present('CUI:C4710278')) - -#################################################################################################### -if __name__ == "__main__": - main() diff --git a/code/reasoningtool/kg-construction/Neo4jConnection.py b/code/reasoningtool/kg-construction/Neo4jConnection.py deleted file mode 100644 index 2c042287b..000000000 --- a/code/reasoningtool/kg-construction/Neo4jConnection.py +++ /dev/null @@ -1,661 +0,0 @@ -''' This module defines the class Neo4jConnection. Neo4jConnection class is designed -to connect to Neo4j database and perform operations on a graphic model object. (e.g., -retrieve node and update node) The available methods include: - - get_xxx_nodes : query all xxx nodes - update_xxx_nodes : update xxx nodes by an array 'nodes', which contain two properties 'node_id' - and 'extended_info_json' for each node - get_xxx_node : query xxx node by ID - - xxx is the type of nodes. (e.g., anatomy, phenotype, microRNA, pathway, protein, disease) - -''' - -__author__ = 'Deqing Qu' -__copyright__ = 'Oregon State University' -__credits__ = ['Deqing Qu', 'Stephen Ramsey'] -__license__ = 'MIT' -__version__ = '0.1.0' -__maintainer__ = '' -__email__ = '' -__status__ = 'Prototype' - -from neo4j.v1 import GraphDatabase - - -class Neo4jConnection: - - def __init__(self, uri, user, password): - self._driver = GraphDatabase.driver(uri, auth=(user, password)) - - def close(self): - self._driver.close() - - def get_anatomy_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_anatomy_nodes) - - def get_phenotype_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_phenotype_nodes) - - def get_microRNA_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_microRNA_nodes) - - def get_pathway_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_pathway_nodes) - - def get_protein_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_protein_nodes) - - def get_disease_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_disease_nodes) - - def get_chemical_substance_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_chemical_substance_nodes) - - def get_bio_process_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_bio_process_nodes) - - def get_cellular_component_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_cellular_component_nodes) - - def get_molecular_function_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self._get_molecular_function_nodes) - - def get_metabolite_nodes(self): - with self._driver.session() as session: - return session.read_transaction(self._get_metabolite_nodes) - - def update_anatomy_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_anatomy_nodes, nodes) - - def update_phenotype_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_phenotype_nodes, nodes) - - def update_microRNA_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_microRNA_nodes, nodes) - - def update_pathway_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_pathway_nodes, nodes) - - def update_protein_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_protein_nodes, nodes) - - def update_disease_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_disease_nodes, nodes) - - def update_chemical_substance_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_chemical_substance_nodes, nodes) - - def update_bio_process_nodes(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_bio_process_nodes, nodes) - - def get_anatomy_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_anatomy_node, id) - - def get_phenotype_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_phenotype_node, id) - - def get_microRNA_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_microRNA_node, id) - - def get_pathway_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_pathway_node, id) - - def get_protein_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_protein_node, id) - - def get_disease_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_disease_node, id) - - def get_chemical_substance_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_chemical_substance_node, id) - - def get_bio_process_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_bio_process_node, id) - - def get_node(self, id): - with self._driver.session() as session: - return session.write_transaction(self._get_node, id) - - def update_anatomy_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_anatomy_nodes_desc, nodes) - - def update_phenotype_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_phenotype_nodes_desc, nodes) - - def update_microRNA_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_microRNA_nodes_desc, nodes) - - def update_pathway_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_pathway_nodes_desc, nodes) - - def update_protein_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_protein_nodes_desc, nodes) - - def update_disease_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_disease_nodes_desc, nodes) - - def update_chemical_substance_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_chemical_substance_nodes_desc, nodes) - - def update_bio_process_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_bio_process_nodes_desc, nodes) - - def update_cellular_component_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_cellular_component_desc, nodes) - - def update_molecular_function_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_molecular_function_desc, nodes) - - def update_protein_nodes_name(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_protein_nodes_name, nodes) - - def update_metabolite_nodes_desc(self, nodes): - with self._driver.session() as session: - return session.write_transaction(self._update_metabolite_desc, nodes) - - def get_node_names(self, type): - with self._driver.session() as session: - return session.write_transaction(self._get_node_names, type) - - def create_disease_has_phenotype(self, array): - with self._driver.session() as session: - return session.write_transaction(self.__create_disease_has_phenotype, array) - - def remove_duplicate_has_phenotype_relations(self): - with self._driver.session() as session: - return session.write_transaction(self.__remove_duplicate_has_phenotype_relations) - - def count_has_phenotype_relation(self, relation): - """ - - :param relation: {"d_id": "DOID:xxxx", "p_id": "HP:xxxx"} - :return: count of relations between d_id and p_id - """ - with self._driver.session() as session: - return session.write_transaction(self.__count_has_phenotype_relation, relation) - - def remove_duplicated_react_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self.__remove_duplicated_react_nodes) - - def count_duplicated_nodes(self): - with self._driver.session() as session: - return session.write_transaction(self.__count_duplicated_nodes) - - def get_relationship(self, r_type, s_id, t_id): - with self._driver.session() as session: - return session.write_transaction(self._get_relationship, r_type, s_id, t_id) - - @staticmethod - def _get_anatomy_nodes(tx): - result = tx.run("MATCH (n:anatomical_entity) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_phenotype_nodes(tx): - result = tx.run("MATCH (n:phenotypic_feature) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_microRNA_nodes(tx): - result = tx.run("MATCH (n:microRNA) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_pathway_nodes(tx): - result = tx.run("MATCH (n:pathway) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_protein_nodes(tx): - result = tx.run("MATCH (n:protein) RETURN n.id") - return [record["n.id"] for record in result] - - @staticmethod - def _get_disease_nodes(tx): - result = tx.run("MATCH (n:disease) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_chemical_substance_nodes(tx): - result = tx.run("MATCH (n:chemical_substance) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_bio_process_nodes(tx): - result = tx.run("MATCH (n:biological_process) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_cellular_component_nodes(tx): - result = tx.run("MATCH (n:cellular_component) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_molecular_function_nodes(tx): - result = tx.run("MATCH (n:molecular_function) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _get_metabolite_nodes(tx): - result = tx.run("MATCH (n:metabolite) RETURN n.rtx_name") - return [record["n.rtx_name"] for record in result] - - @staticmethod - def _update_anatomy_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:anatomical_entity{rtx_name:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_phenotype_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:phenotypic_feature{rtx_name:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_microRNA_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:microRNA{rtx_name:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_pathway_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:pathway{rtx_name:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_protein_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:protein{id:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_disease_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:disease{rtx_name:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_chemical_substance_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:chemical_substance{rtx_name:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_bio_process_nodes(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.extended_info_json AS extended_info_json - MATCH (n:biological_process{rtx_name:node_id}) - SET n.extended_info_json=extended_info_json - """, - nodes=nodes, - ) - return result - - @staticmethod - def _get_anatomy_node(tx, id): - result = tx.run("MATCH (n:anatomical_entity{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_phenotype_node(tx, id): - result = tx.run("MATCH (n:phenotypic_feature{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_microRNA_node(tx, id): - result = tx.run("MATCH (n:microRNA{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_pathway_node(tx, id): - result = tx.run("MATCH (n:pathway{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_protein_node(tx, id): - result = tx.run("MATCH (n:protein{id:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_disease_node(tx, id): - result = tx.run("MATCH (n:disease{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_chemical_substance_node(tx, id): - result = tx.run("MATCH (n:chemical_substance{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_bio_process_node(tx, id): - result = tx.run("MATCH (n:biological_process{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _get_node(tx, id): - result = tx.run("MATCH (n{rtx_name:'%s'}) RETURN n" % id) - return result.single() - - @staticmethod - def _update_anatomy_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:anatomical_entity{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_phenotype_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:phenotypic_feature{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_microRNA_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:microRNA{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_disease_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:disease{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_pathway_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:pathway{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_protein_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:protein{id:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_chemical_substance_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:chemical_substance{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_bio_process_nodes_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:biological_process{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_cellular_component_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:cellular_component{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_molecular_function_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:molecular_function{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_protein_nodes_name(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.name AS name - MATCH (n:protein{id:node_id}) - SET n.name=name - """, - nodes=nodes, - ) - return result - - @staticmethod - def _update_metabolite_desc(tx, nodes): - result = tx.run( - """ - UNWIND {nodes} AS row - WITH row.node_id AS node_id, row.desc AS description - MATCH (n:metabolite{rtx_name:node_id}) - SET n.description=description - """, - nodes=nodes, - ) - return result - - @staticmethod - def _get_node_names(tx, type): - result = tx.run("MATCH (n:%s) RETURN n.name" % type) - return [record["n.name"] for record in result] - - @staticmethod - def __create_disease_has_phenotype(tx, array): - result = tx.run( - """ - UNWIND {array} AS row - WITH row.d_id AS d_id, row.p_id AS p_id - MATCH (d:disease {rtx_name:d_id}), (p:phenotypic_feature {rtx_name:p_id}) - CREATE (d)-[:has_phenotype { - source_node_uuid: d.UUID, - target_node_uuid: p.UUID, - is_defined_by: \'RTX\', - provided_by: \'BioLink\', - predicate: \'has_phenotype\', - seed_node_uuid: d.seed_node_uuid, - relation: \'has_phenotype\' - }]->(p) - """, - array=array - ) - return result - - @staticmethod - def __remove_duplicate_has_phenotype_relations(tx): - result = tx.run( - """ - MATCH (a)-[r:has_phenotype]->(b) - WITH a, b, TAIL (COLLECT (r)) as rr - WHERE size(rr)>0 - FOREACH (r IN rr | DELETE r) - """ - ) - return result - - @staticmethod - def __count_has_phenotype_relation(tx, relation): - result = tx.run( - """ - MATCH p = (a {rtx_name:$relation.d_id})-[r:has_phenotype]->(b {rtx_name:$relation.p_id}) - RETURN count(p) - """, - relation=relation - ) - return result.single()['count(p)'] - - @staticmethod - def __remove_duplicated_react_nodes(tx): - result = tx.run( - """ - MATCH (n), (m) - WHERE n<>m AND n.id=m.id AND split(n.rtx_name, ':')[0] = 'REACT' - DELETE n - """ - ) - return result - - @staticmethod - def __count_duplicated_nodes(tx): - result = tx.run( - """ - MATCH (n), (m) - WHERE n<>m AND n.id=m.id return count(*) - """, - ) - return result.single()['count(*)'] - - @staticmethod - def _get_relationship(tx, r_type, s_id, t_id): - result = tx.run("MATCH p=()-[r:%s]->() where r.source_node_uuid= '%s' and r.target_node_uuid='%s' RETURN r" % - (r_type, s_id, t_id)) - return result.single() diff --git a/code/reasoningtool/kg-construction/NormGoogleDistance.py b/code/reasoningtool/kg-construction/NormGoogleDistance.py index 9746ac6ed..b07cf9a04 100644 --- a/code/reasoningtool/kg-construction/NormGoogleDistance.py +++ b/code/reasoningtool/kg-construction/NormGoogleDistance.py @@ -14,7 +14,6 @@ from cache_control_helper import CacheControlHelper from QueryNCBIeUtils import QueryNCBIeUtils -from QueryDisont import QueryDisont # DOID -> MeSH from QueryEBIOLS import QueryEBIOLS # UBERON -> MeSH from QueryMyChem import QueryMyChem import sqlite3 @@ -94,7 +93,7 @@ def get_mesh_term_for_all(curie_id, description): "UBERON" + "CL" - not supposed to be here? "NCBIGene" + - "DOID" + + "DOID" - "OMIM" + "ChEMBL" + """ @@ -148,18 +147,10 @@ def get_mesh_term_for_all(curie_id, description): elif curie_list[0] == "NCBIGene": gene_id = curie_id.split(':')[1] names = QueryNCBIeUtils.get_pubmed_from_ncbi_gene(gene_id) + elif curie_list[0] == "MONDO": + names = list(QueryEBIOLS.get_mesh_id_for_mondo_id(curie_id)) elif curie_list[0] == "DOID": - mesh_id = QueryDisont.query_disont_to_mesh_id(curie_id) - names = [] - for uid in mesh_id: - uid_num = int(uid[1:]) + 68000000 - name = QueryNCBIeUtils.get_mesh_terms_for_mesh_uid(uid_num) - if name is not None: - names += name - if len(names) == 0: - names = None - else: - names[0] = names[0] + '[MeSH Terms]' + raise ValueError(f"NormGoogleDistance.py is now unable to obtain a MeSH ID from a DOID {curie_id}") elif curie_list[0] == "OMIM": names = QueryNCBIeUtils.get_mesh_terms_for_omim_id(curie_list[1]) elif curie_list[0] == "ChEMBL": @@ -176,44 +167,6 @@ def get_mesh_term_for_all(curie_id, description): return names return [description.replace(';', '|')] - @staticmethod - # @CachedMethods.register - def get_ngd_for_all(curie_id_list, description_list): - """ - Takes a list of currie ids and descriptions then calculates the normalized google distance for the set of nodes. - Params: - curie_id_list - a list of strings containing the curie ids of the nodes. Formatted : e.g. DOID:8398 - description_list - a list of strings containing the English names for the nodes - """ - assert len(curie_id_list) == len(description_list) - terms = [None] * len(curie_id_list) - for a in range(len(description_list)): - terms[a] = NormGoogleDistance.get_mesh_term_for_all(curie_id_list[a], description_list[a]) - if type(terms[a]) != list: - terms[a] = [terms[a]] - if len(terms[a]) == 0: - terms[a] = [description_list[a]] - if len(terms[a]) > 30: - terms[a] = terms[a][:30] - terms_combined = [''] * len(terms) - mesh_flags = [True] * len(terms) - for a in range(len(terms)): - if len(terms[a]) > 1: - if not terms[a][0].endswith('[uid]'): - for b in range(len(terms[a])): - if QueryNCBIeUtils.is_mesh_term(terms[a][b]) and not terms[a][b].endswith('[MeSH Terms]'): - terms[a][b] += '[MeSH Terms]' - terms_combined[a] = '|'.join(terms[a]) - mesh_flags[a] = False - else: - terms_combined[a] = terms[a][0] - if terms[a][0].endswith('[MeSH Terms]'): - terms_combined[a] = terms[a][0][:-12] - elif not QueryNCBIeUtils.is_mesh_term(terms[a][0]): - mesh_flags[a] = False - ngd = QueryNCBIeUtils.multi_normalized_google_distance(terms_combined, mesh_flags) - return ngd - @staticmethod def api_ngd(mesh_term1, mesh_term2): response = {} diff --git a/code/reasoningtool/kg-construction/QueryChEMBL.py b/code/reasoningtool/kg-construction/QueryChEMBL.py deleted file mode 100644 index c87de8056..000000000 --- a/code/reasoningtool/kg-construction/QueryChEMBL.py +++ /dev/null @@ -1,202 +0,0 @@ -''' Queries the ChEMBL database to find target proteins for drugs. -''' - -__author__ = 'Stephen Ramsey' -__copyright__ = 'Oregon State University' -__credits__ = ['Stephen Ramsey'] -__license__ = 'MIT' -__version__ = '0.1.0' -__maintainer__ = '' -__email__ = '' -__status__ = 'Prototype' - -import urllib -# import requests -# import requests_cache -import sys -from cache_control_helper import CacheControlHelper - -from QueryUniprot import QueryUniprot - - -class QueryChEMBL: - API_BASE_URL = 'https://www.ebi.ac.uk/chembl/api/data' - TIMEOUT_SEC = 120 - - @staticmethod - def send_query_get(handler, url_suffix): - - requests = CacheControlHelper() - url = QueryChEMBL.API_BASE_URL + '/' + handler + '?' + url_suffix -# print(url) - try: - res = requests.get(url, - timeout=QueryChEMBL.TIMEOUT_SEC) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryChEMBL for URL: ' + url, file=sys.stderr) - return None - except KeyboardInterrupt: - sys.exit(0) - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryChEMBL for URL: %s' % (e, url), file=sys.stderr) - return None - status_code = res.status_code - if status_code != 200: - print(url, file=sys.stderr) - print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) - return None - return res.json() - - @staticmethod - def get_chembl_ids_for_drug(drug_name): - if not isinstance(drug_name, str): - return set() - - drug_name_safe = urllib.parse.quote(drug_name, safe='') - res = QueryChEMBL.send_query_get(handler='compound_record.json', - url_suffix='compound_name__iexact=' + drug_name_safe) - res_chembl_set = set() - if res is not None: - compound_records = res.get('compound_records', None) - if compound_records is not None: - for compound_record in compound_records: - chembl_id = compound_record.get('molecule_chembl_id', None) - if chembl_id is not None: - res_chembl_set.add(chembl_id) - return res_chembl_set - - @staticmethod - def get_target_uniprot_ids_for_chembl_id(chembl_id): - print(chembl_id, file=sys.stderr) - if not isinstance(chembl_id, str): - return dict() - - res_targets_dict = dict() - - target_mechanisms_json = QueryChEMBL.get_mechanisms_for_chembl_id(chembl_id) - for target_mechanism in target_mechanisms_json: - target_chembl_id = target_mechanism.get("target_chembl_id", None) - if target_chembl_id is not None: - target_uniprot_ids = QueryChEMBL.map_chembl_target_to_uniprot_ids(target_chembl_id) - for target_uniprot_id in target_uniprot_ids: - res_targets_dict[target_uniprot_id] = float(1.0) - - res = QueryChEMBL.send_query_get(handler='target_prediction.json', - url_suffix='molecule_chembl_id__exact=' + chembl_id + '&target_organism__exact=Homo%20sapiens') - if res is not None: - target_predictions_list = res.get('target_predictions', None) - if target_predictions_list is not None: - for target_prediction in target_predictions_list: - # print(target_prediction) - target_uniprot_id = target_prediction.get('target_accession', None) - target_probability = target_prediction.get('probability', None) - if target_uniprot_id is not None: - target_organism = target_prediction.get('target_organism', None) - if target_organism is not None: - assert target_organism == "Homo sapiens" - # need to get the gene ID for this Uniprot ID - if target_uniprot_id not in res_targets_dict: - res_targets_dict[target_uniprot_id] = float(target_probability) - - return res_targets_dict - - @staticmethod - def map_chembl_target_to_uniprot_ids(target_chembl_id): - res_json = QueryChEMBL.send_query_get(handler="target.json", - url_suffix="target_chembl_id=" + target_chembl_id) - res_set = set() -# print(res_json) - if res_json is not None: - targets = res_json.get("targets", None) - if targets is not None and len(targets) > 0: - for target in targets: - components = target.get("target_components", None) - if components is not None: - for component in components: - xrefs = component.get("target_component_xrefs", None) - if xrefs is not None: - for xref in xrefs: - if xref is not None: - xref_src_db = xref.get("xref_src_db", None) - if xref_src_db is not None: - if xref_src_db == "UniProt": - uniprot_id = xref.get("xref_id", None) - if uniprot_id is not None: - uniprot_id_citeable = QueryUniprot.get_citeable_accession_for_accession(uniprot_id) - if uniprot_id_citeable is not None: - res_set |= set([uniprot_id_citeable]) - return res_set - - @staticmethod - def get_target_uniprot_ids_for_drug(drug_name): - if not isinstance(drug_name, str): - return dict() - - chembl_ids_for_drug = QueryChEMBL.get_chembl_ids_for_drug(drug_name) - res_uniprot_ids = dict() - for chembl_id in chembl_ids_for_drug: - # print(chembl_id) - uniprot_ids_dict = QueryChEMBL.get_target_uniprot_ids_for_chembl_id(chembl_id) - for uniprot_id in uniprot_ids_dict.keys(): - res_uniprot_ids[uniprot_id] = uniprot_ids_dict[uniprot_id] - return res_uniprot_ids - - @staticmethod - def get_mechanisms_for_chembl_id(chembl_id): - """Retrieves mechanism of action and target of each drug. - - Args: - chembl_id (str): a ChEMBL id, e.g., "CHEMBL521" - - Returns: - array: an array of mechanism of actions, or [] if no mechanism data could be obtained for the given - ChEMBL ID - - example: - [ - {"action_type": "INHIBITOR", - "binding_site_comment": null, - "direct_interaction": true, - "disease_efficacy": true, - "max_phase": 4, - "mec_id": 1180, - "mechanism_comment": null, - "mechanism_of_action": "Cyclooxygenase inhibitor", - "mechanism_refs": [ - {"ref_id": "0443-059748 PP. 229", - "ref_type": "ISBN", - "ref_url": "http://www.isbnsearch.org/isbn/0443059748" - }, - {"ref_id": "Ibuprofen", - "ref_type": "Wikipedia", - "ref_url": "http://en.wikipedia.org/wiki/Ibuprofen"} - ], - "molecular_mechanism": true, - "molecule_chembl_id": "CHEMBL521", - "record_id": 1343587, - "selectivity_comment": null, - "site_id": null, - "target_chembl_id": "CHEMBL2094253"} - ] - """ - if not isinstance(chembl_id, str): - return [] - - res = QueryChEMBL.send_query_get(handler='mechanism.json', - url_suffix='molecule_chembl_id=' + chembl_id) - res_mechanisms_array = [] - if res is not None: - mechanism_records = res.get('mechanisms', None) - if mechanism_records is not None and len(mechanism_records) > 0: - res_mechanisms_array = mechanism_records - return res_mechanisms_array - - -if __name__ == '__main__': - print(QueryChEMBL.get_target_uniprot_ids_for_chembl_id('CHEMBL521')) - print(QueryChEMBL.get_target_uniprot_ids_for_chembl_id('CHEMBL2364648')) -# print(QueryChEMBL.get_mechanisms_for_chembl_id("CHEMBL521")) -# print(QueryChEMBL.map_chembl_target_to_uniprot_ids("CHEMBL2094253")) -# print(QueryChEMBL.get_mechanisms_for_chembl_id("CHEMBL521")) diff --git a/code/reasoningtool/kg-construction/QueryDisont.py b/code/reasoningtool/kg-construction/QueryDisont.py deleted file mode 100644 index 5db49c02d..000000000 --- a/code/reasoningtool/kg-construction/QueryDisont.py +++ /dev/null @@ -1,114 +0,0 @@ -""" This module is the definition of class QueryDisont. It is written to connect - with disease-ontology to query disease ontology and mesh id of given disont_id. -""" - -__author__ = "" -__copyright__ = "" -__credits__ = [] -__license__ = "" -__version__ = "" -__maintainer__ = "" -__email__ = "" -__status__ = "Prototype" - -# import requests -import sys - -from cache_control_helper import CacheControlHelper - -class QueryDisont: - TIMEOUT_SEC = 120 - API_BASE_URL = 'http://www.disease-ontology.org/api' - - @staticmethod - def send_query_get(handler, url_suffix): - - requests = CacheControlHelper() - url = QueryDisont.API_BASE_URL + "/" + handler + "/" + url_suffix -# print(url_str) - try: - res = requests.get(url, timeout=QueryDisont.TIMEOUT_SEC) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryDisont for URL: ' + url, file=sys.stderr) - return None - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryDisont for URL: %s' % (e, url), file=sys.stderr) - return None - - status_code = res.status_code - if status_code != 200: - print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) - return None - return res - - @staticmethod - def query_disont_to_child_disonts(disont_id): - """for a disease ontology ID (including prefix "DOID:", with zero padding), return child DOIDs - - :param disont_id: string, like ``'DOID:14069'`` - :returns: ``set`` with keys as DOIDs - """ - res = QueryDisont.send_query_get('metadata', disont_id) - ret_set = set() - if res is not None: - res_json = res.json() -# print(res_json) - disease_children_list = res_json.get("children", None) - if disease_children_list is not None: - ret_set |= set([int(disease_child_list[1].split(':')[1]) for disease_child_list in disease_children_list]) - return ret_set - - @staticmethod - def query_disont_to_label(disont_id): - res = QueryDisont.send_query_get('metadata', disont_id) - ret_label = '' - if res is not None: - res_json = res.json() - ret_label = res_json.get('name', '') - return ret_label - - @staticmethod - def query_disont_to_child_disonts_desc(disont_id): - """for a disease ontology ID (including prefix "DOID:", with zero padding), return child DOIDs - - :param disont_id: string, like ``'DOID:14069'`` - :returns: ``dict`` with keys as DOIDs and values as human-readable disease names - """ - - res = QueryDisont.send_query_get('metadata', disont_id) - ret_dict = dict() - if res is not None: - res_json = res.json() -# print(res_json) - disease_children_list = res_json.get("children", None) - if disease_children_list is not None: - ret_dict = dict([[disease_child_list[1], disease_child_list[0]] for disease_child_list in disease_children_list]) - return ret_dict - - @staticmethod - def query_disont_to_mesh_id(disont_id): - """convert a disease ontology ID (including prefix "DOID:", with zero padding) to MeSH ID - - :param disont_id: string, like ``'DOID:14069'`` - """ - res = QueryDisont.send_query_get('metadata', disont_id) - ret_set = set() - if res is not None: - res_json = res.json() - xref_strs = res_json.get("xrefs", None) - if xref_strs is not None: - ret_set |= set([xref_str.split('MESH:')[1] for xref_str in xref_strs if 'MESH:' in xref_str]) - return ret_set - -if __name__ == '__main__': - print(QueryDisont.query_disont_to_label("DOID:0050741")) - print(QueryDisont.query_disont_to_mesh_id("DOID:9352")) - print(QueryDisont.query_disont_to_mesh_id("DOID:1837")) - print(QueryDisont.query_disont_to_mesh_id("DOID:10182")) - print(QueryDisont.query_disont_to_mesh_id("DOID:11712")) - print(QueryDisont.query_disont_to_child_disonts_desc("DOID:9352")) - print(QueryDisont.query_disont_to_mesh_id("DOID:14069")) - print(QueryDisont.query_disont_to_child_disonts_desc("DOID:12365")) - print(QueryDisont.query_disont_to_mesh_id("DOID:0050741")) diff --git a/code/reasoningtool/kg-construction/QueryEBIOLS.py b/code/reasoningtool/kg-construction/QueryEBIOLS.py index 65201e3f2..20c499bab 100644 --- a/code/reasoningtool/kg-construction/QueryEBIOLS.py +++ b/code/reasoningtool/kg-construction/QueryEBIOLS.py @@ -19,7 +19,7 @@ class QueryEBIOLS: TIMEOUT_SEC = 120 - API_BASE_URL = "https://www.ebi.ac.uk/ols/api/ontologies" + API_BASE_URL = "https://www.ebi.ac.uk/ols4/api/ontologies" HANDLER_MAP = { 'get_anatomy': '{ontology}/terms/{id}', 'get_phenotype': '{ontology}/terms/{id}', @@ -65,7 +65,7 @@ def get_bto_term_for_bto_id(bto_curie_id): """ bto_iri = "http://purl.obolibrary.org/obo/" + bto_curie_id.replace(":", "_") bto_iri_double_encoded = urllib.parse.quote_plus(urllib.parse.quote_plus(bto_iri)) - res = QueryEBIOLS.send_query_get("bto/terms/", bto_iri_double_encoded) + res = QueryEBIOLS.send_query_get("bto/terms", bto_iri_double_encoded) ret_label = None if res is not None: res_json = res.json() @@ -81,7 +81,7 @@ def get_bto_id_for_uberon_id(uberon_curie_id): """ uberon_iri = "http://purl.obolibrary.org/obo/" + uberon_curie_id.replace(":", "_") uberon_iri_double_encoded = urllib.parse.quote_plus(urllib.parse.quote_plus(uberon_iri)) - res = QueryEBIOLS.send_query_get("uberon/terms/", uberon_iri_double_encoded) + res = QueryEBIOLS.send_query_get("uberon/terms", uberon_iri_double_encoded) ret_list = list() if res is not None: res_json = res.json() @@ -101,7 +101,7 @@ def get_mesh_id_for_uberon_id(uberon_curie_id): """ uberon_iri = "http://purl.obolibrary.org/obo/" + uberon_curie_id.replace(":", "_") uberon_iri_double_encoded = urllib.parse.quote_plus(urllib.parse.quote_plus(uberon_iri)) - res = QueryEBIOLS.send_query_get("uberon/terms/", uberon_iri_double_encoded) + res = QueryEBIOLS.send_query_get("uberon/terms", uberon_iri_double_encoded) ret_list = list() if res is not None: res_json = res.json() @@ -178,29 +178,30 @@ def get_cellular_component_description(cc_id): def get_molecular_function_description(mf_id): return QueryEBIOLS.__get_entity("get_molecular_function", mf_id) + @staticmethod + def get_mesh_id_for_mondo_id(mondo_curie_id): + """ + Converts a disease MONDO ID to MeSH id + :param mondo_curie_id: eg. "MONDO:0005148" + :return: a set of MeSH id's (eg. {"MESH:D003924"}) + """ + mondo_iri = "http://purl.obolibrary.org/obo/" + mondo_curie_id.replace(":", "_") + mondo_iri_double_encoded = urllib.parse.quote_plus(urllib.parse.quote_plus(mondo_iri)) + res = QueryEBIOLS.send_query_get("mondo/terms", mondo_iri_double_encoded) + ret_list = list() + if res is not None: + res_json = res.json() + res_annotation = res_json.get("annotation", None) + if res_annotation is not None: + db_x_refs = res_annotation.get("database_cross_reference", None) + if db_x_refs is not None: + ret_list = [mesh_id for mesh_id in db_x_refs if "MESH:" in mesh_id] + return set(ret_list) + if __name__ == "__main__": print(QueryEBIOLS.get_bto_id_for_uberon_id("UBERON:0000178")) print(QueryEBIOLS.get_bto_term_for_bto_id("BTO:0000089")) print(QueryEBIOLS.get_mesh_id_for_uberon_id("UBERON:0002107")) print(QueryEBIOLS.get_mesh_id_for_uberon_id("UBERON:0001162")) - - def save_to_test_file(key, value): - f = open('tests/query_desc_test_data.json', 'r+') - try: - json_data = json.load(f) - except ValueError: - json_data = {} - f.seek(0) - f.truncate() - json_data[key] = value - json.dump(json_data, f) - f.close() - - save_to_test_file('UBERON:0004476', QueryEBIOLS.get_anatomy_description('UBERON:0004476')) - save_to_test_file('CL:0000038', QueryEBIOLS.get_anatomy_description('CL:0000038')) - save_to_test_file('GO:0042535', QueryEBIOLS.get_bio_process_description('GO:0042535')) - save_to_test_file('HP:0011105', QueryEBIOLS.get_phenotype_description('HP:0011105')) - save_to_test_file('GO:0005573', QueryEBIOLS.get_cellular_component_description('GO:0005573')) - save_to_test_file('GO:0004689', QueryEBIOLS.get_molecular_function_description('GO:0004689')) - save_to_test_file('OMIM:604348', QueryEBIOLS.get_disease_description('OMIM:604348')) + print(QueryEBIOLS.get_mesh_id_for_mondo_id("MONDO:0005148")) diff --git a/code/reasoningtool/kg-construction/QueryMyChem.py b/code/reasoningtool/kg-construction/QueryMyChem.py index 1b3aecede..3693c42d5 100644 --- a/code/reasoningtool/kg-construction/QueryMyChem.py +++ b/code/reasoningtool/kg-construction/QueryMyChem.py @@ -21,9 +21,6 @@ import sys import json -from QueryPubChem import QueryPubChem - - class QueryMyChem: TIMEOUT_SEC = 120 API_BASE_URL = 'http://mychem.info/v1' @@ -201,7 +198,6 @@ def get_meddra_codes_for_side_effects(chembl_id): return meddra_code_set if chembl_id[:7].upper() == "CHEMBL:": chembl_id = "CHEMBL" + chembl_id[7:] - # pubchem_id = QueryPubChem.get_pubchem_id_for_chembl_id(chembl_id) pubchem_id = QueryMyChem.get_pubchem_cid(chembl_id) if pubchem_id is None: return meddra_code_set diff --git a/code/reasoningtool/kg-construction/QueryMyGene.py b/code/reasoningtool/kg-construction/QueryMyGene.py deleted file mode 100644 index f96c42352..000000000 --- a/code/reasoningtool/kg-construction/QueryMyGene.py +++ /dev/null @@ -1,530 +0,0 @@ -""" This module defines the class QueryMyGene. -QueryMyGene is written to query gene annotation information via python package -mygene. It can convert among gene symbol, uniprot id, entrez gene id, mirbase id. -""" - -__author__ = "" -__copyright__ = "" -__credits__ = [] -__license__ = "" -__version__ = "" -__maintainer__ = "" -__email__ = "" -__status__ = "Prototype" - -# import mygene -import sys -# import requests -import json -# import requests_cache - -from cache_control_helper import CacheControlHelper - - -class QueryMyGene: - def __init__(self, debug=False): - # self.mygene_obj = mygene.MyGeneInfo() - self.debug = debug - - ONT_NAME_TO_SIMPLE_NODE_TYPE = {'BP': 'biological_process', - 'MF': 'molecular_function', - 'CC': 'cellular_component'} - - TIMEOUT_SEC = 120 - API_BASE_URL = 'http://mygene.info/v3' - HANDLER_MAP = { - 'query': 'query', - 'gene': 'gene' - } - - @staticmethod - def __access_api(handler, url_suffix, params=None, return_raw=False): - - requests = CacheControlHelper() - if url_suffix: - url = QueryMyGene.API_BASE_URL + '/' + handler + '?' + url_suffix - else: - url = QueryMyGene.API_BASE_URL + '/' + handler - headers = {'user-agent': "mygene.py/%s python-requests/%s" % ("1.0.0", "1.0.0"), 'Accept': 'application/json'} - try: - res = requests.get(url, params=params, timeout=QueryMyGene.TIMEOUT_SEC, headers=headers) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryMyGene for URL: ' + url, file=sys.stderr) - return None - except KeyboardInterrupt: - sys.exit(0) - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryMyGene for URL: %s' % (e, url), file=sys.stderr) - return None - status_code = res.status_code - if status_code != 200: - print(url, file=sys.stderr) - print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) - return None - if return_raw: - return res.text - else: - return res.json() - - @staticmethod - def unnest(lst, skip_type): - """ - To unnest a list like `["foo", ["bar", "baz"]]` to `["foo", "bar", "baz"]`. - Elements of `skip_type` will be leaf as is. - """ - def generate_elements(lst, skip_type): - for e in lst: - if isinstance(e, skip_type): - yield e - else: - yield from e - - return list(generate_elements(lst, skip_type)) - - def convert_gene_symbol_to_uniprot_id(self, gene_symbol): - # try: - # res = self.mygene_obj.query('symbol:' + gene_symbol, species='human', - # fields='uniprot', verbose=False) - # except requests.exceptions.HTTPError: - # print('HTTP error for querying gene symbol to uniprot in mygene: ' + gene_symbol, file=sys.stderr) - # res = None - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=symbol:" + gene_symbol + "&species=human&fields=uniprot" - res = QueryMyGene.__access_api(handler, url_suffix) - - uniprot_ids_set = set() - if res is not None and len(res) > 0: - uniprot_ids_list = [] - for hit in res['hits']: - uniprot_hit = hit.get("uniprot", None) - if uniprot_hit is not None: - uniprot_id = uniprot_hit.get("Swiss-Prot", None) - if uniprot_id is not None: - uniprot_ids_list.append(uniprot_id) - else: - if self.debug: - print("Could not find Uniprot ID for gene symbol: " + gene_symbol) - uniprot_ids_list = QueryMyGene.unnest(uniprot_ids_list, str) - uniprot_ids_set = set(uniprot_ids_list) - return uniprot_ids_set - - def convert_uniprot_id_to_gene_symbol(self, uniprot_id): - # try: - # res = self.mygene_obj.query('uniprot:' + uniprot_id, species='human', - # fields='symbol', verbose=False) - # except requests.exceptions.HTTPError: - # print('HTTP error for querying uniprot to gene symbol mygene: ' + uniprot_id, file=sys.stderr) - # res = None - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=uniprot:" + uniprot_id + "&species=human&fields=symbol" - res = QueryMyGene.__access_api(handler, url_suffix) - - gene_symbol = set() - if res is not None and len(res) > 0: - res_hits = res.get('hits', None) - if res_hits is not None: - gene_symbol = set([hit['symbol'] for hit in res_hits]) - else: - print("QueryMyGene.convert_uniprot_id_to_gene_symbol: no \'hits\' result data for uniprot_id: " + uniprot_id, file=sys.stderr) - gene_symbol = set([hit["symbol"] for hit in res_hits]) - return gene_symbol - - def convert_uniprot_id_to_entrez_gene_ID(self, uniprot_id): - # requests = CacheControlHelper() - # try: - # res = self.mygene_obj.query('uniprot:' + uniprot_id, species='human', - # fields='entrezgene', verbose=False) - # except requests.exceptions.HTTPError: - # print('HTTP error for querying uniprot-to-entrezgene in mygene: ' + uniprot_id, file=sys.stderr) - # res = None - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=uniprot:" + uniprot_id + "&species=human&fields=entrezgene" - res = QueryMyGene.__access_api(handler, url_suffix) - - entrez_ids = set() - if res is not None and len(res) > 0: - res_hits = res.get('hits', None) - if res_hits is not None: - for hit in res_hits: - entrez_id = hit.get('entrezgene', None) - if entrez_id is not None: - entrez_ids.add(entrez_id) -# entrez_ids = set([hit["entrezgene"] for hit in res_hits]) - else: - print("QueryMyGene.convert_uniprot_id_to_entrez_gene_ID: no \'hits\' result data for uniprot_id: " + uniprot_id, file=sys.stderr) - return entrez_ids - - def convert_hgnc_gene_id_to_uniprot_id(self, hgnc_id): - uniprot_ids = set() - - # requests = CacheControlHelper() - # try: - # res = self.mygene_obj.query(hgnc_id, species='human', - # fields='uniprot', verbose=False) - # except requests.exceptions.HTTPError: - # print("HTTP error in mygene_obj.query for query string: " + hgnc_id, file=sys.stderr) - # return uniprot_ids - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=" + hgnc_id + "&species=human&fields=uniprot" - res = QueryMyGene.__access_api(handler, url_suffix) - - if res is not None and len(res) > 0: - for hit in res['hits']: - uniprot_id_dict = hit.get('uniprot', None) - if uniprot_id_dict is not None: - uniprot_id = uniprot_id_dict.get('Swiss-Prot', None) - if uniprot_id is not None: - if type(uniprot_id) == str: - uniprot_ids.add(uniprot_id) - else: - uniprot_ids.union(uniprot_id) - return uniprot_ids - - def convert_gene_symbol_to_entrez_gene_ID(self, gene_symbol): - entrez_ids = set() - - # requests = CacheControlHelper() - # try: - # res = self.mygene_obj.query('symbol:' + gene_symbol, species='human', - # fields='entrezgene', verbose=False) - # except requests.exceptions.HTTPError: - # print("HTTP error in mygene_obj.query for query string: " + gene_symbol, file=sys.stderr) - # return entrez_ids - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=symbol:" + gene_symbol + "&species=human&fields=entrezgene" - res = QueryMyGene.__access_api(handler, url_suffix) - - if res is not None and len(res) > 0: - entrez_ids = set() - for hit in res['hits']: - entrez_id = hit.get('entrezgene', None) - if entrez_id is not None: - entrez_ids.add(entrez_id) - return entrez_ids - - def convert_entrez_gene_id_to_uniprot_id(self, entrez_gene_id): - assert type(entrez_gene_id) == int - uniprot_id = set() - - # requests = CacheControlHelper() - # try: - # res = self.mygene_obj.query('entrezgene:' + str(entrez_gene_id), species='human', fields='uniprot', verbose=False) - # except requests.exceptions.HTTPError: - # print("HTTP error in mygene_obj.query for query string: " + entrez_gene_id, file=sys.stderr) - # return uniprot_id - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=entrezgene:" + str(entrez_gene_id) + "&species=human&fields=uniprot" - res = QueryMyGene.__access_api(handler, url_suffix) - - if res is not None and len(res) > 0: - res_hits = res.get("hits", None) - if res_hits is not None and type(res_hits) == list: - for hit in res_hits: - res_uniprot_id_dict = hit.get("uniprot", None) - if res_uniprot_id_dict is not None: - res_uniprot_id = res_uniprot_id_dict.get("Swiss-Prot", None) - if res_uniprot_id is not None: - if type(res_uniprot_id) == str: - uniprot_id.add(res_uniprot_id) - else: - if type(res_uniprot_id) == list: - for uniprot_id_item in res_uniprot_id: - uniprot_id.add(uniprot_id_item) - return uniprot_id - - def convert_entrez_gene_ID_to_mirbase_ID(self, entrez_gene_id): - assert type(entrez_gene_id) == int - mirbase_id = set() - - # requests = CacheControlHelper() - # try: - # res = self.mygene_obj.query('entrezgene:' + str(entrez_gene_id), species='human', fields='miRBase', verbose=False) - # except requests.exceptions.HTTPError: - # print("HTTP error in mygene_obj.query for query string: " + entrez_gene_id, file=sys.stderr) - # return mirbase_id - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=entrezgene:" + str(entrez_gene_id) + "&species=human&fields=miRBase" - res = QueryMyGene.__access_api(handler, url_suffix) - - if res is not None and len(res) > 0: - res_hits = res.get("hits", None) - if res_hits is not None and type(res_hits) == list: - for hit in res_hits: - res_mirbase_id = hit.get("miRBase", None) - if res_mirbase_id is not None: - mirbase_id.add(res_mirbase_id) - else: - print("QueryMyGene.convert_entrez_gene_ID_to_mirbase_ID result missing miRBase field where it was expected; Entrez Gene ID: " + - str(entrez_gene_id), file=sys.stderr) - return mirbase_id - - def get_gene_ontology_ids_bp_for_uniprot_id(self, uniprot_id): - assert type(uniprot_id) == str - res = dict() - - # requests = CacheControlHelper() - # try: - # q_res = self.mygene_obj.query('uniprot:' + uniprot_id, species='human', fields='go', verbose=False) - # except requests.exceptions.HTTPError: - # print("HTTP error in mygene_obj.query for query string: " + uniprot_id, file=sys.stderr) - # return res - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=uniprot:" + uniprot_id + "&species=human&fields=go" - q_res = QueryMyGene.__access_api(handler, url_suffix) - - if q_res is None: - return res - - q_res_hits = q_res.get('hits', None) - if q_res_hits is not None: - if type(q_res_hits) == list and len(q_res_hits) > 0: - for q_res_hit in q_res_hits: - if type(q_res_hit) == dict: - q_res_go = q_res_hit.get('go', None) - if q_res_go is not None: - q_res_bp = q_res_go.get('BP', None) - if q_res_bp is not None: - if type(q_res_bp) == list and len(q_res_bp) > 0: - res_add = {item["id"]: item["term"] for item in q_res_bp} - res.update(res_add) - return res - - def get_gene_ontology_ids_for_uniprot_id(self, uniprot_id): - assert type(uniprot_id) == str - res = dict() - - # requests = CacheControlHelper() - # try: - # q_res = self.mygene_obj.query('uniprot:' + uniprot_id, species='human', fields='go', verbose=False) - # except requests.exceptions.HTTPError: - # print("HTTP error in mygene_obj.query for query string: " + uniprot_id, file=sys.stderr) - # return res - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=uniprot:" + uniprot_id + "&species=human&fields=go" - q_res = QueryMyGene.__access_api(handler, url_suffix) - - if q_res is None: - return res - - q_res_hits = q_res.get('hits', None) - if q_res_hits is not None: - if type(q_res_hits) == list and len(q_res_hits) > 0: - for q_res_hit in q_res_hits: - if type(q_res_hit) == dict: - q_res_go = q_res_hit.get('go', None) - if q_res_go is not None: - for ont_name, ont_dict_list in q_res_go.items(): - ont_name_simple_node_type = self.ONT_NAME_TO_SIMPLE_NODE_TYPE[ont_name] - for ont_dict in ont_dict_list: - if type(ont_dict) == dict: - term = ont_dict.get('term', None) - id = ont_dict.get('id', None) - res.update({id: {'term': term, - 'ont': ont_name_simple_node_type}}) - return res - - def get_gene_ontology_ids_bp_for_entrez_gene_id(self, entrez_gene_id): - res = dict() - assert type(entrez_gene_id) == int - # q_res = self.mygene_obj.query('entrezgene:' + str(entrez_gene_id), species='human', fields='go', verbose=False) - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=entrezgene:" + str(entrez_gene_id) + "&species=human&fields=go" - q_res = QueryMyGene.__access_api(handler, url_suffix) - - if q_res is None: - return res - - q_res_hits = q_res.get('hits', None) - if q_res_hits is not None: - if type(q_res_hits) == list and len(q_res_hits) > 0: - for q_res_hit in q_res_hits: - if type(q_res_hit) == dict: - q_res_go = q_res_hit.get('go', None) - if q_res_go is not None: - q_res_bp = q_res_go.get('BP', None) - if q_res_bp is not None: - if type(q_res_bp) == list and len(q_res_bp) > 0: - res_add = {item["id"]: item["term"] for item in q_res_bp} - res.update(res_add) - return res - - def uniprot_id_is_human(self, uniprot_id_str): - # res_json = self.mygene_obj.query("uniprot:" + uniprot_id_str, species="human", verbose=False) - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=uniprot:" + uniprot_id_str + "&species=human" - res_json = QueryMyGene.__access_api(handler, url_suffix) - - if res_json is None: - return False - - hits = res_json.get("hits", None) - return hits is not None and len(hits) > 0 - - def get_cui(self, gene_id): - if gene_id.startswith('NCBIGene'): - gene_id = int(gene_id.split(':')[1]) - # res = self.mygene_obj.getgene(gene_id, fields='umls', verbose=False) - - handler = QueryMyGene.HANDLER_MAP['gene'] + '/' + str(gene_id) - url_suffix = 'fields=umls' - res = QueryMyGene.__access_api(handler, url_suffix) - - if res is not None: - cui_res = res.get('umls', None) - else: - cui_res = None - cuis = None - if cui_res is not None: - cuis = [cui_res['cui']] - return cuis - elif gene_id.startswith('UniProt'): - uni_id = 'uniprot:' + gene_id.split(':')[1] - # res = self.mygene_obj.query(uni_id, fields='umls', verbose=False) - - handler = QueryMyGene.HANDLER_MAP['query'] - url_suffix = "q=" + uni_id + "&fields=umls" - res = QueryMyGene.__access_api(handler, url_suffix) - - if res is not None: - cuis = [] - if 'hits' in res.keys(): - for hit in res['hits']: - if 'umls' in hit.keys(): - cuis.append(hit['umls']['cui']) - if len(cuis) > 0: - return cuis - else: - return None - return None - - @staticmethod - def get_protein_entity(protein_id): - # mg = mygene.MyGeneInfo() - # results = str(mg.query(protein_id.replace('UniProtKB', 'UniProt'), fields='all', return_raw='True', verbose=False)) - - handler = QueryMyGene.HANDLER_MAP['query'] - # url_suffix = "q=" + protein_id.replace('UniProtKB', 'UniProt') + "&fields=all" - params = {'q': protein_id.replace('UniProtKB', 'UniProt'), 'fields': 'all'} - results = str(QueryMyGene.__access_api(handler, None, params=params, return_raw=True)) - - result_str = 'None' - if len(results) > 100: - json_dict = json.loads(results) - result_str = json.dumps(json_dict) - return result_str - - @staticmethod - def get_microRNA_entity(microrna_id): - # mg = mygene.MyGeneInfo() - # results = str(mg.query(microrna_id.replace('NCBIGene', 'entrezgene'), fields='all', return_raw='True', verbose=False)) - - handler = QueryMyGene.HANDLER_MAP['query'] - # url_suffix = "q=" + microrna_id.replace('NCBIGene', 'entrezgene') + "&fields=all" - params = {'q': microrna_id.replace('NCBIGene', 'entrezgene'), 'fields': 'all'} - results = str(QueryMyGene.__access_api(handler, None, params=params, return_raw=True)) - - result_str = 'None' - if len(results) > 100: - json_dict = json.loads(results) - result_str = json.dumps(json_dict) - return result_str - - def get_protein_desc(self, protein_id): - if not isinstance(protein_id, str): - return "None" - result_str = self.get_protein_entity(protein_id) - desc = "None" - if result_str != "None": - result_dict = json.loads(result_str) - if "hits" in result_dict.keys(): - if len(result_dict["hits"]) > 0: - if "summary" in result_dict["hits"][0].keys(): - desc = result_dict["hits"][0]["summary"] - return desc - - def get_microRNA_desc(self, microrna_id): - if not isinstance(microrna_id, str): - return "None" - result_str = self.get_microRNA_entity(microrna_id) - desc = "None" - if result_str != "None": - result_dict = json.loads(result_str) - if "hits" in result_dict.keys(): - if len(result_dict["hits"]) > 0: - if "summary" in result_dict["hits"][0].keys(): - desc = result_dict["hits"][0]["summary"] - return desc - - - def get_protein_name(self, protein_id): - if not isinstance(protein_id, str): - return "None" - result_str = self.get_protein_entity(protein_id) - name = "None" - if result_str != "None": - result_dict = json.loads(result_str) - if "hits" in result_dict.keys(): - if len(result_dict["hits"]) > 0: - if "name" in result_dict["hits"][0].keys(): - name = result_dict["hits"][0]["name"] - return name - -if __name__ == '__main__': - mg = QueryMyGene() - print(mg.convert_gene_symbol_to_uniprot_id('A2M')) - print(mg.convert_gene_symbol_to_uniprot_id('A1BG')) - print(mg.convert_gene_symbol_to_uniprot_id("HMOX1")) - print(mg.convert_gene_symbol_to_uniprot_id('RAD54B')) - print(mg.convert_gene_symbol_to_uniprot_id('NS2')) - print(mg.convert_uniprot_id_to_gene_symbol("P09601")) - print(mg.convert_uniprot_id_to_gene_symbol('Q05925')) - print(mg.convert_uniprot_id_to_gene_symbol('Q8NBZ7')) - print(mg.convert_uniprot_id_to_entrez_gene_ID("P09601")) - print(mg.convert_uniprot_id_to_entrez_gene_ID("XYZZY")) - print(mg.convert_hgnc_gene_id_to_uniprot_id('HGNC:4944')) - print(mg.convert_hgnc_gene_id_to_uniprot_id('HGNC:49440')) - print(mg.convert_gene_symbol_to_entrez_gene_ID('MIR96')) - print(mg.convert_entrez_gene_id_to_uniprot_id(9837)) - print(mg.convert_entrez_gene_ID_to_mirbase_ID(407053)) - print(mg.get_gene_ontology_ids_for_uniprot_id('Q05925')) - print(mg.get_gene_ontology_ids_bp_for_entrez_gene_id(406991)) - print(mg.uniprot_id_is_human("P02794")) - print(mg.uniprot_id_is_human("P10592")) - print(mg.get_cui("NCBIGene:100847086")) - print(mg.get_cui("UniProtKB:O60884")) - - - def save_to_test_file(filename, key, value): - f = open(filename, 'r+') - try: - json_data = json.load(f) - except ValueError: - json_data = {} - f.seek(0) - f.truncate() - json_data[key] = value - json.dump(json_data, f) - f.close() - - save_to_test_file('tests/query_test_data.json', 'UniProtKB:O60884', mg.get_protein_entity("UniProtKB:O60884")) - save_to_test_file('tests/query_test_data.json', 'NCBIGene:100847086', mg.get_microRNA_entity("NCBIGene:100847086")) - print(mg.get_protein_desc("UniProtKB:O60884")) - print(mg.get_protein_desc("UniProtKB:O608840")) - print(mg.get_microRNA_desc("NCBIGene:100847086")) - print(mg.get_microRNA_desc("NCBIGene:1008470860")) - - print(mg.get_protein_name("UniProtKB:P05231")) - print(mg.get_protein_name("UniProtKB:Q8IW03")) \ No newline at end of file diff --git a/code/reasoningtool/kg-construction/QueryPubChem.py b/code/reasoningtool/kg-construction/QueryPubChem.py deleted file mode 100644 index 06b537300..000000000 --- a/code/reasoningtool/kg-construction/QueryPubChem.py +++ /dev/null @@ -1,225 +0,0 @@ -__author__ = 'Stephen Ramsey' -__copyright__ = 'Oregon State University' -__credits__ = ['Stephen Ramsey', 'Finn Womack'] -__license__ = 'MIT' -__version__ = '0.1.0' -__maintainer__ = '' -__email__ = '' -__status__ = 'Prototype' - -import urllib -import pandas -# import requests -import sys -import time -import math -from io import StringIO -import re -import os -import CachedMethods -# import requests_cache -import json -from cache_control_helper import CacheControlHelper - - -class QueryPubChem: - API_BASE_URL = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug' - TIMEOUT_SEC = 120 - HANDLER_MAP = { - 'get_pubchem_cid': 'substance/sid/{sid}/JSON', - 'get_description_url': 'compound/cid/{cid}/description/JSON' - } - - @staticmethod - def __access_api(handler): - requests = CacheControlHelper() - url = QueryPubChem.API_BASE_URL + '/' + handler - # print(url) - try: - res = requests.get(url, timeout=QueryPubChem.TIMEOUT_SEC) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryPubChem for URL: ' + url, file=sys.stderr) - return None - except KeyboardInterrupt: - sys.exit(0) - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryPubChem for URL: %s' % (e, url), file=sys.stderr) - return None - status_code = res.status_code - if status_code != 200: - print(url, file=sys.stderr) - print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) - return None - return res.json() - - @staticmethod - def send_query_get(handler, url_suffix): - requests = CacheControlHelper() - url = QueryPubChem.API_BASE_URL + '/' + handler + '/' + url_suffix - # print(url) - try: - res = requests.get(url, timeout=QueryPubChem.TIMEOUT_SEC) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryPubChem for URL: ' + url, file=sys.stderr) - return None - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryPubChem for URL: %s' % (e, url), file=sys.stderr) - return None - status_code = res.status_code - if status_code != 200: - print(url, file=sys.stderr) - print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) - return None - return res.json() - - @staticmethod - def get_chembl_ids_for_drug(drug_name): - drug_name_safe = urllib.parse.quote(drug_name, safe='') - res = QueryPubChem.send_query_get(handler='compound/name', - url_suffix=drug_name_safe + '/synonyms/JSON') - res_chembl_set = set() - if res is not None: - information_list_dict = res.get('InformationList', None) - if information_list_dict is not None: - information_list = information_list_dict.get('Information', None) - if information_list is not None: - for information_dict in information_list: - synonyms = information_dict.get('Synonym', None) - if synonyms is not None: - for syn in synonyms: - if syn.startswith('CHEMBL'): - res_chembl_set.add(syn) - # res_chembl_set.add('ChEMBL:' + syn.replace('CHEMBL', '')) - return res_chembl_set - - # @staticmethod - # def test(): - # print(QueryPubChem.get_chembl_ids_for_drug('gne-493')) - # print(QueryChEMBL.get_target_uniprot_ids_for_drug('clothiapine')) - - @staticmethod - # @CachedMethods.register - def get_pubchem_id_for_chembl_id(chembl_id): - """This takes a chembl id and then looks up the corresponding pubchem id from a pre-generated .tsv - - NOTE: pubchem-chembl mappings .tsv generated using https://pubchem.ncbi.nlm.nih.gov/idexchange/idexchange.cgi - it took ~3 or so seconds to map all ids in the KG (2226 ids) and not all ids were successful (missed 204 terms -> ~91% success rate) - """ - dir_path = os.path.dirname(os.path.realpath(__file__)) - df = pandas.read_csv(dir_path + '/chemblMap.tsv', sep='\t', index_col=0, header=None) - try: - ans = df.loc[chembl_id].iloc[0] - except KeyError: - return None - if math.isnan(ans): - return None - else: - return str(int(ans)) - - @staticmethod - # @CachedMethods.register - def get_pubmed_id_for_pubchem_id(pubchem_id): - """ - This takes a PubChem id and then gets the PMIDs for articles on PubMed from PubChem which include this entity. - """ - if not isinstance(pubchem_id, str): - return None - - requests = CacheControlHelper() - url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/' + str(pubchem_id) + '/xrefs/PubMedID/JSON' - try: - r = requests.get(url, timeout=10) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryPubChem for URL: ' + url, file=sys.stderr) - return None - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryPubChem for URL: %s' % (e, url), file=sys.stderr) - return None - if r is not None: - if 'Fault' in r.json().keys(): - return None - else: - ans = [str(x) + '[uid]' for x in r.json()['InformationList']['Information'][0]['PubMedID']] - return ans - else: - return None - - @staticmethod - def get_pubchem_cid(pubchem_sid): - pubchem_cid = None - if not isinstance(pubchem_sid, str): - return pubchem_cid - handler = QueryPubChem.HANDLER_MAP['get_pubchem_cid'].format(sid=pubchem_sid) - res = QueryPubChem.__access_api(handler) - if res is not None: - if 'PC_Substances' in res.keys(): - substance = res['PC_Substances'][0] - if len(substance) > 0: - if 'compound' in substance.keys(): - compounds = substance['compound'] - if len(compounds) > 1: - compound = compounds[1] - if 'id' in compound.keys(): - obj = compound['id'] - if 'id' in obj.keys(): - id_obj = obj['id'] - if 'cid' in id_obj.keys(): - pubchem_cid = str(id_obj['cid']) - return pubchem_cid - - - @staticmethod - def get_description_url_from_cid(pubchem_cid): - """ query the description URL from HMDB - Args: - pubchem_cid (str): PubChem CID, e.g. 123689 - Returns: - desc_url (str): the URL of HMDB website, which contains the description of the compound - """ - res_url = None - if not isinstance(pubchem_cid, str): - return res_url - handler = QueryPubChem.HANDLER_MAP['get_description_url'].format(cid=pubchem_cid) - res = QueryPubChem.__access_api(handler) - if res is not None: - if 'InformationList' in res.keys(): - info_list = res['InformationList'] - if 'Information' in info_list.keys(): - infos = info_list['Information'] - for info in infos: - if 'DescriptionSourceName' in info.keys() and 'DescriptionURL' in info.keys(): - if info['DescriptionSourceName'] == "Human Metabolome Database (HMDB)": - return info['DescriptionURL'] - return res_url - - - @staticmethod - def get_description_url(pubchem_sid): - res_url = None - if not isinstance(pubchem_sid, str): - return res_url - pubchem_cid = QueryPubChem.get_pubchem_cid(pubchem_sid) - if pubchem_cid is not None: - res_url = QueryPubChem.get_description_url_from_cid(pubchem_cid) - return res_url - -if __name__ == '__main__': - print(QueryPubChem.get_chembl_ids_for_drug('gne-493')) - print(QueryPubChem.get_pubchem_id_for_chembl_id('CHEMBL521')) - print(QueryPubChem.get_pubchem_id_for_chembl_id('chembl521')) - print(QueryPubChem.get_pubchem_id_for_chembl_id('3400')) - print(QueryPubChem.get_pubmed_id_for_pubchem_id('3672')) - print(QueryPubChem.get_pubmed_id_for_pubchem_id('3500')) - print(QueryPubChem.get_pubmed_id_for_pubchem_id('3400')) - print(QueryPubChem.get_description_url('6921')) - print(QueryPubChem.get_description_url('3500')) - print(QueryPubChem.get_description_url('3400')) - print(QueryPubChem.get_description_url(3400)) - print(QueryPubChem.get_description_url('3324')) - diff --git a/code/reasoningtool/kg-construction/QueryUniprot.py b/code/reasoningtool/kg-construction/QueryUniprot.py deleted file mode 100644 index 0b6439e11..000000000 --- a/code/reasoningtool/kg-construction/QueryUniprot.py +++ /dev/null @@ -1,218 +0,0 @@ -""" This module defines the class QueryUniprot which connects to APIs at -http://www.uniprot.org/uploadlists/, querying reactome pathways from uniprot id. - -* map_enzyme_commission_id_to_uniprot_ids(ec_id) - - Description: - map enzyme commission id to UniProt ids - - Args: - ec_id (str): enzyme commission id, e.g., "ec:1.4.1.17" - - Returns: - ids (set): a set of the enzyme commission ids, or empty set if no UniProt id can be obtained or the response - status code is not 200. - -""" - -__author__ = "" -__copyright__ = "" -__credits__ = [] -__license__ = "" -__version__ = "" -__maintainer__ = "" -__email__ = "" -__status__ = "Prototype" - -# import requests -# import requests_cache -from cache_control_helper import CacheControlHelper -import CachedMethods -import sys -import urllib.parse -import xmltodict - - -class QueryUniprot: - API_BASE_URL = "http://www.uniprot.org/uploadlists/" - TIMEOUT_SEC = 120 - HANDLER_MAP = { - 'map_enzyme_commission_id_to_uniprot_ids': 'uniprot/?query=({id})&format=tab&columns=id', - 'get_protein': 'uniprot/{id}.xml' - } - - @staticmethod - @CachedMethods.register - def uniprot_id_to_reactome_pathways(uniprot_id): - """returns a ``set`` of reactome IDs of pathways associated with a given string uniprot ID - - :param uniprot_id: a ``str`` uniprot ID, like ``"P68871"`` - :returns: a ``set`` of string Reactome IDs - """ - - payload = { 'from': 'ACC', - 'to': 'REACTOME_ID', - 'format': 'tab', - 'query': uniprot_id } - contact = "stephen.ramsey@oregonstate.edu" - header = {'User-Agent': 'Python %s' % contact} - - requests = CacheControlHelper() - - try: - url =QueryUniprot.API_BASE_URL - res = requests.post(QueryUniprot.API_BASE_URL, data=payload, headers=header) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryUniprot for URL: ' + QueryUniprot.API_BASE_URL, file=sys.stderr) - return None - except KeyboardInterrupt: - sys.exit(0) - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryUniprot for URL: %s' % (e, url), file=sys.stderr) - return None - status_code = res.status_code - if status_code != 200: - print(QueryUniprot.API_BASE_URL, file=sys.stderr) - print('Status code ' + str(status_code) + ' for url: ' + QueryUniprot.API_BASE_URL, file=sys.stderr) - return None -# assert 200 == res.status_code - res_set = set() - for line in res.text.splitlines(): - field_str = line.split("\t")[1] - if field_str != "To": - res_set.add(field_str) - return res_set - - @staticmethod - def __access_api(handler): - - api_base_url = 'http://www.uniprot.org' - url = api_base_url + '/' + handler - #print(url) - contact = "stephen.ramsey@oregonstate.edu" - header = {'User-Agent': 'Python %s' % contact} - - requests = CacheControlHelper() - try: - res = requests.get(url, timeout=QueryUniprot.TIMEOUT_SEC, headers=header) - except requests.exceptions.Timeout: - print(url, file=sys.stderr) - print('Timeout in QueryUniprot for URL: ' + url, file=sys.stderr) - return None - except requests.exceptions.ChunkedEncodingError: - print(url, file=sys.stderr) - print('ChunkedEncodingError for URL: ' + url, file=sys.stderr) - return None - except BaseException as e: - print(url, file=sys.stderr) - print('%s received in QueryUniprot for URL: %s' % (e, url), file=sys.stderr) - return None - status_code = res.status_code - if status_code != 200: - print(url, file=sys.stderr) - print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) - return None - return res.text - - @staticmethod - def map_enzyme_commission_id_to_uniprot_ids(ec_id): - res_set = set() - if not isinstance(ec_id, str): - return res_set - ec_id_encoded = urllib.parse.quote_plus(ec_id) - handler = QueryUniprot.HANDLER_MAP['map_enzyme_commission_id_to_uniprot_ids'].format(id=ec_id_encoded) - res = QueryUniprot.__access_api(handler) - if res is not None: - res = res[res.find('\n')+1:] - for line in res.splitlines(): - res_set.add(line) - return res_set - - @staticmethod - def __get_entity(entity_type, entity_id): - if entity_id[:10] == 'UniProtKB:': - entity_id = entity_id[10:] - handler = QueryUniprot.HANDLER_MAP[entity_type].format(id=entity_id) - results = QueryUniprot.__access_api(handler) - entity = None - if results is not None: - obj = xmltodict.parse(results) - if 'uniprot' in obj.keys(): - if 'entry' in obj['uniprot'].keys(): - entity = obj['uniprot']['entry'] - return entity - - @staticmethod - def get_protein_gene_symbol(entity_id): - ret_symbol = "None" - if not isinstance(entity_id, str): - return ret_symbol - entity_obj = QueryUniprot.__get_entity("get_protein", entity_id) - if entity_obj is not None: - if 'gene' in entity_obj.keys(): - if "name" in entity_obj["gene"].keys(): - gene_name_obj = entity_obj["gene"]["name"] - if not type(gene_name_obj) == list: - gene_name_obj = [gene_name_obj] - for name_dict in gene_name_obj: - # print(name_dict) - if "primary" in name_dict.values() and "#text" in name_dict.keys(): - ret_symbol = name_dict["#text"] - return ret_symbol - - @staticmethod - def __get_name(entity_type, entity_id): - entity_obj = QueryUniprot.__get_entity(entity_type, entity_id) - name = "UNKNOWN" - if entity_obj is not None: - if 'protein' in entity_obj.keys(): - if 'recommendedName' in entity_obj['protein'].keys(): - if 'fullName' in entity_obj['protein']['recommendedName'].keys(): - name = entity_obj['protein']['recommendedName']['fullName'] - if isinstance(name, dict): - name = name['#text'] - return name - - @staticmethod - def get_protein_name(protein_id): - if not isinstance(protein_id, str): - return "UNKNOWN" - return QueryUniprot.__get_name("get_protein", protein_id) - - @staticmethod - def get_citeable_accession_for_accession(accession_number): - res_acc = None - res_tab = QueryUniprot.__access_api("uniprot/" + accession_number + ".tab") - if res_tab is None: - return res_acc - res_lines = res_tab.splitlines() - if len(res_lines) > 1: - res_acc = res_lines[1].split("\t")[0] - return res_acc - -if __name__ == '__main__': - print(QueryUniprot.get_citeable_accession_for_accession("P35354")) - print(QueryUniprot.get_citeable_accession_for_accession("A8K802")) - print(QueryUniprot.get_citeable_accession_for_accession("Q16876")) - # print(QueryUniprot.uniprot_id_to_reactome_pathways("P68871")) - # print(QueryUniprot.uniprot_id_to_reactome_pathways("Q16621")) - # print(QueryUniprot.uniprot_id_to_reactome_pathways("P09601")) - print(CachedMethods.cache_info()) - print(QueryUniprot.map_enzyme_commission_id_to_uniprot_ids("ec:1.4.1.17")) # small results - print(QueryUniprot.map_enzyme_commission_id_to_uniprot_ids("ec:1.3.1.110")) # empty result - print(QueryUniprot.map_enzyme_commission_id_to_uniprot_ids("ec:1.2.1.22")) # large results - print(QueryUniprot.map_enzyme_commission_id_to_uniprot_ids("ec:4.4.1.xx")) # fake id - print(QueryUniprot.map_enzyme_commission_id_to_uniprot_ids("R-HSA-1912422")) # wrong id - print(QueryUniprot.get_protein_gene_symbol('UniProtKB:P20848')) - print(QueryUniprot.get_protein_gene_symbol("UniProtKB:P01358")) - print(QueryUniprot.get_protein_gene_symbol("UniProtKB:Q96P88")) - print(QueryUniprot.get_protein_name('UniProtKB:P01358')) - print(QueryUniprot.get_protein_name('UniProtKB:P20848')) - print(QueryUniprot.get_protein_name('UniProtKB:Q9Y471')) - print(QueryUniprot.get_protein_name('UniProtKB:O60397')) - print(QueryUniprot.get_protein_name('UniProtKB:Q8IZJ3')) - print(QueryUniprot.get_protein_name('UniProtKB:Q7Z2Y8')) - print(QueryUniprot.get_protein_name('UniProtKB:Q8IWN7')) - print(QueryUniprot.get_protein_name('UniProtKB:Q156A1')) diff --git a/code/reasoningtool/kg-construction/SynonymMapper.py b/code/reasoningtool/kg-construction/SynonymMapper.py deleted file mode 100644 index 86ebc7ba5..000000000 --- a/code/reasoningtool/kg-construction/SynonymMapper.py +++ /dev/null @@ -1,172 +0,0 @@ -import sys -import os - -from NormGoogleDistance import NormGoogleDistance -from QueryMyGene import QueryMyGene -import mygene -import requests -from QueryMyChem import QueryMyChem -import requests_cache -import pandas -#import _mysql_exceptions - - -class SynonymMapper(): - - def __init__(self): - self.biothings_url = "http://c.biothings.io/v1/query?q=" - self.mygene_obj = mygene.MyGeneInfo() - self.qmg = QueryMyGene() - - def prot_to_gene(self, curie_id): - """ - This takes a uniprot curie id and converts it into a few different gene ids - """ - if len(curie_id.split(':'))>1: - uniprot_id = curie_id.split(':')[1] - else: - return None - entrez_ids = self.qmg.convert_uniprot_id_to_entrez_gene_ID(uniprot_id) - if entrez_ids is not None: - entrez_ids = set(entrez_ids) - else: - entrez_ids = set() - hgnc_ids = set() - mim_ids = set() - vega_ids = set() - ensembl_ids = set() - synonyms = [] - - symbols = self.qmg.convert_uniprot_id_to_gene_symbol(uniprot_id) - for symbol in symbols: - synonyms += ['HGNC.Symbol:' + symbol] - - for gene_id in entrez_ids: - synonyms += ['NCBIGene:' + str(gene_id)] - try: - res = self.mygene_obj.getgene(int(gene_id), fields = 'HGNC,MIM,Vega,ensembl', verbose = False) - except requests.exceptions.HTTPError: - print('HTTP error for querying uniprot to gene symbol mygene: ' + uniprot_id, file=sys.stderr) - res = None - if res is not None: - hgnc_res = res.get('HGNC', None) - mim_res = res.get('MIM', None) - vega_res = res.get('Vega', None) - ensembl_res = res.get('ensembl', None) - else: - hgnc_res = None - mim_res = None - vega_res = None - ensembl_res = None - if hgnc_res is not None: - hgnc_ids |= set([hgnc_res]) - if mim_res is not None: - mim_ids |= set([mim_res]) - if vega_res is not None: - vega_ids |= set([vega_res]) - if ensembl_res is not None: - if type(ensembl_res) == list: - for ens_res in ensembl_res: - ensembl_gene_res = ens_res.get('gene', None) - if ensembl_gene_res is not None: - ensembl_ids |= set([ensembl_gene_res]) - else: - ensembl_gene_res = ensembl_res.get('gene', None) - if ensembl_gene_res is not None: - ensembl_ids |= set([ensembl_gene_res]) - - for hgnc_id in hgnc_ids: - synonyms += ['HGNC:' + str(hgnc_id)] - for mim_id in mim_ids: - synonyms += ['OMIM:' + str(mim_id)] - for vega_id in vega_ids: - synonyms += ['Vega:' + str(vega_id)] - for ensembl_id in ensembl_ids: - synonyms += ['ensembl:' + str(ensembl_id)] - - if len(synonyms)>0: - return synonyms - else: - return None - - def get_all_from_oxo(self, curie_id, map_to = None): - """ - this takes a curie id and gets all the mappings that oxo has for the given id - - :param curie_id: The string for the curie id to submit to OXO (e.g. 'HP:0001947') - :param map_to: A string containing the prefix for the resulting ids. If set to None it will return all mappings. (default is none) - - :return: A list of strings containing the found mapped ids or None if none where found - """ - if map_to is None: - map_to = '' - if type(curie_id) != str: - curie_id = str(curie_id) - if curie_id.startswith('REACT:'): - curie_id = curie_id.replace('REACT', 'Reactome') - prefix = curie_id.split(':')[0] - res = NormGoogleDistance.query_oxo(curie_id) - synonym_ids=None - if res is not None: - res = res.json() - synonym_ids = set() - n_res = res['page']['totalElements'] - if int(n_res) > 0: - mappings = res['_embedded']['mappings'] - for mapping in mappings: - if type(map_to) == list: - for elm in map_to: - if mapping['fromTerm']['curie'].startswith(prefix): - if mapping['toTerm']['curie'].startswith(elm): - synonym_ids |= set([mapping['toTerm']['curie']]) - elif mapping['toTerm']['curie'].startswith(prefix): - if mapping['fromTerm']['curie'].startswith(elm): - synonym_ids |= set([mapping['fromTerm']['curie']]) - else: - if mapping['fromTerm']['curie'].startswith(prefix): - if mapping['toTerm']['curie'].startswith(map_to): - synonym_ids |= set([mapping['toTerm']['curie']]) - elif mapping['toTerm']['curie'].startswith(prefix): - if mapping['fromTerm']['curie'].startswith(map_to): - synonym_ids |= set([mapping['fromTerm']['curie']]) - if len(synonym_ids) == 0: - synonym_ids = None - else: - synonym_ids = list(synonym_ids) - return synonym_ids - - - def chembl_to_chebi(self, chemical_substance_id): - """ - This takes a chembl curie id and return a chebi curie id - """ - if chemical_substance_id[:7] == "ChEMBL:": - chemical_substance_id = chemical_substance_id.replace("ChEMBL:", "CHEMBL") - if chemical_substance_id.startswith('CHEMBL:CHEMBL'): - chemical_substance_id = chemical_substance_id.replace("CHEMBL:", "") - handler = 'chem/' + chemical_substance_id + '?fields=chebi.chebi_id' - - url = QueryMyChem.API_BASE_URL + '/' + handler - - try: - res = requests.get(url, timeout=QueryMyChem.TIMEOUT_SEC) - except requests.exceptions.Timeout: - #print(url, file=sys.stderr) - #print('Timeout in QueryMyChem for URL: ' + url, file=sys.stderr) - return None - if res is None: - return None - status_code = res.status_code - if status_code != 200: - #print(url, file=sys.stderr) - #print('Status code ' + str(status_code) + ' for url: ' + url, file=sys.stderr) - return None - id_json = res.json() - if 'chebi' in id_json.keys(): - return id_json['chebi']['chebi_id'] - else: - return None - - - - diff --git a/code/reasoningtool/kg-construction/chemblMap.tsv b/code/reasoningtool/kg-construction/chemblMap.tsv deleted file mode 100644 index 1158110c7..000000000 --- a/code/reasoningtool/kg-construction/chemblMap.tsv +++ /dev/null @@ -1,2024 +0,0 @@ -CHEMBL1201317 5563 -CHEMBL1491 2162 -CHEMBL1200608 5355130 -CHEMBL542 978 -CHEMBL1201217 3180 -CHEMBL638 71616 -CHEMBL329203 7608 -CHEMBL846 5280453 -CHEMBL374478 5458213 -CHEMBL1115 4991 -CHEMBL106 3365 -CHEMBL76 2719 -CHEMBL1201 941651 -CHEMBL1887666 10646 -CHEMBL1676 9854489 -CHEMBL1316 2581 -CHEMBL989 6215 -CHEMBL490 43815 -CHEMBL1437065 4511 -CHEMBL970 31640 -CHEMBL761 4436 -CHEMBL1179 19493 -CHEMBL1228 4641 -CHEMBL1096885 454216 -CHEMBL965 2551 -CHEMBL1076347 7547 -CHEMBL1201756 3036780 -CHEMBL440 3032285 -CHEMBL448 4737 -CHEMBL1201220 2140 -CHEMBL2104232 70695634 -CHEMBL1237068 66964417 -CHEMBL571 3825 -CHEMBL334966 9854012 -CHEMBL1200766 31011 -CHEMBL626 47641 -CHEMBL1493 3354 -CHEMBL55400 7099 -CHEMBL1201129 451668 -CHEMBL19299 3965 -CHEMBL1622 6037 -CHEMBL81 5035 -CHEMBL1539 17134 -CHEMBL1445 6446 -CHEMBL1434 54675783 -CHEMBL1503 4594 -CHEMBL253592 5902 -CHEMBL1463345 13789 -CHEMBL705 449171 -CHEMBL484785 44591583 -CHEMBL1162 6230 -CHEMBL2110626 6615 -CHEMBL1201325 24199 -CHEMBL822 1549008 -CHEMBL3085436 3037981 -CHEMBL972 26757 -CHEMBL2105385 11226 -CHEMBL1495 4642 -CHEMBL1744 12460 -CHEMBL2403108 57379345 -CHEMBL416 4114 -CHEMBL583 72474 -CHEMBL1554 457193 -CHEMBL1450486 11061 -CHEMBL1477036 11339 -CHEMBL3707307 9862937 -CHEMBL1619 20279 -CHEMBL566534 68911 -CHEMBL1619528 4832 -CHEMBL1597 3080 -CHEMBL1118 125017 -CHEMBL37390 4977 -CHEMBL2103752 16135415 -CHEMBL483254 6918837 -CHEMBL653 3033637 -CHEMBL1193 4761 -CHEMBL3770406 56646803 -CHEMBL2219418 56959087 -CHEMBL135 5757 -CHEMBL409 2375 -CHEMBL24147 5054 -CHEMBL1240977 5281087 -CHEMBL1622638 7084 -CHEMBL981 64929 -CHEMBL308954 193962 -CHEMBL1100 8280 -CHEMBL288441 5328940 -CHEMBL911 5732 -CHEMBL472 3488 -CHEMBL2106161 443935 -CHEMBL1201334 25074470 -CHEMBL1697849 5284614 -CHEMBL1561 441314 -CHEMBL1725880 5536 -CHEMBL96783 6912 -CHEMBL1580 439693 -CHEMBL139367 154234 -CHEMBL168815 2684 -CHEMBL712 2197 -CHEMBL494 5311181 -CHEMBL727 2723601 -CHEMBL114 441243 -CHEMBL1200714 2717 -CHEMBL1210954 65958 -CHEMBL1200853 9051 -CHEMBL1341 4116 -CHEMBL1129 6256 -CHEMBL1488165 65944 -CHEMBL606 5282381 -CHEMBL785 33572 -CHEMBL1471 6918365 -CHEMBL160 5284373 -CHEMBL1201355 16129675 -CHEMBL8085 5962 -CHEMBL468837 10518 -CHEMBL454446 6437877 -CHEMBL990 2484 -CHEMBL1201320 9568614 -CHEMBL2107830 11949646 -CHEMBL429910 9887712 -CHEMBL3127326 54767916 -CHEMBL1498 6166 -CHEMBL23 39186 -CHEMBL340801 16106 -CHEMBL118 2662 -CHEMBL659 9651 -CHEMBL472566 6103 -CHEMBL2103846 13559281 -CHEMBL259209 11622909 -CHEMBL435 3639 -CHEMBL1093 32170 -CHEMBL1043 2955 -CHEMBL1233584 6306 -CHEMBL115 5362440 -CHEMBL2104786 452306 -CHEMBL33 149096 -CHEMBL3544909 6441383 -CHEMBL2105667 9852175 -CHEMBL455917 2707 -CHEMBL3833301 9977518 -CHEMBL1165268 3739 -CHEMBL797 6839 -CHEMBL1005 60815 -CHEMBL108541 6954 -CHEMBL1201195 42008 -CHEMBL451 2712 -CHEMBL708 60854 -CHEMBL254328 132971 -CHEMBL1517 54675779 -CHEMBL1514 15130 -CHEMBL2104860 9909202 -CHEMBL442 8223 -CHEMBL16 1775 -CHEMBL567 4748 -CHEMBL22150 2710 -CHEMBL609 5565 -CHEMBL1505 123606 -CHEMBL1200570 8567 -CHEMBL1256786 3410 -CHEMBL1440 54675776 -CHEMBL959 5071 -CHEMBL605846 11504295 -CHEMBL249837 54675785 -CHEMBL1618102 160883 -CHEMBL1660 6323497 -CHEMBL1489 9444 -CHEMBL534 3827 -CHEMBL1733 107994 -CHEMBL3306382 24672 -CHEMBL927 6915944 -CHEMBL2104627 6436061 -CHEMBL2111125 20054992 -CHEMBL1094 3331 -CHEMBL1355299 7181 -CHEMBL802 4201 -CHEMBL461522 3111 -CHEMBL1697841 22297 -CHEMBL1186894 3035016 -CHEMBL353972 71310 -CHEMBL185073 441289 -CHEMBL1350 60937 -CHEMBL12856 3698 -CHEMBL1182 247 -CHEMBL428 62959 -CHEMBL141 60825 -CHEMBL814 5324346 -CHEMBL1201268 49800047 -CHEMBL544 9015 -CHEMBL777 5280980 -CHEMBL17423 5663 -CHEMBL1565476 26098 -CHEMBL3561695 10154203 -CHEMBL643 4927 -CHEMBL652 3356 -CHEMBL2105689 3003141 -CHEMBL493 31101 -CHEMBL2059073 19988 -CHEMBL985 1176 -CHEMBL3544986 12068809 -CHEMBL564829 16718576 -CHEMBL79 3676 -CHEMBL46516 3396 -CHEMBL1621597 657309 -CHEMBL753 4768 -CHEMBL667 187 -CHEMBL1200544 62921 -CHEMBL1308 3406 -CHEMBL1196 4935 -CHEMBL1192519 5463984 -CHEMBL1201234 3677 -CHEMBL2096635 11457650 -CHEMBL946 19861 -CHEMBL406117 55362 -CHEMBL1490 5572 -CHEMBL1301 16051925 -CHEMBL1464 54678486 -CHEMBL403664 5460769 -CHEMBL343633 175540 -CHEMBL654 4205 -CHEMBL1200386 6714002 -CHEMBL412873 7014 -CHEMBL3544537 69046 -CHEMBL3137312 56640146 -CHEMBL855 5282443 -CHEMBL127 441130 -CHEMBL1070 4409 -CHEMBL1200979 131204 -CHEMBL458875 73115 -CHEMBL1405447 10209 -CHEMBL1263 5152 -CHEMBL1360 47319 -CHEMBL1201295 35330 -CHEMBL831 3964 -CHEMBL2105637 487101 -CHEMBL588 3341 -CHEMBL253376 2442 -CHEMBL226345 190 -CHEMBL1451 31307 -CHEMBL3301678 90683487 -CHEMBL1873475 24821094 -CHEMBL1200971 19150 -CHEMBL2104761 3034015 -CHEMBL787 5281040 -CHEMBL1231723 656641 -CHEMBL779 110635 -CHEMBL1201284 156419 -CHEMBL1201313 2795 -CHEMBL579 3033702 -CHEMBL1372 65359 -CHEMBL1615487 35740 -CHEMBL1631107 5464355 -CHEMBL2110703 26533 -CHEMBL220492 5284627 -CHEMBL2110596 8966 -CHEMBL1084647 46832510 -CHEMBL1077896 175805 -CHEMBL601 137 -CHEMBL495 5280723 -CHEMBL849 5564 -CHEMBL1354 517045 -CHEMBL553025 5311497 -CHEMBL762 4636 -CHEMBL43068 6287 -CHEMBL685 4030 -CHEMBL811 6834 -CHEMBL550348 6098188 -CHEMBL1200694 5206 -CHEMBL1404 56959 -CHEMBL1016 2541 -CHEMBL917 5790 -CHEMBL2104461 3563 -CHEMBL642 1978 -CHEMBL3039597 9898639 -CHEMBL2135460 72081 -CHEMBL1623738 5457 -CHEMBL460 23897 -CHEMBL681 667484 -CHEMBL267495 6445230 -CHEMBL1414320 5362 -CHEMBL2105737 24775005 -CHEMBL60745 9336 -CHEMBL1200490 25074887 -CHEMBL396778 131682 -CHEMBL63857 4764 -CHEMBL3183126 60196264 -CHEMBL2103870 16678941 -CHEMBL2107774 6433107 -CHEMBL1201239 3723 -CHEMBL760 2182 -CHEMBL2105395 3036505 -CHEMBL1201361 5311000 -CHEMBL13291 3369 -CHEMBL1639 5493444 -CHEMBL148 104838 -CHEMBL1520 110634 -CHEMBL1230914 161500 -CHEMBL577 5462501 -CHEMBL409542 11663568 -CHEMBL312311 6271 -CHEMBL1198 4886 -CHEMBL954 1548953 -CHEMBL12 3016 -CHEMBL514800 11561674 -CHEMBL1200799 5282226 -CHEMBL1249 135300 -CHEMBL1201243 5241 -CHEMBL2111176 5648 -CHEMBL54976 6305 -CHEMBL620 2784 -CHEMBL836 129211 -CHEMBL1389 13109 -CHEMBL771 6234 -CHEMBL560 441278 -CHEMBL1200607 9639 -CHEMBL1200622 5281104 -CHEMBL31 5379 -CHEMBL413 5284616 -CHEMBL997 60852 -CHEMBL1490300 75905 -CHEMBL1496806 16367 -CHEMBL734 1990 -CHEMBL564085 202225 -CHEMBL1144 54687 -CHEMBL416755 4628 -CHEMBL1619785 71447 -CHEMBL1161681 24529 -CHEMBL279516 33625 -CHEMBL1229211 54726191 -CHEMBL455706 28204 -CHEMBL3183658 53384398 -CHEMBL578 5388962 -CHEMBL3039498 11677589 -CHEMBL1294 441074 -CHEMBL370143 165580 -CHEMBL726 3372 -CHEMBL1201096 66380 -CHEMBL489411 54260 -CHEMBL2110739 10938 -CHEMBL422 5566 -CHEMBL744 5070 -CHEMBL1201362 5311051 -CHEMBL1200356 19003 -CHEMBL1599 30699 -CHEMBL1201134 157920 -CHEMBL1089221 2313 -CHEMBL2107417 3038503 -CHEMBL1697744 65632 -CHEMBL2110774 6475 -CHEMBL2105300 5050 -CHEMBL24 2249 -CHEMBL1229846 31236 -CHEMBL1200922 6151 -CHEMBL729 92727 -CHEMBL467 3657 -CHEMBL2103868 25102846 -CHEMBL1200934 6540478 -CHEMBL1441 2761171 -CHEMBL898 3059 -CHEMBL1525 40326 -CHEMBL6 3715 -CHEMBL606258 72493 -CHEMBL3833408 44152596 -CHEMBL17962 6274 -CHEMBL1449 36921 -CHEMBL109 3121 -CHEMBL1200507 3724 -CHEMBL1224 5284514 -CHEMBL385517 11243969 -CHEMBL282052 6292 -CHEMBL809 68617 -CHEMBL821 3520 -CHEMBL1399 2187 -CHEMBL422648 65947 -CHEMBL1201187 3002977 -CHEMBL2365712 168871 -CHEMBL1200656 5284447 -CHEMBL546 4631 -CHEMBL1094966 4845 -CHEMBL1256472 6252 -CHEMBL1556 6021 -CHEMBL170988 8249 -CHEMBL741 3878 -CHEMBL923 5245 -CHEMBL2111066 3781 -CHEMBL1200558 10909430 -CHEMBL21731 4011 -CHEMBL175247 3034010 -CHEMBL1688852 9850878 -CHEMBL2107831 49843517 -CHEMBL1182833 5281042 -CHEMBL87708 3947 -CHEMBL1736 2528 -CHEMBL871 3305 -CHEMBL1643 37542 -CHEMBL152067 68770 -CHEMBL224436 5282165 -CHEMBL857 171548 -CHEMBL105608 93550 -CHEMBL1199324 219090 -CHEMBL1741 84029 -CHEMBL178 30323 -CHEMBL766 6435415 -CHEMBL2105348 71741 -CHEMBL170052 71186 -CHEMBL1743263 114948 -CHEMBL742 3821 -CHEMBL1272 65981 -CHEMBL2105760 11978813 -CHEMBL607 4058 -CHEMBL689 6251 -CHEMBL223228 64139 -CHEMBL558913 23725126 -CHEMBL498466 21724551 -CHEMBL2105605 5632 -CHEMBL514446 6674 -CHEMBL69308 5282203 -CHEMBL839 2583 -CHEMBL487253 65628 -CHEMBL2110588 23652731 -CHEMBL455 5320 -CHEMBL1200907 656583 -CHEMBL783 5311309 -CHEMBL1902981 3236 -CHEMBL5 4421 -CHEMBL426926 68682 -CHEMBL509 4037 -CHEMBL1200945 25513 -CHEMBL1200596 2722 -CHEMBL477874 9528 -CHEMBL1213351 10100 -CHEMBL1626 26987 -CHEMBL1373 4236 -CHEMBL285802 5736 -CHEMBL2104208 6526 -CHEMBL1480 3333 -CHEMBL486208 6552009 -CHEMBL848 216326 -CHEMBL58 4212 -CHEMBL184412 208898 -CHEMBL1550 5284607 -CHEMBL174 6249 -CHEMBL1200736 11029 -CHEMBL1679 6006 -CHEMBL273264 4413 -CHEMBL1909072 6761 -CHEMBL782 5505 -CHEMBL683 2797 -CHEMBL1200610 5284584 -CHEMBL2110824 50225 -CHEMBL92161 66901 -CHEMBL1201772 6918456 -CHEMBL41355 121892 -CHEMBL218490 5284549 -CHEMBL897 4911 -CHEMBL572 6604200 -CHEMBL1200666 5288783 -CHEMBL1197 10770 -CHEMBL1200668 5284359 -CHEMBL1442422 9419 -CHEMBL127643 6857 -CHEMBL1625 4632 -CHEMBL888 60750 -CHEMBL250270 65866 -CHEMBL533 60753 -CHEMBL1736151 3275 -CHEMBL1087630 4622 -CHEMBL3739769 23453101 -CHEMBL1082407 15951529 -CHEMBL1201338 2905 -CHEMBL506 4908 -CHEMBL91397 133017 -CHEMBL1201165 9046 -CHEMBL1762 38945 -CHEMBL604608 439709 -CHEMBL252556 3686 -CHEMBL1201776 9838022 -CHEMBL2095208 25151504 -CHEMBL127592 4621 -CHEMBL2107516 6419536 -CHEMBL1201219 39765 -CHEMBL54943 594 -CHEMBL2105618 70695647 -CHEMBL419 3998 -CHEMBL592435 9953065 -CHEMBL2106989 165675 -CHEMBL1248 10197600 -CHEMBL1274 4493 -CHEMBL1200568 61680 -CHEMBL514 3950 -CHEMBL691 5991 -CHEMBL1023 82146 -CHEMBL773 750 -CHEMBL1535 3652 -CHEMBL1201251 60789 -CHEMBL1201232 3775 -CHEMBL1788401 35803 -CHEMBL1139 5282411 -CHEMBL92401 3748 -CHEMBL245807 31315 -CHEMBL1103 5323714 -CHEMBL2105722 68783 -CHEMBL59 681 -CHEMBL1201798 3085017 -CHEMBL1443 8982 -CHEMBL866 446541 -CHEMBL1201321 34328 -CHEMBL54 3559 -CHEMBL1106 3241 -CHEMBL1235872 444237 -CHEMBL1502 4679 -CHEMBL2105909 71365 -CHEMBL1630 16051921 -CHEMBL1900528 5487427 -CHEMBL1398654 6244 -CHEMBL1201056 5322 -CHEMBL492 37497 -CHEMBL100259 6029 -CHEMBL1237054 52945526 -CHEMBL180570 16124 -CHEMBL47 14985 -CHEMBL1655 3005573 -CHEMBL1002 123600 -CHEMBL409153 6918485 -CHEMBL3039471 6473883 -CHEMBL1425 667490 -CHEMBL1615777 135326 -CHEMBL452859 65780 -CHEMBL1233511 890 -CHEMBL1521 5719 -CHEMBL220491 68844 -CHEMBL471737 132999 -CHEMBL3545432 56844015 -CHEMBL2147777 11949652 -CHEMBL2110788 66248 -CHEMBL290578 33309 -CHEMBL445 4543 -CHEMBL269732 445643 -CHEMBL1200342 443928 -CHEMBL1138 150311 -CHEMBL1447 3000540 -CHEMBL1537 6479523 -CHEMBL1257051 11234049 -CHEMBL1062 6238 -CHEMBL1336 216239 -CHEMBL9 4539 -CHEMBL1790041 5039 -CHEMBL1201754 129228 -CHEMBL1951143 9870652 -CHEMBL1200685 66376 -CHEMBL713 153941 -CHEMBL14060 996 -CHEMBL1697737 2811 -CHEMBL189963 5330286 -CHEMBL1902627 2810 -CHEMBL864 2564 -CHEMBL716 5002 -CHEMBL90593 5881 -CHEMBL1007 638793 -CHEMBL1200359 5326 -CHEMBL2218885 21872805 -CHEMBL770 5504 -CHEMBL411 448537 -CHEMBL1200555 3738 -CHEMBL1201151 6291 -CHEMBL1670 4211 -CHEMBL612 5826 -CHEMBL1433 54671203 -CHEMBL1173055 9931954 -CHEMBL2104895 5205 -CHEMBL1536 5280793 -CHEMBL459265 3468 -CHEMBL1590 7028 -CHEMBL158 5742832 -CHEMBL1533 40973 -CHEMBL1207444 71327 -CHEMBL1541 5362065 -CHEMBL7728 3608 -CHEMBL434394 71301 -CHEMBL1371 2733 -CHEMBL1534 493570 -CHEMBL589586 5369209 -CHEMBL1201388 31378 -CHEMBL2133806 46911863 -CHEMBL443052 11499245 -CHEMBL1291 31477 -CHEMBL238804 9913767 -CHEMBL967 5391 -CHEMBL117785 6018 -CHEMBL1201287 16960 -CHEMBL658 41693 -CHEMBL1177 4723 -CHEMBL796 4158 -CHEMBL20883 5386 -CHEMBL523 2997 -CHEMBL1201203 1201549 -CHEMBL268869 5330 -CHEMBL452231 452548 -CHEMBL1229517 42611257 -CHEMBL3350037 13690207 -CHEMBL1200829 7027 -CHEMBL1200453 12620 -CHEMBL231813 3010818 -CHEMBL1200600 9878 -CHEMBL452 2802 -CHEMBL92870 3926 -CHEMBL240163 65957 -CHEMBL1233 2576 -CHEMBL2218896 39860 -CHEMBL590799 2536 -CHEMBL1079604 15459 -CHEMBL53463 31703 -CHEMBL1751 2724385 -CHEMBL395429 439302 -CHEMBL1201224 9570757 -CHEMBL389621 5754 -CHEMBL830 3009 -CHEMBL108436 8158 -CHEMBL427069 6321411 -CHEMBL621 5533 -CHEMBL832 5342 -CHEMBL449 2479 -CHEMBL2105345 15004 -CHEMBL111 104850 -CHEMBL1764 72287 -CHEMBL609109 9429 -CHEMBL1201182 6918289 -CHEMBL1201747 19371515 -CHEMBL70566 3042090 -CHEMBL1200680 24087 -CHEMBL2104036 12551 -CHEMBL1201191 1549000 -CHEMBL934 4174 -CHEMBL1200690 644076 -CHEMBL1224207 49865963 -CHEMBL1256841 4472 -CHEMBL369475 5362471 -CHEMBL1725 3736 -CHEMBL1626223 708857 -CHEMBL1540 4725 -CHEMBL476 5351166 -CHEMBL1037 38521 -CHEMBL1516 158781 -CHEMBL561 3948 -CHEMBL1551 31401 -CHEMBL867 3735 -CHEMBL1201131 5794 -CHEMBL55 4735 -CHEMBL315838 48041 -CHEMBL1017 65999 -CHEMBL32838 6917655 -CHEMBL1882461 31800 -CHEMBL765 3518 -CHEMBL537 785 -CHEMBL1201147 22571 -CHEMBL650 6741 -CHEMBL354541 9853053 -CHEMBL470670 16666 -CHEMBL291747 6288 -CHEMBL709 2092 -CHEMBL1020 5509 -CHEMBL41 3386 -CHEMBL1518 657298 -CHEMBL1089 3675 -CHEMBL1236962 25167777 -CHEMBL1384 6032 -CHEMBL469 3826 -CHEMBL128988 33334 -CHEMBL646 5556 -CHEMBL475903 11475094 -CHEMBL615 6869 -CHEMBL637 5656 -CHEMBL1237021 213046 -CHEMBL1088 4078 -CHEMBL340978 39941 -CHEMBL2107455 124246 -CHEMBL1200660 12597 -CHEMBL358040 4538 -CHEMBL58323 219078 -CHEMBL982 5284594 -CHEMBL2107457 3792 -CHEMBL315985 9881504 -CHEMBL1172 124087 -CHEMBL135400 5735 -CHEMBL1314 5483 -CHEMBL2104662 6917795 -CHEMBL415606 16136245 -CHEMBL566315 447715 -CHEMBL33986 5361092 -CHEMBL1200374 60198 -CHEMBL1908331 6917906 -CHEMBL193240 449193 -CHEMBL86 4168 -CHEMBL211456 9294 -CHEMBL553 176870 -CHEMBL49 2477 -CHEMBL1201248 62887 -CHEMBL1200468 4004 -CHEMBL1200774 5876 -CHEMBL464 5282375 -CHEMBL1624 5819 -CHEMBL596 3345 -CHEMBL46 4595 -CHEMBL1342 10413 -CHEMBL1614 28620 -CHEMBL2170177 24753719 -CHEMBL511115 151170 -CHEMBL1201784 6433083 -CHEMBL2010601 16220172 -CHEMBL1201288 6914273 -CHEMBL817 5503 -CHEMBL304902 16351 -CHEMBL1398031 6603935 -CHEMBL2105979 11071 -CHEMBL1182210 2335 -CHEMBL1739 24244 -CHEMBL1569746 23676755 -CHEMBL1697833 25223 -CHEMBL717 6279 -CHEMBL706 2682 -CHEMBL76370 5362436 -CHEMBL1542 2265 -CHEMBL2111030 14670 -CHEMBL189 4197 -CHEMBL376685 16051933 -CHEMBL1200732 443958 -CHEMBL1265 60164 -CHEMBL1201039 2343 -CHEMBL254316 54671008 -CHEMBL1278 4440 -CHEMBL1697767 9470 -CHEMBL137 4173 -CHEMBL1200585 5281034 -CHEMBL1436 5479529 -CHEMBL282724 216235 -CHEMBL1697851 6762 -CHEMBL1370 5281004 -CHEMBL3275188 7606 -CHEMBL1201346 5362070 -CHEMBL730 4510 -CHEMBL44657 36462 -CHEMBL36 4993 -CHEMBL261641 9549297 -CHEMBL239243 1123 -CHEMBL810 5394 -CHEMBL1112 60795 -CHEMBL186 5479537 -CHEMBL501122 9852981 -CHEMBL83668 5510 -CHEMBL270190 5488548 -CHEMBL834 4674 -CHEMBL1113 2170 -CHEMBL1201407 5311236 -CHEMBL1684 2315 -CHEMBL417 41867 -CHEMBL1757 446987 -CHEMBL631 4932 -CHEMBL1201266 44564 -CHEMBL1601669 5282181 -CHEMBL2104816 20266 -CHEMBL1418176 6300 -CHEMBL2105293 6617 -CHEMBL334491 68778 -CHEMBL121 77999 -CHEMBL524 6082 -CHEMBL2221250 73952296 -CHEMBL119443 443884 -CHEMBL435298 72099 -CHEMBL715 4585 -CHEMBL1509 68873 -CHEMBL2146126 3028194 -CHEMBL1303 59227 -CHEMBL3740903 45357115 -CHEMBL1166 92722 -CHEMBL2107834 11304743 -CHEMBL1009 6047 -CHEMBL2105581 47979 -CHEMBL1201319 5906 -CHEMBL186720 7077 -CHEMBL1201794 643976 -CHEMBL291962 6106 -CHEMBL171679 134601 -CHEMBL1000 2678 -CHEMBL942 2391 -CHEMBL1725250 4613 -CHEMBL19 4100 -CHEMBL1201222 11597698 -CHEMBL1117 42890 -CHEMBL1187833 11519070 -CHEMBL425 5462337 -CHEMBL1413199 6719 -CHEMBL1605443 54158 -CHEMBL423 2369 -CHEMBL1206 3290 -CHEMBL1256391 40632 -CHEMBL526 4943 -CHEMBL105 5746 -CHEMBL671 5453 -CHEMBL969 4890 -CHEMBL477772 10113978 -CHEMBL829 5574 -CHEMBL657 3100 -CHEMBL1201168 3759 -CHEMBL1591 54680690 -CHEMBL591 5281077 -CHEMBL398435 9871419 -CHEMBL1201760 10178705 -CHEMBL464345 10690 -CHEMBL75753 155774 -CHEMBL1237 5362119 -CHEMBL2103756 70695610 -CHEMBL108358 457954 -CHEMBL1201213 3762 -CHEMBL525610 16132393 -CHEMBL1306 441383 -CHEMBL1286 5284583 -CHEMBL768 59768 -CHEMBL233406 24436 -CHEMBL415 2801 -CHEMBL595 4829 -CHEMBL978 1993 -CHEMBL1159717 5606 -CHEMBL1727 27447 -CHEMBL842 2720 -CHEMBL415324 5282230 -CHEMBL1189513 9553856 -CHEMBL645 2405 -CHEMBL597 5775 -CHEMBL507870 3081362 -CHEMBL1200598 3032325 -CHEMBL22587 55283 -CHEMBL931 3562 -CHEMBL2103867 70693500 -CHEMBL284348 1727 -CHEMBL1201345 8771 -CHEMBL42 2818 -CHEMBL1560 44093 -CHEMBL129 35370 -CHEMBL813 5281037 -CHEMBL2159122 11250029 -CHEMBL3707331 10311306 -CHEMBL844 2435 -CHEMBL3301668 16681432 -CHEMBL429 3869 -CHEMBL1235535 7510 -CHEMBL1652 2131 -CHEMBL3317857 56649692 -CHEMBL6622 6883 -CHEMBL939 123631 -CHEMBL1008 2351 -CHEMBL957 104865 -CHEMBL406 3702 -CHEMBL417007 10430040 -CHEMBL576127 3344 -CHEMBL1387 6231 -CHEMBL549473 3109 -CHEMBL1483 2082 -CHEMBL996 441199 -CHEMBL723 2585 -CHEMBL1201109 5311066 -CHEMBL636 77991 -CHEMBL1200515 8550 -CHEMBL1305 2200 -CHEMBL360055 3450 -CHEMBL12198 602 -CHEMBL480 3883 -CHEMBL232202 440936 -CHEMBL180022 9915743 -CHEMBL2103749 16129704 -CHEMBL1441961 5282219 -CHEMBL979 4064 -CHEMBL1194 4906 -CHEMBL808 3198 -CHEMBL2105435 5217 -CHEMBL424 338 -CHEMBL2362906 13218779 -CHEMBL1279 77992 -CHEMBL2105075 3360 -CHEMBL1075 34633 -CHEMBL84 60700 -CHEMBL1201405 55331 -CHEMBL2105458 27901 -CHEMBL1068 34312 -CHEMBL1617 6436173 -CHEMBL580 3958 -CHEMBL1289926 6450551 -CHEMBL1773 60953 -CHEMBL1201353 33036 -CHEMBL788 5905 -CHEMBL1201210 4940 -CHEMBL1663 6432 -CHEMBL1697829 65755 -CHEMBL2063090 44603531 -CHEMBL54922 145742 -CHEMBL2104900 22425 -CHEMBL2095212 9865528 -CHEMBL827 154101 -CHEMBL1730 5742673 -CHEMBL154126 98941 -CHEMBL2141296 25183872 -CHEMBL3545110 44631912 -CHEMBL1201261 5611 -CHEMBL1201746 148121 -CHEMBL1239 2345 -CHEMBL1200862 441350 -CHEMBL1201206 50192 -CHEMBL1201274 60657 -CHEMBL27810 2663 -CHEMBL702 43672 -CHEMBL405110 6099 -CHEMBL1201139 11683 -CHEMBL570 5568 -CHEMBL64 3767 -CHEMBL83906 11350 -CHEMBL277535 2378 -CHEMBL878 4170 -CHEMBL701 2284 -CHEMBL1131 5284513 -CHEMBL505132 3002143 -CHEMBL1201271 6729 -CHEMBL1201075 3743 -CHEMBL778 5311068 -CHEMBL1173655 10184653 -CHEMBL251940 4747 -CHEMBL568 4616 -CHEMBL1908311 15662541 -CHEMBL491571 73303 -CHEMBL177 37768 -CHEMBL1200810 5281107 -CHEMBL1201198 57697 -CHEMBL1255 712 -CHEMBL1222251 892 -CHEMBL3084803 5284517 -CHEMBL477 60961 -CHEMBL243712 2159 -CHEMBL225072 446556 -CHEMBL3187032 62277 -CHEMBL1095 3292 -CHEMBL90 774 -CHEMBL603 5717 -CHEMBL1574 4771 -CHEMBL69998 208910 -CHEMBL669 2895 -CHEMBL1398373 636397 -CHEMBL2105755 54732242 -CHEMBL406393 222528 -CHEMBL2107342 59364992 -CHEMBL1200751 2724350 -CHEMBL466246 502272 -CHEMBL1180725 4934 -CHEMBL52 4534 -CHEMBL684 3052 -CHEMBL218394 10324367 -CHEMBL25 2244 -CHEMBL820 2478 -CHEMBL83 2733526 -CHEMBL330546 107689 -CHEMBL1271 5850 -CHEMBL1101 2381 -CHEMBL1201322 5456 -CHEMBL651 4095 -CHEMBL1200706 10176082 -CHEMBL1241 5587 -CHEMBL1200617 5311412 -CHEMBL852 460612 -CHEMBL1455 2123 -CHEMBL1201758 164522 -CHEMBL964 3117 -CHEMBL2105275 7558 -CHEMBL720 244 -CHEMBL36506 54675769 -CHEMBL1072 2471 -CHEMBL2028663 44462760 -CHEMBL21 5333 -CHEMBL1972224 89216 -CHEMBL1200633 9812710 -CHEMBL386630 6013 -CHEMBL1191 5328 -CHEMBL131 5755 -CHEMBL54126 8246 -CHEMBL1276258 52943456 -CHEMBL2111101 10071196 -CHEMBL1448 4477 -CHEMBL8 2764 -CHEMBL3039583 73348210 -CHEMBL276832 3637 -CHEMBL772 5770 -CHEMBL509924 11954369 -CHEMBL1218 208902 -CHEMBL2106329 65646 -CHEMBL1200614 3741 -CHEMBL450 4099 -CHEMBL432103 4853 -CHEMBL1771 60606 -CHEMBL185 3385 -CHEMBL1651913 5489013 -CHEMBL1732 10531 -CHEMBL1421 3062316 -CHEMBL1054 5560 -CHEMBL1697842 10599 -CHEMBL2104294 3091 -CHEMBL563 3394 -CHEMBL1201262 3105 -CHEMBL1229908 9363 -CHEMBL224325 6301 -CHEMBL152 60613 -CHEMBL316157 5773 -CHEMBL134 2803 -CHEMBL415284 5284595 -CHEMBL2110926 10734 -CHEMBL404520 54675777 -CHEMBL1697830 5284533 -CHEMBL2107117 3033151 -CHEMBL46740 154257 -CHEMBL312448 5709 -CHEMBL511142 9848990 -CHEMBL520400 72075 -CHEMBL25202 108143 -CHEMBL316257 26197 -CHEMBL229383 44424838 -CHEMBL545 702 -CHEMBL357995 44112 -CHEMBL1200959 14677 -CHEMBL576 1110 -CHEMBL1185 60857 -CHEMBL124211 11117 -CHEMBL562 441140 -CHEMBL2074922 119171 -CHEMBL410414 9917862 -CHEMBL1683590 11354606 -CHEMBL1201027 11693 -CHEMBL56367 4495 -CHEMBL267345 118705500 -CHEMBL1201328 18104 -CHEMBL557555 2763 -CHEMBL70418 2789 -CHEMBL1200737 452110 -CHEMBL1200845 443943 -CHEMBL24778 5312125 -CHEMBL1252 16131215 -CHEMBL3137326 45255840 -CHEMBL1201087 54746 -CHEMBL286452 68570 -CHEMBL387675 16134395 -CHEMBL547 5282379 -CHEMBL374731 159269 -CHEMBL204656 5277135 -CHEMBL167911 3039 -CHEMBL37161 3334 -CHEMBL1237119 6918140 -CHEMBL34259 126941 -CHEMBL1229 65028 -CHEMBL52440 5360696 -CHEMBL673 4688 -CHEMBL1431 4091 -CHEMBL1174 448812 -CHEMBL1095097 443872 -CHEMBL1200868 8609 -CHEMBL16699 42723 -CHEMBL126 441401 -CHEMBL17 3038 -CHEMBL963 5284604 -CHEMBL372795 19649 -CHEMBL1096562 157922 -CHEMBL1215 6041 -CHEMBL1738990 200742 -CHEMBL107 6167 -CHEMBL1589896 3609 -CHEMBL1644 47965 -CHEMBL1077 60726 -CHEMBL80 5284596 -CHEMBL2106589 71734 -CHEMBL29 5904 -CHEMBL428880 2882 -CHEMBL11298 5951 -CHEMBL601719 11626560 -CHEMBL1146 456255 -CHEMBL1231649 5287879 -CHEMBL584 64143 -CHEMBL941 5291 -CHEMBL360328 11658860 -CHEMBL2303618 20055063 -CHEMBL206253 6451149 -CHEMBL1231 4634 -CHEMBL512351 10275777 -CHEMBL2366014 72160 -CHEMBL2023898 25154714 -CHEMBL36633 4626 -CHEMBL1201294 13505 -CHEMBL1094636 24958200 -CHEMBL1201360 5311167 -CHEMBL104 2812 -CHEMBL20835 1979 -CHEMBL414357 16157882 -CHEMBL2106871 15063 -CHEMBL497613 9920539 -CHEMBL142438 947 -CHEMBL1201263 84088 -CHEMBL466659 6518 -CHEMBL1643895 108000 -CHEMBL53 6005 -CHEMBL3 89594 -CHEMBL416146 123619 -CHEMBL592 5359272 -CHEMBL1697766 4744 -CHEMBL86715 4919 -CHEMBL521686 23725625 -CHEMBL92915 22565 -CHEMBL1472 638678 -CHEMBL196 54670067 -CHEMBL237500 10096344 -CHEMBL90555 5978 -CHEMBL1496 446157 -CHEMBL1018 667476 -CHEMBL3301669 16134627 -CHEMBL193 4485 -CHEMBL48361 216210 -CHEMBL622 3308 -CHEMBL1393 5833 -CHEMBL1514715 17506 -CHEMBL2097081 11963622 -CHEMBL1405 5870 -CHEMBL170 3034034 -CHEMBL2110700 6067 -CHEMBL108 2554 -CHEMBL2028019 11154555 -CHEMBL1908315 5282237 -CHEMBL1487 60823 -CHEMBL528 6533629 -CHEMBL18442 65015 -CHEMBL22498 5929 -CHEMBL567597 9838675 -CHEMBL1167 15541 -CHEMBL1712170 5359271 -CHEMBL1179047 8612 -CHEMBL549 2771 -CHEMBL139 3033 -CHEMBL1201749 443936 -CHEMBL2111003 70691438 -CHEMBL634 51263 -CHEMBL550 5910 -CHEMBL14376 71360 -CHEMBL529 447043 -CHEMBL639 2267 -CHEMBL49080 2783 -CHEMBL2111014 27490 -CHEMBL1276308 55245 -CHEMBL70972 2449 -CHEMBL1868702 27812 -CHEMBL356479 21782 -CHEMBL1885437 3245 -CHEMBL1617285 40247 -CHEMBL1888176 5284632 -CHEMBL833 5472 -CHEMBL1200 4633 -CHEMBL325041 387447 -CHEMBL3137309 49846579 -CHEMBL2 4893 -CHEMBL1466 54676038 -CHEMBL1734 154059 -CHEMBL1107 37393 -CHEMBL1796997 23987 -CHEMBL94 5983 -CHEMBL1201117 4107 -CHEMBL42336 6137 -CHEMBL1789941 25126798 -CHEMBL408 5591 -CHEMBL314854 107970 -CHEMBL254219 441207 -CHEMBL2110773 20054871 -CHEMBL125 3599 -CHEMBL660 2130 -CHEMBL1292 2794 -CHEMBL1201752 6445540 -CHEMBL968 3393 -CHEMBL1073 3478 -CHEMBL3792763 460392 -CHEMBL454 2481 -CHEMBL2325741 25227436 -CHEMBL1044 999 -CHEMBL895 5311304 -CHEMBL218650 6480466 -CHEMBL1200647 10079874 -CHEMBL190 2153 -CHEMBL1219 5029 -CHEMBL558 4178 -CHEMBL1201212 4195 -CHEMBL1095777 6918554 -CHEMBL1200391 6503 -CHEMBL633 2157 -CHEMBL625 5430 -CHEMBL114655 4567 -CHEMBL1697771 5042 -CHEMBL235668 6323266 -CHEMBL502835 9809715 -CHEMBL2111099 204108 -CHEMBL13828 4615 -CHEMBL1042 5280795 -CHEMBL936 3055 -CHEMBL46102 5105 -CHEMBL98 5311 -CHEMBL2105953 6537431 -CHEMBL289469 3510 -CHEMBL862 3519 -CHEMBL319706 11771731 -CHEMBL608 4912 -CHEMBL1206690 119828 -CHEMBL301267 3000469 -CHEMBL932 3108 -CHEMBL1200788 656684 -CHEMBL422330 5311253 -CHEMBL2105897 24414 -CHEMBL132767 5531 -CHEMBL1237122 5281075 -CHEMBL896 3658 -CHEMBL1501 9642 -CHEMBL1190 2968 -CHEMBL1165 91270 -CHEMBL1544 5920 -CHEMBL1201196 65863 -CHEMBL1082 33613 -CHEMBL267044 688272 -CHEMBL295433 60605 -CHEMBL1399124 6769 -CHEMBL1200604 5593 -CHEMBL157548 71335 -CHEMBL451887 11556711 -CHEMBL2104385 70697633 -CHEMBL1963683 9868491 -CHEMBL249856 53708 -CHEMBL1261 311 -CHEMBL1201273 4969 -CHEMBL1411979 4098 -CHEMBL1200889 49800036 -CHEMBL1083659 24965990 -CHEMBL1220 5479 -CHEMBL781 4020 -CHEMBL2105487 402 -CHEMBL264241 166548 -CHEMBL1187011 3149 -CHEMBL3039514 71661251 -CHEMBL589 5095 -CHEMBL581 62956 -CHEMBL373742 644077 -CHEMBL559 8730 -CHEMBL2104993 9966051 -CHEMBL714 2083 -CHEMBL623 4449 -CHEMBL780 3053 -CHEMBL427 4033 -CHEMBL1200689 145068 -CHEMBL1747 36294 -CHEMBL2304327 49863538 -CHEMBL24646 4823 -CHEMBL403 130313 -CHEMBL44354 5481173 -CHEMBL3039525 16118392 -CHEMBL1908841 149436 -CHEMBL398707 5284570 -CHEMBL2104987 70683012 -CHEMBL103 5994 -CHEMBL1059 5486971 -CHEMBL1029 51634 -CHEMBL1236970 32051 -CHEMBL696 3291 -CHEMBL1428 4497 -CHEMBL1766 5311067 -CHEMBL1774461 24822371 -CHEMBL2079587 25249 -CHEMBL1531 6917715 -CHEMBL566752 11501341 -CHEMBL1726 4499 -CHEMBL1479 28417 -CHEMBL980 3516 -CHEMBL1423 16362 -CHEMBL301523 6140 -CHEMBL485696 5343 -CHEMBL1525826 9047 -CHEMBL573 938 -CHEMBL493982 10077130 -CHEMBL6966 2520 -CHEMBL175691 6451164 -CHEMBL443 5329 -CHEMBL815 5280363 -CHEMBL1201864 68861 -CHEMBL421 5353980 -CHEMBL1460 50599 -CHEMBL1668 5280954 -CHEMBL501849 24873435 -CHEMBL1187846 11968014 -CHEMBL951 7458 -CHEMBL1377 6178111 -CHEMBL600 12035 -CHEMBL1200454 110632 -CHEMBL1200807 9568628 -CHEMBL9225 192197 -CHEMBL531 47811 -CHEMBL925 6057 -CHEMBL960 3899 -CHEMBL61006 4786 -CHEMBL1577 4121 -CHEMBL1071 4614 -CHEMBL1605 5282242 -CHEMBL1201270 4101 -CHEMBL1289 3561 -CHEMBL3182355 25015 -CHEMBL1508 146570 -CHEMBL205596 221493 -CHEMBL144673 3607 -CHEMBL1480987 2893 -CHEMBL1688530 16136912 -CHEMBL697 6476 -CHEMBL1571 13769 -CHEMBL945 16231 -CHEMBL847 4612 -CHEMBL710 57363 -CHEMBL1206440 7533 -CHEMBL231068 6918248 -CHEMBL1738 71384 -CHEMBL1450 74989 -CHEMBL1108 3168 -CHEMBL1121 9833444 -CHEMBL184 2022 -CHEMBL15770 1548887 -CHEMBL1489254 5541 -CHEMBL1027 60648 -CHEMBL222813 60855 -CHEMBL2104404 6918632 -CHEMBL2104624 3032307 -CHEMBL1584 19675 -CHEMBL35 3440 -CHEMBL489326 44565117 -CHEMBL305380 2475 -CHEMBL1346 444031 -CHEMBL457 3463 -CHEMBL1065 9681 -CHEMBL2103872 71457955 -CHEMBL1200936 22502 -CHEMBL594 3219 -CHEMBL1213353 20749 -CHEMBL668 4976 -CHEMBL19019 5360515 -CHEMBL2096623 56928188 -CHEMBL47050 33630 -CHEMBL116 65016 -CHEMBL1293 5280489 -CHEMBL1200790 4162 -CHEMBL1055 2732 -CHEMBL1963681 9869929 -CHEMBL608533 9829523 -CHEMBL1201201 10836 -CHEMBL776 4086 -CHEMBL2106357 10617 -CHEMBL1200584 23690428 -CHEMBL15891 727 -CHEMBL1587 4870 -CHEMBL498 2727 -CHEMBL84158 5467 -CHEMBL367149 445580 -CHEMBL346977 3089 -CHEMBL506247 16129778 -CHEMBL1430 5852 -CHEMBL51149 1234 -CHEMBL32573 5524 -CHEMBL3137327 76322221 -CHEMBL1201780 121396 -CHEMBL1371412 72157 -CHEMBL85 5073 -CHEMBL95855 9507 -CHEMBL728 4917 -CHEMBL1525287 5859 -CHEMBL2040682 6918155 -CHEMBL1161632 767 -CHEMBL89598 5665 -CHEMBL870 2088 -CHEMBL159 13342 -CHEMBL2106830 4854 -CHEMBL3039567 73357346 -CHEMBL18 3295 -CHEMBL295698 47576 -CHEMBL686 4044 -CHEMBL1524273 4806 -CHEMBL1201046 43507 -CHEMBL255863 644241 -CHEMBL1865135 5476 -CHEMBL2105567 21743 -CHEMBL502 3152 -CHEMBL1481 3476 -CHEMBL1201753 5282452 -CHEMBL184618 8378 -CHEMBL1234579 948 -CHEMBL474579 53025 -CHEMBL1214124 9924495 -CHEMBL564 4926 -CHEMBL680 51039 -CHEMBL290960 6842999 -CHEMBL446 5327 -CHEMBL569 4914 -CHEMBL1290 37464 -CHEMBL97137 9570438 -CHEMBL452076 5282138 -CHEMBL1004 3162 -CHEMBL1200963 5311027 -CHEMBL1201189 119432 -CHEMBL677 164739 -CHEMBL1504 6436 -CHEMBL1682 5780 -CHEMBL152231 2169 -CHEMBL2110725 10629256 -CHEMBL1079 5487 -CHEMBL1774055 25099184 -CHEMBL2103855 25025298 -CHEMBL1444 3902 -CHEMBL1201193 92253 -CHEMBL1148 41781 -CHEMBL905 5078 -CHEMBL229128 4059 -CHEMBL4 4583 -CHEMBL1738797 49806720 -CHEMBL575060 33032 -CHEMBL2103774 444008 -CHEMBL1201227 2911 -CHEMBL3397300 57390074 -CHEMBL2105041 21878 -CHEMBL719 446596 -CHEMBL43452 134780 -CHEMBL1730601 54679224 -CHEMBL2111112 9930049 -CHEMBL2042122 15950376 -CHEMBL1201192 9690109 -CHEMBL1110 2099 -CHEMBL1201264 4097 -CHEMBL1201236 34359 -CHEMBL16694 54680692 -CHEMBL1908370 11567473 -CHEMBL192 5212 -CHEMBL398440 2723 -CHEMBL130 5959 -CHEMBL1328219 91734 -CHEMBL1237046 54682938 -CHEMBL1182247 16958 -CHEMBL2048484 24812758 -CHEMBL1200866 9873 -CHEMBL1484 4474 -CHEMBL1175 60835 -CHEMBL504 679 -CHEMBL2104700 4249 -CHEMBL1454 26879 -CHEMBL1297 3342 -CHEMBL1589 1989 -CHEMBL1201229 5966 -CHEMBL2103837 11001318 -CHEMBL1123 3042 -CHEMBL3707372 122197547 -CHEMBL1086440 50248 -CHEMBL1189679 6337614 -CHEMBL655 4192 -CHEMBL869 5447130 -CHEMBL43064 1547484 -CHEMBL3301675 16158208 -CHEMBL1621 115237 -CHEMBL2105711 46908928 -CHEMBL1257753 204103 -CHEMBL1200370 7187 -CHEMBL471 5253 -CHEMBL674 449381 -CHEMBL2110646 656603 -CHEMBL1201352 5311399 -CHEMBL1435 33255 -CHEMBL1201208 4762 -CHEMBL1201283 49800004 -CHEMBL1515611 2447 -CHEMBL1200338 55329 -CHEMBL404215 4904 -CHEMBL1475 5585 -CHEMBL71 2726 -CHEMBL37853 5311053 -CHEMBL1200761 11289 -CHEMBL222645 21319 -CHEMBL2111107 70693550 -CHEMBL488 2145 -CHEMBL473417 24776445 -CHEMBL1242 4756 -CHEMBL2110862 10610 -CHEMBL1208 2762 -CHEMBL908 667467 -CHEMBL127487 10839 -CHEMBL1359 150610 -CHEMBL1237044 5523 -CHEMBL2105128 8992 -CHEMBL641 54841 -CHEMBL1443577 6634 -CHEMBL1201891 189821 -CHEMBL640 4913 -CHEMBL703 5314 -CHEMBL498847 71815 -CHEMBL503 53232 -CHEMBL1200937 6323289 -CHEMBL8659 445639 -CHEMBL431 5311447 -CHEMBL3545184 71511839 -CHEMBL2096648 636363 -CHEMBL2106411 65667 -CHEMBL1457 5284569 -CHEMBL893 18381 -CHEMBL2028850 6918173 -CHEMBL2107720 3824 -CHEMBL548 5280360 -CHEMBL1201774 44257 -CHEMBL484 60172 -CHEMBL1397 468595 -CHEMBL1200547 168060 -CHEMBL1201404 441335 -CHEMBL19490 5733 -CHEMBL221886 36119 -CHEMBL611 5401 -CHEMBL408513 6918638 -CHEMBL1221 5318 -CHEMBL2105131 4047 -CHEMBL2110922 71927 -CHEMBL891 6098 -CHEMBL1601 43594 -CHEMBL1697845 5858 -CHEMBL644 5584 -CHEMBL1482 2370 -CHEMBL376488 5388906 -CHEMBL439849 6918314 -CHEMBL1201309 49800011 -CHEMBL502384 5799 -CHEMBL2205250 71454116 -CHEMBL2104391 16137271 -CHEMBL297302 16363 -CHEMBL940 3446 -CHEMBL860 667477 -CHEMBL1672 6335986 -CHEMBL757 9904 -CHEMBL87385 5372683 -CHEMBL1532 6307 -CHEMBL9967 4848 -CHEMBL1615784 4274 -CHEMBL3426621 9863672 -CHEMBL2107192 70697657 -CHEMBL1583 441397 -CHEMBL1201173 247839 -CHEMBL363295 23480 -CHEMBL221959 9926791 -CHEMBL649 39147 -CHEMBL453 5344 -CHEMBL1102 3487 -CHEMBL647 2216 -CHEMBL3621988 12136798 -CHEMBL2103758 70691388 -CHEMBL1575 259331 -CHEMBL1234004 28486 -CHEMBL853 24066 -CHEMBL361812 123879 -CHEMBL1201225 807 -CHEMBL1763 3647 -CHEMBL1021 151075 -CHEMBL1615438 54385 -CHEMBL1527608 50942 -CHEMBL1585 4842 -CHEMBL1467 2094 -CHEMBL930 5961 -CHEMBL2095207 14989482 -CHEMBL1257 3226 -CHEMBL1395 6010 -CHEMBL290916 4021 -CHEMBL458769 6034 -CHEMBL1201197 3086677 -CHEMBL3736045 447043 -CHEMBL1908391 10074640 -CHEMBL2103873 16004692 -CHEMBL1614637 272833 -CHEMBL2106347 6607 -CHEMBL803 6253 -CHEMBL1311 27661 -CHEMBL556 2973 -CHEMBL679 5816 -CHEMBL460026 446284 -CHEMBL900 4601 -CHEMBL2110746 10764 -CHEMBL267744 38409 -CHEMBL1568 657237 -CHEMBL1358 104741 -CHEMBL1687 9917490 -CHEMBL2107215 5752 -CHEMBL344159 216237 -CHEMBL2105224 5311356 -CHEMBL1164729 134018 -CHEMBL535 5329102 -CHEMBL1201204 636405 -CHEMBL1200969 6918296 -CHEMBL865 119607 -CHEMBL1200932 65492 -CHEMBL2103822 10220503 -CHEMBL635 5865 -CHEMBL163672 12604 -CHEMBL305660 3191 -CHEMBL57 4463 -CHEMBL1689063 9835049 -CHEMBL27 4946 -CHEMBL1194666 7029 -CHEMBL1289601 9823820 -CHEMBL1589793 12450 -CHEMBL926 36811 -CHEMBL57242 9825285 -CHEMBL461101 25181561 -CHEMBL894 444 -CHEMBL122 5090 -CHEMBL2105317 9886190 -CHEMBL1364 1054 -CHEMBL1296 43708 -CHEMBL1566956 5597 -CHEMBL1897362 9454 -CHEMBL22108 21855 -CHEMBL1987462 2292 -CHEMBL1419 5210 -CHEMBL1382 443879 -CHEMBL1040 5283731 -CHEMBL2104790 6257 -CHEMBL1136 3002190 -CHEMBL1576 3284 -CHEMBL1201392 16490 -CHEMBL1374379 7699 -CHEMBL1149 10917 -CHEMBL1546 3651 -CHEMBL1064 54454 -CHEMBL2103875 11707110 -CHEMBL1750 119182 -CHEMBL428647 36314 -CHEMBL141446 9844 -CHEMBL22 5578 -CHEMBL877 5526 -CHEMBL1201199 657181 -CHEMBL503565 5284636 -CHEMBL1201356 8226 -CHEMBL1213252 2809 -CHEMBL1437 439260 -CHEMBL430 9571107 -CHEMBL1648 3784 -CHEMBL93 60490 -CHEMBL30008 941361 -CHEMBL42710 3314 -CHEMBL973 54684141 -CHEMBL632 9782 -CHEMBL1200472 4999 -CHEMBL704 4075 -CHEMBL13341 4496 -CHEMBL524004 4057 -CHEMBL2105570 65858 -CHEMBL7413 9034 -CHEMBL3736368 6445788 -CHEMBL301265 119570 -CHEMBL1046 564 -CHEMBL272080 3037209 -CHEMBL916 60947 -CHEMBL1709464 40113 -CHEMBL1096 2161 -CHEMBL72 2995 -CHEMBL1488 6194 -CHEMBL1201342 4167 -CHEMBL1324 4659569 -CHEMBL1396 170361 -CHEMBL1908355 54683953 -CHEMBL1200436 5878 -CHEMBL1025 5936 -CHEMBL1200438 5482 -CHEMBL1237132 153994 -CHEMBL127508 15723 -CHEMBL363449 4410 -CHEMBL746 50294 -CHEMBL1024 3690 -CHEMBL1372341 4846 -CHEMBL1594 6613 -CHEMBL541 243 -CHEMBL1615775 27304 -CHEMBL1201764 6918558 -CHEMBL25146 2136 -CHEMBL516 2913 -CHEMBL397420 54676537 -CHEMBL61593 2910 -CHEMBL1362 28179 -CHEMBL1582 6011 -CHEMBL1547 1130 -CHEMBL153479 2132 -CHEMBL629 2160 -CHEMBL14309 204163 -CHEMBL66092 54677946 -CHEMBL1819440 14257660 -CHEMBL1201291 3742 -CHEMBL1201012 15209 -CHEMBL532 12560 -CHEMBL2040681 17754356 -CHEMBL1413 2749 -CHEMBL1328 9853654 -CHEMBL1201256 5577 -CHEMBL507674 44187 -CHEMBL478120 60651 -CHEMBL1098 2474 -CHEMBL505 2725 -CHEMBL376140 54686904 -CHEMBL1201358 2341 -CHEMBL2216870 11625818 -CHEMBL918 4753 -CHEMBL187709 6536 -CHEMBL1213490 49863499 -CHEMBL2105720 53465279 -CHEMBL27193 134044 -CHEMBL1200673 166973 -CHEMBL515 2708 -CHEMBL1169 4649 -CHEMBL87563 3447 -CHEMBL285674 3261 -CHEMBL517 3114 -CHEMBL338802 5324 -CHEMBL136737 39214 -CHEMBL1510 77993 -CHEMBL599 54677470 -CHEMBL30 2756 -CHEMBL585 5546 -CHEMBL266481 44278361 -CHEMBL956 5359 -CHEMBL2096649 9915879 -CHEMBL807 4054 -CHEMBL1171086 24827068 -CHEMBL1201863 9578005 -CHEMBL459 38853 -CHEMBL297362 5707 -CHEMBL409803 10351092 -CHEMBL2048028 11965427 -CHEMBL128 5358 -CHEMBL2106324 21678311 -CHEMBL998 3957 -CHEMBL113313 216208 -CHEMBL496 3598 -CHEMBL91 4189 -CHEMBL101253 151194 -CHEMBL1722501 4994 -CHEMBL276568 5281006 -CHEMBL750 11967800 -CHEMBL858 6049 -CHEMBL841 3955 -CHEMBL225071 104758 -CHEMBL481 60838 -CHEMBL1201368 72022 -CHEMBL1200802 8275 -CHEMBL54661 5281881 -CHEMBL376359 11450633 -CHEMBL513 2578 -CHEMBL198362 9875401 -CHEMBL562318 11581936 -CHEMBL299175 5581 -CHEMBL1863513 6918670 -CHEMBL1200709 688020 -CHEMBL27769 10635 -CHEMBL317052 219024 -CHEMBL56337 1549120 -CHEMBL1231871 280 -CHEMBL1200623 13765 -CHEMBL1085 17676 -CHEMBL1184360 8708 -CHEMBL191 3961 -CHEMBL692 753 -CHEMBL2146883 16222096 -CHEMBL614 1046 -CHEMBL1201255 25077993 -CHEMBL1473 444036 -CHEMBL1269025 10280735 -CHEMBL456 3278 -CHEMBL1685 3033053 -CHEMBL2107254 2816 -CHEMBL3343679 10176142 -CHEMBL914 3348 -CHEMBL1084926 25242324 -CHEMBL2103827 92974 -CHEMBL1378 5440 -CHEMBL2110809 10240 -CHEMBL1513 3749 -CHEMBL231779 10182969 -CHEMBL177756 3383 -CHEMBL2374220 67505836 -CHEMBL294199 1548943 -CHEMBL222559 54682461 -CHEMBL2103784 16133802 -CHEMBL407135 44450615 -CHEMBL1200368 6957673 -CHEMBL601773 107715 -CHEMBL1765291 5720 -CHEMBL181 3019 -CHEMBL1201247 5311128 -CHEMBL666 3415 -CHEMBL1235508 5313082 -CHEMBL13 4171 -CHEMBL1908360 6442177 -CHEMBL468 5426 -CHEMBL887 3052776 -CHEMBL1755 151171 -CHEMBL1420 5353894 -CHEMBL902 5702160 -CHEMBL661 2118 -CHEMBL1469 4775 -CHEMBL95 1935 -CHEMBL1189432 5281071 -CHEMBL1201054 66254 -CHEMBL2103877 489181 -CHEMBL1581 107807 -CHEMBL530 36273 -CHEMBL1566 444254 -CHEMBL1422 4369359 -CHEMBL1474900 4593 -CHEMBL262777 14969 -CHEMBL1752 3182 -CHEMBL850 60464 -CHEMBL420 5702063 -CHEMBL1163 148192 -CHEMBL19224 4680 -CHEMBL991 18283 -CHEMBL707 3157 -CHEMBL1623992 4839 -CHEMBL1407943 2782 -CHEMBL416956 4046 -CHEMBL1620144 4990 -CHEMBL373081 3037206 -CHEMBL1337 115355 -CHEMBL188185 2472 -CHEMBL70927 2972 -CHEMBL1200357 441242 -CHEMBL521 3672 -CHEMBL554 208908 -CHEMBL1201327 6806 -CHEMBL1201406 14687 -CHEMBL240597 10133 -CHEMBL457547 477468 -CHEMBL861 4060 -CHEMBL1214 20824 -CHEMBL178803 125564 -CHEMBL1109 5335 -CHEMBL1168 5362129 -CHEMBL493682 5311064 -CHEMBL473 71329 -CHEMBL1201748 9854073 -CHEMBL367463 4888 -CHEMBL113 2519 -CHEMBL583042 11634725 -CHEMBL1477 446156 -CHEMBL278398 11291 -CHEMBL2051960 5360410 -CHEMBL799 2754 -CHEMBL1572 441306 -CHEMBL1201293 71158 -CHEMBL24828 3081361 -CHEMBL1318150 71961 -CHEMBL3039508 56837137 -CHEMBL110458 176077 -CHEMBL1201365 5464096 -CHEMBL2111164 11499 -CHEMBL154 156391 -CHEMBL698 5411 -CHEMBL1200649 5388937 -CHEMBL6273 3357 -CHEMBL127865 10782 -CHEMBL254468 450601 -CHEMBL438 5325 -CHEMBL2368925 3033818 -CHEMBL1051 5311221 -CHEMBL1201354 20299 -CHEMBL1201242 52195 -CHEMBL1623 4034 -CHEMBL1162144 1060 -CHEMBL500 4828 -CHEMBL1463 3366 -CHEMBL434 3779 -CHEMBL1201380 71415 -CHEMBL1385514 5388959 -CHEMBL1201244 441290 -CHEMBL82970 6872 -CHEMBL1619758 8367 -CHEMBL2110802 11482 -CHEMBL1251 16130957 -CHEMBL32 152946 -CHEMBL607400 9837243 -CHEMBL1767408 148196 -CHEMBL13280 3380 -CHEMBL465 16078 -CHEMBL700 5336 -CHEMBL22097 13314 -CHEMBL1697840 3032445 -CHEMBL92 148124 -CHEMBL439 5215 -CHEMBL258918 15020 -CHEMBL592943 5480 -CHEMBL1200574 5234 -CHEMBL656 5284603 -CHEMBL1201341 10548 -CHEMBL1201148 5284587 -CHEMBL267548 4971 -CHEMBL1515 1349907 -CHEMBL1522 969472 -CHEMBL695 5576 -CHEMBL61946 6472 -CHEMBL924 68740 -CHEMBL1266 5419 -CHEMBL1200733 42113 -CHEMBL1194325 11434515 -CHEMBL119 5583 -CHEMBL575 6087 -CHEMBL1090 21704 -CHEMBL1595 5284543 -CHEMBL13209 4506 -CHEMBL358150 65777 -CHEMBL1201340 6127 -CHEMBL572964 1118 -CHEMBL167731 134019 -CHEMBL1908316 5895404 -CHEMBL819 6196 -CHEMBL506871 11960529 -CHEMBL976 4891 -CHEMBL1140 936 -CHEMBL1200471 3005837 -CHEMBL1197792 10304 -CHEMBL384467 5743 -CHEMBL15721 6478035 -CHEMBL1295 47472 -CHEMBL110 31593 -CHEMBL386051 447077 -CHEMBL1282 57469 -CHEMBL1586 20469 -CHEMBL1363 3083544 -CHEMBL499 33624 -CHEMBL1380 441300 -CHEMBL112 1983 -CHEMBL316004 32593 -CHEMBL806 3397 -CHEMBL325109 2780 -CHEMBL11 3696 -CHEMBL790 9552079 -CHEMBL1731 656511 -CHEMBL61 10607 -CHEMBL1323 213039 -CHEMBL1200851 10429215 -CHEMBL502097 44564107 -CHEMBL1657 5381 -CHEMBL99946 6917779 -CHEMBL447 5193 -CHEMBL161 5479530 -CHEMBL1388 7638 -CHEMBL1201237 39468 -CHEMBL1946170 11167602 -CHEMBL502896 34001 -CHEMBL52939 5160 -CHEMBL463 2148 -CHEMBL444633 6323490 -CHEMBL81697 54685524 -CHEMBL2110651 5473 -CHEMBL1069 60846 -CHEMBL826 3229 -CHEMBL1206211 24749 -CHEMBL182 3454 -CHEMBL2104994 178039 -CHEMBL1200559 612 -CHEMBL1200424 164209 -CHEMBL1760 5403 -CHEMBL405 3007 -CHEMBL1555183 134669 -CHEMBL20 1986 -CHEMBL628 4740 -CHEMBL1201112 3011155 -CHEMBL602 6058 -CHEMBL1186579 5361918 -CHEMBL607710 2724 -CHEMBL525076 16130199 -CHEMBL1201216 3033538 -CHEMBL1238 2266 -CHEMBL1201300 3737 -CHEMBL511 4992 -CHEMBL617 6024 -CHEMBL407 3373 -CHEMBL1754 3156 -CHEMBL1321 4915 -CHEMBL1276010 5634 -CHEMBL1753 446598 -CHEMBL1234886 977 -CHEMBL44884 14052 -CHEMBL1200455 3730 -CHEMBL117287 3052762 -CHEMBL582 3776 -CHEMBL2111038 20294 -CHEMBL1256 3763 -CHEMBL1087 4062 -CHEMBL88 2907 -CHEMBL1453 4369270 -CHEMBL2220442 5353627 -CHEMBL404 123630 -CHEMBL856 4909 -CHEMBL1111 6918493 -CHEMBL2146121 5460341 -CHEMBL274323 5960 -CHEMBL395110 6075 -CHEMBL3391662 45110509 -CHEMBL499808 2826718 -CHEMBL1485 6322 -CHEMBL2104426 70685100 -CHEMBL1200686 6509979 -CHEMBL70 5288826 -CHEMBL111861 5867 -CHEMBL1545 394397 -CHEMBL1262 5353853 -CHEMBL3545062 67683363 -CHEMBL1201185 6918011 -CHEMBL517712 174174 -CHEMBL1570 5351 -CHEMBL885 60877 -CHEMBL1201387 5464097 -CHEMBL1255800 10034073 -CHEMBL437 5340 -CHEMBL3353410 71496458 -CHEMBL1200522 51040 -CHEMBL593 5625 -CHEMBL163 392622 -CHEMBL527 54676228 -CHEMBL479 5452 -CHEMBL672 3339 -CHEMBL222863 439501 -CHEMBL1098319 598 diff --git a/code/reasoningtool/kg-construction/request_cache_helper.py b/code/reasoningtool/kg-construction/request_cache_helper.py deleted file mode 100644 index 2011bb7d6..000000000 --- a/code/reasoningtool/kg-construction/request_cache_helper.py +++ /dev/null @@ -1,65 +0,0 @@ -import requests -import requests_cache -import hashlib -import time -import re, os - -_DEFAULT_HEADERS = requests.utils.default_headers() - -#requests_cache.install_cache("orangeboard") -# specifiy the path of orangeboard database -tmppath = re.compile(".*/RTX/") -dbpath = tmppath.search(os.path.realpath(__file__)).group(0) + 'data/orangeboard' -requests_cache.install_cache(dbpath) - -def get_timestamp(url): - """ - get the timestamp of an HTTP get request - :param url: the URL of the request - :return the timestamp of the request, of None if the request is not in the cache - """ - def _to_bytes(s, encoding='utf-8'): - return bytes(s, encoding) - - def create_key(request): - url, body = request.url, request.body - key = hashlib.sha256() - key.update(_to_bytes(request.method.upper())) - key.update(_to_bytes(url)) - if request.body: - key.update(_to_bytes(body)) - return key.hexdigest() - - def url_to_key(url): - session = requests.Session() - return create_key(session.prepare_request(requests.Request('GET', url))) - - # get the cache from request_cache - results = requests_cache.get_cache() - # create the key according to the url - key_url = url_to_key(url) - # results.responses is a dictionary and follows the following format: - # { 'key': (requests_cache.backends objects, timestamp), ..., } - # for example: '4c28e3e4a61e325e520d9c02e0caee99e30c00951a223e67': - # (, - # datetime.datetime(2018, 10, 16, 0, 19, 8, 130204)), - if key_url in results.responses: - back_obj, timestamp = results.responses[key_url] - return timestamp - return None - - -if __name__ == '__main__': - - url = 'http://cohd.io/api/association/obsExpRatio?dataset_id=1&concept_id_1=192855&domain=Procedure' - url1 = 'http://cohd.io/api/association/obsExpRatio?dataset_id=1&concept_id_1=192853&domain=Procedure' - url2 = 'http://cohd.io/api/association/obsExpRatio?dataset_id=1&concept_id_1=192854&domain=Procedure' - - res = requests.get(url) - res = requests.get(url1) - - t = time.time() - print(get_timestamp(url)) - print(get_timestamp(url1)) - print(get_timestamp(url2)) - print("Time used: ", time.time() - t)