diff --git a/notebooks/Tutorial.ipynb b/notebooks/Tutorial.ipynb
index 1f397a7..3d27b06 100644
--- a/notebooks/Tutorial.ipynb
+++ b/notebooks/Tutorial.ipynb
@@ -13,8 +13,23 @@
"cell_type": "code",
"execution_count": 1,
"id": "d5507142",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2021-12-06T09:09:20.179225Z",
+ "start_time": "2021-12-06T09:07:56.474635Z"
+ }
+ },
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Collecting all CT data: 100%|████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1.29it/s]\n",
+ "Parsing and importing clinical trials data: 100%|███████████████████████████████| 1600/1600 [00:00<00:00, 69571.70it/s]\n",
+ "Parsing and importing drug data: 100%|█████████████████████████████████████████| 10000/10000 [00:04<00:00, 2280.53it/s]\n",
+ "Parsing and importing drug data: 100%|███████████████████████████████████████████| 3580/3580 [00:01<00:00, 2682.83it/s]\n"
+ ]
+ },
{
"data": {
"text/html": [
@@ -36,95 +51,89 @@
" \n",
" \n",
" \n",
" \n",
" \n",
- " id \n",
- " pmod_type \n",
- " target_bel \n",
- " target_symbol \n",
- " target_type \n",
- " relation_type \n",
- " interactor_bel \n",
- " interactor_name \n",
- " interactor_type \n",
+ " target_species \n",
" pmid \n",
" pmc \n",
- " target_species \n",
+ " interactor_type \n",
+ " interactor_name \n",
+ " interactor_bel \n",
+ " relation_type \n",
+ " target_bel \n",
+ " target_type \n",
+ " target_symbol \n",
+ " pmod_type \n",
"
967 rows × 12 columns
\n", + "1152 rows × 11 columns
\n", "" ], "text/plain": [ - " id pmod_type target_bel \\\n", - "0 1 pho p(HGNC:\"MAPT\",loc(MESHA:\"Cerebrospinal Fluid\")... \n", - "1 4 pho p(HGNC:\"MAPT\",pmod(Ph,S,357)) \n", - "2 5 pho p(HGNC:\"MAPT\",pmod(Ph,S,357)) \n", - "3 6 pho p(HGNC:\"MAPT\",pmod(Ph,S,199)) \n", - "4 7 pho p(HGNC:\"MAPT\",pmod(Ph,S,199)) \n", - ".. ... ... ... \n", - "962 1147 pho p(HGNC:\"MAPT\",pmod(Ph,T)) \n", - "963 1148 pho p(HGNC:\"MAPT\",pmod(Ph,T)) \n", - "964 1149 pho p(HGNC:\"MAPT\",pmod(Ph,T)) \n", - "965 1150 pho p(HGNC:\"MAPT\",loc(CONSO:\"microtubule-binding r... \n", - "966 1151 pho p(HGNC:\"MAPT\",pmod(Ph,S,369)) \n", + " target_species pmid pmc interactor_type interactor_name \\\n", + "0 9606 28768545 PMC5541421 protein TREM2 \n", + "1 10116 24270208 None protein Dkk1 \n", + "2 0 14642273 None activity None \n", + "3 9606 17389597 None activity None \n", + "4 9606 17360711 None activity None \n", + "... ... ... ... ... ... \n", + "1147 9606 23362255 PMC3597833 protein PIN1 \n", + "1148 9606 23362255 PMC3597833 activity None \n", + "1149 9606 29661268 PMC6033068 protein MAPT \n", + "1150 9606 30935091 PMC6480207 protein HDAC6 \n", + "1151 10090 22419736 None protein Ak1 \n", "\n", - " target_symbol target_type relation_type \\\n", - "0 MAPT protein increases \n", - "1 MAPT protein increases \n", - "2 MAPT protein increases \n", - "3 MAPT protein increases \n", - "4 MAPT protein increases \n", - ".. ... ... ... \n", - "962 MAPT protein increases \n", - "963 MAPT protein decreases \n", - "964 MAPT protein decreases \n", - "965 MAPT protein decreases \n", - "966 MAPT protein increases \n", + " interactor_bel relation_type \\\n", + "0 p(HGNC:\"TREM2\",var(\"p.Arg47His\")) increases \n", + "1 p(MGI:\"Dkk1\") increases \n", + "2 act(p(MGI:\"Cdk5\"),ma(kin)) directly_increases \n", + "3 act(p(HGNC:\"GSK3B\"),ma(kin)) increases \n", + "4 act(p(HGNC:\"GSK3B\"),ma(kin)) increases \n", + "... ... ... \n", + "1147 p(HGNC:\"PIN1\") decreases \n", + "1148 act(p(HGNC:\"PPP2CA\"),ma(phos)) decreases \n", + "1149 p(HGNC:\"MAPT\",loc(CONSO:\"KXGS motif\"),pmod(Ac,... decreases \n", + "1150 p(HGNC:\"HDAC6\") increases \n", + "1151 p(MGI:\"Ak1\") increases \n", "\n", - " interactor_bel interactor_name \\\n", - "0 p(HGNC:\"TREM2\",var(\"p.Arg47His\")) TREM2 \n", - "1 act(p(HGNC:\"GSK3B\"),ma(kin)) None \n", - "2 act(p(HGNC:\"GSK3B\"),ma(kin)) None \n", - "3 a(MESHC:\"calyculin A\") calyculin A \n", - "4 act(p(HGNC:\"CDK5\"),ma(kin)) None \n", - ".. ... ... \n", - "962 act(p(HGNC:\"CDK5\"),ma(kin)) None \n", - "963 p(HGNC:\"PIN1\") PIN1 \n", - "964 act(p(HGNC:\"PPP2CA\"),ma(phos)) None \n", - "965 p(HGNC:\"MAPT\",loc(CONSO:\"KXGS motif\"),pmod(Ac,... MAPT \n", - "966 p(HGNC:\"HDAC6\") HDAC6 \n", + " target_bel target_type \\\n", + "0 p(HGNC:\"MAPT\",loc(MESHA:\"Cerebrospinal Fluid\")... protein \n", + "1 p(RGD:\"Mapt\",pmod(Ph,S,199)) protein \n", + "2 p(MGI:\"Mapt\",pmod(Ph,S,239)) protein \n", + "3 p(HGNC:\"MAPT\",pmod(Ph,S,357)) protein \n", + "4 p(HGNC:\"MAPT\",pmod(Ph,S,357)) protein \n", + "... ... ... \n", + "1147 p(HGNC:\"MAPT\",pmod(Ph,T)) protein \n", + "1148 p(HGNC:\"MAPT\",pmod(Ph,T)) protein \n", + "1149 p(HGNC:\"MAPT\",loc(CONSO:\"microtubule-binding r... protein \n", + "1150 p(HGNC:\"MAPT\",pmod(Ph,S,369)) protein \n", + "1151 p(MGI:\"Mapt\",pmod(Ph,T,235)) protein \n", "\n", - " interactor_type pmid pmc target_species \n", - "0 protein 28768545 PMC5541421 9606 \n", - "1 activity 17389597 None 9606 \n", - "2 activity 17360711 None 9606 \n", - "3 abundance 21297267 None 9606 \n", - "4 activity 12387894 None 9606 \n", - ".. ... ... ... ... \n", - "962 activity 23362255 PMC3597833 9606 \n", - "963 protein 23362255 PMC3597833 9606 \n", - "964 activity 23362255 PMC3597833 9606 \n", - "965 protein 29661268 PMC6033068 9606 \n", - "966 protein 30935091 PMC6480207 9606 \n", + " target_symbol pmod_type \n", + "0 MAPT pho \n", + "1 Mapt pho \n", + "2 Mapt pho \n", + "3 MAPT pho \n", + "4 MAPT pho \n", + "... ... ... \n", + "1147 MAPT pho \n", + "1148 MAPT pho \n", + "1149 MAPT pho \n", + "1150 MAPT pho \n", + "1151 Mapt pho \n", "\n", - "[967 rows x 12 columns]" + "[1152 rows x 11 columns]" ] }, "execution_count": 1, @@ -286,6 +289,8 @@ "#!pip install drugintfinder\n", "from drugintfinder.finder import InteractorFinder\n", "\n", + "import pandas as pd\n", + "\n", "# Initialize with base information\n", "finder = InteractorFinder(symbol=\"MAPT\", pmods=[\"pho\"], edge=\"causal\")\n", "\n", @@ -307,7 +312,12 @@ "cell_type": "code", "execution_count": 2, "id": "176a87ce", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2021-12-06T09:14:31.420305Z", + "start_time": "2021-12-06T09:14:10.856319Z" + } + }, "outputs": [ { "data": { @@ -330,149 +340,143 @@ " \n", "57956 rows × 21 columns
\n", + "57956 rows × 20 columns
\n", "" ], "text/plain": [ - " id drug drugbank_id \\\n", - "0 1 N'-(Pyrrolidino[2,1-B]Isoindolin-4-On-8-Yl)-N-... DB04186 \n", - "1 2 1-(3,5-DICHLOROPHENYL)-5-METHYL-1H-1,2,4-TRIAZ... DB07852 \n", - "2 3 N(6)-dimethylallyladenine DB08768 \n", - "3 4 (5E)-2-Amino-5-(2-pyridinylmethylene)-1,3-thia... DB07529 \n", - "4 5 4-{5-[(Z)-(2-IMINO-4-OXO-1,3-THIAZOLIDIN-5-YLI... DB07534 \n", - "... ... ... ... \n", - "57951 57952 Bufexamac DB13346 \n", - "57952 57953 Debio-1347 None \n", - "57953 57954 indirubin-3'-monoxime None \n", - "57954 57955 quercetin None \n", - "57955 57956 GW441756 None \n", + " drug \\\n", + "0 N'-(Pyrrolidino[2,1-B]Isoindolin-4-On-8-Yl)-N-... \n", + "1 1-(3,5-DICHLOROPHENYL)-5-METHYL-1H-1,2,4-TRIAZ... \n", + "2 N(6)-dimethylallyladenine \n", + "3 (5E)-2-Amino-5-(2-pyridinylmethylene)-1,3-thia... \n", + "4 4-{5-[(Z)-(2-IMINO-4-OXO-1,3-THIAZOLIDIN-5-YLI... \n", + "... ... \n", + "54760 Bufexamac \n", + "54761 Debio-1347 \n", + "54762 indirubin-3'-monoxime \n", + "54763 quercetin \n", + "54764 GW441756 \n", "\n", - " chembl_id pubchem_id interactor_type interactor_bel \\\n", - "0 CHEMBL141247 445840.0 protein p(HGNC:\"CDK2\") \n", - "1 None 2763754.0 protein p(HGNC:\"CDK2\") \n", - "2 CHEMBL476189 92180.0 protein p(HGNC:\"CDK2\") \n", - "3 None 46937079.0 protein p(HGNC:\"CDK2\") \n", - "4 CHEMBL233149 5729339.0 protein p(HGNC:\"CDK2\") \n", - "... ... ... ... ... \n", - "57951 CHEMBL94394 2466.0 protein p(HGNC:\"HDAC6\") \n", - "57952 None None protein p(HGNC:\"HDAC6\") \n", - "57953 None None protein p(HGNC:\"HDAC6\") \n", - "57954 None None protein p(HGNC:\"HDAC6\") \n", - "57955 None None protein p(HGNC:\"HDAC6\") \n", + " capsule_interactor_type capsule_interactor_bel interactor_bel \\\n", + "0 NaN NaN p(HGNC:\"CDK2\") \n", + "1 NaN NaN p(HGNC:\"CDK2\") \n", + "2 NaN NaN p(HGNC:\"CDK2\") \n", + "3 NaN NaN p(HGNC:\"CDK2\") \n", + "4 NaN NaN p(HGNC:\"CDK2\") \n", + "... ... ... ... \n", + "54760 protein p(HGNC:\"HDAC6\") p(HGNC:\"HDAC6\") \n", + "54761 protein p(HGNC:\"HDAC6\") p(HGNC:\"HDAC6\") \n", + "54762 protein p(HGNC:\"HDAC6\") p(HGNC:\"HDAC6\") \n", + "54763 protein p(HGNC:\"HDAC6\") p(HGNC:\"HDAC6\") \n", + "54764 protein p(HGNC:\"HDAC6\") p(HGNC:\"HDAC6\") \n", "\n", - " interactor_name capsule_interactor_bel capsule_interactor_type ... \\\n", - "0 CDK2 None None ... \n", - "1 CDK2 None None ... \n", - "2 CDK2 None None ... \n", - "3 CDK2 None None ... \n", - "4 CDK2 None None ... \n", - "... ... ... ... ... \n", - "57951 HDAC6 p(HGNC:\"HDAC6\") protein ... \n", - "57952 HDAC6 p(HGNC:\"HDAC6\") protein ... \n", - "57953 HDAC6 p(HGNC:\"HDAC6\") protein ... \n", - "57954 HDAC6 p(HGNC:\"HDAC6\") protein ... \n", - "57955 HDAC6 p(HGNC:\"HDAC6\") protein ... \n", + " interactor_type interactor_name relation_type \\\n", + "0 protein CDK2 directly_increases \n", + "1 protein CDK2 directly_increases \n", + "2 protein CDK2 directly_increases \n", + "3 protein CDK2 directly_increases \n", + "4 protein CDK2 directly_increases \n", + "... ... ... ... \n", + "54760 protein HDAC6 increases \n", + "54761 protein HDAC6 increases \n", + "54762 protein HDAC6 increases \n", + "54763 protein HDAC6 increases \n", + "54764 protein HDAC6 increases \n", "\n", - " target_bel target_symbol target_type \\\n", - "0 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein \n", - "1 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein \n", - "2 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein \n", - "3 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein \n", - "4 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein \n", - "... ... ... ... \n", - "57951 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein \n", - "57952 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein \n", - "57953 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein \n", - "57954 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein \n", - "57955 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein \n", + " target_bel target_symbol target_type pmid \\\n", + "0 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein 8282104 \n", + "1 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein 8282104 \n", + "2 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein 8282104 \n", + "3 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein 8282104 \n", + "4 p(HGNC:\"MAPT\",pmod(Ph,S,199)) MAPT protein 8282104 \n", + "... ... ... ... ... \n", + "54760 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein 30935091 \n", + "54761 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein 30935091 \n", + "54762 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein 30935091 \n", + "54763 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein 30935091 \n", + "54764 p(HGNC:\"MAPT\",pmod(Ph,S,369)) MAPT protein 30935091 \n", "\n", - " relation_type pmod_type pmid pmc rel_rid \\\n", - "0 directly_increases pho 8282104 None #570:10 \n", - "1 directly_increases pho 8282104 None #570:10 \n", - "2 directly_increases pho 8282104 None #570:10 \n", - "3 directly_increases pho 8282104 None #570:10 \n", - "4 directly_increases pho 8282104 None #570:10 \n", - "... ... ... ... ... ... \n", - "57951 increases pho 30935091 PMC6480207 #557:788 \n", - "57952 increases pho 30935091 PMC6480207 #557:788 \n", - "57953 increases pho 30935091 PMC6480207 #557:788 \n", - "57954 increases pho 30935091 PMC6480207 #557:788 \n", - "57955 increases pho 30935091 PMC6480207 #557:788 \n", + " pmc rel_pub_year rel_rid drug_rel_rid drug_rel_actions \\\n", + "0 None 1994 #570:10 #1898:10624 None \n", + "1 None 1994 #570:10 #1898:10917 None \n", + "2 None 1994 #570:10 #1898:11306 None \n", + "3 None 1994 #570:10 #1898:11332 None \n", + "4 None 1994 #570:10 #1899:10523 None \n", + "... ... ... ... ... ... \n", + "54760 PMC6480207 2019 #557:788 #1900:11395 inhibitor \n", + "54761 PMC6480207 2019 #557:788 #1911:11461 inhibitor \n", + "54762 PMC6480207 2019 #557:788 #1912:11461 inhibitor \n", + "54763 PMC6480207 2019 #557:788 #1913:11461 inhibitor \n", + "54764 PMC6480207 2019 #557:788 #1898:11465 inhibitor \n", "\n", - " drug_rel_rid drug_rel_actions \n", - "0 #1898:10624 None \n", - "1 #1898:10917 None \n", - "2 #1898:11306 None \n", - "3 #1898:11332 None \n", - "4 #1899:10523 None \n", - "... ... ... \n", - "57951 #1900:11395 inhibitor \n", - "57952 #1911:11461 inhibitor \n", - "57953 #1912:11461 inhibitor \n", - "57954 #1913:11461 inhibitor \n", - "57955 #1898:11465 inhibitor \n", + " drugbank_id chembl_id pubchem_id pmod_type \n", + "0 DB04186 CHEMBL141247 445840.0 pho \n", + "1 DB07852 None 2763754.0 pho \n", + "2 DB08768 CHEMBL476189 92180.0 pho \n", + "3 DB07529 None 46937079.0 pho \n", + "4 DB07534 CHEMBL233149 5729339.0 pho \n", + "... ... ... ... ... \n", + "54760 DB13346 CHEMBL94394 2466.0 pho \n", + "54761 None None NaN pho \n", + "54762 None None NaN pho \n", + "54763 None None NaN pho \n", + "54764 None None NaN pho \n", "\n", - "[57956 rows x 21 columns]" + "[57956 rows x 20 columns]" ] }, "execution_count": 2, @@ -732,13 +730,19 @@ "cell_type": "code", "execution_count": 3, "id": "ffcc5bd7", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2021-12-06T09:19:05.740057Z", + "start_time": "2021-12-06T09:14:57.167220Z" + } + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Counting edges: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 881.72it/s]\n" + "Counting BioAssays for targets: 100%|██████████████████████████████████████████████████| 80/80 [01:18<00:00, 1.02it/s]\n", + "Counting edges: 100%|██████████████████████████████████████████████████████████████████| 80/80 [02:48<00:00, 2.11s/it]\n" ] }, { @@ -775,58 +779,58 @@ " \n", "\n", + " | Protein | \n", + "BioAssays per Drug | \n", + "
---|---|---|
70 | \n", + "F2 | \n", + "1106 | \n", + "
64 | \n", + "STAT3 | \n", + "613 | \n", + "
63 | \n", + "RPS6KB1 | \n", + "387 | \n", + "
73 | \n", + "CSNK1D | \n", + "367 | \n", + "
42 | \n", + "GSK3A | \n", + "365 | \n", + "
62 | \n", + "MAPK11 | \n", + "235 | \n", + "
40 | \n", + "MAPK13 | \n", + "207 | \n", + "
33 | \n", + "HDAC6 | \n", + "204 | \n", + "
54 | \n", + "CDK5R1 | \n", + "201 | \n", + "
76 | \n", + "MARK1 | \n", + "188 | \n", + "
\n", + " | Drug | \n", + "Target | \n", + "Synergizes | \n", + "Number of BioAssays for Target | \n", + "Number of Causal Edges for Target | \n", + "Drug Patent Ongoing | \n", + "Generic Version of Drug Available | \n", + "Number of Drug Targets | \n", + "BioAssays per Known Drug Target | \n", + "
---|---|---|---|---|---|---|---|---|---|
802 | \n", + "5-[3-(2-METHOXYPHENYL)-1H-PYRROLO[2,3-B]PYRIDI... | \n", + "ABL1 | \n", + "N/A | \n", + "1970 | \n", + "10 | \n", + "No | \n", + "No | \n", + "1 | \n", + "1970 | \n", + "
713 | \n", + "2-{[(6-OXO-1,6-DIHYDROPYRIDIN-3-YL)METHYL]AMIN... | \n", + "ABL1 | \n", + "N/A | \n", + "1970 | \n", + "10 | \n", + "No | \n", + "No | \n", + "1 | \n", + "1970 | \n", + "
758 | \n", + "Radotinib | \n", + "ABL1 | \n", + "N/A | \n", + "1970 | \n", + "10 | \n", + "No | \n", + "No | \n", + "1 | \n", + "1970 | \n", + "
242 | \n", + "2-amino-5-[3-(1-ethyl-1H-pyrazol-5-yl)-1H-pyrr... | \n", + "ABL1 | \n", + "N/A | \n", + "1970 | \n", + "10 | \n", + "No | \n", + "No | \n", + "1 | \n", + "1970 | \n", + "
612 | \n", + "PD-166326 | \n", + "ABL1 | \n", + "N/A | \n", + "1970 | \n", + "10 | \n", + "No | \n", + "No | \n", + "1 | \n", + "1970 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
296 | \n", + "quercetin | \n", + "CAMK2B | \n", + "N/A | \n", + "181 | \n", + "10 | \n", + "No | \n", + "No | \n", + "N/A | \n", + "0 | \n", + "
289 | \n", + "Pyridoxal phosphate | \n", + "DDC | \n", + "No | \n", + "8 | \n", + "2 | \n", + "No | \n", + "Yes | \n", + "56 | \n", + "0 | \n", + "
777 | \n", + "Polaprezinc | \n", + "IL6 | \n", + "N/A | \n", + "7 | \n", + "99 | \n", + "No | \n", + "No | \n", + "8 | \n", + "0 | \n", + "
279 | \n", + "Foreskin fibroblast (neonatal) | \n", + "IL6 | \n", + "No | \n", + "7 | \n", + "99 | \n", + "No | \n", + "No | \n", + "11 | \n", + "0 | \n", + "
268 | \n", + "quercetin | \n", + "HDAC6 | \n", + "N/A | \n", + "2042 | \n", + "180 | \n", + "No | \n", + "No | \n", + "N/A | \n", + "0 | \n", + "
858 rows × 9 columns
\n", + "