Merge pull request #2255 from RTXteam/issue2254

dkoslicki · web-flow · commit 03862b124fa9 · 2024-03-20T22:19:03.000-04:00
Issue2254
diff --git a/code/ARAX/ARAXQuery/ARAX_query.py b/code/ARAX/ARAXQuery/ARAX_query.py
@@ -1060,10 +1060,10 @@ def main():
             "add_qnode(name=acetaminophen, key=n0)",
             "add_qnode(categories=biolink:Protein, key=n1)",
             "add_qedge(subject=n0, object=n1, key=e0)",
-            "expand(edge_key=e0)",
+            "expand(edge_key=e0, kp=infores:rtx-kg2)",
             "overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n0, object_qnode_key=n1)",
             "resultify(ignore_edge_direction=true)",
-            "filter_results(action=limit_number_of_results, max_results=10)",
+            "#filter_results(action=limit_number_of_results, max_results=5)",
             "return(message=true, store=true)",
         ]}}
 
@@ -1838,10 +1838,12 @@ def main():
 
 
     #### Print out the logging stream
-    print(response.show(level=ARAXResponse.DEBUG))
+    #if verbose:
+    #    print(response.show(level=ARAXResponse.DEBUG))
 
     #### Print out the message that came back
-    print(json.dumps(ast.literal_eval(repr(envelope)), sort_keys=True, indent=2))
+    #if verbose:
+    #    print(json.dumps(ast.literal_eval(repr(envelope)), sort_keys=True, indent=2))
 
     #### Other stuff that could be dumped
     #print(json.dumps(message.to_dict(),sort_keys=True,indent=2))
@@ -1858,10 +1860,11 @@ def main():
     #print(f"Essence names in the answers: {[x.essence for x in message.results]}")
     print("Results:")
     for result in message.results:
-        confidence = result.confidence
-        if confidence is None:
-            confidence = 0.0
-        print("  -" + '{:6.3f}'.format(confidence) + f"\t{result.essence}")
+        analysis = result.analyses[0]
+        score = analysis.score
+        if score is None:
+            score = 0.0
+        print("  -" + '{:6.3f}'.format(score) + f"\t{result.essence}")
 
     # print the response id at the bottom for convenience too:
     print(f"Returned response id: {envelope.id}")
diff --git a/code/ARAX/ARAXQuery/ARAX_ranker.py b/code/ARAX/ARAXQuery/ARAX_ranker.py
@@ -168,6 +168,33 @@ def _score_result_graphs_by_networkx_graph_scorer(kg_edge_id_to_edge: Dict[str,
     return nx_graph_scorer(result_graphs_nx)
 
 
+def _break_ties_and_preserve_order(scores):
+    adjusted_scores = scores.copy()
+    n = len(scores)
+    # if there are more than 1,000 scores, apply the fix to the first 1000 scores and ignore the rest
+    if n > 1000:
+        n = 1000
+
+    for i in range(n):
+        if i > 0 and adjusted_scores[i] >= adjusted_scores[i - 1]:
+            # Calculate the decrement such that it makes this score slightly less than the previous,
+            # maintaining the descending order.
+            decrement = round(adjusted_scores[i - 1] - adjusted_scores[i], 3) - 0.001
+            adjusted_scores[i] = adjusted_scores[i - 1] - max(decrement, 0.001)
+
+        # Ensure the adjusted score doesn't become lower than the next score
+        if i < n - 1 and adjusted_scores[i] <= adjusted_scores[i + 1]:
+            # Adjust the next score to be slightly less than the current score
+            increment = round(adjusted_scores[i] - adjusted_scores[i + 1], 3) - 0.001
+            adjusted_scores[i + 1] = adjusted_scores[i] - max(increment, 0.001)
+
+    # round all scores to 3 decimal places
+    adjusted_scores = [round(score, 3) for score in adjusted_scores]
+    # make sure no scores are below 0
+    adjusted_scores = [max(score, 0) for score in adjusted_scores]
+    return adjusted_scores
+
+
 class ARAXRanker:
 
     # #### Constructor
@@ -657,11 +684,12 @@ def aggregate_scores_dmk(self, response):
                                   [_score_networkx_graphs_by_max_flow,
                                    _score_networkx_graphs_by_longest_path,
                                    _score_networkx_graphs_by_frobenius_norm])))
-        #print(ranks_list)
-        #print(float(len(ranks_list)))
+
+
         result_scores = sum(ranks_list)/float(len(ranks_list))
         #print(result_scores)
 
+
         # Replace Inferred Results Score with Probability score calculated by xDTD model
         inferred_qedge_keys = [qedge_key for qedge_key, qedge in message.query_graph.edges.items() 
                                if qedge.knowledge_type == "inferred"]
@@ -699,6 +727,13 @@ def aggregate_scores_dmk(self, response):
 
         # Re-sort the final results
         message.results.sort(key=lambda result: result.analyses[0].score, reverse=True)
+        # break ties and preserve order, round to 3 digits and make sure none are < 0
+        scores_with_ties = [result.analyses[0].score for result in message.results]
+        scores_without_ties = _break_ties_and_preserve_order(scores_with_ties)
+        # reinsert these scores into the results
+        for result, score in zip(message.results, scores_without_ties):
+            result.analyses[0].score = score
+            result.row_data[0] = score
         response.debug("Results have been ranked and sorted")
 
 
diff --git a/code/ARAX/Examples/ARAX_Example1.ipynb b/code/ARAX/Examples/ARAX_Example1.ipynb
@@ -19,10 +19,10 @@
    "outputs": [],
    "source": [
     "# Set the base URL for the ARAX reasoner and its endpoint\n",
-    "endpoint_url = 'https://arax.transltr.io/api/rtx/v1.4/query'\n",
+    "endpoint_url = 'https://arax.transltr.io/api/arax/v1.4/query'\n",
     "\n",
     "# Create a dict of the request, specifying a start previous Message and the list of DSL commands\n",
-    "query = {\"previous_message_processing_plan\": {\"processing_actions\": [\n",
+    "query = {\"message\":{}, \"operations\": {\"actions\": [\n",
     "            \"add_qnode(name=acetaminophen, key=n0)\",\n",
     "            \"add_qnode(categories=biolink:Protein, key=n1)\",\n",
     "            \"add_qedge(subject=n0, object=n1, key=e0)\",\n",
@@ -58,9 +58,9 @@
    "source": [
     "# Unpack respsonse from JSON and display the information log\n",
     "response_dict = response_content.json()\n",
-    "for message in response_dict['log']:\n",
-    "    if message['level'] >= 20:\n",
-    "        print(message['prefix']+message['message'])"
+    "for message in response_dict['logs']:\n",
+    "    if message['level'] == \"INFO\":\n",
+    "        print(message['level']+\": \"+message['message'])"
    ]
   },
   {
diff --git a/code/ARAX/Examples/ARAX_Example2.ipynb b/code/ARAX/Examples/ARAX_Example2.ipynb
@@ -15,27 +15,29 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "pycharm": {
+     "is_executing": true
+    }
+   },
    "outputs": [],
    "source": [
     "# Set the base URL for the ARAX reasoner and its endpoint\n",
-    "endpoint_url = 'https://arax.rtx.ai/api/rtx/v1/query'\n",
+    "endpoint_url = 'https://arax.transltr.io/api/arax/v1.4/query'\n",
     "\n",
     "# Create a dict of the request, specifying the list of DSL commands\n",
-    "query = { \"previous_message_processing_plan\": { \"processing_actions\": [\n",
+    "query = {\"message\":{}, \"operations\": { \"actions\": [\n",
     "            \"add_qnode(name=DOID:14330, key=n00)\",\n",
-    "            \"add_qnode(category=biolink:Protein, is_set=true, key=n01)\",\n",
-    "            \"add_qnode(category=biolink:ChemicalSubstance, key=n02)\",\n",
-    "            \"add_qedge(subject=n00, object=n01, id=e00)\",\n",
-    "            \"add_qedge(subject=n01, object=n02, id=e01, predicate=biolink:physically_interacts_with)\",\n",
-    "            \"expand(edge_key=[e00,e01], kp=ARAX/KG1)\",\n",
+    "            \"add_qnode(categories=biolink:Protein, is_set=true, key=n01)\",\n",
+    "            \"add_qnode(categories=biolink:ChemicalEntity, key=n02)\",\n",
+    "            \"add_qedge(subject=n00, object=n01, key=e00)\",\n",
+    "            \"add_qedge(subject=n01, object=n02, key=e01, predicates=biolink:physically_interacts_with)\",\n",
+    "            \"expand(edge_key=[e00,e01], kp=infores:rtx-kg2)\",\n",
     "            \"overlay(action=compute_jaccard, start_node_key=n00, intermediate_node_key=n01, end_node_key=n02, virtual_relation_label=J1)\",\n",
-    "            \"filter_kg(action=remove_edges_by_attribute, edge_attribute=jaccard_index, direction=below, threshold=.2, remove_connected_nodes=t, qnode_key=n02)\",\n",
-    "            \"filter_kg(action=remove_edges_by_property, edge_property=provided_by, property_value=Pharos)\",\n",
-    "            \"overlay(action=predict_drug_treats_disease, subject_qnode_key=n02, object_qnode_key=n00, virtual_relation_label=P1)\", \n",
+    "            \"filter_kg(action=remove_edges_by_continuous_attribute,edge_attribute=jaccard_index,threshold=0.2,remove_connected_nodes=true,qnode_keys=n02, direction=below)\",\n",
     "            \"resultify(ignore_edge_direction=true)\",\n",
-    "            \"filter_results(action=sort_by_edge_attribute, edge_attribute=probability_drug_treats, direction=descending, max_results=15)\", \n",
-    "            \"return(message=true, store=true)\",\n",
+    "            \"filter_results(action=limit_number_of_results,max_results=50,prune_kg=true)\",\n",
+    "            \"return(message=true, store=true)\"\n",
     "            ] } }"
    ]
   },
@@ -64,9 +66,9 @@
    "source": [
     "# Unpack respsonse from JSON and display the information log\n",
     "response_dict = response_content.json()\n",
-    "for message in response_dict['log']:\n",
-    "    if message['level'] >= 20:\n",
-    "        print(message['prefix']+message['message'])"
+    "for message in response_dict['logs']:\n",
+    "    if message['level'] == \"INFO\":\n",
+    "        print(message['level']+\": \"+message['message'])"
    ]
   },
   {
@@ -80,7 +82,7 @@
     "    print(f\"Data: {response_dict['id']}\")\n",
     "    match = re.search(r'(\\d+)$', response_dict['id'])\n",
     "    if match:\n",
-    "        print(f\"GUI: https://arax.rtx.ai/?m={match.group(1)}\")\n",
+    "        print(f\"GUI: arax.transltr.io/?m={match.group(1)}\")\n",
     "else:\n",
     "    print(\"No id was returned in response\")"
    ]
diff --git a/code/ARAX/Examples/ARAX_Example3.ipynb b/code/ARAX/Examples/ARAX_Example3.ipynb
@@ -19,21 +19,23 @@
    "outputs": [],
    "source": [
     "# Set the base URL for the ARAX reasoner and its endpoint\n",
-    "endpoint_url = 'https://arax.rtx.ai/api/rtx/v1/query'\n",
+    "endpoint_url = 'https://arax.transltr.io/api/arax/v1.4/query'\n",
     "\n",
     "# Create a dict of the request, specifying the list of DSL commands\n",
-    "query = {\"previous_message_processing_plan\": {\"processing_actions\": [\n",
-    "            \"add_qnode(name=DOID:9406, key=n00)\",\n",
-    "            \"add_qnode(type=biolink:ChemicalSubstance, is_set=true, key=n01)\",\n",
-    "            \"add_qnode(type=biolink:Protein, key=n02)\",\n",
+    "query = {\"message\":{}, \"operations\": {\"actions\": [\n",
+    "            \"add_qnode(ids=DOID:9406, key=n00)\",\n",
+    "            \"add_qnode(categories=biolink:ChemicalEntity, is_set=true, key=n01)\",\n",
+    "            \"add_qnode(categories=biolink:Protein, key=n02)\",\n",
     "            \"add_qedge(subject=n00, object=n01, key=e00)\",  \n",
     "            \"add_qedge(subject=n01, object=n02, key=e01)\",\n",
     "            \"expand(edge_key=[e00,e01])\", \n",
     "            \"overlay(action=overlay_clinical_info, observed_expected_ratio=true, virtual_relation_label=C1, subject_qnode_key=n00, object_qnode_key=n01)\", \n",
-    "            \"filter_kg(action=remove_edges_by_attribute, edge_attribute=observed_expected_ratio, direction=below, threshold=3, remove_connected_nodes=t, qnode_id=n01)\",   \n",
-    "            \"filter_kg(action=remove_orphaned_nodes, node_type=protein)\",\n",
+    "            \"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=observed_expected_ratio, \"\n",
+    "            \"direction=below, threshold=3, remove_connected_nodes=t, qnode_keys=n01)\",\n",
+    "            \"filter_kg(action=remove_orphaned_nodes, node_category=biolink:Protein)\",\n",
     "            \"overlay(action=compute_ngd, virtual_relation_label=N1, subject_qnode_key=n01, object_qnode_key=n02)\", \n",
-    "            \"filter_kg(action=remove_edges_by_attribute, edge_attribute=ngd, direction=above, threshold=0.85, remove_connected_nodes=t, qnode_id=n02)\",\n",
+    "            \"filter_kg(action=remove_edges_by_continuous_attribute, edge_attribute=ngd, direction=above, threshold=0.85, \"\n",
+    "            \"remove_connected_nodes=t, qnode_keys=n02)\",\n",
     "            \"resultify(ignore_edge_direction=true)\",\n",
     "            \"return(message=true, store=true)\"\n",
     "        ]}}"
@@ -51,7 +53,10 @@
     "status_code = response_content.status_code\n",
     "if status_code != 200:\n",
     "    print(\"ERROR returned with status \"+str(status_code))\n",
-    "    print(response_content.json())\n",
+    "    #print(response_content.json())\n",
+    "    for x in response_content.json()['logs']:\n",
+    "        if x['level'] == \"ERROR\":\n",
+    "            print(x)\n",
     "else:\n",
     "    print(f\"Response returned with status {status_code}\")"
    ]
@@ -64,9 +69,9 @@
    "source": [
     "# Unpack respsonse from JSON and display the information log\n",
     "response_dict = response_content.json()\n",
-    "for message in response_dict['log']:\n",
-    "    if message['level'] >= 20:\n",
-    "        print(message['prefix']+message['message'])"
+    "for message in response_dict['logs']:\n",
+    "    if message['level'] == \"INFO\":\n",
+    "        print(message['level']+\": \"+message['message'])"
    ]
   },
   {
@@ -80,7 +85,7 @@
     "    print(f\"Data: {response_dict['id']}\")\n",
     "    match = re.search(r'(\\d+)$', response_dict['id'])\n",
     "    if match:\n",
-    "        print(f\"GUI: https://arax.rtx.ai/?m={match.group(1)}\")\n",
+    "        print(f\"GUI: https://arax.transltr.io/?m={match.group(1)}\")\n",
     "else:\n",
     "    print(\"No id was returned in response\")"
    ]
diff --git a/code/ARAX/test/test_ARAX_filter_results.py b/code/ARAX/test/test_ARAX_filter_results.py
@@ -190,7 +190,7 @@ def test_sort_by_score():
     assert len(message.results) == 8
     result_scores = [x.analyses[0].score for x in message.results]
     assert result_scores == sorted(result_scores)
-    assert max(result_scores) < 1
+    assert max(result_scores) <= 1
 
 @pytest.mark.external
 def test_issue1506():