From 7177723306ffe353433052824571abd3147816f7 Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Apr 2022 09:48:04 +0100 Subject: [PATCH 01/13] new command --- cre.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/cre.py b/cre.py index 31794f9b1..6fac238e6 100644 --- a/cre.py +++ b/cre.py @@ -142,7 +142,21 @@ def main() -> None: default=None, help="export all data into yaml files under the directory pointed to by this argument", ) - + parser.add_argument( + "--compare_datasets", + action="store_true", + help="compare the CRE datasets pointed to by --dataset1 and --dataset2", + ) + parser.add_argument( + "--dataset1", + default=None, + help="used with --compare_datasets, dataset1", + ) + parser.add_argument( + "--dataset2", + default=None, + help="used with --compare_datasets, dataset2", + ) args = parser.parse_args() from application.cmd import cre_main From a80ccd0866ac88d0e79e3f909ad88aa3ed3fb66a Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Apr 2022 09:48:35 +0100 Subject: [PATCH 02/13] skeleton for new command --- application/cmd/cre_main.py | 48 +++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 462dc712d..b8805bfb3 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -375,6 +375,8 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover if args.owasp_proj_meta: owasp_metadata_to_cre(args.owasp_proj_meta) + if args.compare_datasets: + compare_datasets(args.dataset1,args.dataset2) def db_connect(path: str) -> db.Node_collection: @@ -457,6 +459,52 @@ def export_to_osib(file_loc: str, cache: str) -> None: with open(file_loc, "w") as f: f.write(json.dumps(tree.todict())) +def compare_datasets(db1:str,db2:str)->Dict: + """ + Given two CRE datasets in databases with connection strings db1 and db2 + Print their differences. + + (make db load descriptions etc in memory) + ensure that both graphs have same number of nodes and edges + + for every cre node in g1 + find node in g2 with same external_id + compare metadata dicts + get g1 edges and compare to g2 edges <-- this will be interesting, need to compare which "unique" info it leads to and whatS the edge typs as i can't trust ids + + do the same for g2 + """ + # def graph_nodes_equal(g1,node1,g2,node2): + # if node1.startswith("CRE"): + # if g1.nodes[node1]["external_id"] != g2.nodes[node2]["external_id"]: + # return False + # elif node1.startswith("Node"): + # if g1.nodes[node1]["name"] != g2.nodes[node2]["name"] or\ + # g1.nodes[node1]["section"] != g2.nodes[node2]["section"] or \ + # g1.nodes[node1]["subsection"] != g2.nodes[node2]["subsection"] or \ + # g1.nodes[node1]["description"] != g2.nodes[node2]["description"] or \ + # g1.nodes[node1]["version"] != g2.nodes[node2]["version"] or\ + # g1.nodes[node1]["infosum"] != g2.nodes[node2]["infosum"]: + # return False + # # TODO: i think i need to change the way i tag nodes, + # # currently it's DB ids which are mutable + # # it needs to be infosums of nodes so that I can compare nodes and edges simply + # # but then i need to update the node and all it's edges when I import CREs or nodes and the infosum changes + + # [ (edge) for edge in g1.edges(node1) + # if g1.get_edge_data(*edge)["infosum"] == g2.get_edge_data + + # print('connecting db1') + # database1 = db_connect(path=db1) + # print('connecting db2') + # database2 = db_connect(path=db2) + # import networkx as nx + # from pprint import pprint + # pprint([node for node in database1.graph.graph ]) + # pprint([node for node in database2.graph.graph ]) + # print("calculating equality") + # pprint(graphs_equal(database1.graph.graph,database2.graph.graph)) + input() def owasp_metadata_to_cre(meta_file: str): """given a file with entries like below From c03fd03786b364c0efea00130dfa1f11b277091f Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Apr 2022 12:19:45 +0100 Subject: [PATCH 03/13] progress --- application/cmd/cre_main.py | 119 +++++++++++++++++++---------- application/database/db.py | 27 ++++++- application/tests/cre_main_test.py | 37 ++++++++- 3 files changed, 140 insertions(+), 43 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index b8805bfb3..d0694ae0e 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -376,7 +376,8 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover owasp_metadata_to_cre(args.owasp_proj_meta) if args.compare_datasets: - compare_datasets(args.dataset1,args.dataset2) + compare_datasets(args.dataset1, args.dataset2) + def db_connect(path: str) -> db.Node_collection: @@ -459,52 +460,88 @@ def export_to_osib(file_loc: str, cache: str) -> None: with open(file_loc, "w") as f: f.write(json.dumps(tree.todict())) -def compare_datasets(db1:str,db2:str)->Dict: + +def compare_datasets(db1: str, db2: str) -> List[Dict]: """ Given two CRE datasets in databases with connection strings db1 and db2 - Print their differences. + Print their differefnces. (make db load descriptions etc in memory) ensure that both graphs have same number of nodes and edges - - for every cre node in g1 - find node in g2 with same external_id - compare metadata dicts - get g1 edges and compare to g2 edges <-- this will be interesting, need to compare which "unique" info it leads to and whatS the edge typs as i can't trust ids - - do the same for g2 """ - # def graph_nodes_equal(g1,node1,g2,node2): - # if node1.startswith("CRE"): - # if g1.nodes[node1]["external_id"] != g2.nodes[node2]["external_id"]: - # return False - # elif node1.startswith("Node"): - # if g1.nodes[node1]["name"] != g2.nodes[node2]["name"] or\ - # g1.nodes[node1]["section"] != g2.nodes[node2]["section"] or \ - # g1.nodes[node1]["subsection"] != g2.nodes[node2]["subsection"] or \ - # g1.nodes[node1]["description"] != g2.nodes[node2]["description"] or \ - # g1.nodes[node1]["version"] != g2.nodes[node2]["version"] or\ - # g1.nodes[node1]["infosum"] != g2.nodes[node2]["infosum"]: - # return False - # # TODO: i think i need to change the way i tag nodes, - # # currently it's DB ids which are mutable - # # it needs to be infosums of nodes so that I can compare nodes and edges simply - # # but then i need to update the node and all it's edges when I import CREs or nodes and the infosum changes - - # [ (edge) for edge in g1.edges(node1) - # if g1.get_edge_data(*edge)["infosum"] == g2.get_edge_data - - # print('connecting db1') - # database1 = db_connect(path=db1) - # print('connecting db2') - # database2 = db_connect(path=db2) - # import networkx as nx - # from pprint import pprint - # pprint([node for node in database1.graph.graph ]) - # pprint([node for node in database2.graph.graph ]) - # print("calculating equality") - # pprint(graphs_equal(database1.graph.graph,database2.graph.graph)) - input() + + database1 = db_connect(path=db1) + database2 = db_connect(path=db2) + + def make_hashtable(graph): + nodes = {} + edges = {} + for node in graph.nodes(): + if node.startswith("CRE"): + nodes[graph.nodes[node]["external_id"]] = node + elif node.startswith("Node"): + nodes[graph.nodes[node]["infosum"]] = node + else: + logger.fatal("Graph seems corrupted") + + for edge in graph.edges(): + key = graph.nodes[edge[0]]["external_id"] + if edge[1].startswith("CRE"): + key = key + "-" + graph.nodes[edge[1]]["external_id"] + else: + key = key + "-" + graph.nodes[edge[1]]["infosum"] + edges[key] = edge + return nodes, edges + + def node_differences(nodes1, nodes2, db2): + # get n1 nodes not in n2 and n1 nodes with different attrs than n2 + differences = {} + for node, attrs in nodes1.items(): + if node not in nodes2: + logger.error(f"{node} not present in {db2}") + differences["not_present"] = (node, db2) + elif nodes2[node] != attrs: + logger.error( + f"Dataset 2 {db2} node:{node} has different data from dataset 1 equivalent, data1 is {attrs} data 2 is {nodes2[node]} " + ) + differences["different data"] = { + "node": node, + "attributes1": attrs, + "attributes2": nodes2[node], + } + return differences + + def edge_differences(edges1, edges2, db2): + # get n1 nodes not in n2 and n1 nodes with different attrs than n2 + differences = {} + for edge, attrs in edges1.items(): + if edge not in edges2: + logger.error(f"{edge} not present in {db2}") + differences["not_present"] = (edge, db2) + else: + if edges2[edge] != attrs: + logger.error( + f"Dataset 2{db2} edge:{edge} has different data from dataset 1 equivalent, data1 is {attrs} data 2 is {edges2[edge]}" + ) + differences["different data"] = { + "edge": edge, + "attributes1": attrs, + "attributes2": edges2[edge], + } + return differences + + g1 = database1.graph.graph + g2 = database2.graph.graph + n1, e1 = make_hashtable(g1) + n2, e2 = make_hashtable(g2) + + d1 = node_differences(n1, n2, db2) + d2 = node_differences(n2, n1, db1) + + ed1 = edge_differences(e1, e2, db2) + ed2 = edge_differences(e2, e1, db1) + return [d1, d2, ed1, ed2] + def owasp_metadata_to_cre(meta_file: str): """given a file with entries like below diff --git a/application/database/db.py b/application/database/db.py index 7915ce694..0fb93cc7e 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,4 +1,5 @@ import logging +import hashlib import re from collections import Counter from itertools import permutations @@ -28,6 +29,18 @@ def generate_uuid(): class Node(BaseModel): # type: ignore + def serialise(self): + return "".join( + [ + self.name, + self.section or "", + self.subsection or "", + self.tags or "", + self.ntype, + self.description or "", + self.version or "", + ] + ).encode() __tablename__ = "node" id = sqla.Column(sqla.String, primary_key=True, default=generate_uuid) @@ -58,6 +71,10 @@ class Node(BaseModel): # type: ignore class CRE(BaseModel): # type: ignore + def serialise(self): + return "".join( + [self.name, self.external_id or "", self.description or "", self.tags or ""] + ).encode() __tablename__ = "cre" id = sqla.Column(sqla.String, primary_key=True, default=generate_uuid) @@ -152,11 +169,19 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: @classmethod def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: if dbnode: + sum = hashlib.sha256( + dbnode.serialise() + ) # using md5 would have been way more performant but then I'd have to triage every beg-hunter's SAST scanner results graph.add_node( - "Node: " + str(dbnode.id), + f"Node: {dbnode.id}", internal_id=dbnode.id, name=dbnode.name, section=dbnode.section, + subsection=dbnode.subsection, + type=dbnode.ntype, + description=dbnode.description, + version=dbnode.version, + infosum=sum.hexdigest(), ) else: logger.error("Called with dbnode being none") diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index f0447c7ac..3eeaf8204 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -4,7 +4,7 @@ import tempfile import unittest from pprint import pprint -from typing import Any, Dict, List +from typing import Any, Dict, List, NamedTuple from unittest import mock from unittest.mock import Mock, patch @@ -671,6 +671,41 @@ def test_export_to_osib( mocked_db_connect.assert_called_with(path=cache) mocked_cre2osib.assert_called_with([defs.CRE(name="c0")]) + @patch("application.cmd.cre_main.db_connect") + def test_compare_datasets(self, mock_connect): + import networkx as nx + + g1 = nx.DiGraph() + + c0 = db.CRE(external_id="111-000", description="CREdesc", name="CREname") + c1 = db.CRE(external_id="111-001", description="Groupdesc", name="GroupName") + s456 = db.Node( + ntype="Standard", + subsection="4.5.6", + section="FooStand", + name="BarStand", + link="https://example.com", + tags="a,b,c", + ) + s_unlinked = db.Node( + ntype="Standard", + subsection="4.5.6", + section="Unlinked", + name="Unlinked", + link="https://example.com", + ) + g1 = db.CRE_Graph.add_cre(c0, g1) + g1 = db.CRE_Graph.add_cre(c1, g1) + g1 = db.CRE_Graph.add_node(s456, g1) + g1 = db.CRE_Graph.add_node(s_unlinked, g1) + + CREGraph = NamedTuple("CREGraph", ("graph")) + Graph = NamedTuple("Graph", ("graph")) + graph = Graph(graph=CREGraph(graph=g1)) + mock_connect.return_value = graph + + self.assertEqual(main.compare_datasets("foo", "bar"), []) + # def test_prepare_for_Review(self): # raise NotImplementedError From c48f9329c056e7d26c6d5b78f863b997225003e9 Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Apr 2022 22:17:19 +0100 Subject: [PATCH 04/13] temporarily rm test --- application/tests/cre_main_test.py | 35 ------------------------------ 1 file changed, 35 deletions(-) diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index 3eeaf8204..69bce5b20 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -671,41 +671,6 @@ def test_export_to_osib( mocked_db_connect.assert_called_with(path=cache) mocked_cre2osib.assert_called_with([defs.CRE(name="c0")]) - @patch("application.cmd.cre_main.db_connect") - def test_compare_datasets(self, mock_connect): - import networkx as nx - - g1 = nx.DiGraph() - - c0 = db.CRE(external_id="111-000", description="CREdesc", name="CREname") - c1 = db.CRE(external_id="111-001", description="Groupdesc", name="GroupName") - s456 = db.Node( - ntype="Standard", - subsection="4.5.6", - section="FooStand", - name="BarStand", - link="https://example.com", - tags="a,b,c", - ) - s_unlinked = db.Node( - ntype="Standard", - subsection="4.5.6", - section="Unlinked", - name="Unlinked", - link="https://example.com", - ) - g1 = db.CRE_Graph.add_cre(c0, g1) - g1 = db.CRE_Graph.add_cre(c1, g1) - g1 = db.CRE_Graph.add_node(s456, g1) - g1 = db.CRE_Graph.add_node(s_unlinked, g1) - - CREGraph = NamedTuple("CREGraph", ("graph")) - Graph = NamedTuple("Graph", ("graph")) - graph = Graph(graph=CREGraph(graph=g1)) - mock_connect.return_value = graph - - self.assertEqual(main.compare_datasets("foo", "bar"), []) - # def test_prepare_for_Review(self): # raise NotImplementedError From 3a17e958eb20b054f800a2710fb249e53176a2b1 Mon Sep 17 00:00:00 2001 From: Spyros Date: Mon, 11 Apr 2022 15:07:37 +0100 Subject: [PATCH 05/13] get diff to return one on diffs --- application/cmd/cre_main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index d0694ae0e..8f44d4740 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -467,7 +467,7 @@ def compare_datasets(db1: str, db2: str) -> List[Dict]: Print their differefnces. (make db load descriptions etc in memory) - ensure that both graphs have same number of nodes and edges + ensure that both graphs have same number of nodes and edges and both graphs have the same data """ database1 = db_connect(path=db1) @@ -540,7 +540,9 @@ def edge_differences(edges1, edges2, db2): ed1 = edge_differences(e1, e2, db2) ed2 = edge_differences(e2, e1, db1) - return [d1, d2, ed1, ed2] + if len(d1) or len(d2) or len(ed1) or len(ed2): + exit(1) + # return [d1, d2, ed1, ed2] # TODO uncomment when this becomes a library method def owasp_metadata_to_cre(meta_file: str): From 9623ffc7a33e6ca3c085dadd4380972002ace8cb Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 13 Apr 2022 02:38:39 +0100 Subject: [PATCH 06/13] poc script to compare latest imports to heroku --- scripts/data-equivalency.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100755 scripts/data-equivalency.sh diff --git a/scripts/data-equivalency.sh b/scripts/data-equivalency.sh new file mode 100755 index 000000000..1d44c8bea --- /dev/null +++ b/scripts/data-equivalency.sh @@ -0,0 +1,21 @@ +#!/bin/bash +curr_dir=$(pwd) + +rm -rf import.dump latest.backup latest.dump latest.dump.1 + +docker run -d -e POSTGRES_HOST_AUTH_METHOD=trust --rm --network host postgres:13.6 +sleep 10 + +export PROD_DATABASE_URL=postgres://postgres@0.0.0.0:5432 +make migrate-upgrade +make import-all + +rm -rf /tmp/diff_data +mkdir -p /tmp/diff_data +cd /tmp/diff_data + +heroku login && heroku pg:backups:download -a opencreorg + +source $curr_dir/venv/bin/activate +python $curr_dir/cre.py --compare_datasets --dataset1=$PROD_DATABASE_URL --dataset2=sqlite://cres/db.sqlite +exit $? \ No newline at end of file From 9a1d617ee0d2c7109cc8072b529661fa720bd131 Mon Sep 17 00:00:00 2001 From: Spyros Date: Tue, 26 Apr 2022 18:17:06 +0100 Subject: [PATCH 07/13] progress --- application/cmd/cre_main.py | 69 ++++++++++++++----------- application/database/db.py | 17 +++++-- application/tests/cre_main_test.py | 82 +++++++++++++++++++++++++++--- 3 files changed, 127 insertions(+), 41 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 8f44d4740..1f878ec06 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -226,7 +226,7 @@ def add_from_spreadsheet(spreadsheet_url: str, cache_loc: str, cre_loc: str) -> import new mappings from export db to ../../cres/ """ - database = db_connect(path=cache_loc) + database, _, _ = db_connect(path=cache_loc) spreadsheet = sheet_utils.readSpreadsheet( url=spreadsheet_url, cres_loc=cre_loc, alias="new spreadsheet", validate=False ) @@ -246,7 +246,7 @@ def add_from_disk(cache_loc: str, cre_loc: str) -> None: import new mappings from export db to ../../cres/ """ - database = db_connect(path=cache_loc) + database, _, _ = db_connect(path=cache_loc) for file in get_cre_files_from_disk(cre_loc): with open(file, "rb") as standard: parse_file( @@ -265,7 +265,7 @@ def review_from_spreadsheet(cache: str, spreadsheet_url: str, share_with: str) - create new spreadsheet of the new CRE landscape for review """ loc, cache = prepare_for_review(cache) - database = db_connect(path=cache) + database, _, _ = db_connect(path=cache) spreadsheet = sheet_utils.readSpreadsheet( url=spreadsheet_url, cres_loc=loc, alias="new spreadsheet", validate=False ) @@ -294,7 +294,7 @@ def review_from_disk(cache: str, cre_file_loc: str, share_with: str) -> None: create new spreadsheet of the new CRE landscape for review """ loc, cache = prepare_for_review(cache) - database = db_connect(path=cache) + database, _, _ = db_connect(path=cache) for file in get_cre_files_from_disk(cre_file_loc): with open(file, "rb") as standard: parse_file( @@ -359,27 +359,25 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover elif args.osib_out: export_to_osib(file_loc=args.osib_out, cache=args.cache_file) if args.zap_in: - zap_alerts_parser.parse_zap_alerts(db_connect(args.cache_file)) + cache, _, _ = db_connect(args.cache_file) + zap_alerts_parser.parse_zap_alerts(cache) if args.cheatsheets_in: - cheatsheets_parser.parse_cheatsheets(db_connect(args.cache_file)) + cache, _, _ = db_connect(args.cache_file) + cheatsheets_parser.parse_cheatsheets(cache) if args.github_tools_in: for url in misc_tools_parser.tool_urls: - misc_tools_parser.parse_tool( - cache=db_connect(args.cache_file), tool_repo=url - ) - if args.capec_in: - capec_parser.parse_capec(cache=db_connect(args.cache_file)) - if args.export: - cache = db_connect(args.cache_file) - cache.export(args.export) + cache, _, _ = db_connect(args.cache_file) + misc_tools_parser.parse_tool(cache=cache, tool_repo=url) if args.owasp_proj_meta: owasp_metadata_to_cre(args.owasp_proj_meta) if args.compare_datasets: - compare_datasets(args.dataset1, args.dataset2) + d1, d2, ed1, ed2 = compare_datasets(args.dataset1, args.dataset2) + if len(d1) or len(d2) or len(ed1) or len(ed2): + exit(1) -def db_connect(path: str) -> db.Node_collection: +def db_connect(path: str) -> Tuple[db.Node_collection, Any, Any]: global app conf = CMDConfig(db_uri=path) @@ -387,8 +385,7 @@ def db_connect(path: str) -> db.Node_collection: collection = db.Node_collection() app_context = app.app_context() app_context.push() - - return collection + return (collection, app, app_context) def create_spreadsheet( @@ -422,7 +419,7 @@ def review_osib_from_file(file_loc: str, cache: str, cre_loc: str) -> None: """Given the location of an osib.yaml, parse osib, convert to cres and add to db export db to yamls and spreadsheet for review""" loc, cache = prepare_for_review(cache) - database = db_connect(path=cache) + database, _, _ = db_connect(path=cache) ymls = odefs.read_osib_yaml(file_loc) osibs = odefs.try_from_file(ymls) for osib in osibs: @@ -443,7 +440,7 @@ def review_osib_from_file(file_loc: str, cache: str, cre_loc: str) -> None: def add_osib_from_file(file_loc: str, cache: str, cre_loc: str) -> None: - database = db_connect(path=cache) + database, _, _ = db_connect(path=cache) ymls = odefs.read_osib_yaml(file_loc) osibs = odefs.try_from_file(ymls) for osib in osibs: @@ -454,7 +451,8 @@ def add_osib_from_file(file_loc: str, cache: str, cre_loc: str) -> None: def export_to_osib(file_loc: str, cache: str) -> None: - docs = db_connect(path=cache).export(file_loc, dry_run=True) + cache, _, _ = db_connect(path=cache) + docs = cache.export(file_loc, dry_run=True) tree = odefs.cre2osib(docs) with open(file_loc, "x"): with open(file_loc, "w") as f: @@ -478,18 +476,18 @@ def make_hashtable(graph): edges = {} for node in graph.nodes(): if node.startswith("CRE"): - nodes[graph.nodes[node]["external_id"]] = node + nodes[graph.nodes[node].get("external_id")] = node elif node.startswith("Node"): - nodes[graph.nodes[node]["infosum"]] = node + nodes[graph.nodes[node].get("infosum")] = node else: logger.fatal("Graph seems corrupted") for edge in graph.edges(): - key = graph.nodes[edge[0]]["external_id"] + key = graph.nodes[edge[0]].get("external_id") if edge[1].startswith("CRE"): - key = key + "-" + graph.nodes[edge[1]]["external_id"] + key = f"{key}-{graph.nodes[edge[1]].get('external_id')}" else: - key = key + "-" + graph.nodes[edge[1]]["infosum"] + key = f"{key}-{graph.nodes[edge[1]].get('infosum')}" edges[key] = edge return nodes, edges @@ -530,9 +528,22 @@ def edge_differences(edges1, edges2, db2): } return differences + database1, _, _ = db_connect(path=db1) g1 = database1.graph.graph - g2 = database2.graph.graph n1, e1 = make_hashtable(g1) + + print("$" * 90) + database1.graph.print_graph() + print("$" * 90) + database1.graph._instance = None + database1.graph = None + + database2, _, _ = db_connect(path=db2) + g2 = database2.graph.graph + print("$" * 90) + database2.graph.print_graph() + print("$" * 90) + input() n2, e2 = make_hashtable(g2) d1 = node_differences(n1, n2, db2) @@ -540,9 +551,7 @@ def edge_differences(edges1, edges2, db2): ed1 = edge_differences(e1, e2, db2) ed2 = edge_differences(e2, e1, db1) - if len(d1) or len(d2) or len(ed1) or len(ed2): - exit(1) - # return [d1, d2, ed1, ed2] # TODO uncomment when this becomes a library method + return [d1, d2, ed1, ed2] # TODO uncomment when this becomes a library method def owasp_metadata_to_cre(meta_file: str): diff --git a/application/database/db.py b/application/database/db.py index 0fb93cc7e..41061566e 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -13,6 +13,7 @@ from sqlalchemy import func from sqlalchemy.sql.expression import desc # type: ignore import uuid +from matplotlib import pyplot from .. import sqla # type: ignore @@ -140,6 +141,15 @@ class CRE_Graph: graph: nx.Graph = None __instance = None + def print_graph(self, png_path: str = None): + """DEbug method to dump the graph, if png_path is provided it shows the graph in png format + if not, it returns the graph as dict of dicts""" + if png_path: + nx.draw(self.graph, with_labels=True) + pyplot.savefig(png_path) + pyplot.show() + return nx.to_dict_of_dicts(self.graph) + @classmethod def instance(cls, session): if cls.__instance is None: @@ -219,12 +229,11 @@ def load_cre_graph(cls, session) -> nx.Graph: class Node_collection: graph: nx.Graph = None - session = sqla.session + session = None - def __init__(self) -> None: + def __init__(self, session=sqla.session) -> None: self.graph = CRE_Graph.instance(sqla.session) - # self.graph = CRE_Graph.instance(session=sqla.session) - self.session = sqla.session + self.session = session def __get_external_links(self) -> List[Tuple[CRE, Node, str]]: external_links: List[Tuple[CRE, Node, str]] = [] diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index 69bce5b20..a6cfe1fdc 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -1,3 +1,4 @@ +import copy import logging import os import shutil @@ -372,7 +373,7 @@ def test_add_from_spreadsheet( self.tmpdirs.append(dir) cache = tempfile.mkstemp(dir=dir, suffix=".sqlite")[1] - mocked_db_connect.return_value = self.collection + mocked_db_connect.return_value = self.collection, self.app, self.app_context mocked_export.return_value = [ defs.CRE(name="c0"), defs.Standard(name="s0", section="s1"), @@ -415,7 +416,7 @@ def test_review_from_spreadsheet( loc = tempfile.mkstemp(dir=dir)[1] cache = tempfile.mkstemp(dir=dir)[1] mocked_prepare_for_review.return_value = (loc, cache) - mocked_db_connect.return_value = self.collection + mocked_db_connect.return_value = self.collection, self.app, self.app_context mocked_create_spreadsheet.return_value = "https://example.com/sheeet" mocked_export.return_value = [ @@ -467,7 +468,7 @@ def test_review_from_disk( loc = tempfile.mkstemp(dir=dir)[1] cache = tempfile.mkstemp(dir=dir, suffix=".sqlite")[1] mocked_prepare_for_review.return_value = (loc, cache) - mocked_db_connect.return_value = self.collection + mocked_db_connect.return_value = self.collection, self.app, self.app_context mocked_get_standards_files_from_disk.return_value = [yml for i in range(0, 3)] mocked_export.return_value = [ defs.CRE(name="c0"), @@ -511,7 +512,7 @@ def test_add_from_disk( yml = tempfile.mkstemp(dir=dir, suffix=".yaml")[1] loc = tempfile.mkstemp(dir=dir)[1] cache = tempfile.mkstemp(dir=dir, suffix=".sqlite")[1] - mocked_db_connect.return_value = self.collection + mocked_db_connect.return_value = self.collection, self.app, self.app_context mocked_get_standards_files_from_disk.return_value = [yml for i in range(0, 3)] mocked_export.return_value = [ defs.CRE(name="c0"), @@ -557,7 +558,7 @@ def test_review_osib_from_file( loc = tempfile.mkstemp(dir=dir)[1] cach = tempfile.mkstemp(dir=dir)[1] mocked_prepare_for_review.return_value = (loc, cach) - mocked_db_connect.return_value = self.collection + mocked_db_connect.return_value = self.collection, self.app, self.app_context mocked_read_osib_yaml.return_value = [{"osib": "osib"}] mocked_try_from_file.return_value = [ Osib_tree(aliases=[Osib_id("t1")]), @@ -619,7 +620,7 @@ def test_add_osib_from_file( osib_yaml = tempfile.mkstemp(dir=dir, suffix=".yaml")[1] loc = tempfile.mkstemp(dir=dir)[1] cache = tempfile.mkstemp(dir=dir, suffix=".sqlite")[1] - mocked_db_connect.return_value = self.collection + mocked_db_connect.return_value = self.collection, self.app, self.app_context mocked_read_osib_yaml.return_value = [{"osib": "osib"}] mocked_try_from_file.return_value = [ odefs.Osib_tree(aliases=[Osib_id("t1")]), @@ -663,7 +664,7 @@ def test_export_to_osib( # osib_yaml = tempfile.mkstemp(dir=dir,suffix=".yaml")[1] loc = tempfile.mkstemp(dir=dir)[1] cache = tempfile.mkstemp(dir=dir, suffix=".sqlite")[1] - mocked_db_connect.return_value = self.collection + mocked_db_connect.return_value = self.collection, self.app, self.app_context mocked_cre2osib.return_value = odefs.Osib_tree(aliases=[Osib_id("t1")]) mocked_export.return_value = [defs.CRE(name="c0")] @@ -671,6 +672,73 @@ def test_export_to_osib( mocked_db_connect.assert_called_with(path=cache) mocked_cre2osib.assert_called_with([defs.CRE(name="c0")]) + def test_compare_datasets(self): + _, t1 = tempfile.mkstemp() + _, t2 = tempfile.mkstemp() + _, tdiff = tempfile.mkstemp() + self.tmpdirs.extend([t1, t2, tdiff]) + + c0 = defs.CRE(id="111-000", description="CREdesc", name="CREname") + s456 = defs.Standard( + subsection="4.5.6", + section="FooStand", + name="BarStand", + hyperlink="https://example.com", + tags=["a", "b", "c"], + ) + c1 = defs.CRE( + id="111-001", + description="Groupdesc", + name="GroupName", + links=[defs.Link(document=s456)], + ) + s_unlinked = defs.Standard( + subsection="4.5.6", + section="Unlinked", + name="Unlinked", + hyperlink="https://example.com", + ) + connection_1, app1, context1 = main.db_connect(path=t1) + sqla.create_all(app=app1) + connection_1.graph.graph = db.CRE_Graph.load_cre_graph(connection_1.session) + connection_1.add_cre(c0) + connection_1.add_node(s_unlinked) + connection_1.add_link(connection_1.add_cre(c1), connection_1.add_node(s456)) + + pprint("%" * 90) + pprint(t1) + pprint(connection_1.graph.print_graph()) + input() + + # connection_2,app2,context2 = main.db_connect(path=t2) + # sqla.create_all(app=app2) + # connection_2.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) + # connection_2.add_cre(c0) + # connection_2.add_node(s_unlinked) + # connection_2.add_link(connection_2.add_cre(c1),connection_2.add_node(s456)) + + connection_diff, appdiff, contextdiff = main.db_connect(path=tdiff) + connection_diff.graph.graph = db.CRE_Graph.load_cre_graph( + connection_diff.session + ) + sqla.create_all(app=appdiff) + connection_diff.add_cre(c0) + connection_diff.add_cre(defs.CRE(id="000-111", name="asdfa232332sdf")) + + pprint("#" * 90) + pprint(tdiff) + pprint(connection_diff.graph.print_graph()) + input() + pprint("#" * 90) + + # self.assertEqual(main.compare_datasets("foo", "bar"), [{},{},{},{}]) + # self.assertEqual(main.compare_datasets(t1,t2), [{},{},{},{}]) + self.assertNotEqual(main.compare_datasets(t1, tdiff), [{}, {}, {}, {}]) + + contextdiff.pop() + # context2.pop() + context1.pop() + # def test_prepare_for_Review(self): # raise NotImplementedError From d2d13cccdc66600476f3a39197e5e686b76f9e76 Mon Sep 17 00:00:00 2001 From: Spyros Date: Wed, 1 Jun 2022 23:22:21 +0100 Subject: [PATCH 08/13] not sure if progress --- application/cmd/cre_main.py | 28 ++++++++++++++++++++++------ application/database/db.py | 7 +++++-- application/tests/cre_main_test.py | 7 +++++-- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 1f878ec06..c4b01de85 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -23,6 +23,10 @@ from dacite import from_dict from dacite.config import Config +from application import sqla +from sqlalchemy import create_engine +from sqlalchemy.orm import scoped_session, sessionmaker + logging.basicConfig() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -377,14 +381,17 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover exit(1) -def db_connect(path: str) -> Tuple[db.Node_collection, Any, Any]: - +def db_connect( + path: str, session=None, mk_app=True +) -> Tuple[db.Node_collection, Any, Any]: global app + app_context = None conf = CMDConfig(db_uri=path) app = create_app(conf=conf) - collection = db.Node_collection() app_context = app.app_context() app_context.push() + collection = db.Node_collection() + return (collection, app, app_context) @@ -527,8 +534,13 @@ def edge_differences(edges1, edges2, db2): "attributes2": edges2[edge], } return differences - - database1, _, _ = db_connect(path=db1) + # sqla = create_engine(db1) + # session1 = scoped_session( + # sessionmaker(autocommit=False, autoflush=False, bind=sqla) + # ) + # database1 = db.Node_collection(session=session1) + database1, app1, context1 = db_connect(path=db1) + database1.graph.graph = db.CRE_Graph.load_cre_graph(session=database1.session) g1 = database1.graph.graph n1, e1 = make_hashtable(g1) @@ -538,7 +550,11 @@ def edge_differences(edges1, edges2, db2): database1.graph._instance = None database1.graph = None - database2, _, _ = db_connect(path=db2) + engine2 = create_engine(db2) + session2 = scoped_session( + sessionmaker(autocommit=False, autoflush=False, bind=engine2) + ) + database2 = db.Node_collection (session=session2) g2 = database2.graph.graph print("$" * 90) database2.graph.print_graph() diff --git a/application/database/db.py b/application/database/db.py index 41061566e..6aab740d9 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -231,8 +231,11 @@ class Node_collection: graph: nx.Graph = None session = None - def __init__(self, session=sqla.session) -> None: - self.graph = CRE_Graph.instance(sqla.session) + def __init__(self, session=sqla.session, graph:CRE_Graph=None) -> None: + if graph: + self.graph = graph + else: + self.graph = CRE_Graph.instance(sqla.session) self.session = session def __get_external_links(self) -> List[Tuple[CRE, Node, str]]: diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index a6cfe1fdc..6ef9a4160 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -676,7 +676,7 @@ def test_compare_datasets(self): _, t1 = tempfile.mkstemp() _, t2 = tempfile.mkstemp() _, tdiff = tempfile.mkstemp() - self.tmpdirs.extend([t1, t2, tdiff]) + # self.tmpdirs.extend([t1, t2, tdiff]) c0 = defs.CRE(id="111-000", description="CREdesc", name="CREname") s456 = defs.Standard( @@ -698,6 +698,7 @@ def test_compare_datasets(self): name="Unlinked", hyperlink="https://example.com", ) + connection_1, app1, context1 = main.db_connect(path=t1) sqla.create_all(app=app1) connection_1.graph.graph = db.CRE_Graph.load_cre_graph(connection_1.session) @@ -733,7 +734,9 @@ def test_compare_datasets(self): # self.assertEqual(main.compare_datasets("foo", "bar"), [{},{},{},{}]) # self.assertEqual(main.compare_datasets(t1,t2), [{},{},{},{}]) - self.assertNotEqual(main.compare_datasets(t1, tdiff), [{}, {}, {}, {}]) + pprint("sqlite://"+t1) + pprint("sqlite://"+tdiff) + self.assertNotEqual(main.compare_datasets("sqlite://"+t1, "sqlite://"+tdiff), [{}, {}, {}, {}]) contextdiff.pop() # context2.pop() From e78acad2b02ea3160101e757ac83d307c0b5c41b Mon Sep 17 00:00:00 2001 From: Spyros Date: Tue, 7 Jun 2022 08:43:13 +0100 Subject: [PATCH 09/13] wip --- application/cmd/cre_main.py | 56 +++++++++++++++++------------- application/database/db.py | 24 +++++++------ application/tests/cre_main_test.py | 56 ++++++++++++++++-------------- 3 files changed, 74 insertions(+), 62 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index c4b01de85..b18cb5341 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -1,3 +1,4 @@ +from pprint import pprint import argparse import json import logging @@ -385,7 +386,6 @@ def db_connect( path: str, session=None, mk_app=True ) -> Tuple[db.Node_collection, Any, Any]: global app - app_context = None conf = CMDConfig(db_uri=path) app = create_app(conf=conf) app_context = app.app_context() @@ -482,16 +482,16 @@ def make_hashtable(graph): nodes = {} edges = {} for node in graph.nodes(): - if node.startswith("CRE"): + if node.startswith("CRE-id"): nodes[graph.nodes[node].get("external_id")] = node - elif node.startswith("Node"): + elif node.startswith("Node-id"): nodes[graph.nodes[node].get("infosum")] = node else: logger.fatal("Graph seems corrupted") for edge in graph.edges(): key = graph.nodes[edge[0]].get("external_id") - if edge[1].startswith("CRE"): + if edge[1].startswith("CRE-id"): key = f"{key}-{graph.nodes[edge[1]].get('external_id')}" else: key = f"{key}-{graph.nodes[edge[1]].get('infosum')}" @@ -505,7 +505,8 @@ def node_differences(nodes1, nodes2, db2): if node not in nodes2: logger.error(f"{node} not present in {db2}") differences["not_present"] = (node, db2) - elif nodes2[node] != attrs: + elif not (attrs.startswith("CRE") nodes2[node] != attrs: + logger.error( f"Dataset 2 {db2} node:{node} has different data from dataset 1 equivalent, data1 is {attrs} data 2 is {nodes2[node]} " ) @@ -540,27 +541,32 @@ def edge_differences(edges1, edges2, db2): # ) # database1 = db.Node_collection(session=session1) database1, app1, context1 = db_connect(path=db1) + sqla.create_all(app=app1) database1.graph.graph = db.CRE_Graph.load_cre_graph(session=database1.session) - g1 = database1.graph.graph - n1, e1 = make_hashtable(g1) - - print("$" * 90) - database1.graph.print_graph() - print("$" * 90) - database1.graph._instance = None - database1.graph = None - - engine2 = create_engine(db2) - session2 = scoped_session( - sessionmaker(autocommit=False, autoflush=False, bind=engine2) - ) - database2 = db.Node_collection (session=session2) - g2 = database2.graph.graph - print("$" * 90) - database2.graph.print_graph() - print("$" * 90) - input() - n2, e2 = make_hashtable(g2) + n1, e1 = make_hashtable(database1.graph.graph) + database1.session.remove() + context1.pop() + + # print("$" * 90) + # pprint(database1.get_node_names()) + # pprint(database1.graph.print_graph()) + # print("$" * 90) + # # database1.graph.__instance = None + # database1.graph = None + + database2, app2, context2 = db_connect(path=db2) + sqla.create_all(app=app2) + database2.graph.graph = db.CRE_Graph.load_cre_graph(session=database2.session) + context2.pop() + # print("$" * 90) + # pprint(database2.get_node_names()) + # pprint(database2.graph.print_graph()) + # print("$" * 90) + # input() + + # database2.session.remove() + + n2, e2 = make_hashtable(database2.graph.graph) d1 = node_differences(n1, n2, db2) d2 = node_differences(n2, n1, db1) diff --git a/application/database/db.py b/application/database/db.py index 6aab740d9..9cdbef851 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -170,7 +170,9 @@ def add_node(self, *args, **kwargs): def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: if dbcre: graph.add_node( - f"CRE: {dbcre.id}", internal_id=dbcre.id, external_id=dbcre.external_id + f"CRE-id: {dbcre.id}", + internal_id=dbcre.id, + external_id=dbcre.external_id ) else: logger.error("Called with dbcre being none") @@ -183,7 +185,7 @@ def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: dbnode.serialise() ) # using md5 would have been way more performant but then I'd have to triage every beg-hunter's SAST scanner results graph.add_node( - f"Node: {dbnode.id}", + f"Node-id: {dbnode.id}", internal_id=dbnode.id, name=dbnode.name, section=dbnode.section, @@ -212,7 +214,7 @@ def load_cre_graph(cls, session) -> nx.Graph: logger.error(f"CRE {il.cre} does not exist?") graph = cls.add_cre(dbcre=cre, graph=graph) - graph.add_edge(f"CRE: {il.group}", f"CRE: {il.cre}", ltype=il.type) + graph.add_edge(f"CRE-id: {il.group}", f"CRE-id: {il.cre}", ltype=il.type) for lnk in session.query(Links).all(): node = session.query(Node).filter(Node.id == lnk.node).first() @@ -223,7 +225,7 @@ def load_cre_graph(cls, session) -> nx.Graph: cre = session.query(CRE).filter(CRE.id == lnk.cre).first() graph = cls.add_cre(dbcre=cre, graph=graph) - graph.add_edge(f"CRE: {lnk.cre}", f"Node: {str(lnk.node)}", ltype=lnk.type) + graph.add_edge(f"CRE-id: {lnk.cre}", f"Node-id: {str(lnk.node)}", ltype=lnk.type) return graph @@ -926,14 +928,16 @@ def add_internal_link( f" {group.external_id}:{group.name}" f" == {cre.external_id}:{cre.name} ,adding" ) - cycle = self.__introduces_cycle(f"CRE: {group.id}", f"CRE: {cre.id}") + cycle = self.__introduces_cycle(f"CRE-id: {group.id}", f"CRE-id: {cre.id}") if not cycle: self.session.add( InternalLinks(type=type.value, cre=cre.id, group=group.id) ) self.session.commit() self.graph.add_edge( - f"CRE: {group.id}", f"CRE: {cre.id}", ltype=type.value + f"CRE-id: {group.id}", + f"CRE-id: {cre.id}", + ltype=type.value ) else: logger.warning( @@ -972,7 +976,7 @@ def add_link( return else: cycle = self.__introduces_cycle( - f"CRE: {cre.id}", f"Standard: {str(node.id)}" + f"CRE-id: {cre.id}", f"Node-id: {str(node.id)}" ) if not cycle: logger.debug( @@ -982,7 +986,7 @@ def add_link( ) self.session.add(Links(type=type.value, cre=cre.id, node=node.id)) self.graph.add_edge( - f"CRE: {cre.id}", f"Node: {str(node.id)}", ltype=type.value + f"CRE-id: {cre.id}", f"Node-id: {str(node.id)}", ltype=type.value ) else: logger.warning( @@ -1001,8 +1005,8 @@ def find_path_between_nodes( this starts getting complicated when we have more linktypes""" res: bool = nx.has_path( self.graph.graph.to_undirected(), - "Node: " + str(node_source_id), - "Node: " + str(node_destination_id), + "Node-id: " + str(node_source_id), + "Node-id: " + str(node_destination_id), ) return res diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index 6ef9a4160..80a91fbbb 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -19,14 +19,15 @@ class TestMain(unittest.TestCase): def tearDown(self) -> None: - for tmpdir in self.tmpdirs: - shutil.rmtree(tmpdir) + [shutil.rmtree(tmpdir) for tmpdir in self.tmpdirs] + [os.remove(tmpfile) for tmpfile in self.tmpfiles] sqla.session.remove() sqla.drop_all(app=self.app) self.app_context.pop() def setUp(self) -> None: self.tmpdirs: List[str] = [] + self.tmpfiles: List[str] = [] self.app = create_app(mode="test") sqla.create_all(app=self.app) self.app_context = self.app.app_context() @@ -676,7 +677,8 @@ def test_compare_datasets(self): _, t1 = tempfile.mkstemp() _, t2 = tempfile.mkstemp() _, tdiff = tempfile.mkstemp() - # self.tmpdirs.extend([t1, t2, tdiff]) + self.tmpfiles.extend([t1, t2, tdiff]) + self.maxDiff = None c0 = defs.CRE(id="111-000", description="CREdesc", name="CREname") s456 = defs.Standard( @@ -705,18 +707,18 @@ def test_compare_datasets(self): connection_1.add_cre(c0) connection_1.add_node(s_unlinked) connection_1.add_link(connection_1.add_cre(c1), connection_1.add_node(s456)) + context1.pop() - pprint("%" * 90) - pprint(t1) - pprint(connection_1.graph.print_graph()) - input() + self.assertNotEqual(main.compare_datasets(t1, tdiff), [{}, {}, {}, {}]) + - # connection_2,app2,context2 = main.db_connect(path=t2) - # sqla.create_all(app=app2) - # connection_2.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) - # connection_2.add_cre(c0) - # connection_2.add_node(s_unlinked) - # connection_2.add_link(connection_2.add_cre(c1),connection_2.add_node(s456)) + connection_2,app2,context2 = main.db_connect(path=t2) + sqla.create_all(app=app2) + connection_2.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) + connection_2.add_cre(c0) + connection_2.add_node(s_unlinked) + connection_2.add_link(connection_2.add_cre(c1),connection_2.add_node(s456)) + context2.pop() connection_diff, appdiff, contextdiff = main.db_connect(path=tdiff) connection_diff.graph.graph = db.CRE_Graph.load_cre_graph( @@ -725,22 +727,22 @@ def test_compare_datasets(self): sqla.create_all(app=appdiff) connection_diff.add_cre(c0) connection_diff.add_cre(defs.CRE(id="000-111", name="asdfa232332sdf")) - - pprint("#" * 90) - pprint(tdiff) - pprint(connection_diff.graph.print_graph()) - input() - pprint("#" * 90) - - # self.assertEqual(main.compare_datasets("foo", "bar"), [{},{},{},{}]) - # self.assertEqual(main.compare_datasets(t1,t2), [{},{},{},{}]) - pprint("sqlite://"+t1) - pprint("sqlite://"+tdiff) - self.assertNotEqual(main.compare_datasets("sqlite://"+t1, "sqlite://"+tdiff), [{}, {}, {}, {}]) - contextdiff.pop() + + # pprint("#" * 90) + # pprint(tdiff) + # pprint(connection_diff.graph.print_graph()) + # input() + # pprint("#" * 90) + + self.assertEqual(main.compare_datasets("foo", "bar"), [{},{},{},{}]) + self.assertEqual(main.compare_datasets(t1,t2), [{},{},{},{}]) + # pprint("sqlite://"+t1) + # pprint("sqlite://"+tdiff) + self.assertNotEqual(main.compare_datasets(t1, tdiff), [{}, {}, {}, {}]) + + # contextdiff.pop() # context2.pop() - context1.pop() # def test_prepare_for_Review(self): # raise NotImplementedError From e1bc9e94cbb421561c6c68ec021387f5e5b0ea61 Mon Sep 17 00:00:00 2001 From: Spyros Date: Tue, 7 Jun 2022 16:23:35 +0100 Subject: [PATCH 10/13] wip --- application/cmd/cre_main.py | 41 ++++++++--------------- application/database/db.py | 12 +++---- application/tests/cre_main_test.py | 52 +++++++++++++++++------------- 3 files changed, 49 insertions(+), 56 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index b18cb5341..276812a79 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -505,8 +505,10 @@ def node_differences(nodes1, nodes2, db2): if node not in nodes2: logger.error(f"{node} not present in {db2}") differences["not_present"] = (node, db2) - elif not (attrs.startswith("CRE") nodes2[node] != attrs: - + elif nodes2[node] != attrs and not ( + attrs.startswith("CRE-id") or attrs.startswith("Node-id") + ): + logger.error( f"Dataset 2 {db2} node:{node} has different data from dataset 1 equivalent, data1 is {attrs} data 2 is {nodes2[node]} " ) @@ -525,7 +527,11 @@ def edge_differences(edges1, edges2, db2): logger.error(f"{edge} not present in {db2}") differences["not_present"] = (edge, db2) else: - if edges2[edge] != attrs: + if edges2[edge] != attrs and [ + e + for e in attrs + if not (e.startswith("CRE-id") or e.startswith("Node-id")) + ]: logger.error( f"Dataset 2{db2} edge:{edge} has different data from dataset 1 equivalent, data1 is {attrs} data 2 is {edges2[edge]}" ) @@ -535,45 +541,26 @@ def edge_differences(edges1, edges2, db2): "attributes2": edges2[edge], } return differences - # sqla = create_engine(db1) - # session1 = scoped_session( - # sessionmaker(autocommit=False, autoflush=False, bind=sqla) - # ) - # database1 = db.Node_collection(session=session1) + database1, app1, context1 = db_connect(path=db1) sqla.create_all(app=app1) database1.graph.graph = db.CRE_Graph.load_cre_graph(session=database1.session) n1, e1 = make_hashtable(database1.graph.graph) database1.session.remove() context1.pop() - - # print("$" * 90) - # pprint(database1.get_node_names()) - # pprint(database1.graph.print_graph()) - # print("$" * 90) - # # database1.graph.__instance = None - # database1.graph = None database2, app2, context2 = db_connect(path=db2) - sqla.create_all(app=app2) + sqla.create_all(app=app2) database2.graph.graph = db.CRE_Graph.load_cre_graph(session=database2.session) - context2.pop() - # print("$" * 90) - # pprint(database2.get_node_names()) - # pprint(database2.graph.print_graph()) - # print("$" * 90) - # input() - - # database2.session.remove() - n2, e2 = make_hashtable(database2.graph.graph) + database2.session.remove() + context2.pop() d1 = node_differences(n1, n2, db2) d2 = node_differences(n2, n1, db1) - ed1 = edge_differences(e1, e2, db2) ed2 = edge_differences(e2, e1, db1) - return [d1, d2, ed1, ed2] # TODO uncomment when this becomes a library method + return [d1, d2, ed1, ed2] def owasp_metadata_to_cre(meta_file: str): diff --git a/application/database/db.py b/application/database/db.py index 9cdbef851..b8a8a8cd9 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -172,7 +172,7 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: graph.add_node( f"CRE-id: {dbcre.id}", internal_id=dbcre.id, - external_id=dbcre.external_id + external_id=dbcre.external_id, ) else: logger.error("Called with dbcre being none") @@ -225,7 +225,9 @@ def load_cre_graph(cls, session) -> nx.Graph: cre = session.query(CRE).filter(CRE.id == lnk.cre).first() graph = cls.add_cre(dbcre=cre, graph=graph) - graph.add_edge(f"CRE-id: {lnk.cre}", f"Node-id: {str(lnk.node)}", ltype=lnk.type) + graph.add_edge( + f"CRE-id: {lnk.cre}", f"Node-id: {str(lnk.node)}", ltype=lnk.type + ) return graph @@ -233,7 +235,7 @@ class Node_collection: graph: nx.Graph = None session = None - def __init__(self, session=sqla.session, graph:CRE_Graph=None) -> None: + def __init__(self, session=sqla.session, graph: CRE_Graph = None) -> None: if graph: self.graph = graph else: @@ -935,9 +937,7 @@ def add_internal_link( ) self.session.commit() self.graph.add_edge( - f"CRE-id: {group.id}", - f"CRE-id: {cre.id}", - ltype=type.value + f"CRE-id: {group.id}", f"CRE-id: {cre.id}", ltype=type.value ) else: logger.warning( diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index 80a91fbbb..b33a8ecb6 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -674,9 +674,9 @@ def test_export_to_osib( mocked_cre2osib.assert_called_with([defs.CRE(name="c0")]) def test_compare_datasets(self): - _, t1 = tempfile.mkstemp() - _, t2 = tempfile.mkstemp() - _, tdiff = tempfile.mkstemp() + _, t1 = tempfile.mkstemp(suffix="dataset1") + _, t2 = tempfile.mkstemp(suffix="dataset2") + _, tdiff = tempfile.mkstemp(suffix="datasetdiff") self.tmpfiles.extend([t1, t2, tdiff]) self.maxDiff = None @@ -700,7 +700,7 @@ def test_compare_datasets(self): name="Unlinked", hyperlink="https://example.com", ) - + connection_1, app1, context1 = main.db_connect(path=t1) sqla.create_all(app=app1) connection_1.graph.graph = db.CRE_Graph.load_cre_graph(connection_1.session) @@ -709,15 +709,22 @@ def test_compare_datasets(self): connection_1.add_link(connection_1.add_cre(c1), connection_1.add_node(s456)) context1.pop() - self.assertNotEqual(main.compare_datasets(t1, tdiff), [{}, {}, {}, {}]) - + self.assertEqual( + main.compare_datasets(t1, tdiff), + [ + {"not_present": (c1, id, tdiff)}, + {}, + {"not_present": (f"{c1.id}-", tdiff)}, + {}, + ], + ) - connection_2,app2,context2 = main.db_connect(path=t2) + connection_2, app2, context2 = main.db_connect(path=t2) sqla.create_all(app=app2) connection_2.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) connection_2.add_cre(c0) connection_2.add_node(s_unlinked) - connection_2.add_link(connection_2.add_cre(c1),connection_2.add_node(s456)) + connection_2.add_link(connection_2.add_cre(c1), connection_2.add_node(s456)) context2.pop() connection_diff, appdiff, contextdiff = main.db_connect(path=tdiff) @@ -728,21 +735,20 @@ def test_compare_datasets(self): connection_diff.add_cre(c0) connection_diff.add_cre(defs.CRE(id="000-111", name="asdfa232332sdf")) contextdiff.pop() - - # pprint("#" * 90) - # pprint(tdiff) - # pprint(connection_diff.graph.print_graph()) - # input() - # pprint("#" * 90) - - self.assertEqual(main.compare_datasets("foo", "bar"), [{},{},{},{}]) - self.assertEqual(main.compare_datasets(t1,t2), [{},{},{},{}]) - # pprint("sqlite://"+t1) - # pprint("sqlite://"+tdiff) - self.assertNotEqual(main.compare_datasets(t1, tdiff), [{}, {}, {}, {}]) - - # contextdiff.pop() - # context2.pop() + + self.assertEqual(main.compare_datasets("foo", "bar"), [{}, {}, {}, {}]) + self.assertEqual(main.compare_datasets(t1, t2), [{}, {}, {}, {}]) + self.assertEqual( + main.compare_datasets(t1, tdiff), + [ + {"not_present": (c1.id, tdiff)}, + {}, + { + "not_present": (f"{c1.id}-", tdiff) + }, # here the make_hashtable method creates edges with the format of - so need to find the infosum of the node conencted to c1 + {}, + ], + ) # def test_prepare_for_Review(self): # raise NotImplementedError From 4e4a778feee3ac0807a65e9feb4ed23431cf89dd Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 26 Jun 2022 17:25:00 +0100 Subject: [PATCH 11/13] fix tests --- application/tests/cre_main_test.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index b33a8ecb6..51012f4db 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -706,18 +706,15 @@ def test_compare_datasets(self): connection_1.graph.graph = db.CRE_Graph.load_cre_graph(connection_1.session) connection_1.add_cre(c0) connection_1.add_node(s_unlinked) - connection_1.add_link(connection_1.add_cre(c1), connection_1.add_node(s456)) - context1.pop() + db_s456 = connection_1.add_node(s456) + connection_1.add_link(connection_1.add_cre(c1), db_s456) + infosum = [ + connection_1.graph.graph.nodes[x].get("infosum") + for x in connection_1.graph.graph.nodes + if db_s456.id in x + ][0] - self.assertEqual( - main.compare_datasets(t1, tdiff), - [ - {"not_present": (c1, id, tdiff)}, - {}, - {"not_present": (f"{c1.id}-", tdiff)}, - {}, - ], - ) + context1.pop() connection_2, app2, context2 = main.db_connect(path=t2) sqla.create_all(app=app2) @@ -743,9 +740,7 @@ def test_compare_datasets(self): [ {"not_present": (c1.id, tdiff)}, {}, - { - "not_present": (f"{c1.id}-", tdiff) - }, # here the make_hashtable method creates edges with the format of - so need to find the infosum of the node conencted to c1 + {"not_present": (f"{c1.id}-{infosum}", tdiff)}, {}, ], ) From 91cf851a2c92ada5fb4114f7aa98ff2d910ba5f8 Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 26 Jun 2022 17:27:32 +0100 Subject: [PATCH 12/13] fix tests --- application/tests/cre_main_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index 51012f4db..ca66fdca9 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -716,6 +716,16 @@ def test_compare_datasets(self): context1.pop() + self.assertEqual( + main.compare_datasets(t1, tdiff), + [ + {"not_present": (c1.id, tdiff)}, + {}, + {"not_present": (f"{c1.id}-{infosum}", tdiff)}, + {}, + ], + ) + connection_2, app2, context2 = main.db_connect(path=t2) sqla.create_all(app=app2) connection_2.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) From 41ee700e4d8b0712a4df673bd94c518e0d64a9f8 Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 26 Jun 2022 17:35:08 +0100 Subject: [PATCH 13/13] new deps --- requirements.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/requirements.txt b/requirements.txt index e2de0dfee..7abfe5893 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ click-didyoumean==0.0.3 click-plugins==1.1.1 click-repl==0.2.0 coverage==5.5 +cycler==0.11.0 dacite==1.6.0 dataclasses-json==0.5.6 decorator==4.4.2 @@ -24,6 +25,7 @@ Flask-Cors==3.0.10 Flask-Migrate==3.1.0 Flask-SQLAlchemy==2.5.1 flask-sqlalchemy-stubs==0.2 +fonttools==4.33.3 gitdb==4.0.5 github2==0.6.2 GitPython==3.1.9 @@ -40,19 +42,24 @@ isort==5.9.3 itsdangerous==1.1.0 Jinja2==2.11.3 jsonschema==3.2.0 +kiwisolver==1.4.3 lazy-object-proxy==1.6.0 Mako==1.1.5 MarkupSafe==1.1.1 marshmallow==3.14.1 marshmallow-enum==1.5.1 +matplotlib==3.5.2 mccabe==0.6.1 mypy==0.910 mypy-extensions==0.4.3 networkx==2.5.1 +numpy==1.23.0 oauthlib==3.1.0 +packaging==21.3 pathspec==0.9.0 pbr==5.8.0 pep517==0.8.2 +Pillow==9.1.1 pip-autoremove==0.9.1 platformdirs==2.2.0 prompt-toolkit==3.0.19