From 9944643bf4fd955e4f35b276877ac8676ed4bef4 Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 3 Apr 2022 23:52:52 +0100 Subject: [PATCH 1/4] Make db ids UUID (#191) * make migrations a bit more resilient when run locally * foreign key cascades * make database ids uuid4 * lint --- Makefile | 8 + application/cmd/cre_main.py | 8 +- application/database/db.py | 35 +- .../0d267ae11945_make_database_ids_be_uuid.py | 300 ++++++++++++++++++ ...27871a6_change_standards_table_to_nodes.py | 18 +- ...f052a44ea_add_cascades_for_foreign_keys.py | 87 +++++ .../versions/7a17989aa1e3_first_migration.py | 32 +- 7 files changed, 454 insertions(+), 34 deletions(-) create mode 100644 migrations/versions/0d267ae11945_make_database_ids_be_uuid.py create mode 100644 migrations/versions/455f052a44ea_add_cascades_for_foreign_keys.py diff --git a/Makefile b/Makefile index 85ee61b67..1620fe3de 100644 --- a/Makefile +++ b/Makefile @@ -55,11 +55,19 @@ clean: find . -type f -name '*.orig' -delete migrate-upgrade: + if ! [ -f "standards_cache.sqlite" ]; then cp cres/db.sqlite standards_cache.sqlite; fi [ -d "./venv" ] && . ./venv/bin/activate export FLASK_APP=$(CURDIR)/cre.py flask db upgrade + migrate-downgrade: [ -d "./venv" ] && . ./venv/bin/activate export FLASK_APP=$(CURDIR)/cre.py flask db downgrade + +import-all: + [ -d "./venv" ] && . ./venv/bin/activate + export FLASK_APP=$(CURDIR)/cre.py + python cre.py --zap_in --cheatsheets_in --github_tools_in --add --from_spreadsheet https://docs.google.com/spreadsheets/d/1eZOEYgts7d_-Dr-1oAbogPfzBLh6511b58pX3b59kvg/edit#gid=260321921 + all: clean lint test dev dev-run diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 5ed9e699d..444cd54b4 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -363,16 +363,16 @@ def run(args: argparse.Namespace) -> None: elif args.osib_out: export_to_osib(file_loc=args.osib_out, cache=args.cache_file) - elif args.zap_in: + if args.zap_in: zap_alerts_parser.parse_zap_alerts(db_connect(args.cache_file)) - elif args.cheatsheets_in: + if args.cheatsheets_in: cheatsheets_parser.parse_cheatsheets(db_connect(args.cache_file)) - elif args.github_tools_in: + if args.github_tools_in: for url in misc_tools_parser.tool_urls: misc_tools_parser.parse_tool( cache=db_connect(args.cache_file), tool_repo=url ) - elif args.owasp_proj_meta: + if args.owasp_proj_meta: owasp_metadata_to_cre(args.owasp_proj_meta) diff --git a/application/database/db.py b/application/database/db.py index 3c006fffe..5fa98418b 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -11,6 +11,7 @@ from flask_sqlalchemy.model import DefaultMeta from sqlalchemy import func from sqlalchemy.sql.expression import desc # type: ignore +import uuid from .. import sqla # type: ignore @@ -22,10 +23,14 @@ BaseModel: DefaultMeta = sqla.Model +def generate_uuid(): + return str(uuid.uuid4()) + + class Node(BaseModel): # type: ignore __tablename__ = "node" - id = sqla.Column(sqla.Integer, primary_key=True) + id = sqla.Column(sqla.String, primary_key=True, default=generate_uuid) # ASVS or standard name, what are we linking to name = sqla.Column(sqla.String) # which part of are we linking to @@ -55,7 +60,7 @@ class Node(BaseModel): # type: ignore class CRE(BaseModel): # type: ignore __tablename__ = "cre" - id = sqla.Column(sqla.Integer, primary_key=True) + id = sqla.Column(sqla.String, primary_key=True, default=generate_uuid) external_id = sqla.Column(sqla.String, default="") description = sqla.Column(sqla.String, default="") @@ -72,8 +77,16 @@ class InternalLinks(BaseModel): # type: ignore __tablename__ = "cre_links" type = sqla.Column(sqla.String, default="SAME") - group = sqla.Column(sqla.Integer, sqla.ForeignKey("cre.id"), primary_key=True) - cre = sqla.Column(sqla.Integer, sqla.ForeignKey("cre.id"), primary_key=True) + group = sqla.Column( + sqla.String, + sqla.ForeignKey("cre.id", onupdate="CASCADE", ondelete="CASCADE"), + primary_key=True, + ) + cre = sqla.Column( + sqla.String, + sqla.ForeignKey("cre.id", onupdate="CASCADE", ondelete="CASCADE"), + primary_key=True, + ) __table_args__ = ( sqla.UniqueConstraint( group, @@ -86,9 +99,17 @@ class InternalLinks(BaseModel): # type: ignore class Links(BaseModel): # type: ignore __tablename__ = "cre_node_links" - type = sqla.Column(sqla.String, default="SAM") - cre = sqla.Column(sqla.Integer, sqla.ForeignKey("cre.id"), primary_key=True) - node = sqla.Column(sqla.Integer, sqla.ForeignKey("node.id"), primary_key=True) + type = sqla.Column(sqla.String, default="SAME") + cre = sqla.Column( + sqla.String, + sqla.ForeignKey("cre.id", onupdate="CASCADE", ondelete="CASCADE"), + primary_key=True, + ) + node = sqla.Column( + sqla.String, + sqla.ForeignKey("node.id", onupdate="CASCADE", ondelete="CASCADE"), + primary_key=True, + ) __table_args__ = ( sqla.UniqueConstraint( cre, diff --git a/migrations/versions/0d267ae11945_make_database_ids_be_uuid.py b/migrations/versions/0d267ae11945_make_database_ids_be_uuid.py new file mode 100644 index 000000000..35b187480 --- /dev/null +++ b/migrations/versions/0d267ae11945_make_database_ids_be_uuid.py @@ -0,0 +1,300 @@ +"""make database ids be uuid instead of incremental ints + +Revision ID: 0d267ae11945 +Revises: 455f052a44ea +Create Date: 2022-04-03 16:05:31.487481 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import engine_from_config, text +from application.database.db import generate_uuid +from random import randint + +# revision identifiers, used by Alembic. +revision = "0d267ae11945" +down_revision = "455f052a44ea" +branch_labels = None +depends_on = None +temp_table_number = randint(1, 100) + + +def copy_tables(cre_table, node_table, cre_link_table, cre_node_link_table): + config = op.get_context().config + engine = engine_from_config( + config.get_section(config.config_ini_section), prefix="sqlalchemy." + ) + connection = op.get_bind() + nodes = connection.execute( + "Select id,name,section,subsection,link,ntype,tags,version,description from node" + ) + nodes_data = nodes.fetchall() if nodes else [] + + cre = connection.execute("Select id,name,description,external_id,tags from cre") + cre_data = cre.fetchall() if cre else [] + + cre_link = connection.execute('Select type, "group" ,cre from cre_links') + cre_link_data = cre_link.fetchall() if cre_link else [] + + cre_node_link = connection.execute("Select type, cre, node from cre_node_links") + cre_node_link_data = cre_node_link.fetchall() if cre_node_link else [] + + nodes = [ + { + "id": dat[0], + "name": dat[1], + "section": dat[2], + "subsection": dat[3], + "link": dat[4], + "ntype": dat[5], + "tags": dat[6], + "version": dat[7], + "description": dat[8], + } + for dat in nodes_data + ] + cres = [ + { + "id": dat[0], + "name": dat[1], + "description": dat[2], + "external_id": dat[3], + "tags": dat[4], + } + for dat in cre_data + ] + cre_links = [ + {"type": dat[0], "group": dat[1], "cre": dat[2]} for dat in cre_link_data + ] + + cre_node_links = [ + {"type": dat[0], "cre": dat[1], "node": dat[2]} for dat in cre_node_link_data + ] + + op.bulk_insert(cre_table, cres) + op.bulk_insert(node_table, nodes) + op.bulk_insert(cre_link_table, cre_links) + op.bulk_insert(cre_node_link_table, cre_node_links) + + +def update_ids_to_uuid(): + config = op.get_context().config + engine = engine_from_config( + config.get_section(config.config_ini_section), prefix="sqlalchemy." + ) + connection = op.get_bind() + + nodes = connection.execute(f"Select id from node{temp_table_number}") + nodes_data = nodes.fetchall() if nodes else [] + + cre = connection.execute(f"Select id from cre{temp_table_number}") + cre_data = cre.fetchall() if cre else [] + + for id in nodes_data: + node_uuid = generate_uuid() + connection.execute( + f"UPDATE node{temp_table_number} set id='{node_uuid}' WHERE id={id[0]}" + ) + connection.execute( + f"UPDATE cre_node_links{temp_table_number} set node='{node_uuid}' WHERE node={id[0]}" + ) + + for id in cre_data: + cre_uuid = generate_uuid() + connection.execute( + f"UPDATE cre{temp_table_number} set id='{cre_uuid}' WHERE id={id[0]}" + ) + connection.execute( + f"UPDATE cre_links{temp_table_number} set cre='{cre_uuid}' WHERE cre={id[0]}" + ) + connection.execute( + f'UPDATE cre_links{temp_table_number} set "group"=\'{cre_uuid}\' WHERE "group"={id[0]}' + ) + connection.execute( + f'UPDATE cre_node_links{temp_table_number} set "cre"=\'{cre_uuid}\' WHERE "cre"={id[0]}' + ) + + +def downgrade_uuid_to_id(): + config = op.get_context().config + engine = engine_from_config( + config.get_section(config.config_ini_section), prefix="sqlalchemy." + ) + connection = op.get_bind() + nodes = connection.execute("Select id from node") + nodes_data = nodes.fetchall() if nodes else [] + + cre = connection.execute("Select id from cre") + cre_data = cre.fetchall() if cre else [] + + node_id = 1 + for id in nodes_data: + connection.execute( + f"UPDATE node{temp_table_number} set id='{node_id}' WHERE id='{id[0]}'" + ) + connection.execute( + f"UPDATE cre_node_links set node{temp_table_number}='{node_id}' WHERE node='{id[0]}'" + ) + node_id = node_id + 1 + + cre_id = 1 + for id in cre_data: + connection.execute( + f"UPDATE cre{temp_table_number} set id='{cre_id}' WHERE id='{id[0]}'" + ) + connection.execute( + f"UPDATE cre_links{temp_table_number} set cre='{cre_id}' WHERE cre='{id[0]}'" + ) + connection.execute( + f"UPDATE cre_links{temp_table_number} set \"group\"='{cre_id}' WHERE \"group\"='{id[0]}'" + ) + connection.execute( + f"UPDATE cre_node_links{temp_table_number} set \"cre\"='{cre_id}' WHERE \"cre\"='{id[0]}'" + ) + cre_id = cre_id + 1 + + +def create_tmp_tables(id_datatype): + cre2 = op.create_table( + f"cre{temp_table_number}", + sa.Column("id", id_datatype, primary_key=True), + sa.Column("external_id", sa.String(), nullable=True), + sa.Column("description", sa.String(), nullable=True), + sa.Column("name", sa.String(), nullable=True), + sa.Column("tags", sa.String(), nullable=True), + ) + node2 = op.create_table( + f"node{temp_table_number}", + sa.Column("id", id_datatype, primary_key=True), + sa.Column("name", sa.String()), + sa.Column("section", sa.String(), nullable=True), + sa.Column("subsection", sa.String()), + sa.Column("tags", sa.String()), + sa.Column("version", sa.String()), + sa.Column("description", sa.String()), + sa.Column("ntype", sa.String()), + sa.Column("link", sa.String()), + ) + cre_links2 = op.create_table( + f"cre_links{temp_table_number}", + sa.Column("type", sa.String()), + sa.Column( + "group", + id_datatype, + sa.ForeignKey( + f"cre{temp_table_number}.id", onupdate="CASCADE", ondelete="CASCADE" + ), + primary_key=True, + ), + sa.Column( + "cre", + id_datatype, + sa.ForeignKey( + f"cre{temp_table_number}.id", onupdate="CASCADE", ondelete="CASCADE" + ), + primary_key=True, + ), + ) + cre_node_links2 = op.create_table( + f"cre_node_links{temp_table_number}", + sa.Column("type", sa.String()), + sa.Column( + "cre", + id_datatype, + sa.ForeignKey( + f"cre{temp_table_number}.id", onupdate="CASCADE", ondelete="CASCADE" + ), + primary_key=True, + ), + sa.Column( + "node", + id_datatype, + sa.ForeignKey( + f"node{temp_table_number}.id", onupdate="CASCADE", ondelete="CASCADE" + ), + primary_key=True, + ), + ) + return cre2, node2, cre_links2, cre_node_links2 + + +def drop_old_tables(): + op.drop_table("cre_links") + op.drop_table("cre_node_links") + op.drop_table("cre") + op.drop_table("node") + + +def cleanup(): + # op.drop_table("cre_links2") + # op.drop_table("cre_node_links2") + # op.drop_table("cre2") + # op.drop_table("node2") + pass + + +def rename_tables(): + op.rename_table(f"cre{temp_table_number}", "cre") + op.rename_table(f"node{temp_table_number}", "node") + op.rename_table(f"cre_links{temp_table_number}", "cre_links") + op.rename_table(f"cre_node_links{temp_table_number}", "cre_node_links") + + +def add_constraints(): + with op.batch_alter_table("cre") as batch_op: + batch_op.create_unique_constraint( + columns=["name", "external_id"], constraint_name="unique_cre_fields" + ) + + with op.batch_alter_table("node") as batch_op: + batch_op.create_unique_constraint( + columns=[ + "name", + "section", + "subsection", + "ntype", + "description", + "version", + ], + constraint_name="uq_node", + ) + with op.batch_alter_table("cre_links") as batch_op: + batch_op.create_unique_constraint( + columns=["group", "cre"], constraint_name="uq_cre_link_pair" + ) + with op.batch_alter_table("cre_node_links") as batch_op: + batch_op.create_unique_constraint( + columns=["cre", "node"], + constraint_name="uq_cre_node_link_pair", + ) + + +# WARNING: The following recreates the entire DB, hence will be relatively slow for big databases +# Necessary since we are changing all primary and foreign keys +def upgrade(): + cre2, node2, cre_links2, cre_node_links2 = create_tmp_tables(sa.String()) + copy_tables( + cre_table=cre2, + node_table=node2, + cre_link_table=cre_links2, + cre_node_link_table=cre_node_links2, + ) + update_ids_to_uuid() + drop_old_tables() + rename_tables() + add_constraints() + + +def downgrade(): + cleanup() + cre2, node2, cre_links2, cre_node_links2 = create_tmp_tables(sa.Integer()) + copy_tables( + cre_table=cre2, + node_table=node2, + cre_link_table=cre_links2, + cre_node_link_table=cre_node_links2, + ) + downgrade_uuid_to_id() + drop_old_tables() + rename_tables() + add_constraints() diff --git a/migrations/versions/3c65127871a6_change_standards_table_to_nodes.py b/migrations/versions/3c65127871a6_change_standards_table_to_nodes.py index 21437ce41..2a75e1b35 100644 --- a/migrations/versions/3c65127871a6_change_standards_table_to_nodes.py +++ b/migrations/versions/3c65127871a6_change_standards_table_to_nodes.py @@ -23,9 +23,10 @@ def migrate_data_between_standards_and_node(new_table, old_table_name): config.get_section(config.config_ini_section), prefix="sqlalchemy." ) connection = op.get_bind() - standards_data = connection.execute( + standards = connection.execute( f"Select id,name,section,subsection,link from {old_table_name}" - ).fetchall() + ) + standards_data = standards.fetchall() if standards else [] if old_table_name == "standard": nodes = [ { @@ -63,9 +64,10 @@ def migrate_data_between_links_and_cre_node_links( config.get_section(config.config_ini_section), prefix="sqlalchemy." ) connection = op.get_bind() - links_data = connection.execute( + links = connection.execute( f"Select type,cre,{standard_column_name} from {old_table_name}" - ).fetchall() + ) + links_data = links.fetchall() if links else [] cre_node_links = [ {"type": dat[0], "cre": dat[1], new_column_name: dat[2]} for dat in links_data ] @@ -131,15 +133,17 @@ def downgrade(): standard = op.create_table( "standard", - sa.Column("id", sa.INTEGER(), nullable=False), + sa.Column("id", sa.INTEGER(), primary_key=True), sa.Column("name", sa.VARCHAR(), nullable=True), sa.Column("section", sa.VARCHAR(), nullable=False), sa.Column("subsection", sa.VARCHAR(), nullable=True), sa.Column("tags", sa.VARCHAR(), nullable=True), sa.Column("version", sa.VARCHAR(), nullable=True), sa.Column("link", sa.VARCHAR(), nullable=True), - sa.PrimaryKeyConstraint("id", name="pk_standard"), - sa.UniqueConstraint("name", "section", "subsection", name="standard_section"), + sa.UniqueConstraint( + columns=["name", "section", "subsection"], + constraint_name="standard_section", + ), ) links = op.create_table( "links", diff --git a/migrations/versions/455f052a44ea_add_cascades_for_foreign_keys.py b/migrations/versions/455f052a44ea_add_cascades_for_foreign_keys.py new file mode 100644 index 000000000..614820b66 --- /dev/null +++ b/migrations/versions/455f052a44ea_add_cascades_for_foreign_keys.py @@ -0,0 +1,87 @@ +"""add cascades for foreign keys + +Revision ID: 455f052a44ea +Revises: 3c65127871a6 +Create Date: 2022-04-03 17:40:00.616539 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "455f052a44ea" +down_revision = "3c65127871a6" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("cre_links", schema=None) as batch_op: + batch_op.drop_constraint("fk_crelinks_group_cre", type_="foreignkey") + batch_op.drop_constraint("fk_crelinks_cre_cre", type_="foreignkey") + batch_op.create_foreign_key( + batch_op.f("fk_cre_links_group_cre"), + "cre", + ["group"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + batch_op.create_foreign_key( + batch_op.f("fk_cre_links_cre_cre"), + "cre", + ["cre"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + + with op.batch_alter_table("cre_node_links", schema=None) as batch_op: + batch_op.drop_constraint("fk_cre_node_links_cre_cre", type_="foreignkey") + batch_op.drop_constraint("fk_cre_node_links_node_node", type_="foreignkey") + batch_op.create_foreign_key( + batch_op.f("fk_cre_node_links_cre_cre"), + "cre", + ["cre"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + batch_op.create_foreign_key( + batch_op.f("fk_cre_node_links_node_node"), + "node", + ["node"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + with op.batch_alter_table("cre_node_links", schema=None) as batch_op: + batch_op.drop_constraint( + batch_op.f("fk_cre_node_links_node_node"), type_="foreignkey" + ) + batch_op.drop_constraint( + batch_op.f("fk_cre_node_links_cre_cre"), type_="foreignkey" + ) + batch_op.create_foreign_key( + "fk_cre_node_links_node_node", "node", ["node"], ["id"] + ) + batch_op.create_foreign_key("fk_cre_node_links_cre_cre", "cre", ["cre"], ["id"]) + + with op.batch_alter_table("cre_links", schema=None) as batch_op: + batch_op.drop_constraint(batch_op.f("fk_cre_links_cre_cre"), type_="foreignkey") + batch_op.drop_constraint( + batch_op.f("fk_cre_links_group_cre"), type_="foreignkey" + ) + batch_op.create_foreign_key("fk_crelinks_cre_cre", "cre", ["cre"], ["id"]) + batch_op.create_foreign_key("fk_crelinks_group_cre", "cre", ["group"], ["id"]) + + # ### end Alembic commands ### diff --git a/migrations/versions/7a17989aa1e3_first_migration.py b/migrations/versions/7a17989aa1e3_first_migration.py index 7f4fdfb07..611359a1f 100644 --- a/migrations/versions/7a17989aa1e3_first_migration.py +++ b/migrations/versions/7a17989aa1e3_first_migration.py @@ -43,30 +43,30 @@ def upgrade(): op.create_table( "crelinks", sa.Column("type", sa.String(), nullable=True), - sa.Column("group", sa.Integer(), nullable=False), - sa.Column("cre", sa.Integer(), nullable=False), - sa.ForeignKeyConstraint( - ["cre"], - ["cre.id"], + sa.Column( + "group", + sa.Integer(), + sa.ForeignKey("cre.id", onupdate="CASCADE", ondelete="CASCADE"), ), - sa.ForeignKeyConstraint( - ["group"], - ["cre.id"], + sa.Column( + "cre", + sa.Integer(), + sa.ForeignKey("cre.id", onupdate="CASCADE", ondelete="CASCADE"), ), sa.PrimaryKeyConstraint("group", "cre"), ) op.create_table( "links", sa.Column("type", sa.String(), nullable=True), - sa.Column("cre", sa.Integer(), nullable=False), - sa.Column("standard", sa.Integer(), nullable=False), - sa.ForeignKeyConstraint( - ["cre"], - ["cre.id"], + sa.Column( + "cre", + sa.Integer(), + sa.ForeignKey("cre.id", onupdate="CASCADE", ondelete="CASCADE"), ), - sa.ForeignKeyConstraint( - ["standard"], - ["standard.id"], + sa.Column( + "standard", + sa.Integer(), + sa.ForeignKey("standard.id", onupdate="CASCADE", ondelete="CASCADE"), ), sa.PrimaryKeyConstraint("cre", "standard"), ) From 2cb5310af63f0acedd3659f689c47f9a3a34e7a3 Mon Sep 17 00:00:00 2001 From: Spyros Date: Sun, 10 Apr 2022 22:59:27 +0100 Subject: [PATCH 2/4] coverage improvements (#198) --- application/cmd/cre_main.py | 10 +---- application/tests/cre_main_test.py | 59 +++++++++++++++++++----------- cre.py | 7 +++- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 444cd54b4..ed1a6be51 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -89,18 +89,12 @@ def register_cre(cre: defs.CRE, collection: db.Node_collection) -> db.CRE: collection.add_internal_link( dbcre, register_cre(link.document, collection), type=link.ltype ) - elif type(link.document) == defs.Standard: + else: collection.add_link( cre=dbcre, node=register_node(node=link.document, collection=collection), type=link.ltype, ) - elif type(link.document) == defs.Tool: - collection.add_link( - cre=dbcre, - tool=register_tool(tool=link.document, collection=collection), - type=link.ltype, - ) return dbcre @@ -327,7 +321,7 @@ def print_graph() -> None: raise NotImplementedError -def run(args: argparse.Namespace) -> None: +def run(args: argparse.Namespace) -> None: # pragma: no cover script_path = os.path.dirname(os.path.realpath(__file__)) os.path.join(script_path, "../cres") diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index a2ac238cd..f0447c7ac 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -39,6 +39,7 @@ def test_register_node_with_links(self) -> None: id="", description="", name="standard_with_links", + section="Standard With Links", links=[ defs.Link( document=defs.Standard( @@ -54,9 +55,15 @@ def test_register_node_with_links(self) -> None: name="CodemcCodeFace", ) ), + defs.Link( + document=defs.Tool( + description="awesome hacking tool", + name="ToolmcToolFace", + ) + ), ], - section="Standard With Links", ) + ret = main.register_node(node=standard_with_links, collection=self.collection) # assert returned value makes sense self.assertEqual(ret.name, "standard_with_links") @@ -68,46 +75,51 @@ def test_register_node_with_links(self) -> None: self.assertIsNone(thing.cre) self.assertEqual(self.collection.session.query(db.Links).all(), []) - # 3 cre-less nodes in the db - self.assertEqual(len(self.collection.session.query(db.Node).all()), 3) + + # 4 cre-less nodes in the db + self.assertEqual(len(self.collection.session.query(db.Node).all()), 4) def test_register_node_with_cre(self) -> None: + known_standard_with_cre = defs.Standard( + name="CWE", + section="598", + links=[ + defs.Link(document=defs.CRE(id="101-202", name="crename")), + ], + ) standard_with_cre = defs.Standard( - doctype=defs.Credoctypes.Standard, id="", description="", name="standard_with_cre", links=[ - defs.Link( - document=defs.CRE( - doctype=defs.Credoctypes.CRE, - id="101-202", - description="cre desc", - name="crename", - links=[], - tags=[], - metadata={}, - ) - ), defs.Link( document=defs.Tool( - doctype=defs.Credoctypes.Tool, tooltype=defs.ToolTypes.Offensive, name="zap", ) ), + defs.Link( + document=defs.Standard( + name="CWE", + section="598", + links=[ + defs.Link(document=defs.CRE(id="101-202", name="crename")), + ], + ) + ), ], section="standard_with_cre", ) + main.register_node(node=known_standard_with_cre, collection=self.collection) main.register_node(node=standard_with_cre, collection=self.collection) # assert db structure makes sense self.assertEqual( - len(self.collection.session.query(db.Links).all()), 2 - ) # 2 links in the db + len(self.collection.session.query(db.Links).all()), 3 + ) # 3 links in the db self.assertEqual( - len(self.collection.session.query(db.Node).all()), 2 - ) # 2 standards in the db + len(self.collection.session.query(db.Node).all()), 3 + ) # 3 standards in the db self.assertEqual( len(self.collection.session.query(db.CRE).all()), 1 ) # 1 cre in the db @@ -181,16 +193,16 @@ def test_register_standard_with_groupped_cre_links(self) -> None: def test_register_cre(self) -> None: standard = defs.Standard( - doctype=defs.Credoctypes.Standard, name="ASVS", section="SESSION-MGT-TOKEN-DIRECTIVES-DISCRETE-HANDLING", subsection="3.1.1", ) + tool = defs.Tool(name="Tooly", tooltype=defs.ToolTypes.Defensive) cre = defs.CRE( id="100", description="CREdesc", name="CREname", - links=[defs.Link(document=standard)], + links=[defs.Link(document=standard), defs.Link(document=tool)], tags=["CREt1", "CREt2"], metadata={"tags": ["CREl1", "CREl2"]}, ) @@ -199,6 +211,9 @@ def test_register_cre(self) -> None: self.assertEqual( len(self.collection.session.query(db.CRE).all()), 1 ) # 1 cre in the db + self.assertEqual( + len(self.collection.session.query(db.Node).all()), 2 + ) # 2 nodes in the db def test_parse_file(self) -> None: file: List[Dict[str, Any]] = [ diff --git a/cre.py b/cre.py index fec0b945c..27ff3b849 100644 --- a/cre.py +++ b/cre.py @@ -9,7 +9,6 @@ from flask_migrate import Migrate # type: ignore from application import create_app, sqla # type: ignore -from application.cmd import cre_main # Hacky solutions to make this both a command line application with argparse and a flask application @@ -34,6 +33,9 @@ def test(cover: coverage.Coverage, test_names: List[str]) -> None: config_file="application/tests/.coveragerc", ) COV.start() + # Hack to get coverage to cover method and class defs + from application import create_app, sqla # type: ignore + from application.cmd import cre_main if test_names: tests = unittest.TestLoader().loadTestsFromNames(test_names) @@ -131,6 +133,9 @@ def main() -> None: help="import supported github tools, urls can be found in misc_tools_parser.py", ) args = parser.parse_args() + + from application.cmd import cre_main + cre_main.run(args) From 8ecd747735fa71e156cd3d85d7c0211f17dd0123 Mon Sep 17 00:00:00 2001 From: Spyros Date: Tue, 12 Apr 2022 19:46:48 +0100 Subject: [PATCH 3/4] Improve coverage (#199) * coverage improvements * add zap parser tests, start on adding alerts tags parsing * add cheatsheet parser tests * made zap parser also recognise top10 tags --- .../cheatsheets_parser.py | 17 +++--- .../zap_alerts_parser.py | 53 +++++++++++++++---- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/application/utils/external_project_parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/cheatsheets_parser.py index f105fd898..405126a7f 100644 --- a/application/utils/external_project_parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/cheatsheets_parser.py @@ -18,13 +18,18 @@ def cheatsheet(section: str, hyperlink: str, tags: List[str]) -> defs.Standard: def parse_cheatsheets(cache: db.Node_collection): c_repo = "https://github.com/OWASP/CheatSheetSeries.git" - cheasheets_path = "cheatsheets/" + cheatsheets_path = "cheatsheets/" + repo = git.clone(c_repo) + register_cheatsheets(repo=repo, cache=cache, cheatsheets_path=cheatsheets_path) + + +def register_cheatsheets(cache: db.Node_collection, repo, cheatsheets_path, repo_path): + title_regexp = r"# (?P.+)" cre_link = r"(https://www\.)?opencre.org/cre/(?P<cre>\d+-\d+)" - repo = git.clone(c_repo) - files = os.listdir(os.path.join(repo.working_dir, cheasheets_path)) + files = os.listdir(os.path.join(repo.working_dir, cheatsheets_path)) for mdfile in files: - pth = os.path.join(repo.working_dir, cheasheets_path, mdfile) + pth = os.path.join(repo.working_dir, cheatsheets_path, mdfile) name = None tag = None section = None @@ -39,9 +44,7 @@ def parse_cheatsheets(cache: db.Node_collection): name = title.group("title") cre_id = cre.group("cre") cres = cache.get_CREs(external_id=cre_id) - hyperlink = ( - f"{c_repo.replace('.git','')}/tree/master/{cheasheets_path}{mdfile}" - ) + hyperlink = f"{repo_path.replace('.git','')}/tree/master/{cheatsheets_path}{mdfile}" for dbcre in cres: cs = cheatsheet( section=name, diff --git a/application/utils/external_project_parsers/zap_alerts_parser.py b/application/utils/external_project_parsers/zap_alerts_parser.py index 8fbe02155..cf50d7b89 100644 --- a/application/utils/external_project_parsers/zap_alerts_parser.py +++ b/application/utils/external_project_parsers/zap_alerts_parser.py @@ -17,10 +17,10 @@ def zap_alert( name: str, id: str, description: str, tags: List[str], code: str ) -> defs.Tool: + tags.append(id) return defs.Tool( tooltype=defs.ToolTypes.Offensive, name=f"ZAP Rule: {name}", - id=id, description=description, tags=tags, hyperlink=code, @@ -30,14 +30,19 @@ def zap_alert( def parse_zap_alerts(cache: db.Node_collection): zaproxy_website = "https://github.com/zaproxy/zaproxy-website.git" alerts_path = "site/content/docs/alerts/" + repo = git.clone(zaproxy_website) + register_alerts(repo=repo, cache=cache, alerts_path=alerts_path) + + +def register_alerts(cache: db.Node_collection, repo: git.git, alerts_path: str): zap_md_cwe_regexp = r"cwe: ?(?P<cweId>\d+)" zap_md_title_regexp = r"title: ?(?P<title>\".+\")" zap_md_alert_id_regexp = r"alertid: ?(?P<id>\d+)" zap_md_alert_type_regexp = r"alerttype: ?(?P<type>\".+\")" zap_md_solution_regexp = r"solution: ?(?P<solution>\".+\")" zap_md_code_regexp = r"code: ?(?P<code>.+)" + zap_md_top10_regexp = r"OWASP_(?P<year>\d\d\d\d)_A(?P<num>\d\d?)" - repo = git.clone(zaproxy_website) for mdfile in os.listdir(os.path.join(repo.working_dir, alerts_path)): pth = os.path.join(repo.working_dir, alerts_path, mdfile) name = None @@ -72,20 +77,48 @@ def parse_zap_alerts(cache: db.Node_collection): ) continue cwe = re.search(zap_md_cwe_regexp, mdtext) + alert = zap_alert( + name=name, + id=externalId, + description=description, + tags=[tag], + code=code, + ) + dbnode = cache.add_node(alert) + + top10 = re.finditer(zap_md_top10_regexp, mdtext) + if top10: + for match in top10: + year = match.group("year") + num = match.group("num") + entries = cache.get_nodes(name=f"Top10 {year}", ntype="Standard") + entry = [e for e in entries if str(int(num)) in e.section] + if entry: + logger.info( + f"Found zap alert {name} linking to {entry[0].name}{entry[0].section}" + ) + for cre in [ + nl + for nl in entry[0].links + if nl.document.doctype == defs.Credoctypes.CRE + ]: + cache.add_link( + cre=db.dbCREfromCRE(cre.document), node=dbnode + ) + else: + logger.error( + f"Zap Alert {name} links to OWASP top 10 {year}:{num} but CRE doesn't know about it, incomplete data?" + ) if cwe: cweId = cwe.group("cweId") + logger.info(f"Found zap alert {name} linking to CWE {cweId}") cwe_nodes = cache.get_nodes(name="CWE", section=cweId) for node in cwe_nodes: for link in node.links: if link.document.doctype == defs.Credoctypes.CRE: - alert = zap_alert( - name=name, - id=externalId, - description=description, - tags=[tag], - code=code, - ) - dbnode = cache.add_node(alert) + cache.add_link( cre=db.dbCREfromCRE(link.document), node=dbnode ) + else: + logger.info(f"CWE id not found in alert {externalId}, skipping linking") From bf1ad2656a0d60ccb9a9e8917ec0e11b0fa95b4d Mon Sep 17 00:00:00 2001 From: Spyros <northdpole@users.noreply.github.com> Date: Tue, 12 Apr 2022 19:53:05 +0100 Subject: [PATCH 4/4] make zap alerts have the correct link type (#200) --- .../utils/external_project_parsers/zap_alerts_parser.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/application/utils/external_project_parsers/zap_alerts_parser.py b/application/utils/external_project_parsers/zap_alerts_parser.py index cf50d7b89..94c6d0401 100644 --- a/application/utils/external_project_parsers/zap_alerts_parser.py +++ b/application/utils/external_project_parsers/zap_alerts_parser.py @@ -103,7 +103,9 @@ def register_alerts(cache: db.Node_collection, repo: git.git, alerts_path: str): if nl.document.doctype == defs.Credoctypes.CRE ]: cache.add_link( - cre=db.dbCREfromCRE(cre.document), node=dbnode + cre=db.dbCREfromCRE(cre.document), + node=dbnode, + type=defs.LinkTypes.LinkedTo, ) else: logger.error( @@ -118,7 +120,9 @@ def register_alerts(cache: db.Node_collection, repo: git.git, alerts_path: str): if link.document.doctype == defs.Credoctypes.CRE: cache.add_link( - cre=db.dbCREfromCRE(link.document), node=dbnode + cre=db.dbCREfromCRE(link.document), + node=dbnode, + type=defs.LinkTypes.LinkedTo, ) else: logger.info(f"CWE id not found in alert {externalId}, skipping linking")