diff --git a/Analytic_Process.py b/Analytic_Process.py index de919ac..ae514a8 100644 --- a/Analytic_Process.py +++ b/Analytic_Process.py @@ -1,69 +1,81 @@ -import threading as th -import multiprocessing as mp -import json import io +import json +import multiprocessing as mp import os -import validity_constants as validity +import threading as th from itertools import count -import schema_graph -from schema_checker import schema_checker -from load_schema_from_web import load_schema -from Count_String_Visitor import Count_String_Visitor +import schema_graph +import validity_constants as validity from AdditionalProperties_Visitor import AdditionalProperties_Visitor from AllOf_Visitor import AllOf_Visitor from AnyOf_Visitor import AnyOf_Visitor from Arrays_Visitor import Arrays_Visitor +from Boolean_Visitor import Boolean_Visitor + +from Count_String_Visitor import Count_String_Visitor from CountReferences_Visitor import CountReferences_Visitor from Enum_Visitor import Enum_Visitor +from load_schema_from_web import load_schema from MultipleOf_Visitor import MultipleOf_Visitor from Not_Visitor import Not_Visitor +from NullType_Visitor import NullType_Visitor from Number_Visitor import Number_Visitor +from Object_Visitor import Object_Visitor from Pattern_Visitor import Pattern_Visitor from Required_Visitor import Required_Visitor +from schema_checker import schema_checker from UniqueItems_Visitor import UniqueItems_Visitor from ValueRestriction_Visitor import ValueRestriction_Visitor -from Boolean_Visitor import Boolean_Visitor -from NullType_Visitor import NullType_Visitor -from Object_Visitor import Object_Visitor class Analytic_Process(mp.Process): """! @brief A definition of a process to analyze a JSON Schema files - An Analytic_Process gets a list of files to analyze, a dictionary for the - outputs, a list for all processed filenames and three locks for all shared - data. In this way the analysis is parallelized over files. - Additionally, with this approach, a blocking (e.g. a big) file does only - block one thread while the other threads can execute and analyse other - files in parallel. + An Analytic_Process gets a list of files to analyze, a dictionary for the + outputs, a list for all processed filenames and three locks for all shared + data. In this way the analysis is parallelized over files. + Additionally, with this approach, a blocking (e.g. a big) file does only + block one thread while the other threads can execute and analyse other + files in parallel. """ + _ids = count() - def __init__(self, verbose_flag = True, filelist = [], cat_list_dict = dict(), namedict = dict(), \ - filename_cat_dict = dict(), print_lock = th.Lock(), file_lock = th.Lock(), res_lock = th.Lock()): + + def __init__( + self, + verbose_flag=True, + filelist=[], + cat_list_dict=dict(), + namedict=dict(), + filename_cat_dict=dict(), + print_lock=th.Lock(), + file_lock=th.Lock(), + res_lock=th.Lock(), + ): """! @brief The constructor of of an Analytic_Process - To create an Analytic_Process, the main process has to set up a list of all files to be analyzed, - a dictionary with categories as keys and lists of attribute dictionaries as values for the results, - a list to store all valid and processed filenames, and a dicitionary to match filenames with categories. - Additiononally the main process has to prepare thread locks for printing to console, for fetching - new files from the list of files to analyse and one to store the results. All lists and dictionaries - are shared resources and therefor protected with locks. - - @param verbose_flag: specifies whether the output shall be shwon on the console - @param filelist a list of all files (i.e. their pathes) that shall be analysed - @param cat_list_dict a dictionary with categories as keys and lists of result attribute_dicts as values - @param namedict a dictionary of all valid and processed filenames sorted by category - as keys and lists of filenames as values - @param filename_cat_dict a dictionary to match files against their category - @param print_lock lock for console outputs - @param file_lock lock for files to analyse list - @param res_lock lock for result dictionary cat_list_dict - - @return void + To create an Analytic_Process, the main process has to set up a list of all files to be analyzed, + a dictionary with categories as keys and lists of attribute dictionaries as values for the results, + a list to store all valid and processed filenames, and a dicitionary to match filenames with categories. + Additiononally the main process has to prepare thread locks for printing to console, for fetching + new files from the list of files to analyse and one to store the results. All lists and dictionaries + are shared resources and therefor protected with locks. + + @param verbose_flag: specifies whether the output shall be shwon on the console + @param filelist a list of all files (i.e. their pathes) that shall be analysed + @param cat_list_dict a dictionary with categories as keys and lists of result attribute_dicts as values + @param namedict a dictionary of all valid and processed filenames sorted by category + as keys and lists of filenames as values + @param filename_cat_dict a dictionary to match files against their category + @param print_lock lock for console outputs + @param file_lock lock for files to analyse list + @param res_lock lock for result dictionary cat_list_dict + + @return void """ super().__init__() - ## Process ID + ## Process ID self.id = next(self._ids) ## Path to logfile self.schema_graph_log_path = "../../schema_graph.log" @@ -109,17 +121,16 @@ def __init__(self, verbose_flag = True, filelist = [], cat_list_dict = dict(), n self.visitor_dict["nulltype_visitor"] = NullType_Visitor() self.visitor_dict["object_visitor"] = Object_Visitor() - def setFilepath(self, filepath): - """! @brief Setter for self.filepath - @param filepath Path to the file that shall later be analysed - @return void + """! @brief Setter for self.filepath + @param filepath Path to the file that shall later be analysed + @return void """ self.filepath = filepath def resetVisitors(self): """! @brief Reset all used visitors - This is necessary because otherwise counts are counted up over files. + This is necessary because otherwise counts are counted up over files. """ self.visitor_dict["add_prop_visitor"] = AdditionalProperties_Visitor() self.visitor_dict["all_of_visitor"] = AllOf_Visitor() @@ -140,37 +151,37 @@ def resetVisitors(self): def getFilepath(self): """! @brief Getter for self.filepath - @return self.filepath Path to currently analysed file + @return self.filepath Path to currently analysed file """ return self.filepath def getAttributeDict(self): """! @brief Getter for the dictionary that stores analysis results. - @return self.attribute_dict Analysis results as dictionary + @return self.attribute_dict Analysis results as dictionary """ return self.attribute_dict def getID(self): """!@brief Getter for Process' ID - @return self.id Process' instance ID + @return self.id Process' instance ID """ return self.id def run(self): """! @brief Process' main function to analyse one file after another as long as files are available. - - The thread's core functionality is to analyse multiple files, one after another. Therefor - a thread gets the next file to process from the list given in constructor. A thread deletes the - file it is going to process from the list. Race conditions arepossible here. Thats - why the block is secured with a lock. - As long as there are files available to process, threads continue to take files from the list and - analyse them. The result is stored in the result dictionary in a protected manner. + The thread's core functionality is to analyse multiple files, one after another. Therefor + a thread gets the next file to process from the list given in constructor. A thread deletes the + file it is going to process from the list. Race conditions arepossible here. Thats + why the block is secured with a lock. - @return void + As long as there are files available to process, threads continue to take files from the list and + analyse them. The result is stored in the result dictionary in a protected manner. + + @return void """ @@ -182,7 +193,7 @@ def run(self): if len(self.filelist) == 0: files_available = False self.file_lock.release() - break; + break else: self.filepath = self.filelist[0] self.filelist.pop(0) @@ -198,7 +209,7 @@ def run(self): return # analyse the schema.... - if(self.verbose): + if self.verbose: # ... verbose self.validity_flag = self.analyse_schema_verbose(schema_dict) else: @@ -211,17 +222,17 @@ def run(self): cat = self.filename_cat_dict[f_name] if self.validity_flag == validity.SCHEMA_REFERENCE_EXCEPTION: self.print_lock.acquire(True) - with open(self.schema_graph_log_path, 'a+') as fp: - fp.write(f_name + " contains invalid references!\n") + with open(self.schema_graph_log_path, "a+") as fp: + fp.write(f_name + " contains invalid references!\n") self.print_lock.release() elif self.validity_flag == validity.SCHEMA_VALIDATOR_EXCEPTION: self.print_lock.acquire(True) - with open(self.schema_graph_log_path, 'a+') as fp: - fp.write(f_name + " is invalid according to validator!\n") + with open(self.schema_graph_log_path, "a+") as fp: + fp.write(f_name + " is invalid according to validator!\n") self.print_lock.release() elif self.attribute_dict is None: self.print_lock.acquire(True) - with open(self.schema_graph_log_path, 'a+') as fp: + with open(self.schema_graph_log_path, "a+") as fp: fp.write(f_name + "'s validity check went terribly wrong!\n") self.print_lock.release() else: @@ -231,22 +242,21 @@ def run(self): self.cat_list_dict[cat].append(self.attribute_dict) self.res_lock.release() - def analyse_schema(self, schema): """! @brief Analyze all features of the given schema. - - This function performs all analysis steps and stores the results of every step in - the internal result dictionary self.attribute_dictionary. - @param schema dictionary representation of the schema produced by json parser module + This function performs all analysis steps and stores the results of every step in + the internal result dictionary self.attribute_dictionary. - @return Indicator "enum" for valid schemas + @param schema dictionary representation of the schema produced by json parser module + + @return Indicator "enum" for valid schemas """ # reset attribute dict in case of failure in previous file - self.attribute_dict = dict() + self.attribute_dict = dict() self.resetVisitors() ret_val = validity.SCHEMA_VALID - validator = schema_checker() + validator = schema_checker() # check whether schema is valid according to validator module jsonschema self.is_valid = validator.check_schema(schema) @@ -260,16 +270,18 @@ def analyse_schema(self, schema): # all counts are implemented using the visitor pattern # make all visitors visit the resolved reference graph and store the results - for (name,visitor) in self.visitor_dict.items(): + for (name, visitor) in self.visitor_dict.items(): sg.visit_res_graph(visitor) - key = name[:-7] + "count" #replace _visitor with _count + key = name[:-7] + "count" # replace _visitor with _count self.attribute_dict[key] = visitor.getCount() # perform all other analysis steps and store results self.attribute_dict["filename"] = os.path.basename(self.filepath) self.attribute_dict["ref_count"] = sg.getNoReferences() self.attribute_dict["depth_schema"] = sg.depth_schema() - self.attribute_dict["depth_resolvedTree"] = sg.depth_resolvedReferenceGraph() + self.attribute_dict[ + "depth_resolvedTree" + ] = sg.depth_resolvedReferenceGraph() self.attribute_dict["fan_in"] = sg.getMaxFanIn() self.attribute_dict["fan_out"] = sg.getMaxFanOut() self.attribute_dict["has_recursion"] = sg.check_recursion() @@ -280,10 +292,9 @@ def analyse_schema(self, schema): self.attribute_dict["fan_out_list"] = sg.getFanOutList() self.attribute_dict["blow_up"] = sg.getBlowUpFactor() - if sg.getInvalidReferenceFlag() == True: - #Schema contains invalid references, what means that it is not valid - #in terms of semantics --> not taken into account + # Schema contains invalid references, what means that it is not valid + # in terms of semantics --> not taken into account ret_val = validity.SCHEMA_REFERENCE_EXCEPTION self.attribute_dict = None else: @@ -294,32 +305,30 @@ def analyse_schema(self, schema): def analyse_schema_verbose(self, schema): """! @brief Analyse the given schema and print all results to the console. - This function uses analyse_schema() to analyse the given schema. - - @param schema dictionary representation of schema produced by json module parser - - @return Indicator "enum" if schema is valid - + This function uses analyse_schema() to analyse the given schema. + + @param schema dictionary representation of schema produced by json module parser + + @return Indicator "enum" if schema is valid + """ - - ret_val = self.analyse_schema(schema) - #synchronized console output of the analysis results + ret_val = self.analyse_schema(schema) + + # synchronized console output of the analysis results if sg is not None: self.print_lock.acquire(True) self.print_results() self.print_lock.release() return ret_val - + def print_results(self): - """! @brief This function creates a console output of the analysis results. - - """ + """! @brief This function creates a console output of the analysis results.""" print("File at " + self.filepath + ":") print("Is Schema valid:", end=" ") - print(str(self.is_valid)) + print(str(self.is_valid)) print("Depth of Schema:", end=" ") print(str(self.attribute_dict["depth_schema"])) @@ -354,6 +363,6 @@ def print_results(self): def getValidityFlag(self): """! @brief Return the validity flag of the current processed schema. - @return Indictator "enum" if schema is valid + @return Indictator "enum" if schema is valid """ return self.validity_flag diff --git a/JSON_Schema_Analysis.py b/JSON_Schema_Analysis.py index 6ddf315..96f2ac8 100644 --- a/JSON_Schema_Analysis.py +++ b/JSON_Schema_Analysis.py @@ -12,18 +12,21 @@ - -c | --count analyse only first files in directory ./JSON """ -import sys, getopt +import getopt, sys + sys.path.append("./Visitors") sys.path.append("./NodeTypes") import os + absFilePath = os.path.abspath(__file__) os.chdir(os.path.dirname(absFilePath)) -from Analytic_Process import Analytic_Process -import threading as th -import multiprocessing as mp import csv, json +import multiprocessing as mp +import threading as th + import pandas as pd import validity_constants as validity +from Analytic_Process import Analytic_Process from array import * from schema_graph import schema_graph @@ -31,18 +34,18 @@ def main(argv): """! @brief This is the main entry function of JSON_Schema_Analysis. - The main function parses the command line arguments and starts the analyses as specified by them. - The JSON Schema documents are analysed in parallel with n threads, where n represents the number - of virtual CPU cores found by os.cpu_count(). - This method sets up all lists and dictionaries needed by an Analytic_Thread to perform the analysis - of the JSON Schema documents and to store the results. - After all files were analysed completely, the results are output as different csv files and as a - Excel sheet. - The resulting Excel sheet will look as follows: - - Filename | Category | add_prop_count | all_of_count | any_of_count | array_count | ref_count | str_count | enum_count | mult_of_count | not_count | number_count | pattern_count | required_count | unique_items_count | value_restriction_count | boolean_count | nulltype_count | object_count | depth_schema | depth_resolvedTree | fan_in | fan_out | has_recursion | no_path_or_cycle | width | reachability - --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- - File1 | example category | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val + The main function parses the command line arguments and starts the analyses as specified by them. + The JSON Schema documents are analysed in parallel with n threads, where n represents the number + of virtual CPU cores found by os.cpu_count(). + This method sets up all lists and dictionaries needed by an Analytic_Thread to perform the analysis + of the JSON Schema documents and to store the results. + After all files were analysed completely, the results are output as different csv files and as a + Excel sheet. + The resulting Excel sheet will look as follows: + + Filename | Category | add_prop_count | all_of_count | any_of_count | array_count | ref_count | str_count | enum_count | mult_of_count | not_count | number_count | pattern_count | required_count | unique_items_count | value_restriction_count | boolean_count | nulltype_count | object_count | depth_schema | depth_resolvedTree | fan_in | fan_out | has_recursion | no_path_or_cycle | width | reachability + --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- + File1 | example category | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val | val """ verbose_output = False file_count = 0 @@ -50,59 +53,63 @@ def main(argv): # parse command line arguments try: - opts, args = getopt.getopt(argv, "hvac:",["verbose","all","count="]) + opts, args = getopt.getopt(argv, "hvac:", ["verbose", "all", "count="]) except getopt.GetoptError: - print('JSON_Schema_Analysis.py [-v | --verbose] [(-a | --all) | ((-c | --count) )') + print( + "JSON_Schema_Analysis.py [-v | --verbose] [(-a | --all) | ((-c | --count) )" + ) sys.exit(2) for opt, arg in opts: - if opt == '-h': - print('JSON_Schema_Analysis.py [-v | --verbose] [(-a | --all) | ((-c | --count) )') + if opt == "-h": + print( + "JSON_Schema_Analysis.py [-v | --verbose] [(-a | --all) | ((-c | --count) )" + ) sys.exit() - elif opt in ('-v', '--verbose'): + elif opt in ("-v", "--verbose"): verbose_output = True - elif opt in ('-a', '--all'): + elif opt in ("-a", "--all"): file_count = 0 - elif opt in ('-c', '--count'): + elif opt in ("-c", "--count"): file_count = int(arg) # set up the environment as specified in the loaded files ## directory with JSON Schema documents - json_dir_path = "./JSON" + json_dir_path = "./JSON" ## csv that stores nicknames of files and there actual filename - file_spec_path = "./filename_spec.csv" + file_spec_path = "./filename_spec.csv" ## csv that stores nicknames of files and there category - cat_spec_path = "./categorisation.csv" + cat_spec_path = "./categorisation.csv" ## path to logfile - schema_graph_log_path = "./schema_graph.log" + schema_graph_log_path = "./schema_graph.log" ## multiprocessing manager manager = mp.Manager() ## matching nicknames to filenames filename_dict = manager.dict() ## matching nicknames to categories - cat_dict = manager.dict() + cat_dict = manager.dict() ## matching filenames to categories - filename_cat_dict = manager.dict() + filename_cat_dict = manager.dict() ## result dictionary - cat_list_dict = manager.dict() + cat_list_dict = manager.dict() ## dict of all valid and processed files - filename_res_dict = manager.dict() + filename_res_dict = manager.dict() - open(schema_graph_log_path, 'w+').close() #clean logfile + open(schema_graph_log_path, "w+").close() # clean logfile - with open(file_spec_path, 'r', newline='') as csvfile: + with open(file_spec_path, "r", newline="") as csvfile: csv_reader = csv.DictReader(csvfile) for row in csv_reader: filename_dict[row["name"]] = row["filename"] - with open(cat_spec_path, 'r', newline='') as csvfile: + with open(cat_spec_path, "r", newline="") as csvfile: csv_reader = csv.DictReader(csvfile) - for row in csv_reader: + for row in csv_reader: cat_dict[row["name"]] = row["category"] for name, filename in filename_dict.items(): filename_cat_dict[filename] = cat_dict[name] - help_cat_set = set(cat for cat in cat_dict.values()) #unique categories + help_cat_set = set(cat for cat in cat_dict.values()) # unique categories for cat in help_cat_set: cat_list_dict[cat] = manager.list() @@ -111,7 +118,7 @@ def main(argv): # create list of JSON Schema documents to be analysed unmanaged_pathes = list() pathes = manager.list() - + for file in os.listdir(json_dir_path): unmanaged_pathes.append(json_dir_path + "/" + file) @@ -130,60 +137,97 @@ def main(argv): print_lock = mp.Lock() list_lock = mp.Lock() res_lock = mp.Lock() - + print("Analysis of files started!") pathes_available = True - #creating and starting of threads - #threads now handle multiple files by them self + # creating and starting of threads + # threads now handle multiple files by them self if verbose_output: for i in range(0, thread_count): - thread_list.append(Analytic_Process(True, pathes, cat_list_dict, filename_res_dict, filename_cat_dict,\ - print_lock, list_lock, res_lock)) + thread_list.append( + Analytic_Process( + True, + pathes, + cat_list_dict, + filename_res_dict, + filename_cat_dict, + print_lock, + list_lock, + res_lock, + ) + ) else: for i in range(0, thread_count): - thread_list.append(Analytic_Process(False, pathes, cat_list_dict, filename_res_dict, filename_cat_dict,\ - print_lock, list_lock, res_lock)) + thread_list.append( + Analytic_Process( + False, + pathes, + cat_list_dict, + filename_res_dict, + filename_cat_dict, + print_lock, + list_lock, + res_lock, + ) + ) for i in range(0, thread_count): thread_list[i].start() - #wait for thread without timeout + # wait for thread without timeout for i in range(0, thread_count): - thread_list[i].join() + thread_list[i].join() unmanaged_cat_list_dict = dict() for key in cat_list_dict: unmanaged_cat_list_dict[key] = list() - - #create output by storing analysis results to csv and xlsx + # create output by storing analysis results to csv and xlsx createKeywordCountTable(cat_list_dict) saveAllInformation(cat_list_dict, filename_res_dict) createFanInCSV(cat_list_dict) createFanOutCSV(cat_list_dict) print("Finished analysis!") - + def createKeywordCountTable(cat_list_dict): """! @brief Output a CSV file with categories and each counted value. - - The ouput csv file specifies summed keyword count per category - @param cat_list_dict dictionary with all analysis results + The ouput csv file specifies summed keyword count per category + + @param cat_list_dict dictionary with all analysis results """ # this list specifies all relevant attributes for this csv file - attribute_name_list = ["add_prop_count", "all_of_count", "any_of_count", \ - "array_count", "ref_count", "str_count", "enum_count", "mult_of_count", \ - "not_count", "number_count", "pattern_count", "required_count", "unique_items_count", \ - "value_restriction_count", "boolean_count", "nulltype_count", "object_count"] + attribute_name_list = [ + "add_prop_count", + "all_of_count", + "any_of_count", + "array_count", + "ref_count", + "str_count", + "enum_count", + "mult_of_count", + "not_count", + "number_count", + "pattern_count", + "required_count", + "unique_items_count", + "value_restriction_count", + "boolean_count", + "nulltype_count", + "object_count", + ] # sum up the keyword counts for all relevant attributes per category attr_count = 0 cat_count = 0 - table = [[0 for x in range(len(cat_list_dict.keys()))] for y in range(len(attribute_name_list))] + table = [ + [0 for x in range(len(cat_list_dict.keys()))] + for y in range(len(attribute_name_list)) + ] for attr in attribute_name_list: cat_count = 0 for cat in cat_list_dict: @@ -193,9 +237,9 @@ def createKeywordCountTable(cat_list_dict): table[attr_count][cat_count] = cat_att_sum cat_count += 1 attr_count += 1 - + # write the csv file to ../../KeywordCount.csv - with open("./KeywordCount.csv", 'w+') as csvfile: + with open("./KeywordCount.csv", "w+") as csvfile: csvwriter = csv.writer(csvfile, delimiter=",") csvwriter.writerow(["Attribute"] + list(cat_list_dict.keys())) attr_count = 0 @@ -203,17 +247,18 @@ def createKeywordCountTable(cat_list_dict): csvwriter.writerow([attribute_name_list[attr_count]] + row) attr_count += 1 + def saveAllInformation(cat_list_dict, filenamedict): """! @brief This function saves all analysed information in a csv-File and a Excel sheet. - This function creates a csv file where all analysis results of all Schema documents are stored. - It also creates an equivalent Excel sheet with the same information + This function creates a csv file where all analysis results of all Schema documents are stored. + It also creates an equivalent Excel sheet with the same information + + @param cat_list_dict a dictionary with all analysis results + @param filenemlist a list with the names of all valid and analysed JSON Schema documents + """ - @param cat_list_dict a dictionary with all analysis results - @param filenemlist a list with the names of all valid and analysed JSON Schema documents - """ - keyword_list = list() - #exclude fan_in and fan_out lists from this csv + # exclude fan_in and fan_out lists from this csv for key in cat_list_dict["data"][0].keys(): if (key == "fan_in_list") or (key == "fan_out_list") or (key == "filename"): continue @@ -222,14 +267,14 @@ def saveAllInformation(cat_list_dict, filenamedict): # create csv-file at ../../AnalysisResults.csv data = [] filenames = [] - with open("./AnalysisResults.csv", 'w+') as csvfile: - csvwriter = csv.writer(csvfile, delimiter=',') - # Header with all attribute names. These are extracted from example category conf and the + with open("./AnalysisResults.csv", "w+") as csvfile: + csvwriter = csv.writer(csvfile, delimiter=",") + # Header with all attribute names. These are extracted from example category conf and the # first element in the category's dictionary list - + csvwriter.writerow(["Category"] + keyword_list) for cat in cat_list_dict: - #filenames.extend(filenamedict[cat]) + # filenames.extend(filenamedict[cat]) for attr_dict in cat_list_dict[cat]: csv_row = [cat] filenames.append(attr_dict["filename"]) @@ -239,36 +284,38 @@ def saveAllInformation(cat_list_dict, filenamedict): csvwriter.writerow(csv_row) # create equivalent Excel Sheet at ../../AnalysisResults.xlsx - col_list = keyword_list; + col_list = keyword_list col_list.insert(0, "Category") df = pd.DataFrame(data, filenames, col_list) df.to_excel("./AnalysisResults.xlsx") - + def createFanInCSV(cat_list_dict): """! @brief This function creates a csv file with all fan-ins of all elements in all files. - The output csv file is used to generate plots of all fan-ins contained in a file. + The output csv file is used to generate plots of all fan-ins contained in a file. - @param cat_list_dict a dictionary containing all analysis results + @param cat_list_dict a dictionary containing all analysis results """ - with open("./FanInList.csv", 'w+') as csvfile: - csvwriter = csv.writer(csvfile, delimiter=',') + with open("./FanInList.csv", "w+") as csvfile: + csvwriter = csv.writer(csvfile, delimiter=",") for cat in cat_list_dict: for attr_dict in cat_list_dict[cat]: csvwriter.writerow([cat] + attr_dict["fan_in_list"]) + def createFanOutCSV(cat_list_dict): """! @brief This function creates a csv file with all fan-outs of all elements in all files. - The output csv file is used to generate plots of all fan-outs contained in a file. + The output csv file is used to generate plots of all fan-outs contained in a file. - @param cat_list_dict a dictionary containing all analysis results + @param cat_list_dict a dictionary containing all analysis results """ - with open("./FanOutList.csv", 'w+') as csvfile: - csvwriter = csv.writer(csvfile, delimiter=',') + with open("./FanOutList.csv", "w+") as csvfile: + csvwriter = csv.writer(csvfile, delimiter=",") for cat in cat_list_dict: for attr_dict in cat_list_dict[cat]: csvwriter.writerow([cat] + attr_dict["fan_out_list"]) + # entry point if __name__ == "__main__": main(sys.argv[1:]) diff --git a/NodeTypes/ArrayNode.py b/NodeTypes/ArrayNode.py index 1000253..4f3dfa0 100644 --- a/NodeTypes/ArrayNode.py +++ b/NodeTypes/ArrayNode.py @@ -1,8 +1,9 @@ from SchemaNode import SchemaNode + class ArrayNode(SchemaNode): """! @brief This class is used to represent JSON Schema arrays as nodes in the schema_graph.""" - + def __init__(self, name): ## Node's name self.name = name @@ -27,5 +28,3 @@ def getID(self): def setID(self, id): self.nodeID = id - - diff --git a/NodeTypes/KeyValueNode.py b/NodeTypes/KeyValueNode.py index d42dbfe..7690628 100644 --- a/NodeTypes/KeyValueNode.py +++ b/NodeTypes/KeyValueNode.py @@ -1,17 +1,19 @@ from SchemaNode import SchemaNode + class KeyValueNode(SchemaNode): """! @brief This class is used to represent Key-Value pairs of JSON Schema Documents as nodes in - the schema graph - A key-value pair looks as follows in the schema document: - @code{.json} - "type" : "string" - @endcode + the schema graph + A key-value pair looks as follows in the schema document: + @code{.json} + "type" : "string" + @endcode """ + def __init__(self, name, value): self.name = name self.value = value - self.nodeID = 0 + self.nodeID = 0 def getName(self): return self.name @@ -30,5 +32,3 @@ def getID(self): def setID(self, id): self.nodeID = id - - diff --git a/NodeTypes/ObjectNode.py b/NodeTypes/ObjectNode.py index 1c977c8..848b001 100644 --- a/NodeTypes/ObjectNode.py +++ b/NodeTypes/ObjectNode.py @@ -1,8 +1,9 @@ from SchemaNode import SchemaNode + class ObjectNode(SchemaNode): - """! @brief Class to represent objects in JSON Schema Documents as Nodes in the schema graph. - """ + """! @brief Class to represent objects in JSON Schema Documents as Nodes in the schema graph.""" + def __init__(self, name): self.name = name self.nodeID = 0 @@ -23,4 +24,4 @@ def getID(self): return self.nodeID def setID(self, id): - self.nodeID = id \ No newline at end of file + self.nodeID = id diff --git a/NodeTypes/SchemaNode.py b/NodeTypes/SchemaNode.py index fe29f5c..d4871d7 100644 --- a/NodeTypes/SchemaNode.py +++ b/NodeTypes/SchemaNode.py @@ -1,5 +1,6 @@ import abc + class SchemaNode(abc.ABC): """! @brief Abstract class Schema Node for nodes in schema_graph.""" @@ -10,13 +11,14 @@ def getName(self): @abc.abstractmethod def getValue(self): pass + @abc.abstractclassmethod def setValue(self, value): pass @abc.abstractmethod def accept(self, visitor): - pass + pass @abc.abstractmethod def getID(self): @@ -25,7 +27,3 @@ def getID(self): @abc.abstractmethod def setID(self, id): pass - - - - diff --git a/PyTest/test_schemagraph.py b/PyTest/test_schemagraph.py index 60a3955..a4e574e 100644 --- a/PyTest/test_schemagraph.py +++ b/PyTest/test_schemagraph.py @@ -1,22 +1,24 @@ -import sys, pytest +import pytest, sys + sys.path.append("../") sys.path.append("../NodeTypes") sys.path.append("../Visitors/") -from schema_graph import schema_graph import json -import pandas as pd + import networkx as nx +import pandas as pd from Count_String_Visitor import Count_String_Visitor from CountReferences_Visitor import CountReferences_Visitor +from schema_graph import schema_graph """! @package This file runs the unit test for JSON_Schema_Analysis project It loads all test cases with files as input and the expected values per category from the Excel Sheet TestDefintions.xlsx """ -#getting test data from ExcelSheet +# getting test data from ExcelSheet xl = pd.ExcelFile("TestDefinitions.xlsx") -df = xl.parse('Tests') +df = xl.parse("Tests") xl.close() ex_depth_list = list() @@ -43,56 +45,60 @@ filename_list.append(file) i += 1 + @pytest.mark.parametrize("test_input, expected", ex_depth_list) - def test_depth(test_input, expected): assert depth(test_input) == expected -@pytest.mark.parametrize("test_input, expected", ex_resdepth_list) +@pytest.mark.parametrize("test_input, expected", ex_resdepth_list) def test_resolvedDepth(test_input, expected): assert resDepth(test_input) == expected -@pytest.mark.parametrize("test_input, expected", ex_has_recursion_list) +@pytest.mark.parametrize("test_input, expected", ex_has_recursion_list) def test_recursion(test_input, expected): assert recursion(test_input) == expected -@pytest.mark.parametrize("test_input, expected", ex_string_count_list) +@pytest.mark.parametrize("test_input, expected", ex_string_count_list) def test_string_count(test_input, expected): assert string_count(test_input) == expected -@pytest.mark.parametrize("test_input, expected", ex_fan_in_list) +@pytest.mark.parametrize("test_input, expected", ex_fan_in_list) def test_fan_in(test_input, expected): assert fan_in(test_input) == expected -@pytest.mark.parametrize("test_input, expected", ex_reachability_list) +@pytest.mark.parametrize("test_input, expected", ex_reachability_list) def test_reachability(test_input, expected): assert reachability(test_input) == expected -@pytest.mark.parametrize("test_input, expected", ex_ref_count_list) +@pytest.mark.parametrize("test_input, expected", ex_ref_count_list) def test_refCount(test_input, expected): - assert refCount(test_input) == expected + assert refCount(test_input) == expected + def depth(filename): sg = getSg(filename) - + return sg.depth_schema() + def resDepth(filename): sg = getSg(filename) ret_val = sg.depth_resolvedReferenceGraph() return ret_val + def recursion(filename): sg = getSg(filename) return sg.check_recursion() + def string_count(filename): sg = getSg(filename) visitor = Count_String_Visitor() @@ -100,26 +106,31 @@ def string_count(filename): return visitor.getCount() + def fan_in(filename): sg = getSg(filename) return sg.getMaxFanIn() + def reachability(filename): sg = getSg(filename) return sg.check_reachability() + def refCount(filename): sg = getSg(filename) return sg.getNoReferences() + def origNodes(filename): sg = getSg(filename) return len(list(sg.nodes)) + def expNodes(filename): sg = getSg(filename) @@ -131,13 +142,14 @@ def expNodes(filename): return ret_val + def getSg(filename): - """! @brief Load Schema Graph from file - @param filename Name of the file located in ./TestSchemas directory + """! @brief Load Schema Graph from file + @param filename Name of the file located in ./TestSchemas directory - @return schema_graph of the given file + @return schema_graph of the given file """ - with open("./TestSchemas/"+filename, 'r') as fp: + with open("./TestSchemas/" + filename, "r") as fp: sg = schema_graph(filename) sg.load_schema(json.loads(fp.read())) diff --git a/Visitors/AdditionalProperties_Visitor.py b/Visitors/AdditionalProperties_Visitor.py index 78992d4..7fdf000 100644 --- a/Visitors/AdditionalProperties_Visitor.py +++ b/Visitors/AdditionalProperties_Visitor.py @@ -1,32 +1,34 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class AdditionalProperties_Visitor(Visitor): """! @brief Visitor to count the additionalProperties keyword in the Schema - This visitor counts all appearances of the additionalProperties keyword in the schema. + This visitor counts all appearances of the additionalProperties keyword in the schema. """ + def __init__(self): """! @brief Constructor of AdditionalProperties_Visitor. - It sets the additionalProperties count result value to zero. + It sets the additionalProperties count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a additionalProperties keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a additionalProperties keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "additionalProperties"): + if node.getName() == "additionalProperties": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of additionalProperties keyword in the Schema + @return The amount of appearances of additionalProperties keyword in the Schema """ return self.cnt diff --git a/Visitors/AllOf_Visitor.py b/Visitors/AllOf_Visitor.py index bb40d26..abd52c9 100644 --- a/Visitors/AllOf_Visitor.py +++ b/Visitors/AllOf_Visitor.py @@ -1,32 +1,34 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class AllOf_Visitor(Visitor): """! @brief Visitor to count the allOf keyword in the Schema - This visitor counts all appearances of the allOf keyword in the schema. + This visitor counts all appearances of the allOf keyword in the schema. """ + def __init__(self): """! @brief Constructor of AllOf_Visitor. - It sets the allOf count result value to zero. + It sets the allOf count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a allOf keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a allOf keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "allOf"): + if node.getName() == "allOf": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of allOf keyword in the Schema + @return The amount of appearances of allOf keyword in the Schema """ return self.cnt diff --git a/Visitors/AnyOf_Visitor.py b/Visitors/AnyOf_Visitor.py index 9951923..a863fb1 100644 --- a/Visitors/AnyOf_Visitor.py +++ b/Visitors/AnyOf_Visitor.py @@ -1,32 +1,34 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class AnyOf_Visitor(Visitor): """! @brief Visitor to count the anyOf keyword in the Schema - This visitor counts all appearances of the anyOf keyword in the schema. + This visitor counts all appearances of the anyOf keyword in the schema. """ + def __init__(self): """! @brief Constructor of AnyOf_Visitor. - It sets the anyOf count result value to zero. + It sets the anyOf count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a anyOf keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a anyOf keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "anyOf"): + if node.getName() == "anyOf": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of anyOf keyword in the Schema + @return The amount of appearances of anyOf keyword in the Schema """ return self.cnt diff --git a/Visitors/Arrays_Visitor.py b/Visitors/Arrays_Visitor.py index c54199e..4441323 100644 --- a/Visitors/Arrays_Visitor.py +++ b/Visitors/Arrays_Visitor.py @@ -1,32 +1,36 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Arrays_Visitor(Visitor): """! @brief Visitor to count the array keyword in the Schema - This visitor counts all appearances of the array keyword in the schema. + This visitor counts all appearances of the array keyword in the schema. """ + def __init__(self): """! @brief Constructor of Arrays_Visitor. - It sets the array count result value to zero. + It sets the array count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a array keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a array keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "type" and node.getValue() == "array"): #item-keyword implicitly expected + if ( + node.getName() == "type" and node.getValue() == "array" + ): # item-keyword implicitly expected self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of array keyword in the Schema + @return The amount of appearances of array keyword in the Schema """ return self.cnt diff --git a/Visitors/Boolean_Visitor.py b/Visitors/Boolean_Visitor.py index eb243c2..d73e6b2 100644 --- a/Visitors/Boolean_Visitor.py +++ b/Visitors/Boolean_Visitor.py @@ -1,38 +1,39 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Boolean_Visitor(Visitor): """! @brief Visitor to count the boolean type keyword in the Schema - This visitor counts all appearances of the boolean type keyword in the schema. + This visitor counts all appearances of the boolean type keyword in the schema. - A boolean type keyword in a JSON Schema document looks as follows: - @code{.json} - "type" : "boolean" - @endcode + A boolean type keyword in a JSON Schema document looks as follows: + @code{.json} + "type" : "boolean" + @endcode """ def __init__(self): """! @brief Constructor of Boolean_Visitor. - It sets the boolean type count result value to zero. + It sets the boolean type count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a boolean type keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a boolean type keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "type" and str(node.getValue()) == "boolean"): + if node.getName() == "type" and str(node.getValue()) == "boolean": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of boolean type keyword in the Schema + @return The amount of appearances of boolean type keyword in the Schema """ return self.cnt diff --git a/Visitors/Check_Ref_Visitor.py b/Visitors/Check_Ref_Visitor.py index 13125de..8286596 100644 --- a/Visitors/Check_Ref_Visitor.py +++ b/Visitors/Check_Ref_Visitor.py @@ -1,41 +1,41 @@ from Visitor import Visitor + class Check_Ref_Visitor(Visitor): """! @brief This visitor checks whether there are references in the JSON Schema. - The result value is True if and only if there is at least one $ref in the JSON Schema. + The result value is True if and only if there is at least one $ref in the JSON Schema. """ def __init__(self): """! @brief Constructor of Check_Ref_Visitor. - It sets the boolean result value to False. + It sets the boolean result value to False. """ self.contains_ref = False def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and changes the result value to True if the node is a representation - of a $ref keyword in the schema. + This function visits a node and changes the result value to True if the node is a representation + of a $ref keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ if node.getName() == "$ref": self.contains_ref = True - def getCount(self): + def getCount(self): """! @brief Basic getter for the result value implementation - @return Integer conversion of the boolean result value. - Result value is True if and only if the Schema contains at least one $ref keyword. + @return Integer conversion of the boolean result value. + Result value is True if and only if the Schema contains at least one $ref keyword. """ - return int(self.contains_ref) + return int(self.contains_ref) def contains_ref(self): """! @brief Additional getter for the boolean result value - @return True if the Schema contains at least one $ref keyword. - False otherwise. + @return True if the Schema contains at least one $ref keyword. + False otherwise. """ return self.concontains_ref - diff --git a/Visitors/CountReferences_Visitor.py b/Visitors/CountReferences_Visitor.py index 88ec791..171cf05 100644 --- a/Visitors/CountReferences_Visitor.py +++ b/Visitors/CountReferences_Visitor.py @@ -1,25 +1,27 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class CountReferences_Visitor(Visitor): """! @brief Visitor to count the $ref keyword in the Schema - This visitor counts all appearances of the $ref keyword in the schema. + This visitor counts all appearances of the $ref keyword in the schema. """ + def __init__(self): """! @brief Constructor of CountReference_Visitor. - It sets the $ref count result value to zero. + It sets the $ref count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a $ref keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a $ref keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ if node.getName() == "$ref": self.cnt = self.cnt + 1 @@ -27,6 +29,6 @@ def visit(self, node): def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of $ref keyword in the Schema + @return The amount of appearances of $ref keyword in the Schema """ return self.cnt diff --git a/Visitors/Count_String_Visitor.py b/Visitors/Count_String_Visitor.py index c293a17..5c6fd54 100644 --- a/Visitors/Count_String_Visitor.py +++ b/Visitors/Count_String_Visitor.py @@ -1,37 +1,39 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Count_String_Visitor(Visitor): """! @brief Visitor to count the string type keyword in the Schema - This visitor counts all appearances of the string type keyword in the schema. + This visitor counts all appearances of the string type keyword in the schema. - A string type keyword in a JSON Schema document looks as follows: - @code{.json} - "type" : "string" - @endcode + A string type keyword in a JSON Schema document looks as follows: + @code{.json} + "type" : "string" + @endcode """ + def __init__(self): """! @brief Constructor of Count_String_Visitor. - It sets the string count result value to zero. + It sets the string count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a string type keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a string type keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "type" and str(node.getValue()) == "string"): + if node.getName() == "type" and str(node.getValue()) == "string": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of string type keyword in the Schema + @return The amount of appearances of string type keyword in the Schema """ return self.cnt diff --git a/Visitors/Enum_Visitor.py b/Visitors/Enum_Visitor.py index 41ac472..5c3d936 100644 --- a/Visitors/Enum_Visitor.py +++ b/Visitors/Enum_Visitor.py @@ -1,32 +1,34 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Enum_Visitor(Visitor): """! @brief Visitor to count the enum keyword in the Schema - This visitor counts all appearances of the enum keyword in the schema. + This visitor counts all appearances of the enum keyword in the schema. """ + def __init__(self): """! @brief Constructor of Enum_Visitor. - It sets the enum count result value to zero. + It sets the enum count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a enum keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a enum keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "enum"): + if node.getName() == "enum": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of enum keyword in the Schema + @return The amount of appearances of enum keyword in the Schema """ return self.cnt diff --git a/Visitors/MultipleOf_Visitor.py b/Visitors/MultipleOf_Visitor.py index 9a2c68a..4750067 100644 --- a/Visitors/MultipleOf_Visitor.py +++ b/Visitors/MultipleOf_Visitor.py @@ -1,32 +1,34 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class MultipleOf_Visitor(Visitor): """! @brief Visitor to count the multipleOf keyword in the Schema - This visitor counts all appearances of the multipleOf keyword in the schema. + This visitor counts all appearances of the multipleOf keyword in the schema. """ + def __init__(self): """! @brief Constructor of MultipleOf_Visitor. - It sets the multipleOf count result value to zero. + It sets the multipleOf count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a multipleOf keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a multipleOf keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "multipleOf"): + if node.getName() == "multipleOf": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of multipleOf keyword in the Schema + @return The amount of appearances of multipleOf keyword in the Schema """ return self.cnt diff --git a/Visitors/Not_Visitor.py b/Visitors/Not_Visitor.py index 84eaa05..89613e3 100644 --- a/Visitors/Not_Visitor.py +++ b/Visitors/Not_Visitor.py @@ -1,32 +1,34 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Not_Visitor(Visitor): """! @brief Visitor to count the not keyword in the Schema - This visitor counts all appearances of the not keyword in the schema. + This visitor counts all appearances of the not keyword in the schema. """ + def __init__(self): """! @brief Constructor of Not_Visitor. - It sets the required count result value to zero. + It sets the required count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a not keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a not keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (str(node.getName()) == "not"): + if str(node.getName()) == "not": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of not keyword in the Schema + @return The amount of appearances of not keyword in the Schema """ return self.cnt diff --git a/Visitors/NullType_Visitor.py b/Visitors/NullType_Visitor.py index a3526d5..674af50 100644 --- a/Visitors/NullType_Visitor.py +++ b/Visitors/NullType_Visitor.py @@ -1,37 +1,38 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class NullType_Visitor(Visitor): """! @brief Visitor to count the null type keyword in the Schema - This visitor counts all appearances of the null type keyword in the schema. - A null type in a JSON Schema document looks as follows: - @code{.json} - "type" : "null" - @endcode + This visitor counts all appearances of the null type keyword in the schema. + A null type in a JSON Schema document looks as follows: + @code{.json} + "type" : "null" + @endcode """ def __init__(self): """! @brief Constructor of NullType_Visitor. - It sets the required count result value to zero. + It sets the required count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a null type keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a null type keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "type" and str(node.getValue()) == "null"): + if node.getName() == "type" and str(node.getValue()) == "null": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of null type keyword in the Schema + @return The amount of appearances of null type keyword in the Schema """ return self.cnt diff --git a/Visitors/Number_Visitor.py b/Visitors/Number_Visitor.py index 4177bb1..50258f7 100644 --- a/Visitors/Number_Visitor.py +++ b/Visitors/Number_Visitor.py @@ -1,38 +1,43 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Number_Visitor(Visitor): """! @brief Visitor to count the number or integer type keyword in the Schema - This visitor counts all appearances of the keywords number or integer in the schema. - A number or integer type keyword in a JSON Schema document looks as follows - @code{.json} - "type" : "number" - "type" : "integer" - @endcode + This visitor counts all appearances of the keywords number or integer in the schema. + A number or integer type keyword in a JSON Schema document looks as follows + @code{.json} + "type" : "number" + "type" : "integer" + @endcode """ - + def __init__(self): """! @brief Constructor of Number_Visitor. - It sets the number count result value to zero. + It sets the number count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a number or integer type keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a number or integer type keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "type" and (str(node.getValue()) == "number") or (str(node.getValue()) == "integer")): + if ( + node.getName() == "type" + and (str(node.getValue()) == "number") + or (str(node.getValue()) == "integer") + ): self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of integer or number type keyword in the Schema + @return The amount of appearances of integer or number type keyword in the Schema """ return self.cnt diff --git a/Visitors/Object_Visitor.py b/Visitors/Object_Visitor.py index 1409395..1a8871d 100644 --- a/Visitors/Object_Visitor.py +++ b/Visitors/Object_Visitor.py @@ -1,36 +1,37 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Object_Visitor(Visitor): """! @brief Visitor to count object types in Schema - This visitor counts all appearances of the object type keyword in the schema. - A object type in a Schema looks as follows: + This visitor counts all appearances of the object type keyword in the schema. + A object type in a Schema looks as follows: - @code{.json} "type" : "object" @endcode + @code{.json} "type" : "object" @endcode """ def __init__(self): """! @brief Constructor of Object_Visitor. - It sets the object type count result value to zero. + It sets the object type count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a object type keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a object type keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "type" and str(node.getValue()) == "object"): + if node.getName() == "type" and str(node.getValue()) == "object": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of object type keyword in the Schema + @return The amount of appearances of object type keyword in the Schema """ return self.cnt diff --git a/Visitors/Pattern_Visitor.py b/Visitors/Pattern_Visitor.py index c94a1ac..b233cd6 100644 --- a/Visitors/Pattern_Visitor.py +++ b/Visitors/Pattern_Visitor.py @@ -1,33 +1,35 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Pattern_Visitor(Visitor): """! @brief Visitor to count pattern keywords in Schema. - The visitor counts all appearances of the "pattern" keyword and the "patternProperties" - keyword. + The visitor counts all appearances of the "pattern" keyword and the "patternProperties" + keyword. """ + def __init__(self): """! @brief Constructor of Pattern_Visitor - Sets the counter result value to zero + Sets the counter result value to zero """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a pattern keywords in the schema like defined in the class decsription. + This function visits a node and increments the counter if the node is a representation + of a pattern keywords in the schema like defined in the class decsription. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ param_list = ["pattern", "patternProperties"] - if (node.getName() in param_list): + if node.getName() in param_list: self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of pattern keywords in the Schema as - defined in the class decsription + @return The amount of appearances of pattern keywords in the Schema as + defined in the class decsription """ return self.cnt diff --git a/Visitors/Required_Visitor.py b/Visitors/Required_Visitor.py index b672df9..b438de9 100644 --- a/Visitors/Required_Visitor.py +++ b/Visitors/Required_Visitor.py @@ -1,32 +1,34 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class Required_Visitor(Visitor): """! @brief Visitor to count the required keyword in the Schema - This visitor counts all appearances of the required keyword in the schema. + This visitor counts all appearances of the required keyword in the schema. """ + def __init__(self): """! @brief Constructor of Required_Visitor. - It sets the required count result value to zero. + It sets the required count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node and increments the counter if the node is a representation - of a required keyword in the schema. + This function visits a node and increments the counter if the node is a representation + of a required keyword in the schema. - @param node Node to visit. This has to be a inherited type from SchemaNode. + @param node Node to visit. This has to be a inherited type from SchemaNode. """ - if (node.getName() == "required"): + if node.getName() == "required": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - @return The amount of appearances of required keyword in the Schema + @return The amount of appearances of required keyword in the Schema """ return self.cnt diff --git a/Visitors/UniqueItems_Visitor.py b/Visitors/UniqueItems_Visitor.py index 68a3586..15acc4c 100644 --- a/Visitors/UniqueItems_Visitor.py +++ b/Visitors/UniqueItems_Visitor.py @@ -1,31 +1,33 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class UniqueItems_Visitor(Visitor): """! @brief Visitor to count uniqueItems key word in a Schema. - - This visitor counts the appearances of the uniqueItems keyword of JSON Schema. + + This visitor counts the appearances of the uniqueItems keyword of JSON Schema. """ + def __init__(self): """! @brief Constructor of the UniqueItems_Visitor. - It sets the unique items count result value to zero. + It sets the unique items count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation of UniqueItems_Visitor. - - This function visits a node and increments the counter if the node represents - a uniqueItems keyword in the JSON Schema. - @param node Node to visit. This node has be a inherited type of SchemaNode + This function visits a node and increments the counter if the node represents + a uniqueItems keyword in the JSON Schema. + + @param node Node to visit. This node has be a inherited type of SchemaNode """ - if (node.getName() == "uniqueItems"): + if node.getName() == "uniqueItems": self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the result value implementation - - @return The amount of uniqueItems appearances in the Schema + + @return The amount of uniqueItems appearances in the Schema """ return self.cnt diff --git a/Visitors/ValueRestriction_Visitor.py b/Visitors/ValueRestriction_Visitor.py index 00af360..0ba324a 100644 --- a/Visitors/ValueRestriction_Visitor.py +++ b/Visitors/ValueRestriction_Visitor.py @@ -1,39 +1,48 @@ -from Visitor import Visitor from KeyValueNode import KeyValueNode +from Visitor import Visitor + class ValueRestriction_Visitor(Visitor): """! @brief Visitor to count all value restrictions for numbers and integers or arrays in the schema. - - A value restriction can be upper and lower limits for integers and numbers or arrays in the schema. - The key words searched for are: - - minimum - - maximum - - minLength - - maxLength - - exclusiveMinimum - - exclusiveMaximum + + A value restriction can be upper and lower limits for integers and numbers or arrays in the schema. + The key words searched for are: + - minimum + - maximum + - minLength + - maxLength + - exclusiveMinimum + - exclusiveMaximum """ + def __init__(self): """! @brief Constructor of the ValueRestriction_Visitor. - Sets initial count result value to zero. + Sets initial count result value to zero. """ self.cnt = 0 def visit(self, node): """! @brief Basic visit method implementation. - This function visits a node in the graph and increments the count if the node's name - is a value restriction keyword as specified by the class description. + This function visits a node in the graph and increments the count if the node's name + is a value restriction keyword as specified by the class description. - @param node The node to visit. It has to be a inherited class of SchemaNode. + @param node The node to visit. It has to be a inherited class of SchemaNode. """ - param_list = ["minimum", "maximum", "minLength", "maxLength", "exclusiveMinimum", "exclusiveMaximum"] - if (node.getName() in param_list): + param_list = [ + "minimum", + "maximum", + "minLength", + "maxLength", + "exclusiveMinimum", + "exclusiveMaximum", + ] + if node.getName() in param_list: self.cnt = self.cnt + 1 def getCount(self): """! @brief Basic getter for the visit result implementation - - @return The amount of value restriction keywords in the Schema. + + @return The amount of value restriction keywords in the Schema. """ return self.cnt diff --git a/Visitors/Visitor.py b/Visitors/Visitor.py index 147467d..c4c53be 100644 --- a/Visitors/Visitor.py +++ b/Visitors/Visitor.py @@ -1,22 +1,22 @@ import abc + class Visitor(abc.ABC): """! @brief This is an abstract parent class to implement the visitor pattern - - Different visitors are used in this project to analyse a JSON Schema Document. - Basically, a visitor counts special elements or nodes in the Schema. All visitors - have to implement the methods visit(self, node) and getCount(self). + + Different visitors are used in this project to analyse a JSON Schema Document. + Basically, a visitor counts special elements or nodes in the Schema. All visitors + have to implement the methods visit(self, node) and getCount(self). """ + @abc.abstractmethod def visit(self, node): - """! @brief This is the basic visit method. Every visitor has to implement it. - """ + """! @brief This is the basic visit method. Every visitor has to implement it.""" pass @abc.abstractmethod def getCount(self): """! @brief This is the basic getter for the results of the visit. Every visitor has - to implement it. + to implement it. """ pass - diff --git a/get_schemas_from_store.py b/get_schemas_from_store.py index 9a23484..a5cd3f9 100644 --- a/get_schemas_from_store.py +++ b/get_schemas_from_store.py @@ -2,11 +2,12 @@ ## This file downloads and renames and stores the available JSON Schemas from schemastore.org -import urllib3 as url +import csv import json import os import shutil -import csv + +import urllib3 as url # specifiy path where a folder with all JSONs will be created path_schema = "./" @@ -19,13 +20,13 @@ else: shutil.rmtree(path) os.makedirs(path) - + path = path + "/" -url.disable_warnings(url.exceptions.InsecureRequestWarning) +url.disable_warnings(url.exceptions.InsecureRequestWarning) http = url.PoolManager() -schema_catalog_req = http.request('GET', 'http://schemastore.org/api/json/catalog.json') +schema_catalog_req = http.request("GET", "http://schemastore.org/api/json/catalog.json") schema_catalog_raw = schema_catalog_req.data # converts a json file into python data structures, e.g. Object --> dict @@ -33,11 +34,11 @@ itrtr = 0 print("Fetching Schemas...", end="") -log = open(path_schema + "logfile.log", 'w+') -url_file = open(path_schema + "non_descripted_urls.log", 'w+') -bad_names_file = open(path_schema + "bad_names.log", 'w+') -csv_file = open(path_schema + "filename_spec.csv",'w+', newline='') -csv_writer = csv.DictWriter(csv_file, delimiter=',', fieldnames=["name", "filename"]) +log = open(path_schema + "logfile.log", "w+") +url_file = open(path_schema + "non_descripted_urls.log", "w+") +bad_names_file = open(path_schema + "bad_names.log", "w+") +csv_file = open(path_schema + "filename_spec.csv", "w+", newline="") +csv_writer = csv.DictWriter(csv_file, delimiter=",", fieldnames=["name", "filename"]) csv_writer.writeheader() no_schemas_available = len(schema_catalog_json["schemas"]) @@ -46,9 +47,9 @@ duplicate_iterators = [] for schema in schema_catalog_json["schemas"]: - valid_json = True + valid_json = True schema_url = schema["url"] - schema_raw = http.request('GET', schema_url).data + schema_raw = http.request("GET", schema_url).data if schema_raw is None: log.write("Could not load from " + schema_url + "\n") print("NONE") @@ -58,7 +59,7 @@ except: log.write("Fucked up JSON at " + schema_url + "\n") valid_json = False - + try: filename = schema_json["title"].replace(" ", "_") + ".json" except KeyError: @@ -71,9 +72,9 @@ filename = filename.replace("/", "_") filename = filename.replace(",", "_") filename = filename.replace(":", "") - - if valid_json: - if os.path.isfile(path+filename): + + if valid_json: + if os.path.isfile(path + filename): if filename in duplicate_list: idx = duplicate_list.index(filename) duplicate_iterators[idx] = duplicate_iterators[idx] + 1 @@ -81,10 +82,10 @@ duplicate_list.append(filename) duplicate_iterators.append(1) idx = duplicate_list.index(filename) - + filename = filename[:-5] + "_" + str(duplicate_iterators[idx]) + ".json" try: - f = open(path + filename, 'w+') + f = open(path + filename, "w+") f.write(json.dumps(schema_json, indent=4)) f.close() itrtr = itrtr + 1 @@ -92,7 +93,7 @@ old_filename = filename filename = "Schema_" + str(itrtr) + ".json" try: - f = open(path + filename, 'w+') + f = open(path + filename, "w+") f.write(json.dumps(schema_json, indent=4)) f.close() bad_names_file.write(old_filename + ": " + schema_url + "\n") @@ -100,13 +101,13 @@ except OSError: log.write("File " + old_filename + " could not be created\n") log.write("File " + old_filename + " renamed to " + filename + "\n") - csv_writer.writerow({'name' : schema["name"], 'filename' : filename}) + csv_writer.writerow({"name": schema["name"], "filename": filename}) else: - csv_writer.writerow({'name' : schema["name"], 'filename' : "None"}) - + csv_writer.writerow({"name": schema["name"], "filename": "None"}) + print(".", end="") - - #endif valid_json + + # endif valid_json log.close() url_file.close() bad_names_file.close() diff --git a/load_schema_from_web.py b/load_schema_from_web.py index 6d7bcbf..ed729db 100644 --- a/load_schema_from_web.py +++ b/load_schema_from_web.py @@ -1,22 +1,24 @@ -import urllib3 as url -import json import io, os +import json + +import urllib3 as url + def load_schema(url_str, logfile): - """! @brief This function loads a schema from given web address and returns a JSON schema dictionary representation - - The function is capable of returning sub-object of the schema by requesting e.g. http://example.com/#def1 - - @note For the UnitTest of schema_graph.py, this function can additionaly load two files from local storage - when one of the following urls is requested: http://loopback/address or http://loopback/ext + """! @brief This function loads a schema from given web address and returns a JSON schema dictionary representation + + The function is capable of returning sub-object of the schema by requesting e.g. http://example.com/#def1 - @param url_str url to download file from - @param logfile file pointer to logfile + @note For the UnitTest of schema_graph.py, this function can additionaly load two files from local storage + when one of the following urls is requested: http://loopback/address or http://loopback/ext - @return Dictionary representation of requested Schema, part of Schema or None if failed + @param url_str url to download file from + @param logfile file pointer to logfile + + @return Dictionary representation of requested Schema, part of Schema or None if failed """ partly_referenced = False - valid_json = True + valid_json = True schema_url = url_str # change working directory to satisfy relative pathes old_path = os.getcwd() @@ -33,15 +35,15 @@ def load_schema(url_str, logfile): fp.close() else: if "#" in schema_url: - #A definition or part of the whole file is referenced + # A definition or part of the whole file is referenced partly_referenced = True # extract url part - definition_part = schema_url[(schema_url.find("#")+1):] - schema_url = schema_url[:(schema_url.find("#"))] + definition_part = schema_url[(schema_url.find("#") + 1) :] + schema_url = schema_url[: (schema_url.find("#"))] # download whole file - url.disable_warnings(url.exceptions.InsecureRequestWarning) + url.disable_warnings(url.exceptions.InsecureRequestWarning) http = url.PoolManager() - schema_raw = http.request('GET', schema_url).data + schema_raw = http.request("GET", schema_url).data if schema_raw is None: logfile.write("Could not load from " + schema_url + "\n") else: @@ -54,7 +56,7 @@ def load_schema(url_str, logfile): # step-by-step deeper into dictionaries until right and last one in definition parts is reached for part in definition_list: - if part == '': + if part == "": continue else: schema_json = schema_json[part] @@ -65,8 +67,8 @@ def load_schema(url_str, logfile): # restore working directory os.chdir(old_path) - - if valid_json: + + if valid_json: return schema_json else: return None diff --git a/schema_checker.py b/schema_checker.py index d19735f..80f85ec 100644 --- a/schema_checker.py +++ b/schema_checker.py @@ -1,18 +1,19 @@ import json + import jsonschema class schema_checker(object): """! @brief This class implements a validity check for JSON Schema documents - - This class gets a schema as dictionary parsed by json module and checks whether it is - a valid JSON Schema document by using the jsonschema validator module. + + This class gets a schema as dictionary parsed by json module and checks whether it is + a valid JSON Schema document by using the jsonschema validator module. """ def __init__(self): """! @brief Constructor for schema_checker - Initialize the internal variables self.schema, self.draft and self.validator + Initialize the internal variables self.schema, self.draft and self.validator """ self.schema = None self.draft = "" @@ -21,55 +22,55 @@ def __init__(self): def set_new_schema(self, schema): """! @brief This function introduces a new schema to check its validity. - This function takes a schema dictionary and determines the draft version by checking - the Schemas $schema property + This function takes a schema dictionary and determines the draft version by checking + the Schemas $schema property - @param schema a dictionary representation of the JSON Schema document parsed by json module + @param schema a dictionary representation of the JSON Schema document parsed by json module """ self.schema = schema try: schema_string = schema["$schema"] idx = schema_string.find("draft-0") - if(idx == -1): - if(schema_string == "http://json-schema.org/schema"): - self.draft = "draft-07" #latest draft - else: - self.draft = schema_string[idx:(idx+8)] + if idx == -1: + if schema_string == "http://json-schema.org/schema": + self.draft = "draft-07" # latest draft + else: + self.draft = schema_string[idx : (idx + 8)] except: - # if $schema tag is missing, the draft verstion can't be determined and the schema is + # if $schema tag is missing, the draft verstion can't be determined and the schema is # treaten as invalid print("$schema-Tag missing!") self.draft = None def check_schema(self, schema=None): """! @brief This function determines the validity of the given JSON Schema document. - The jsonschema module is a validator that can check the given schema against the draft specification - which is also a JSON Schema document. + The jsonschema module is a validator that can check the given schema against the draft specification + which is also a JSON Schema document. - @param schema a dictionary representation of a Schema produced by json module - If not given or set to None, the method returns False i.e. invalid - @return Validity of schemas as boolean value. True => Valid; False => Invalid + @param schema a dictionary representation of a Schema produced by json module + If not given or set to None, the method returns False i.e. invalid + @return Validity of schemas as boolean value. True => Valid; False => Invalid """ ret_val = True if not schema is None: self.set_new_schema(schema) # switch validator according to draft version - if ('draft-07' == self.draft): + if "draft-07" == self.draft: self.validator = jsonschema.Draft7Validator - elif ('draft-06' == self.draft): + elif "draft-06" == self.draft: self.validator = jsonschema.Draft6Validator - elif ('draft-05' == self.draft): - #draft-05 uses old metas form draft-04 + elif "draft-05" == self.draft: + # draft-05 uses old metas form draft-04 self.validator = jsonschema.Draft4Validator - elif ('draft-04' == self.draft): + elif "draft-04" == self.draft: self.validator = jsonschema.Draft4Validator - elif ('draft-03' == self.draft): + elif "draft-03" == self.draft: self.validator = jsonschema.Draft3Validator elif self.draft is None: print("Non-valid JSON Schema document!") return False - + # check validity. if invalid an exception is raised by jsonschema validators try: self.validator.check_schema(self.schema) @@ -80,5 +81,3 @@ def check_schema(self, schema=None): ret_val = False return ret_val - - \ No newline at end of file diff --git a/schema_graph.py b/schema_graph.py index 0e53d86..6b89aea 100644 --- a/schema_graph.py +++ b/schema_graph.py @@ -1,39 +1,41 @@ import sys + sys.path.append("./NodeTypes") -import networkx as nx +from copy import copy, deepcopy + +import load_schema_from_web as web import matplotlib.pyplot as plt -from ObjectNode import ObjectNode +import networkx as nx from ArrayNode import ArrayNode -from KeyValueNode import KeyValueNode -from graphviz import render from Check_Ref_Visitor import Check_Ref_Visitor -import load_schema_from_web as web -from copy import deepcopy -from copy import copy +from graphviz import render +from KeyValueNode import KeyValueNode +from ObjectNode import ObjectNode class schema_graph(nx.DiGraph): """! @brief This class implements a graph representing an JSON Schema document structure. - - The schema graph is a directed graph inherited from NetworkX's DiGraph. All Elements in a - JSON Schema are represented as one of three different nodes which inherit from SchemaNode. - See the node classes' documentation for details. - After setting up a schema graph, one can obtain different analysis tasks with internal methods - and by using an implemented visitor pattern. - Keyword counts are implemented by using different visitors. - Other analysis tasks like the depth of a schema are implemented as internal methods of schema graphs. - A schema graph is also capable of representing itself in DOT-Format and PDF-Format. + + The schema graph is a directed graph inherited from NetworkX's DiGraph. All Elements in a + JSON Schema are represented as one of three different nodes which inherit from SchemaNode. + See the node classes' documentation for details. + After setting up a schema graph, one can obtain different analysis tasks with internal methods + and by using an implemented visitor pattern. + Keyword counts are implemented by using different visitors. + Other analysis tasks like the depth of a schema are implemented as internal methods of schema graphs. + A schema graph is also capable of representing itself in DOT-Format and PDF-Format. """ + ## maximum number of resolving definitions rounds (see getSolvedGraph()) max_count = 10 def __init__(self, filename=""): """! @brief Constructor for a schema_graph. - This function sets up different lists and properties that are used in internal anlysis methods. - For details on the properties, please refered to the documentation of the method that uses them. + This function sets up different lists and properties that are used in internal anlysis methods. + For details on the properties, please refered to the documentation of the method that uses them. - @param filename Path to JSON Schema document to be loaded into a schema_graph + @param filename Path to JSON Schema document to be loaded into a schema_graph """ super().__init__() ## JSON Schema document to be loaded into schema_graph @@ -66,7 +68,7 @@ def __init__(self, filename=""): ## multiplied) resolved references self.ext_solved_graph = None ## Flag indicating whether an invalid reference was detected during - self.invalid_reference_detected = False + self.invalid_reference_detected = False ## Set to determine subgraphs (see getSuccessorSubgraph) self.sub_node_set = set() ## Node counter used to determine unique ID for every node @@ -75,9 +77,9 @@ def __init__(self, filename=""): def load_schema(self, schema_dict): """! @brief This function loads a dictionary representation of a schema into a schema_graph - This includes converting the elements to the specific nodes. + This includes converting the elements to the specific nodes. - @param schema_dict a dictionary representation of a JSON Schema document loaded by json module + @param schema_dict a dictionary representation of a JSON Schema document loaded by json module """ self.schema_dict = schema_dict @@ -90,21 +92,20 @@ def load_schema(self, schema_dict): sg = self.load_subgraph(schema_dict, None) super().add_nodes_from(sg) super().add_edges_from(sg.edges) - def load_subgraph(self, schema_pattern, name): """! @brief This function loads a subgraph from a so called schema_pattern. - The function takes different types of elements as schema_patterns and produces the - corresponding nodes according to the type of schema_pattern. For example, an JSON - Schema object is represented as a dictionary in schema_pattern and will be added - to the schema_graph as ObjecNode. - This function operates recursively until all leafes of the tree are reached. + The function takes different types of elements as schema_patterns and produces the + corresponding nodes according to the type of schema_pattern. For example, an JSON + Schema object is represented as a dictionary in schema_pattern and will be added + to the schema_graph as ObjecNode. + This function operates recursively until all leafes of the tree are reached. - @param schema_pattern JSON Schema Element of various typed - @param name name of the resulting node of the schema_pattern + @param schema_pattern JSON Schema Element of various typed + @param name name of the resulting node of the schema_pattern - @return The generated subgraph (nx.DiGraph), that results out of schema_pattern + @return The generated subgraph (nx.DiGraph), that results out of schema_pattern """ # if no name is given, root node is asumed @@ -112,9 +113,9 @@ def load_subgraph(self, schema_pattern, name): name = "root" subgraph = nx.DiGraph() - + if isinstance(schema_pattern, dict): - #Schema Objects + # Schema Objects oNode = ObjectNode(name) subgraph.add_node(oNode, name=name) @@ -130,12 +131,12 @@ def load_subgraph(self, schema_pattern, name): self.logmessage("Failed to load subgraph for Object " + name) elif isinstance(schema_pattern, list): - #Schema Arrays + # Schema Arrays arrNode = ArrayNode(name) subgraph.add_node(arrNode, name=name) - + for it in schema_pattern: - #step into array nodes recursively + # step into array nodes recursively h_graph = self.load_subgraph(it, str(it)) if h_graph is not None: subgraph.add_nodes_from(h_graph) @@ -145,19 +146,25 @@ def load_subgraph(self, schema_pattern, name): else: self.logmessage("Failed to load subgraph for list " + name) - elif isinstance(schema_pattern, str) or isinstance(schema_pattern, int) or isinstance(schema_pattern, float): - #Schema "properties" - - if (name == "$ref"): - #$ref are shared ressources and shall be represented as such + elif ( + isinstance(schema_pattern, str) + or isinstance(schema_pattern, int) + or isinstance(schema_pattern, float) + ): + # Schema "properties" + + if name == "$ref": + # $ref are shared ressources and shall be represented as such ref_name = name + schema_pattern if ref_name in self.ref_name_list: - #insert an edge from the previous node to the existing $ref - #node + # insert an edge from the previous node to the existing $ref + # node index = self.ref_name_list.index(ref_name) - kvNode = self.ref_node_list[index] #adding an existing node to the graph is ignored by networkx + kvNode = self.ref_node_list[ + index + ] # adding an existing node to the graph is ignored by networkx else: - #insert new $ref node + # insert new $ref node self.ref_name_list.append(ref_name) kvNode = KeyValueNode(name, schema_pattern) self.ref_node_list.append(kvNode) @@ -167,59 +174,59 @@ def load_subgraph(self, schema_pattern, name): subgraph.add_node(kvNode, name=name) elif schema_pattern is None: - #null type is parsed to None by json library + # null type is parsed to None by json library subgraph.add_node(KeyValueNode(name, "null")) else: - #non-valid Schema document - subgraph = None + # non-valid Schema document + subgraph = None return subgraph - + def show(self): - """! @brief Shows a dirty version of the graph structure in a interactive window - @deprecated Use visualize(path) + """! @brief Shows a dirty version of the graph structure in a interactive window + @deprecated Use visualize(path) """ - nx.draw_shell(self, with_labels=False, font_weight='bold') + nx.draw_shell(self, with_labels=False, font_weight="bold") plt.show() def visualize(self, path): """! @brief Creates a pdf and a DOT-format file with a proper visualisatzion of the graph. - - @param path path to the dot-format file and the pdf + + @param path path to the dot-format file and the pdf """ vis_graph = self.getVisGraph() - + nx.drawing.nx_pydot.write_dot(vis_graph, path + ".gv") - render('dot', 'pdf', path + ".gv") + render("dot", "pdf", path + ".gv") def getVisGraph(self): """! @brief This function returns a DiGraph representation of the schema_graph for visualisation. - This is necessary because Node can't be graphically represented as they get represented as Python - Objects with adresses. Therefor a DiGraph containing only the names of the nodes is generated - with this method. + This is necessary because Node can't be graphically represented as they get represented as Python + Objects with adresses. Therefor a DiGraph containing only the names of the nodes is generated + with this method. - @return A nx.DiGraph with names of original nodes as nodes + @return A nx.DiGraph with names of original nodes as nodes """ vis_graph = nx.DiGraph() node_list = list(self.nodes) edge_list = list(self.edges) name_list = [] - + # DiGraph nodes work with unique elements, names of nodes cant be the # same # if a node name already appeared, an integer is added to the name itrtr = 0 for node in node_list: - if (node is None): + if node is None: name = "None" - elif node.getName() == "graph": #problem with gv file parsing + elif node.getName() == "graph": # problem with gv file parsing name = "graf" else: name = node.getName() if name in name_list: name = name + str(itrtr) - itrtr = itrtr + 1 + itrtr = itrtr + 1 name_list.append(name) @@ -237,46 +244,46 @@ def getVisGraph(self): return vis_graph def getFilename(self): - """! @brief Getter for the filename - @return self.filename + """! @brief Getter for the filename + @return self.filename """ return self.filename def depth(self, graph): """! @brief Determine the depth of graph by checking all path lengths to all leaf nodes. - This function is using the simple paths method of NetworkX module. It gets all pathes - to all leaf nodes of the graph and stores their length in a list. The maximum of this list - is returned as depth. - @param graph schema_graph to determine depth of - @return The depth of the given graph - """ + This function is using the simple paths method of NetworkX module. It gets all pathes + to all leaf nodes of the graph and stores their length in a list. The maximum of this list + is returned as depth. + @param graph schema_graph to determine depth of + @return The depth of the given graph + """ kvNodeList = list() path_length_list = list() root_node = list(graph.nodes)[0] - + for node in graph.nodes: # get all leaf nodes which have to be kvNodes and vice versa if isinstance(node, KeyValueNode): for path in nx.all_simple_paths(graph, root_node, node): path_length_list.append(len(path)) - + return max(path_length_list) def depth_schema(self): """! @brief Return the depth of the JSON Schema document. - - This is equivalent to the depth of the schema_graph itself. - @return Depth of the JSON Schema document represented by this schema_graph. + This is equivalent to the depth of the schema_graph itself. + + @return Depth of the JSON Schema document represented by this schema_graph. """ return self.depth(self) def depth_resolvedReferenceGraph(self): """! @brief Determine the depth of the resolved reference graph of the Schema document. - This means to solve the $refs and inserting the linked (sub) graph. - If recursion are in the graph, the length of the bigest cycle is returned. - - @return Depth or max cycle length of the resolved reference graph + This means to solve the $refs and inserting the linked (sub) graph. + If recursion are in the graph, the length of the bigest cycle is returned. + + @return Depth or max cycle length of the resolved reference graph """ self.solved_graph = self.getResolvedReferenceGraph() @@ -288,7 +295,7 @@ def depth_resolvedReferenceGraph(self): def shortest_cycle(self): """! @brief Return the shortest cycle in the resolved graph or 0 if schema is not recursive - @return Shortest Cycle in a recursive graph or 0 for non-recursive graphs + @return Shortest Cycle in a recursive graph or 0 for non-recursive graphs """ self.solved_graph = self.getResolvedReferenceGraph() @@ -297,13 +304,13 @@ def shortest_cycle(self): return self.min_cycle_length(self.solved_graph) else: return 0 - + def updateRefNameList(self): """! @brief A function to update schema_graph's reference name and node lists. - This function clears the internal ref_name_list, ref_node_list, res_name_list and - res_node_list. They contain reference names and nodes and already resolved names and nodes. - The ref_name_list and ref_node_list are filled with all references in the graph. + This function clears the internal ref_name_list, ref_node_list, res_name_list and + res_node_list. They contain reference names and nodes and already resolved names and nodes. + The ref_name_list and ref_node_list are filled with all references in the graph. """ self.ref_name_list = list() self.ref_node_list = list() @@ -311,107 +318,125 @@ def updateRefNameList(self): self.res_node_list = list() for node in self.nodes: name = node.getName() - if (name == "$ref"): - #$ref are shared ressources and shall be represented as such + if name == "$ref": + # $ref are shared ressources and shall be represented as such ref_name = name + node.getValue() if ref_name not in self.ref_name_list: self.ref_name_list.append(ref_name) self.ref_node_list.append(node) def getResolvedReferenceGraph(self): - """! @brief This function creates the resolved reference graph. - - The resolved reference graph is created by the method getSolvedGraph(..). + """! @brief This function creates the resolved reference graph. - @return The resolved reference graph. + The resolved reference graph is created by the method getSolvedGraph(..). + + @return The resolved reference graph. """ if self.solved_graph is None: self.solved_graph = self.getSolvedGraph(0) - + return self.solved_graph - def getSolvedGraph(self, count=0): - """! @brief This function creates the resolved reference graph. - - The resolved reference graph is a version of the original schema_graph, where all reference - nodes are replaced by the sub-graph they referenced. Internal definitions are loaded from - definitions sections and copied to the reference. This can happen only once as $ref-nodes - are unique and treaten as "shared resource". External references are loaded from the web. - If a sub-graph contains references either, the algorithm searches for equal references in - the original schema_graph. These can be already resolved and therefor in res_name_list and - res_node_list or they are to be resolved later and stored in ref_name_list and ref_node_list. - If there are such references, the algorithm treats them as same unique references. - This is used to detect recursive structures in the schema documents. + def getSolvedGraph(self, count=0): + """! @brief This function creates the resolved reference graph. + + The resolved reference graph is a version of the original schema_graph, where all reference + nodes are replaced by the sub-graph they referenced. Internal definitions are loaded from + definitions sections and copied to the reference. This can happen only once as $ref-nodes + are unique and treaten as "shared resource". External references are loaded from the web. + If a sub-graph contains references either, the algorithm searches for equal references in + the original schema_graph. These can be already resolved and therefor in res_name_list and + res_node_list or they are to be resolved later and stored in ref_name_list and ref_node_list. + If there are such references, the algorithm treats them as same unique references. + This is used to detect recursive structures in the schema documents. - @param count Integer value to determine how often this method was called from itself. - Multiple rounds can be necessary to resolve all references of subgraphs. - For the initial call of this function, always use count = 0. + @param count Integer value to determine how often this method was called from itself. + Multiple rounds can be necessary to resolve all references of subgraphs. + For the initial call of this function, always use count = 0. - @return The resolved reference graph. + @return The resolved reference graph. """ - + if self.solved_graph is not None: return self.solved_graph - new_ref_round = False #determine whether this procedure has to be re-run - count = count + 1 #count iterations of this method to newly created graphs - + new_ref_round = False # determine whether this procedure has to be re-run + count = count + 1 # count iterations of this method to newly created graphs + if count == 1: self.updateRefNameList() - if(len(self.ref_name_list) != 0): + if len(self.ref_name_list) != 0: # Depth of JSON only differs from Schema's Depth if Schema contains # $refs - solved_graph = deepcopy(self) # 'real' copy, no connection between objects + solved_graph = deepcopy(self) # 'real' copy, no connection between objects it_ref_node_list = copy(solved_graph.ref_node_list) - for it_node in it_ref_node_list: - node = it_node #adding in lists --> capability to change iterating node + for it_node in it_ref_node_list: + node = ( + it_node # adding in lists --> capability to change iterating node + ) if isinstance(node, KeyValueNode): - #only KeyValue - Nodes can be $ref Nodes + # only KeyValue - Nodes can be $ref Nodes if node.getName() == "$ref": if node.getValue()[0] == "#": ref_name = node.getValue() - defsec_name = self.store_defsecname(node.getValue()) #add section to list of known definition sections - #local reference + defsec_name = self.store_defsecname( + node.getValue() + ) # add section to list of known definition sections + # local reference internal_valid_ref_flag = True - #find referenced node in solved_graph + # find referenced node in solved_graph def_node = solved_graph.getNodeByPath(ref_name) if not def_node is None: - predecs = solved_graph.predecessors(node) - + predecs = solved_graph.predecessors(node) + for pred in predecs: solved_graph.add_edge(pred, def_node) solved_graph.remove_node(node) - node = def_node # neccessary for adding it to res_node_list + node = def_node # neccessary for adding it to res_node_list - internal_valid_ref_flag = False + internal_valid_ref_flag = False else: internal_valid_ref_flag = True - - self.invalid_reference_detected = self.invalid_reference_detected | internal_valid_ref_flag + + self.invalid_reference_detected = ( + self.invalid_reference_detected + | internal_valid_ref_flag + ) elif node.getValue()[:4] == "http": if node.getValue() == self.id_tag: - #recursive to self + # recursive to self predecs = solved_graph.predecessors(node) solved_graph.remove_node(node) for pred in predecs: - solved_graph.add_edge(pred, list(solved_graph.nodes)[0]) - node = list(solved_graph.nodes)[0] #neccessary for adding it to res_node_list - - else:#external reference, recursions possible - schema_dict = web.load_schema(node.getValue(), open("../../schema_graph.log", 'a+')) + solved_graph.add_edge( + pred, list(solved_graph.nodes)[0] + ) + node = list(solved_graph.nodes)[ + 0 + ] # neccessary for adding it to res_node_list + + else: # external reference, recursions possible + schema_dict = web.load_schema( + node.getValue(), + open("../../schema_graph.log", "a+"), + ) if not schema_dict is None: subgraph = schema_graph(self.getFilename()) - subgraph.load_schema(schema_dict) - - subgraph = subgraph.resolveInternalReferences(node.getValue()) + subgraph.load_schema(schema_dict) + + subgraph = subgraph.resolveInternalReferences( + node.getValue() + ) if subgraph.invalid_reference_detected == True: self.invalid_reference_detected = True - self.logmessage("Invalid internal reference in externaly referenced file!") + self.logmessage( + "Invalid internal reference in externaly referenced file!" + ) # include references as same node for ref_name in subgraph.ref_name_list: @@ -420,54 +445,90 @@ def getSolvedGraph(self, count=0): # recursive reference to this # subgraph --> replace $ref # with root - idx_rec_ref = subgraph.ref_name_list.index(ref_name) - predecs_rec_ref = subgraph.predecessors(subgraph.ref_node_list[idx_rec_ref]) - subgraph.remove_node(subgraph.ref_node_list[idx_rec_ref]) + idx_rec_ref = ( + subgraph.ref_name_list.index( + ref_name + ) + ) + predecs_rec_ref = subgraph.predecessors( + subgraph.ref_node_list[idx_rec_ref] + ) + subgraph.remove_node( + subgraph.ref_node_list[idx_rec_ref] + ) sub_top_node = list(subgraph.nodes)[0] for pred_rec_ref in predecs_rec_ref: - subgraph.add_edge(pred_rec_ref, sub_top_node) + subgraph.add_edge( + pred_rec_ref, sub_top_node + ) else: # reference to another party # already in graph - idx_top = solved_graph.ref_name_list.index(ref_name) - idx_sub = subgraph.ref_name_list.index(ref_name) - sub_node = subgraph.ref_node_list[idx_sub] - top_node = solved_graph.ref_node_list[idx_top] + idx_top = ( + solved_graph.ref_name_list.index( + ref_name + ) + ) + idx_sub = subgraph.ref_name_list.index( + ref_name + ) + sub_node = subgraph.ref_node_list[ + idx_sub + ] + top_node = solved_graph.ref_node_list[ + idx_top + ] # refs are leaves in subgraph, # so no sucessors available - predecs_sub = subgraph.predecessors(sub_node) + predecs_sub = subgraph.predecessors( + sub_node + ) subgraph.remove_node(sub_node) subgraph.add_node(top_node) for pred_node in predecs_sub: - subgraph.add_edge(pred_node, top_node) + subgraph.add_edge( + pred_node, top_node + ) elif ref_name in solved_graph.res_name_list: # reference was already solved --> # connect to its subgraph's root - idx_top = solved_graph.res_name_list.index(ref_name) - idx_sub = subgraph.ref_name_list.index(ref_name) + idx_top = solved_graph.res_name_list.index( + ref_name + ) + idx_sub = subgraph.ref_name_list.index( + ref_name + ) sub_node = subgraph.ref_node_list[idx_sub] - top_node = solved_graph.res_node_list[idx_top] + top_node = solved_graph.res_node_list[ + idx_top + ] # refs are leaves in subgraph, so # no sucessors available - predecs_sub = subgraph.predecessors(sub_node) - subgraph.remove_node(sub_node) + predecs_sub = subgraph.predecessors( + sub_node + ) + subgraph.remove_node(sub_node) subgraph.add_node(top_node) for pred_node in predecs_sub: subgraph.add_edge(pred_node, top_node) - - else: #ref_name never occured + + else: # ref_name never occured new_ref_round = True ##currently unknown reference --> ##recurse into subgraph to resolve ##it - ref_node = subgraph.ref_node_list[subgraph.ref_name_list.index(ref_name)] + ref_node = subgraph.ref_node_list[ + subgraph.ref_name_list.index(ref_name) + ] solved_graph.ref_name_list.append(ref_name) solved_graph.ref_node_list.append(ref_node) - - #end for ref_name in - #subgraph.ref_name_list - idx_rep = solved_graph.ref_name_list.index("$ref" + it_node.getValue()) + + # end for ref_name in + # subgraph.ref_name_list + idx_rep = solved_graph.ref_name_list.index( + "$ref" + it_node.getValue() + ) rep_node = solved_graph.ref_node_list[idx_rep] predecs = solved_graph.predecessors(rep_node) solved_graph.remove_node(rep_node) @@ -476,94 +537,122 @@ def getSolvedGraph(self, count=0): sub_top_node = list(subgraph.nodes)[0] for pred_node in predecs: solved_graph.add_edge(pred_node, sub_top_node) - node = sub_top_node #neccesary for adding it to res_node_list + node = sub_top_node # neccesary for adding it to res_node_list else: self.invalid_reference_detected = True self.logmessage("Invalid external reference") else: - #undefined reference detected + # undefined reference detected self.invalid_reference_detected = True self.logmessage("Undefined internal reference") - #end if isinstance(node, KeyValueNode) - #reference was solved --> remove from reference list and add it - #to resolved references list - ref_idx = solved_graph.ref_name_list.index(it_node.getName() + it_node.getValue()) - solved_graph.ref_name_list.remove(it_node.getName() + it_node.getValue()) #original iterated node + # end if isinstance(node, KeyValueNode) + # reference was solved --> remove from reference list and add it + # to resolved references list + ref_idx = solved_graph.ref_name_list.index( + it_node.getName() + it_node.getValue() + ) + solved_graph.ref_name_list.remove( + it_node.getName() + it_node.getValue() + ) # original iterated node solved_graph.ref_node_list.remove(solved_graph.ref_node_list[ref_idx]) - solved_graph.res_name_list.append(it_node.getName() + it_node.getValue()) #sub_top_node reference + solved_graph.res_name_list.append( + it_node.getName() + it_node.getValue() + ) # sub_top_node reference solved_graph.res_node_list.append(node) - #end for it_node in solved_graph.ref_node_list + # end for it_node in solved_graph.ref_node_list if new_ref_round and (count <= schema_graph.max_count): self.solved_graph = solved_graph.getSolvedGraph(count) return self.solved_graph - else: #no new references added + else: # no new references added self.solved_graph = solved_graph return solved_graph - else: #no refs in graph + else: # no refs in graph self.solved_graph = self return self def resolveInternalReferences(self, webaddress): """! @brief This private function is resolves internal references only - This is used to resolve internal references of externaly included files. + This is used to resolve internal references of externaly included files. """ self.updateRefNameList() - webaddress = webaddress.split('#')[0] - solved_graph = self #if no internal references, return original + webaddress = webaddress.split("#")[0] + solved_graph = self # if no internal references, return original - if(len(self.ref_name_list) != 0): + if len(self.ref_name_list) != 0: # Depth of JSON only differs from Schema's Depth if Schema contains # $refs - solved_graph = deepcopy(self) # 'real' copy, no connection between objects + solved_graph = deepcopy(self) # 'real' copy, no connection between objects it_ref_node_list = copy(solved_graph.ref_node_list) - for it_node in it_ref_node_list: - node = it_node #adding in lists --> capability to change iterating node + for it_node in it_ref_node_list: + node = ( + it_node # adding in lists --> capability to change iterating node + ) if isinstance(node, KeyValueNode): - #only KeyValue - Nodes can be $ref Nodes + # only KeyValue - Nodes can be $ref Nodes if node.getName() == "$ref": if node.getValue()[0] == "#": ref_name = node.getValue() - defsec_name = self.store_defsecname(node.getValue()) #add section to list of known definition sections - #local reference + defsec_name = self.store_defsecname( + node.getValue() + ) # add section to list of known definition sections + # local reference internal_valid_ref_flag = True - #find referenced node in solved_graph + # find referenced node in solved_graph def_node = solved_graph.getNodeByPath(ref_name) if def_node is None: - #not found, maybe in complete document - schema_dict = web.load_schema(webaddress + ref_name, open("../../schema_graph.log", 'a+')) + # not found, maybe in complete document + schema_dict = web.load_schema( + webaddress + ref_name, + open("../../schema_graph.log", "a+"), + ) if not schema_dict is None: - #convert to external address + # convert to external address old_name = it_node.getName() + it_node.getValue() node.setValue(webaddress + ref_name) ref_idx = solved_graph.ref_name_list.index(old_name) - solved_graph.ref_name_list.remove(old_name) #original iterated node - solved_graph.ref_name_list.insert(ref_idx, node.getName() + node.getValue()) + solved_graph.ref_name_list.remove( + old_name + ) # original iterated node + solved_graph.ref_name_list.insert( + ref_idx, node.getName() + node.getValue() + ) continue if not def_node is None: - predecs = solved_graph.predecessors(node) - + predecs = solved_graph.predecessors(node) + for pred in predecs: solved_graph.add_edge(pred, def_node) solved_graph.remove_node(node) - #reference was solved --> remove from - #reference list and add it to resolved - #references list - ref_idx = solved_graph.ref_name_list.index(it_node.getName() + it_node.getValue()) - solved_graph.ref_name_list.remove(it_node.getName() + it_node.getValue()) #original iterated node - solved_graph.ref_node_list.remove(solved_graph.ref_node_list[ref_idx]) - solved_graph.res_name_list.append(it_node.getName() + it_node.getValue()) #sub_top_node reference + # reference was solved --> remove from + # reference list and add it to resolved + # references list + ref_idx = solved_graph.ref_name_list.index( + it_node.getName() + it_node.getValue() + ) + solved_graph.ref_name_list.remove( + it_node.getName() + it_node.getValue() + ) # original iterated node + solved_graph.ref_node_list.remove( + solved_graph.ref_node_list[ref_idx] + ) + solved_graph.res_name_list.append( + it_node.getName() + it_node.getValue() + ) # sub_top_node reference solved_graph.res_node_list.append(def_node) - internal_valid_ref_flag = False + internal_valid_ref_flag = False else: internal_valid_ref_flag = True - solved_graph.invalid_reference_detected = solved_graph.invalid_reference_detected | internal_valid_ref_flag + solved_graph.invalid_reference_detected = ( + solved_graph.invalid_reference_detected + | internal_valid_ref_flag + ) else: # external reference, nothing to do pass @@ -571,16 +660,16 @@ def resolveInternalReferences(self, webaddress): def visit_tree(self, visitor): """! @brief Traverse the tree using visitor pattern. - - @param visitor A vistitor to visit the schema graph. It has to be a instance inherited of Visitor.py + + @param visitor A vistitor to visit the schema graph. It has to be a instance inherited of Visitor.py """ for node in self.nodes: node.accept(visitor) def visit_ext_graph(self, visitor): """! @brief Traverse extanded reference graph using visitor pattern. - - @param visitor A vistitor to visit the schema graph. It has to be a instance inherited of Visitor.py + + @param visitor A vistitor to visit the schema graph. It has to be a instance inherited of Visitor.py """ if self.ext_solved_graph is None: self.ext_solved_graph = self.getExtendedRefGraph() @@ -591,7 +680,7 @@ def visit_ext_graph(self, visitor): def visit_res_graph(self, visitor): """! @brief Traverse resolved reference graph using the visitor pattern. - @param visitor A visitor to visit the schema graph. It has to be a instance inherited of Visitor.py + @param visitor A visitor to visit the schema graph. It has to be a instance inherited of Visitor.py """ if self.solved_graph is None: @@ -599,66 +688,64 @@ def visit_res_graph(self, visitor): for node in self.solved_graph: node.accept(visitor) - def getFanInList(self): - """! @brief This function returns a list of all element's fan-in values - - @return List of Fan-in values of all elements in the original schema graph + """! @brief This function returns a list of all element's fan-in values + + @return List of Fan-in values of all elements in the original schema graph """ fan_in_list = [] - for node in self.nodes: + for node in self.nodes: fan_in_list.append(len(list(self.predecessors(node)))) return fan_in_list - def getMaxFanIn(self): """! @brief Get the maximum fan in of any node in the graph. - - @return Maximum fan-in value of any node in the graph. - """ + + @return Maximum fan-in value of any node in the graph. + """ return max(self.getFanInList()) def getFanOutList(self): - """! @brief This function returns a list of all element's fan-out values excluding root - - @return List of Fan-out values of all elements in the original schema graph excluding root node. + """! @brief This function returns a list of all element's fan-out values excluding root + + @return List of Fan-out values of all elements in the original schema graph excluding root node. """ fan_out_list = [] for node in self.nodes: - if (node.getName() != "root"): + if node.getName() != "root": fan_out_list.append(len(list(self.successors(node)))) return fan_out_list def getMaxFanOut(self): """! @brief Get the maximum fan out of any node in the graph excluding root. - - @return Maximum fan-out value of any node in the graph excluding root + + @return Maximum fan-out value of any node in the graph excluding root """ return max(self.getFanOutList()) def check_recursion(self, *args): """! @brief Checks whether the schema document contains recursions. - This function loads the resolved reference graph by using the method getResolvedReferenceGraph() - and converts it to a clean nx.DiGraph() to use the class's internal cycle detection method. - By providing an schema_graph in args[0] the user can check args[0] for recursions. + This function loads the resolved reference graph by using the method getResolvedReferenceGraph() + and converts it to a clean nx.DiGraph() to use the class's internal cycle detection method. + By providing an schema_graph in args[0] the user can check args[0] for recursions. - @param *args Optional list of arguments. If provided args[0] has to be a schema graph + @param *args Optional list of arguments. If provided args[0] has to be a schema graph - @return Boolean value to determine whether the schema document contains recursions. + @return Boolean value to determine whether the schema document contains recursions. """ - #without converting it to a clean DiGraph, the - #generator returned by simple_cycles doesn't work + # without converting it to a clean DiGraph, the + # generator returned by simple_cycles doesn't work - if(len(args) == 0): + if len(args) == 0: g = nx.DiGraph(self.getResolvedReferenceGraph().edges) else: g = nx.DiGraph(args[0].edges) - + if len(list(nx.simple_cycles(g))) != 0: self.has_recursions = True else: @@ -668,12 +755,12 @@ def check_recursion(self, *args): def max_cycle_length(self, recursive_graph): """! @brief Returns the length of the longest cycle in a given recursive graph. - @param recursive_graph A schema_graph that contains recursions - @return The lenght of the longest cycle in the recursive graph. + @param recursive_graph A schema_graph that contains recursions + @return The lenght of the longest cycle in the recursive graph. """ g = nx.DiGraph(recursive_graph.edges) len_list = list() - + for cycle in nx.simple_cycles(g): len_list.append(len(list(cycle))) @@ -681,35 +768,35 @@ def max_cycle_length(self, recursive_graph): def min_cycle_length(self, recursive_graph): """! @brief Returns the length of the shortest cycle in a given recursive graph. - @param recursive_graph A schema_graph that contains recursions - @return The lenght of the shortest cycle in the recursive graph. + @param recursive_graph A schema_graph that contains recursions + @return The lenght of the shortest cycle in the recursive graph. """ g = nx.DiGraph(recursive_graph.edges) len_list = list() - + for cycle in nx.simple_cycles(g): len_list.append(len(list(cycle))) return min(len_list) def getNumberCycles(self): - """! @brief Return the number of cycles in the resolved reference graph of self. - The function creates the resolved reference graph and returns the number of cycles - in the resolved reference graph. + """! @brief Return the number of cycles in the resolved reference graph of self. + The function creates the resolved reference graph and returns the number of cycles + in the resolved reference graph. - @return Number of cycles in the resolved reference graph of self. + @return Number of cycles in the resolved reference graph of self. """ g = nx.DiGraph(self.getResolvedReferenceGraph().edges) return len(list(nx.simple_cycles(g))) def getNumberPathes(self): - """! @brief Return the number of simple pathes included in the resolved reference graph. + """! @brief Return the number of simple pathes included in the resolved reference graph. - This is equivalent to the number of leafes in the tree. Thats why this function counts - the number of KeyValueNodes in the graph. KeyValueNodes are leafes and vice versa. + This is equivalent to the number of leafes in the tree. Thats why this function counts + the number of KeyValueNodes in the graph. KeyValueNodes are leafes and vice versa. - @return The number of pathes in the resolved refernce graph of self + @return The number of pathes in the resolved refernce graph of self """ count = 0 solved_graph = self.getResolvedReferenceGraph() @@ -720,12 +807,12 @@ def getNumberPathes(self): return count def getWidth(self): - """! @brief Return the width of the schema_graph which is equivalent to the number of - leafes of the graph - - @return The width of the schema graph defined as number of leafes. - - """ + """! @brief Return the width of the schema_graph which is equivalent to the number of + leafes of the graph + + @return The width of the schema graph defined as number of leafes. + + """ count = 0 for node in self.nodes: @@ -733,36 +820,40 @@ def getWidth(self): count += 1 return count - def check_reachability(self): - """! @brief This function checks if the graph is fully reachable. - - Reachability is defined as usage of defintions. Reachability is given if all defined defintions - in the schema are referenced at least once. Reachability is not given if at least one defined - defintions is not at least referenced once. - This function uses the internal set self.def_secs_name_set which contains the names of all defintions - sections. It has to be set before using this method. It is created in getResolvedReferenceGraph(). + """! @brief This function checks if the graph is fully reachable. + + Reachability is defined as usage of defintions. Reachability is given if all defined defintions + in the schema are referenced at least once. Reachability is not given if at least one defined + defintions is not at least referenced once. + This function uses the internal set self.def_secs_name_set which contains the names of all defintions + sections. It has to be set before using this method. It is created in getResolvedReferenceGraph(). - @return Reachability of the graph as defined above. + @return Reachability of the graph as defined above. """ self.solved_graph = self.getResolvedReferenceGraph() reachability = True - + if not self.invalid_reference_detected: for def_name in self.def_secs_name_set: def_sec_node = self.getNodeByName(def_name) if not def_sec_node is None: defs_in_section = self.successors(def_sec_node) for def_node in defs_in_section: - if not (("$ref#/" + def_name + "/" + def_node.getName()) in self.solved_graph.res_name_list): + if not ( + ("$ref#/" + def_name + "/" + def_node.getName()) + in self.solved_graph.res_name_list + ): reachability = False return reachability else: - if None == self.solved_graph.getNodeByName(def_node.getName()): #refs in definition sections get resolved entries even if not used elsewhere + if None == self.solved_graph.getNodeByName( + def_node.getName() + ): # refs in definition sections get resolved entries even if not used elsewhere reachability = False - return reachability + return reachability else: reachability = False break @@ -774,25 +865,25 @@ def check_reachability(self): def getNoReferences(self): """! @brief This function counts all references in the JSON Schema document. - The method iterates over the raw dictionary of the Schema document to find all references. - This has to be done, because the schema_graph itself interprets equal references as one node. - That would not lead to the intended result. + The method iterates over the raw dictionary of the Schema document to find all references. + This has to be done, because the schema_graph itself interprets equal references as one node. + That would not lead to the intended result. - @return An integer value representing the number of references in the JSON schema document + @return An integer value representing the number of references in the JSON schema document """ return self.search_references(self.schema_dict) def search_references(self, schema_pattern, parentName="none"): - """! @brief This private function is used to find all references in the JSON Schema document in a - recursive manner. + """! @brief This private function is used to find all references in the JSON Schema document in a + recursive manner. - This method shall only be used by self.getNoReferences(self). Beginning with the original schema dictionary - the method goes recursively into the schema and finds all occurences of references. + This method shall only be used by self.getNoReferences(self). Beginning with the original schema dictionary + the method goes recursively into the schema and finds all occurences of references. - @param schema_pattern Part of the schema_dictionary to step into - @param parentName Name of the parent "node" to identify $ref + @param schema_pattern Part of the schema_dictionary to step into + @param parentName Name of the parent "node" to identify $ref - @return Number of references in the currrent observed part of the schema_dictionary + @return Number of references in the currrent observed part of the schema_dictionary """ ## return value @@ -805,8 +896,8 @@ def search_references(self, schema_pattern, parentName="none"): elif isinstance(schema_pattern, list): for item in schema_pattern: ref_count += self.search_references(item, str(item)) - - elif (isinstance(schema_pattern, str) and (parentName == "$ref")): + + elif isinstance(schema_pattern, str) and (parentName == "$ref"): ref_count += 1 else: # schema_pattern is either int, float, or None (null in JSON @@ -816,26 +907,25 @@ def search_references(self, schema_pattern, parentName="none"): return ref_count - def getInvalidReferenceFlag(self): """! @brief Getter for invalid reference detection flag. - - @return Invalid reference detection flag - Set if invalid references were detected + + @return Invalid reference detection flag - Set if invalid references were detected """ return self.invalid_reference_detected def store_defsecname(self, ref_name): """! @brief This function stores the name of the definition section in self.def_secs_name_set. - It returns the stored name. - - @return The stored definition section name + It returns the stored name. + + @return The stored definition section name """ - str_part_list = ref_name.split('/') + str_part_list = ref_name.split("/") if len(str_part_list) > 1: - #ref_name come in the form of "#/defname/refname", so second entry - #in list is the defname - self.def_secs_name_set.add(str_part_list[1]) #sets store entries unique - + # ref_name come in the form of "#/defname/refname", so second entry + # in list is the defname + self.def_secs_name_set.add(str_part_list[1]) # sets store entries unique + return str_part_list[1] else: # root referenced by "#" @@ -844,40 +934,39 @@ def store_defsecname(self, ref_name): def getNodeByName(self, name): """! @brief This function searches the given name in all nodes and returns the first node with the given name. - - @param name Node's name to search for. - @return First node found with the given name. + @param name Node's name to search for. + + @return First node found with the given name. """ for node in self.nodes: if node.getName() == name: return node - return None # in case name was not found, return None - + return None # in case name was not found, return None def getNodeByPath(self, path): """! @brief This function returns the node located at the end of path - @param path Path to node as string, e.g. #/defintions/foo, when Node "foo" is searched + @param path Path to node as string, e.g. #/defintions/foo, when Node "foo" is searched - @return Searched node in self or None if not found + @return Searched node in self or None if not found """ - path_parts = path.split('/') + path_parts = path.split("/") node = None valid_path = True for part in path_parts: - if part == '#': + if part == "#": node = list(self.nodes)[0] else: - valid_path = False #set true if successor found + valid_path = False # set true if successor found if not node is None: for suc in self.successors(node): if suc.getName() == part: node = suc valid_path = True - break # successor found, stop searching + break # successor found, stop searching else: # empty reference, return None # this is treaten as invalid reference later on @@ -890,22 +979,22 @@ def getNodeByPath(self, path): def logmessage(self, message): """! @brief This function writes a message to the logfile. - The function write the filename and the given message to the logfile "../../schema_graph.log". - - @param message Message to write to the logfile + The function write the filename and the given message to the logfile "../../schema_graph.log". + + @param message Message to write to the logfile """ - logfile = open("../../schema_graph.log", 'a+') + logfile = open("../../schema_graph.log", "a+") logfile.write(self.filename + ": " + message + "\n") logfile.close() def getExtendedRefGraph(self): """! @brief This function is used to create a graph with extended references. - - This is done by multiplying the every reference to generate multiple reference node with one predecessor only. - Then, a resolved graph is generated for this extended graph. The result is stored in self.ext_solved_graph. - @return The extended resolved reference graph + This is done by multiplying the every reference to generate multiple reference node with one predecessor only. + Then, a resolved graph is generated for this extended graph. The result is stored in self.ext_solved_graph. + + @return The extended resolved reference graph """ if self.ext_solved_graph is None: @@ -913,11 +1002,10 @@ def getExtendedRefGraph(self): ext_graph.setNodeIDs - ext_orig_nodes = list(ext_graph.nodes) for node in ext_orig_nodes: predecs = list(ext_graph.predecessors(node)) - if ((len(predecs)) > 1): + if (len(predecs)) > 1: subgraph = ext_graph.getSuccessorSubgraph(node) # cycles in subgraphs can lead to _unconnected_ cliques in # the extendend graph @@ -926,18 +1014,20 @@ def getExtendedRefGraph(self): predec_copy = predecs for pred_node in predec_copy: if pred_node in subgraph.nodes: - predecs.remove(pred_node) + predecs.remove(pred_node) - for pred in predecs[1:]: #let the first edge lead to the originial subgraph + for pred in predecs[ + 1: + ]: # let the first edge lead to the originial subgraph subgraph_copy = deepcopy(subgraph) # search sub_root in subgraph sub_root = None for sub_node in subgraph_copy.nodes: if sub_node.getID() is node.getID(): sub_root = sub_node - break # root found # no checking for None necessary because node with right ID - # surely exists - + break # root found # no checking for None necessary because node with right ID + # surely exists + # copy the new subgraph into the extended graph, remove # the edge to shared reference and add new edge # to the subgraph @@ -950,28 +1040,28 @@ def getExtendedRefGraph(self): def getSuccessorSubgraph(self, ref_node): """! @brief This function returns a subgraph with ref_node as root. - All successors and successors of successors (and so on) of ref_node are - determined using the recursive function successor_list and the resulting - subgraph is returned. + All successors and successors of successors (and so on) of ref_node are + determined using the recursive function successor_list and the resulting + subgraph is returned. - @param ref_node Root node of subgraph + @param ref_node Root node of subgraph - @return The resulting subgraph + @return The resulting subgraph """ self.sub_node_set = set() self.successor_list(ref_node) - + return self.subgraph(self.sub_node_set) - def successor_list(self, ref_node): + def successor_list(self, ref_node): """! @brief This private function fills the successor list self.sub_node_set in a recursive manner. - It is called by getSuccessorSubgraph(..). This function ensures that self.sub_node_set is empty in advance. + It is called by getSuccessorSubgraph(..). This function ensures that self.sub_node_set is empty in advance. - @param ref_node Node to find and add successors + @param ref_node Node to find and add successors - @return None + @return None """ if not ref_node in self.sub_node_set: @@ -985,10 +1075,10 @@ def successor_list(self, ref_node): def setNodeIDs(self): """! @brief This functions sets unique IDs for each node in the graph - This is necessary to find the correct sub roots while expanding the graph and generating subgraphs. + This is necessary to find the correct sub roots while expanding the graph and generating subgraphs. - @param void - @return Nothing + @param void + @return Nothing """ i = 0 for node in self.nodes: @@ -996,25 +1086,27 @@ def setNodeIDs(self): i += 1 def getBlowUpFactor(self): - """! @brief This function calculates a blow-up factor as proxy-metrics for how compact the schema is designed. - - When authors of schemas make use of references, they can create very tight schemas. The blow-up factor is a proxy-metrics - for the "tightness" of a schema. It is calculated as the number of nodes after expanding the graph (see getExtendedRefGraph() ) - divided by the number of nodes in the original schema graph. + """! @brief This function calculates a blow-up factor as proxy-metrics for how compact the schema is designed. - @return A Blow-Up Factor as proxy-metrics for the "tightness" of a schema - """ + When authors of schemas make use of references, they can create very tight schemas. The blow-up factor is a proxy-metrics + for the "tightness" of a schema. It is calculated as the number of nodes after expanding the graph (see getExtendedRefGraph() ) + divided by the number of nodes in the original schema graph. - if not self.check_recursion(): - if self.ext_solved_graph is None: - self.ext_solved_graph = self.getExtendedRefGraph() - - if self.solved_graph is None: - self.solved_graph = self.getResGraph() + @return A Blow-Up Factor as proxy-metrics for the "tightness" of a schema + """ + + if not self.check_recursion(): + if self.ext_solved_graph is None: + self.ext_solved_graph = self.getExtendedRefGraph() - ret_val = (len(list(self.ext_solved_graph.nodes)) / len(list(self.solved_graph.nodes))) + if self.solved_graph is None: + self.solved_graph = self.getResGraph() - else: - ret_val = 0 + ret_val = len(list(self.ext_solved_graph.nodes)) / len( + list(self.solved_graph.nodes) + ) + + else: + ret_val = 0 - return ret_val + return ret_val diff --git a/validity_constants.py b/validity_constants.py index fd41315..81a0340 100644 --- a/validity_constants.py +++ b/validity_constants.py @@ -5,5 +5,3 @@ SCHEMA_VALIDATOR_EXCEPTION = 1 SCHEMA_REFERENCE_EXCEPTION = 2 SCHEMA_VALID = 0 - -