From f7c5242d7ef1181d6122ad26d4de18b9d5bee3b5 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 4 May 2023 18:09:29 -0400 Subject: [PATCH 01/39] adding support for verbosity test output to a user specified file --- configs/default_filter_config.json | 4 ++ src/diagnose_github_repo.py | 7 +- src/diagnose_npm_package.py | 7 +- src/test_JS_repo_lib.py | 108 +++++++++++++++++++++++++++-- 4 files changed, 120 insertions(+), 6 deletions(-) diff --git a/configs/default_filter_config.json b/configs/default_filter_config.json index 872b2bb..14fdabb 100644 --- a/configs/default_filter_config.json +++ b/configs/default_filter_config.json @@ -14,6 +14,10 @@ }, "test": { "track_tests": true, + "test_verbose_all_output": { + "do_verbose_tracking": false, + "verbose_json_output_file": "verbose_test_report.json" + }, "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"], "timeout": 1000 diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index 040db10..7e7b44c 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -33,6 +33,8 @@ class RepoWalker(): COMPUTE_DEP_LISTS = False TRACK_BUILD = True TRACK_TESTS = True + TEST_VERBOSE_ALL_OUTPUT = False + TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json" TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"] @@ -42,7 +44,7 @@ class RepoWalker(): # timeouts for stages, in seconds INSTALL_TIMEOUT = 1000 - # note: these are timeouts pers *script* in the stage of the process + # note: these are timeouts per *script* in the stage of the process BUILD_TIMEOUT = 1000 TEST_TIMEOUT = 1000 @@ -97,6 +99,9 @@ def set_up_config( self, config_file): self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT) self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS) self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS) + test_verbose_config = cf_dict.get("test_verbose_all_output", {}) + self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT) + self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON) cf_dict = config_json.get("QL_output", {}) self.QL_CUTOFF = cf_dict.get("QL_cutoff", self.QL_CUTOFF) diff --git a/src/diagnose_npm_package.py b/src/diagnose_npm_package.py index 4e4cd78..cc09f8a 100644 --- a/src/diagnose_npm_package.py +++ b/src/diagnose_npm_package.py @@ -26,6 +26,8 @@ class NPMSpider(scrapy.Spider): COMPUTE_DEP_LISTS = False TRACK_BUILD = True TRACK_TESTS = True + TEST_VERBOSE_ALL_OUTPUT = False + TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json" TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"] @@ -35,7 +37,7 @@ class NPMSpider(scrapy.Spider): # timeouts for stages, in seconds INSTALL_TIMEOUT = 1000 - # note: these are timeouts pers *script* in the stage of the process + # note: these are timeouts per *script* in the stage of the process BUILD_TIMEOUT = 1000 TEST_TIMEOUT = 1000 @@ -89,6 +91,9 @@ def set_up_config( self, config_file): self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT) self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS) self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS) + test_verbose_config = cf_dict.get("test_verbose_all_output", {}) + self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT) + self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON) def parse(self, response): # TODO should we handle specific response codes? diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index f93d3af..2719b0d 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -116,11 +116,90 @@ def run_tests( manager, pkg_json, crawler): test_info.compute_test_infras() test_info.compute_nested_test_commands( test_scripts) test_info.compute_test_stats() - # print( test_info[t]) - # print( get_test_info(error, output)) + # if we're in verbose testing mode (i.e. getting all timing info for each test, etc) + # then, we rerun the test commands with all the commands for adding verbose_mode to + # each of the test infras involved (individually) + if crawler.TEST_VERBOSE_ALL_OUTPUT: + # we're gonna be adding our new custom scripts for verbosity testing + run_command( "mv package.json TEMP_package.json_TEMP") + verbosity_index = 0 + test_verbosity_output = {} + for test_infra in test_info.test_infras: + verbose_test_json = ("" if verbosity_index == 0 else "infra_" + str(verbosity_index) + "_") + crawler.TEST_VERBOSE_OUTPUT_JSON + infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS[test_infra] + if not infra_verbosity_config: # checks if it's an empty object + print("TEST VERBOSE MODE: unsupported test infra " + test_infra) + test_verbosity_output[test_infra] = { "error": True } + continue + infra_verbosity_args = infra_verbosity_config.get("args", "") + infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end + infra_verbosity_command = instrument_test_command_for_verbose(test_info.test_command, test_infra, infra_verbosity_args, + verbose_test_json, infra_verbosity_args_pos) + verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index) + pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command + with open("package.json", 'w') as f: + json.dump( pkg_json, f) + print("Running verbosity: " + manager + infra_verbosity_command) + verb_error, verb_output, verb_retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) + verbosity_index += 1 + # get the output + test_verbosity_infra = {} + test_verbosity_infra["command"] = infra_verbosity_command + test_verbosity_infra["output_files"] = verbose_test_json + if crawler.VERBOSE_MODE: + test_verbosity_infra["test_debug"] = "\nError output: " + verb_error.decode('utf-8') \ + + "\nOutput stream: " + verb_output.decode('utf-8') + test_verbosity_output[test_infra] = test_verbosity_infra + test_info.set_test_verbosity_output(test_verbosity_output) + # put the package.json back + run_command( "mv TEMP_package.json_TEMP package.json") test_json_summary[t] = test_info.get_json_rep() return( retcode, test_json_summary) +def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos): + # replace the output file name with the custom output filename + # add an index to the filename for the 2nd,+ time the filename shows up + # so as to avoid overwriting the files + num_files = 0 + new_infra_verbosity_args = "" + for i, sub in enumerate(infra_verbosity_args.split("$PLACEHOLDER_OUTPUT_FILE_NAME$")): + # not the file name + if sub != "": + new_infra_verbosity_args += sub + else: + new_infra_verbosity_args += ("" if num_files == 0 else ("out_" + str(num_files) + "_")) + verbose_test_json + num_files += 1 + infra_verbosity_args = new_infra_verbosity_args + # split into sub-commands + command_split_chars = [ "&&", ";"] + infra_calls = test_script.split(test_infra) + instrumented_test_command = [] + for i, infra_call in enumerate(infra_calls): + # if the current call is empty string and the next is non-empty + # then this is the call to the testing infra and the next is the arguments + # so, skip this one + # if there are no args (i.e. no next non-empty string), then just instrument this one + if infra_call == "" and i < len(infra_calls) - 1 and infra_calls[i + 1] != "": + instrumented_test_command += [ "" ] + continue + # if the first call is non-empty, then it's pre-test-infra and we skip it too + elif infra_call != "" and i == 0: + instrumented_test_command += [ "" ] + continue + # get the arguments, splitting off from any other non-test commands that might be + # in this command (note: we know all the commands started with test_infra) + end_command_pos = re.search(r'|'.join(command_split_chars), infra_call) + end_command_pos = end_command_pos.start() if not end_command_pos is None else -1 + sub_command_args = (infra_call[0:end_command_pos] if end_command_pos > -1 else infra_call).split(" ") + if infra_verbosity_args_pos != -1: + sub_command_args.insert(infra_verbosity_args_pos, infra_verbosity_args) + else: + sub_command_args.append(infra_verbosity_args) + # rebuild the command, re-attaching any extra sub-commands + instrumented_test_command += [ " ".join(sub_command_args) + (infra_call[end_command_pos:] if end_command_pos > -1 else "") ] + return(test_infra.join(instrumented_test_command)) + + def called_in_command( str_comm, command, manager): # command ends with command terminator (this list includes \0 end-of-string, # but this is not available to check in Python so we use endswith) @@ -195,14 +274,29 @@ class TestInfo: "failing": ("failed", -1) }, } + # extra args, their position in the arg list, and any post-processing required + VERBOSE_TESTS_EXTRA_ARGS = { + "jest": { + "args": " --verbose --json --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", + "position": -1, + "post_processing": None + }, + "mocha": { + "args": " -- --reporter xunit --reporter-option output=$PLACEHOLDER_OUTPUT_FILE_NAME$", + "position": -1, + "post_processing": None #TODO change this to the xml-to-json parser + } + } TRACKED_INFRAS = { "mocha": { "name": "mocha", - "output_checkers": [ "mocha", "tap" ] + "output_checkers": [ "mocha", "tap" ], + "verbose_tests_extra_args": [ "mocha" ] }, "jest": { "name": "jest", - "output_checkers": [ "jest" ] + "output_checkers": [ "jest" ], + "verbose_tests_extra_args": [ "jest" ] }, "jasmine": { "name": "jasmine", @@ -256,10 +350,14 @@ def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE): self.num_failing = None self.timed_out = False self.VERBOSE_MODE = VERBOSE_MODE + self.test_verbosity_output = None def set_test_command( self, test_command): self.test_command = test_command + def set_test_verbosity_output( self, verbose_output): + self.test_verbosity_output = verbose_output + def compute_test_infras( self): self.test_infras = [] self.test_covs = [] @@ -323,6 +421,8 @@ def get_json_rep( self): json_rep["nested_test_commands"] = self.nested_test_commands if "test_infras" not in json_rep: json_rep["RUNS_NEW_USER_TESTS"] = False + if self.test_verbosity_output: + json_rep["test_verbosity_output"] = self.test_verbosity_output json_rep["timed_out"] = self.timed_out return( json_rep) From 38df6aa9a66e035056e18f7db70b3834253c2cf3 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 5 May 2023 15:36:14 -0400 Subject: [PATCH 02/39] rename output file so no overwrite with multiple test scripts; move test reports to specified output dir; add parser for mocha output post-processing (xml to json) --- .gitignore | 1 + src/TestInfo.py | 251 +++++++++++++++++++++ src/diagnose_github_repo.py | 2 +- src/diagnose_npm_package.py | 2 +- src/output_parsing/test_output_proc.py | 13 ++ src/test_JS_repo_lib.py | 293 +++---------------------- 6 files changed, 298 insertions(+), 264 deletions(-) create mode 100644 src/TestInfo.py create mode 100644 src/output_parsing/test_output_proc.py diff --git a/.gitignore b/.gitignore index 0679a00..34f1701 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ QLDBs/* items.json *__page_data.html *__results.json +*_verbose_test_report.json local_mount/* **/node_modules diff --git a/src/TestInfo.py b/src/TestInfo.py new file mode 100644 index 0000000..47a832f --- /dev/null +++ b/src/TestInfo.py @@ -0,0 +1,251 @@ +import re +import output_parsing.test_output_proc as TestOutputProc + +class TestInfo: + OUTPUT_CHECKERS = { + "mocha": + { + "output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '.*', + "passing": ("passing", -1), + "failing": ("failing", -1) + }, + "jest": + { + "output_regex_fct" : lambda condition: r'Tests:.*\d+ ' + condition, + "passing": ("passed", -1), + "failing": ("failed", -1) + }, + "tap": { + "output_regex_fct" : lambda condition: r'# ' + condition + '.*\d+', + "passing": ("pass", 1), + "failing": ("fail", 1) + }, + "tap_raw": { + "output_regex_fct" : lambda condition: r'' + condition + ' \d+ - (?!.*time=).*$', + "passing": (r'^.*(?!not )ok', None), # this "passing" is a regex: count "ok" but not "not ok" + "failing": (r'^.*not ok', None) + }, + "ava": + { + "output_regex_fct": lambda condition: r'.*\d+ tests? ' + condition, + "passing": ("passed", -2), + "failing": ("failed", -2) + }, + "ava_2": + { + "output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '$', + "passing": ("passed", -1), + "failing": ("failed", -1) + }, + } + # extra args, their position in the arg list, and any post-processing required + # post-processing is a function that takes 2 arguments: input file and output file + VERBOSE_TESTS_EXTRA_ARGS = { + "jest": { + "args": " --verbose --json --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", + "position": -1, + "post_processing": None + }, + "mocha": { + "args": " -- --reporter xunit --reporter-option output=$PLACEHOLDER_OUTPUT_FILE_NAME$", + "position": -1, + "post_processing": TestOutputProc.xml_to_json + } + } + TRACKED_INFRAS = { + "mocha": { + "name": "mocha", + "output_checkers": [ "mocha", "tap" ], + "verbose_tests_extra_args": [ "mocha" ] + }, + "jest": { + "name": "jest", + "output_checkers": [ "jest" ], + "verbose_tests_extra_args": [ "jest" ] + }, + "jasmine": { + "name": "jasmine", + "output_checkers": [ "mocha" ] + }, + "tap": { + "name": "tap", + "output_checkers": [ "tap", "tap_raw" ] + }, + "lab": { + "name": "lab", + "output_checkers": [] + }, + "ava": { + "name": "ava", + "output_checkers": [ "ava", "ava_2" ] + }, + "gulp": { + "name": "gulp", + "output_checkers": [ "mocha" ] + }, + } + TRACKED_COVERAGE = { + "istanbul": "istanbul -- coverage testing", + "nyc": "nyc -- coverage testing", + "coveralls": "coveralls -- coverage testing", + "c8": "c8 -- coverage testing" + } + TRACKED_LINTERS = { + "eslint": "eslint -- linter", + "tslint": "tslint -- linter", + "xx": "xx -- linter", + "standard": "standard -- linter", + "prettier": "prettier -- linter", + "gulp lint": "gulp lint -- linter" + } + + TRACKED_RUNNERS = [ "node", "babel-node", "grunt" ] + + def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE): + self.success = success + self.error_stream = error_stream + self.output_stream = output_stream + self.manager = manager + # start all other fields as None + self.test_infras = None + self.test_covs = None + self.test_lints = None + self.nested_test_commands = None + self.num_passing = None + self.num_failing = None + self.timed_out = False + self.VERBOSE_MODE = VERBOSE_MODE + self.test_verbosity_output = None + + def set_test_command( self, test_command): + self.test_command = test_command + + def set_test_verbosity_output( self, verbose_output): + self.test_verbosity_output = verbose_output + + def compute_test_infras( self): + self.test_infras = [] + self.test_covs = [] + self.test_lints = [] + self.nested_test_commands = [] + if self.test_command: + self.test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, self.test_command, self.manager) ] + self.test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, self.test_command, self.manager) ] + self.test_covs += [ TestInfo.TRACKED_COVERAGE[ti] for ti in TestInfo.TRACKED_COVERAGE if called_in_command(ti, self.test_command, self.manager) ] + self.test_lints += [ TestInfo.TRACKED_LINTERS[ti] for ti in TestInfo.TRACKED_LINTERS if called_in_command(ti, self.test_command, self.manager) ] + self.test_infras = list(set(self.test_infras)) + self.test_covs = list(set(self.test_covs)) + self.test_lints = list(set(self.test_lints)) + # TODO: maybe we can also figure it out from the output stream + + def compute_nested_test_commands( self, test_commands): + # one might think that we should only check the package's own manager + # however, it's common to mix and match (esp. to run commands with "npm run" even if the package manager is yarn) + self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "npm run " + tc, self.test_command, self.manager) ] + self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "yarn " + tc, self.test_command, self.manager) ] + + def compute_test_stats( self): + if not self.test_infras or self.test_infras == []: + return + test_output = self.output_stream.decode('utf-8') + self.error_stream.decode('utf-8') + ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') + test_output = ansi_escape.sub('', test_output) + self.num_passing = 0 + self.num_failing = 0 + self.timed_out = (self.error_stream.decode('utf-8') == "TIMEOUT ERROR") + for infra in self.test_infras: + output_checker_names = TestInfo.TRACKED_INFRAS.get(infra, {}).get("output_checkers", []) + if infra in TestInfo.TRACKED_RUNNERS and output_checker_names == []: + output_checker_names = self.OUTPUT_CHECKERS.keys() # all the checkers + for checker_name in output_checker_names: + div_factor = 2 if checker_name == "ava_2" else 1 + checker = self.OUTPUT_CHECKERS[ checker_name] + self.num_passing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["passing"][0], checker["passing"][1]) / div_factor) + self.num_failing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["failing"][0], checker["failing"][1]) / div_factor) + + def get_json_rep( self): + json_rep = {} + if self.VERBOSE_MODE: + json_rep["test_debug"] = "" + if not self.success: + json_rep["ERROR"] = True + if self.VERBOSE_MODE: + json_rep["test_debug"] += "\nError output: " + self.error_stream.decode('utf-8') + if self.num_passing is not None and self.num_failing is not None: + json_rep["num_passing"] = self.num_passing + json_rep["num_failing"] = self.num_failing + if self.VERBOSE_MODE: + json_rep["test_debug"] += "\nOutput stream: " + self.output_stream.decode('utf-8') + if self.test_infras and self.test_infras != []: + json_rep["test_infras"] = [TestInfo.TRACKED_INFRAS.get(infra, {}).get("name", "Custom Testing: " + infra) for infra in self.test_infras] + if self.test_covs and self.test_covs != []: + json_rep["test_coverage_tools"] = self.test_covs + if self.test_lints and self.test_lints != []: + json_rep["test_linters"] = self.test_lints + if self.nested_test_commands and self.nested_test_commands != []: + json_rep["nested_test_commands"] = self.nested_test_commands + if "test_infras" not in json_rep: + json_rep["RUNS_NEW_USER_TESTS"] = False + if self.test_verbosity_output: + json_rep["test_verbosity_output"] = self.test_verbosity_output + json_rep["timed_out"] = self.timed_out + return( json_rep) + + def __str__(self): + to_ret = "" + if not self.success: + to_ret += "ERROR" + if self.VERBOSE_MODE: + to_ret += "\nError output: " + self.error_stream.decode('utf-8') + else: + to_ret += "SUCCESS" + if self.num_passing is not None and self.num_failing is not None: + to_ret += "\nPassing tests: " + str(self.num_passing) + "\nFailing tests: " + str(self.num_failing) + if self.VERBOSE_MODE: + to_ret += "\nOutput stream: " + self.output_stream.decode('utf-8') + if self.test_infras and self.test_infras != []: + to_ret += "\nTest infras: " + str([TestInfo.TRACKED_INFRAS[infra]["name"] for infra in self.test_infras]) + if self.test_covs and self.test_covs != []: + to_ret += "\nCoverage testing: " + str(self.test_covs) + if self.test_lints and self.test_lints != []: + to_ret += "\nLinter: " + str(self.test_lints) + if self.nested_test_commands and self.nested_test_commands != []: + to_ret += "\nNested test commands: " + str(self.nested_test_commands) + to_ret += "\nTimed out: " + str(self.timed_out) + return( to_ret) + +def called_in_command( str_comm, command, manager): + # command ends with command terminator (this list includes \0 end-of-string, + # but this is not available to check in Python so we use endswith) + post_command_chars = [ "" ] if command.endswith(str_comm) else [ " ", "\t", ";"] + for pcc in post_command_chars: + check_comm = str_comm + pcc + if command.find( check_comm) == 0: + return( True) + if command.find( "&&" + check_comm) > -1 or command.find( "&& " + check_comm) > -1: + return( True) + if command.find( "cross-env NODE_ENV=test " + check_comm) > -1 or command.find( "cross-env NODE_ENV=production " + check_comm) > -1: + return( True) + if command.find( "cross-env CI=true " + check_comm) > -1: + return( True) + if command.find( "opener " + check_comm) > -1: + return( True) + if command.find( "gulp " + check_comm) > -1: + return( True) + if command.find( "nyc " + check_comm) > -1: + return( True) + return( False) + +def test_cond_count( test_output, regex_fct, condition, offset): + ptrn = re.compile( regex_fct(condition), re.MULTILINE) + results = ptrn.findall( test_output) + if offset is None: + return( len( results)) # just count the number of hits, each hit is an individual test (example: tap "ok" vs "not ok") + num_cond = 0 + for r in results: + temp = r.split() + try: + num_cond += int( temp[temp.index(condition) + offset]) + except ValueError: + num_cond += 0 + return( num_cond) \ No newline at end of file diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index 7e7b44c..5d65c3a 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -52,7 +52,7 @@ class RepoWalker(): def __init__(self, config_file="", output_dir = "."): self.set_up_config( config_file) - self.output_dir = output_dir + self.output_dir = os.path.abspath(output_dir) def set_repo_links(self, repo_links): self.repo_links = repo_links diff --git a/src/diagnose_npm_package.py b/src/diagnose_npm_package.py index cc09f8a..aa5dcf1 100644 --- a/src/diagnose_npm_package.py +++ b/src/diagnose_npm_package.py @@ -46,7 +46,7 @@ def __init__(self, packages=None, config_file="", output_dir=".", *args, **kwarg self.packages = packages self.start_urls = ['https://www.npmjs.com/package/' + pkg for pkg in self.packages] self.set_up_config( config_file) - self.output_dir = output_dir + self.output_dir = os.path.abspath(output_dir) super(NPMSpider, self).__init__(*args, **kwargs) def set_up_config( self, config_file): diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py new file mode 100644 index 0000000..c2ff451 --- /dev/null +++ b/src/output_parsing/test_output_proc.py @@ -0,0 +1,13 @@ +import json +import xmltodict + +# convert an xml file to json +# used to convert the xunit reporter output from mocha into json +# note: this overwrites the existing file +# code from https://www.geeksforgeeks.org/python-xml-to-json/ +def xml_to_json(output_file): + with open(output_file) as xml_file: + data_dict = xmltodict.parse(xml_file.read()) + json_data = json.dumps(data_dict) + with open(output_file, 'w') as json_file: + json_file.write(json_data) \ No newline at end of file diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index 2719b0d..a8797d0 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -2,6 +2,7 @@ import subprocess import json import os +from TestInfo import * def run_command( commands, timeout=None): for command in commands.split(";"): @@ -100,7 +101,7 @@ def run_build( manager, pkg_json, crawler): build_script_list += [b] return( retcode, build_script_list, build_debug) -def run_tests( manager, pkg_json, crawler): +def run_tests( manager, pkg_json, crawler, cur_dir="."): test_json_summary = {} retcode = 0 if len(crawler.TRACKED_TEST_COMMANDS) == 0: @@ -108,7 +109,7 @@ def run_tests( manager, pkg_json, crawler): test_scripts = [t for t in pkg_json.get("scripts", {}).keys() if not set([ t.find(t_com) for t_com in crawler.TRACKED_TEST_COMMANDS]) == {-1}] test_scripts = [t for t in test_scripts if set([t.find(ig_com) for ig_com in crawler.IGNORED_COMMANDS]) == {-1}] test_scripts = [t for t in test_scripts if set([pkg_json.get("scripts", {})[t].find(ig_sub) for ig_sub in crawler.IGNORED_SUBSTRINGS]) == {-1}] - for t in test_scripts: + for test_index, t in enumerate(test_scripts): print("Running: " + manager + t) error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) @@ -122,10 +123,12 @@ def run_tests( manager, pkg_json, crawler): if crawler.TEST_VERBOSE_ALL_OUTPUT: # we're gonna be adding our new custom scripts for verbosity testing run_command( "mv package.json TEMP_package.json_TEMP") - verbosity_index = 0 test_verbosity_output = {} - for test_infra in test_info.test_infras: - verbose_test_json = ("" if verbosity_index == 0 else "infra_" + str(verbosity_index) + "_") + crawler.TEST_VERBOSE_OUTPUT_JSON + for verbosity_index, test_infra in enumerate(test_info.test_infras): + verbose_test_json = crawler.output_dir + "/" \ + + "test_" + str(test_index) + "_"\ + + "infra_" + str(verbosity_index) + "_" \ + + crawler.TEST_VERBOSE_OUTPUT_JSON infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS[test_infra] if not infra_verbosity_config: # checks if it's an empty object print("TEST VERBOSE MODE: unsupported test infra " + test_infra) @@ -133,7 +136,8 @@ def run_tests( manager, pkg_json, crawler): continue infra_verbosity_args = infra_verbosity_config.get("args", "") infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end - infra_verbosity_command = instrument_test_command_for_verbose(test_info.test_command, test_infra, infra_verbosity_args, + infra_verbosity_post_proc = infra_verbosity_config.get("post_processing", None) + infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_info.test_command, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos) verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index) pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command @@ -141,6 +145,9 @@ def run_tests( manager, pkg_json, crawler): json.dump( pkg_json, f) print("Running verbosity: " + manager + infra_verbosity_command) verb_error, verb_output, verb_retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) + if not infra_verbosity_post_proc is None: + for out_file in out_files: + infra_verbosity_post_proc(out_file) verbosity_index += 1 # get the output test_verbosity_infra = {} @@ -162,12 +169,21 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity # so as to avoid overwriting the files num_files = 0 new_infra_verbosity_args = "" + output_files = [] for i, sub in enumerate(infra_verbosity_args.split("$PLACEHOLDER_OUTPUT_FILE_NAME$")): # not the file name if sub != "": new_infra_verbosity_args += sub else: - new_infra_verbosity_args += ("" if num_files == 0 else ("out_" + str(num_files) + "_")) + verbose_test_json + path_index = verbose_test_json.rfind("/") + if path_index == -1: + output_file = "out_" + str(num_files) + "_" + verbose_test_json + new_infra_verbosity_args += output_file + output_files += [ output_file ] + else: + output_file = verbose_test_json[:path_index] + "/out_" + str(num_files) + "_" + verbose_test_json[path_index + 1:] + new_infra_verbosity_args += output_file + output_files += [ output_file ] num_files += 1 infra_verbosity_args = new_infra_verbosity_args # split into sub-commands @@ -175,11 +191,11 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity infra_calls = test_script.split(test_infra) instrumented_test_command = [] for i, infra_call in enumerate(infra_calls): - # if the current call is empty string and the next is non-empty + # if the current call is empty string # then this is the call to the testing infra and the next is the arguments # so, skip this one - # if there are no args (i.e. no next non-empty string), then just instrument this one - if infra_call == "" and i < len(infra_calls) - 1 and infra_calls[i + 1] != "": + # if there are no args (i.e. no next string), then just instrument this one + if infra_call == "" and i < len(infra_calls) - 1: instrumented_test_command += [ "" ] continue # if the first call is non-empty, then it's pre-test-infra and we skip it too @@ -197,259 +213,12 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity sub_command_args.append(infra_verbosity_args) # rebuild the command, re-attaching any extra sub-commands instrumented_test_command += [ " ".join(sub_command_args) + (infra_call[end_command_pos:] if end_command_pos > -1 else "") ] - return(test_infra.join(instrumented_test_command)) - - -def called_in_command( str_comm, command, manager): - # command ends with command terminator (this list includes \0 end-of-string, - # but this is not available to check in Python so we use endswith) - post_command_chars = [ "" ] if command.endswith(str_comm) else [ " ", "\t", ";"] - for pcc in post_command_chars: - check_comm = str_comm + pcc - if command.find( check_comm) == 0: - return( True) - if command.find( "&&" + check_comm) > -1 or command.find( "&& " + check_comm) > -1: - return( True) - if command.find( "cross-env NODE_ENV=test " + check_comm) > -1 or command.find( "cross-env NODE_ENV=production " + check_comm) > -1: - return( True) - if command.find( "cross-env CI=true " + check_comm) > -1: - return( True) - if command.find( "opener " + check_comm) > -1: - return( True) - if command.find( "gulp " + check_comm) > -1: - return( True) - if command.find( "nyc " + check_comm) > -1: - return( True) - return( False) - -def test_cond_count( test_output, regex_fct, condition, offset): - ptrn = re.compile( regex_fct(condition), re.MULTILINE) - results = ptrn.findall( test_output) - if offset is None: - return( len( results)) # just count the number of hits, each hit is an individual test (example: tap "ok" vs "not ok") - num_cond = 0 - for r in results: - temp = r.split() - try: - num_cond += int( temp[temp.index(condition) + offset]) - except ValueError: - num_cond += 0 - return( num_cond) - - -class TestInfo: - OUTPUT_CHECKERS = { - "mocha": - { - "output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '.*', - "passing": ("passing", -1), - "failing": ("failing", -1) - }, - "jest": - { - "output_regex_fct" : lambda condition: r'Tests:.*\d+ ' + condition, - "passing": ("passed", -1), - "failing": ("failed", -1) - }, - "tap": { - "output_regex_fct" : lambda condition: r'# ' + condition + '.*\d+', - "passing": ("pass", 1), - "failing": ("fail", 1) - }, - "tap_raw": { - "output_regex_fct" : lambda condition: r'' + condition + ' \d+ - (?!.*time=).*$', - "passing": (r'^.*(?!not )ok', None), # this "passing" is a regex: count "ok" but not "not ok" - "failing": (r'^.*not ok', None) - }, - "ava": - { - "output_regex_fct": lambda condition: r'.*\d+ tests? ' + condition, - "passing": ("passed", -2), - "failing": ("failed", -2) - }, - "ava_2": - { - "output_regex_fct" : lambda condition: r'.*\d+ ' + condition + '$', - "passing": ("passed", -1), - "failing": ("failed", -1) - }, - } - # extra args, their position in the arg list, and any post-processing required - VERBOSE_TESTS_EXTRA_ARGS = { - "jest": { - "args": " --verbose --json --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", - "position": -1, - "post_processing": None - }, - "mocha": { - "args": " -- --reporter xunit --reporter-option output=$PLACEHOLDER_OUTPUT_FILE_NAME$", - "position": -1, - "post_processing": None #TODO change this to the xml-to-json parser - } - } - TRACKED_INFRAS = { - "mocha": { - "name": "mocha", - "output_checkers": [ "mocha", "tap" ], - "verbose_tests_extra_args": [ "mocha" ] - }, - "jest": { - "name": "jest", - "output_checkers": [ "jest" ], - "verbose_tests_extra_args": [ "jest" ] - }, - "jasmine": { - "name": "jasmine", - "output_checkers": [ "mocha" ] - }, - "tap": { - "name": "tap", - "output_checkers": [ "tap", "tap_raw" ] - }, - "lab": { - "name": "lab", - "output_checkers": [] - }, - "ava": { - "name": "ava", - "output_checkers": [ "ava", "ava_2" ] - }, - "gulp": { - "name": "gulp", - "output_checkers": [ "mocha" ] - }, - } - TRACKED_COVERAGE = { - "istanbul": "istanbul -- coverage testing", - "nyc": "nyc -- coverage testing", - "coveralls": "coveralls -- coverage testing", - "c8": "c8 -- coverage testing" - } - TRACKED_LINTERS = { - "eslint": "eslint -- linter", - "tslint": "tslint -- linter", - "xx": "xx -- linter", - "standard": "standard -- linter", - "prettier": "prettier -- linter", - "gulp lint": "gulp lint -- linter" - } - - TRACKED_RUNNERS = [ "node", "babel-node", "grunt" ] - - def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE): - self.success = success - self.error_stream = error_stream - self.output_stream = output_stream - self.manager = manager - # start all other fields as None - self.test_infras = None - self.test_covs = None - self.test_lints = None - self.nested_test_commands = None - self.num_passing = None - self.num_failing = None - self.timed_out = False - self.VERBOSE_MODE = VERBOSE_MODE - self.test_verbosity_output = None - - def set_test_command( self, test_command): - self.test_command = test_command - - def set_test_verbosity_output( self, verbose_output): - self.test_verbosity_output = verbose_output - - def compute_test_infras( self): - self.test_infras = [] - self.test_covs = [] - self.test_lints = [] - self.nested_test_commands = [] - if self.test_command: - self.test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, self.test_command, self.manager) ] - self.test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, self.test_command, self.manager) ] - self.test_covs += [ TestInfo.TRACKED_COVERAGE[ti] for ti in TestInfo.TRACKED_COVERAGE if called_in_command(ti, self.test_command, self.manager) ] - self.test_lints += [ TestInfo.TRACKED_LINTERS[ti] for ti in TestInfo.TRACKED_LINTERS if called_in_command(ti, self.test_command, self.manager) ] - self.test_infras = list(set(self.test_infras)) - self.test_covs = list(set(self.test_covs)) - self.test_lints = list(set(self.test_lints)) - # TODO: maybe we can also figure it out from the output stream - - def compute_nested_test_commands( self, test_commands): - # one might think that we should only check the package's own manager - # however, it's common to mix and match (esp. to run commands with "npm run" even if the package manager is yarn) - self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "npm run " + tc, self.test_command, self.manager) ] - self.nested_test_commands += [ tc for tc in test_commands if called_in_command( "yarn " + tc, self.test_command, self.manager) ] - - def compute_test_stats( self): - if not self.test_infras or self.test_infras == []: - return - test_output = self.output_stream.decode('utf-8') + self.error_stream.decode('utf-8') - ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') - test_output = ansi_escape.sub('', test_output) - self.num_passing = 0 - self.num_failing = 0 - self.timed_out = (self.error_stream.decode('utf-8') == "TIMEOUT ERROR") - for infra in self.test_infras: - output_checker_names = TestInfo.TRACKED_INFRAS.get(infra, {}).get("output_checkers", []) - if infra in TestInfo.TRACKED_RUNNERS and output_checker_names == []: - output_checker_names = self.OUTPUT_CHECKERS.keys() # all the checkers - for checker_name in output_checker_names: - div_factor = 2 if checker_name == "ava_2" else 1 - checker = self.OUTPUT_CHECKERS[ checker_name] - self.num_passing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["passing"][0], checker["passing"][1]) / div_factor) - self.num_failing += int(test_cond_count( test_output, checker["output_regex_fct"], checker["failing"][0], checker["failing"][1]) / div_factor) - - def get_json_rep( self): - json_rep = {} - if self.VERBOSE_MODE: - json_rep["test_debug"] = "" - if not self.success: - json_rep["ERROR"] = True - if self.VERBOSE_MODE: - json_rep["test_debug"] += "\nError output: " + self.error_stream.decode('utf-8') - if self.num_passing is not None and self.num_failing is not None: - json_rep["num_passing"] = self.num_passing - json_rep["num_failing"] = self.num_failing - if self.VERBOSE_MODE: - json_rep["test_debug"] += "\nOutput stream: " + self.output_stream.decode('utf-8') - if self.test_infras and self.test_infras != []: - json_rep["test_infras"] = [TestInfo.TRACKED_INFRAS.get(infra, {}).get("name", "Custom Testing: " + infra) for infra in self.test_infras] - if self.test_covs and self.test_covs != []: - json_rep["test_coverage_tools"] = self.test_covs - if self.test_lints and self.test_lints != []: - json_rep["test_linters"] = self.test_lints - if self.nested_test_commands and self.nested_test_commands != []: - json_rep["nested_test_commands"] = self.nested_test_commands - if "test_infras" not in json_rep: - json_rep["RUNS_NEW_USER_TESTS"] = False - if self.test_verbosity_output: - json_rep["test_verbosity_output"] = self.test_verbosity_output - json_rep["timed_out"] = self.timed_out - return( json_rep) - - def __str__(self): - to_ret = "" - if not self.success: - to_ret += "ERROR" - if self.VERBOSE_MODE: - to_ret += "\nError output: " + self.error_stream.decode('utf-8') - else: - to_ret += "SUCCESS" - if self.num_passing is not None and self.num_failing is not None: - to_ret += "\nPassing tests: " + str(self.num_passing) + "\nFailing tests: " + str(self.num_failing) - if self.VERBOSE_MODE: - to_ret += "\nOutput stream: " + self.output_stream.decode('utf-8') - if self.test_infras and self.test_infras != []: - to_ret += "\nTest infras: " + str([TestInfo.TRACKED_INFRAS[infra]["name"] for infra in self.test_infras]) - if self.test_covs and self.test_covs != []: - to_ret += "\nCoverage testing: " + str(self.test_covs) - if self.test_lints and self.test_lints != []: - to_ret += "\nLinter: " + str(self.test_lints) - if self.nested_test_commands and self.nested_test_commands != []: - to_ret += "\nNested test commands: " + str(self.nested_test_commands) - to_ret += "\nTimed out: " + str(self.timed_out) - return( to_ret) + return(test_infra.join(instrumented_test_command), output_files) def on_diagnose_exit( json_out, crawler, cur_dir, repo_name): + # if we still have the temp package.json, restore it + if os.path.isfile("TEMP_package.json_TEMP"): + run_command( "mv TEMP_package.json_TEMP package.json") # move back to the original working directory if repo_name != "": os.chdir( cur_dir) @@ -562,7 +331,7 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): if not crawler.DO_INSTALL: print("Can't run tests without installing (do_install: false) -- skipping") else: - (retcode, test_json_summary) = run_tests( manager, pkg_json, crawler) + (retcode, test_json_summary) = run_tests( manager, pkg_json, crawler, cur_dir) json_out["testing"] = test_json_summary else: json_out["testing"] = { "track_tests": False } From 49bb029a281411c2ff2daccc3d698e36c2f71bc0 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 5 May 2023 15:37:28 -0400 Subject: [PATCH 03/39] update: add option for different output file for test postproc (defaults to overwriting original) --- src/output_parsing/test_output_proc.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index c2ff451..2c1022b 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -5,9 +5,11 @@ # used to convert the xunit reporter output from mocha into json # note: this overwrites the existing file # code from https://www.geeksforgeeks.org/python-xml-to-json/ -def xml_to_json(output_file): +def xml_to_json(output_file, new_output_file=None): + if new_output_file is None: + new_output_file = output_file with open(output_file) as xml_file: data_dict = xmltodict.parse(xml_file.read()) json_data = json.dumps(data_dict) - with open(output_file, 'w') as json_file: + with open(new_output_file, 'w') as json_file: json_file.write(json_data) \ No newline at end of file From 89e22ef8b800f99290a73f07bb6d71495b9252e6 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 5 May 2023 15:41:06 -0400 Subject: [PATCH 04/39] output file for verbosity tests now has repo name --- src/test_JS_repo_lib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index a8797d0..87bf2f1 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -101,7 +101,7 @@ def run_build( manager, pkg_json, crawler): build_script_list += [b] return( retcode, build_script_list, build_debug) -def run_tests( manager, pkg_json, crawler, cur_dir="."): +def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): test_json_summary = {} retcode = 0 if len(crawler.TRACKED_TEST_COMMANDS) == 0: @@ -126,6 +126,7 @@ def run_tests( manager, pkg_json, crawler, cur_dir="."): test_verbosity_output = {} for verbosity_index, test_infra in enumerate(test_info.test_infras): verbose_test_json = crawler.output_dir + "/" \ + + "repo_" + repo_name + "_" \ + "test_" + str(test_index) + "_"\ + "infra_" + str(verbosity_index) + "_" \ + crawler.TEST_VERBOSE_OUTPUT_JSON @@ -331,7 +332,7 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): if not crawler.DO_INSTALL: print("Can't run tests without installing (do_install: false) -- skipping") else: - (retcode, test_json_summary) = run_tests( manager, pkg_json, crawler, cur_dir) + (retcode, test_json_summary) = run_tests( manager, pkg_json, crawler, repo_name, cur_dir) json_out["testing"] = test_json_summary else: json_out["testing"] = { "track_tests": False } From 8991afd858a89c127becfa1b204b00e9be18d6fd Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 5 May 2023 20:29:21 -0400 Subject: [PATCH 05/39] add more info on the output files to the json output, for easy post-processing --- src/test_JS_repo_lib.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index 87bf2f1..a3f9c4f 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -146,14 +146,15 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): json.dump( pkg_json, f) print("Running verbosity: " + manager + infra_verbosity_command) verb_error, verb_output, verb_retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) + # if there's post-processing to be done if not infra_verbosity_post_proc is None: - for out_file in out_files: - infra_verbosity_post_proc(out_file) + for out_file_obj in out_files: + infra_verbosity_post_proc(out_file_obj["output_file"]) verbosity_index += 1 # get the output test_verbosity_infra = {} test_verbosity_infra["command"] = infra_verbosity_command - test_verbosity_infra["output_files"] = verbose_test_json + test_verbosity_infra["output_files"] = out_files if crawler.VERBOSE_MODE: test_verbosity_infra["test_debug"] = "\nError output: " + verb_error.decode('utf-8') \ + "\nOutput stream: " + verb_output.decode('utf-8') @@ -172,6 +173,7 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity new_infra_verbosity_args = "" output_files = [] for i, sub in enumerate(infra_verbosity_args.split("$PLACEHOLDER_OUTPUT_FILE_NAME$")): + out_file_object = { "test_script": test_script, "test_infra": test_infra } # not the file name if sub != "": new_infra_verbosity_args += sub @@ -180,11 +182,12 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity if path_index == -1: output_file = "out_" + str(num_files) + "_" + verbose_test_json new_infra_verbosity_args += output_file - output_files += [ output_file ] + out_file_object["output_file"] = output_file else: output_file = verbose_test_json[:path_index] + "/out_" + str(num_files) + "_" + verbose_test_json[path_index + 1:] new_infra_verbosity_args += output_file - output_files += [ output_file ] + out_file_object["output_file"] = output_file + output_files += [ out_file_object ] num_files += 1 infra_verbosity_args = new_infra_verbosity_args # split into sub-commands From 13b1be81b9362642b0e1b4277e51b6f0faeed49f Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Wed, 31 May 2023 15:41:41 -0400 Subject: [PATCH 06/39] script to get the required versions of node and npm for a project; to allow docker build --- get_rel_project_reqs.js | 166 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 get_rel_project_reqs.js diff --git a/get_rel_project_reqs.js b/get_rel_project_reqs.js new file mode 100644 index 0000000..9391e69 --- /dev/null +++ b/get_rel_project_reqs.js @@ -0,0 +1,166 @@ +// get the build requirements for the project, if they're present +// these are: +// - npm version +// - node version +// - OS +// +// some notes: +// - devs can specify a range of engines (npm, node) that their project works on. +// If a range is specified we just get one version in the valid range +// - if the project specifically doesn't work on linux, then we're bailing -- this +// only makes linux docker containers + +// also this is in JS instead of python bc the python semver library is garbage + +const semver = require('semver'); +const subproc = require('child_process'); +const fs = require('fs').promises; + +// can specify OS version: https://docs.npmjs.com/cli/v9/configuring-npm/package-json#os +// can specify node/npm version: https://docs.npmjs.com/cli/v9/configuring-npm/package-json#engines +async function get_reqs_from_pkg_json(pkg_json) { + let reqs = {} + + let engines = pkg_json["engines"] || {}; + // if not specified, "*" any version + let npm_req = engines["npm"] || "*"; + let node_req = engines["node"] || "*"; + + // if a range is specified, get a version in the valid range + let { node_version, npm_version } = await get_versions_in_range(node_req, npm_req); + reqs["node"] = node_version; + reqs["npm"] = npm_version; + + + oss = engines["os"] || []; + // explicit versions and linux is not listed + if (oss.length > 0 && oss.indexOf("linux") == -1) + reqs["linux"] = false + // explicitly excluding linux :'( + else if (oss.indexOf("!linux") != -1) + reqs["linux"] = false + else + reqs["linux"] = true + + return reqs +} + +const BANNED_VERSION_SUBSTRINGS = ["beta", "alpha", "pre"] + +// using semver, let's get a version that matches our specs +async function get_versions_in_range(node_version, npm_version) { + let node_npm_version_pairs = []; + try { + node_npm_version_pairs = await get_node_npm_version_pairs(); + } catch(e) { + console.log("Error getting npm/node pairs -- proceeding blind: " + e); + } + + // normal route: we have the data. + // now just need to find a pair that matches + if (node_npm_version_pairs.length > 0) { + for (const pair of node_npm_version_pairs) { + if (is_banned(pair["npm"]) || is_banned(pair["node"])) { + continue; + } + if (semver.satisfies(pair["npm"], npm_version) && semver.satisfies(pair["node"], node_version)) { + return { "node_version": pair["node"], "npm_version": pair["npm"] } + } + } + } + + // if we get here we didn't return in the if above + // we don't have the data: get the list of all node versions from nvm: `nvm ls-remote` + // and all npm versions from npm itself: `npm view npm versions` + // NOTE: node version takes precedence over the npm version bc it's more commonly specified, + // and because it's more important + if (node_version !== "*" ) { + // then we care about the node version + subproc.exec('nvm ls-remote', { shell: '/bin/bash'}, (err, stdout, stderr) => { + let versions = stdout.split("\n").map(v => v.trim().split(" ")[0]); // strip formatting and any space-delimited labels (LTS, etc) + for (vers of versions) { + if (is_banned(vers)) { + continue; + } + if (semver.satisfies(vers, node_version)) { + return { "node_version": vers, "npm_version": "*" } + } + } + }) + } + + // if we get here, then we didn't have the version pair data, and we also didn't care about the node version + // so let's get an npm version + if (npm_version !== "*") { + // then we care about the npm version + subproc.exec('npm view npm versions --json', { shell: '/bin/bash'}, (err, stdout, stderr) => { + let versions = JSON.parse(stdout); + for (vers of versions) { + if (is_banned(vers)) { + continue; + } + if (semver.satisfies(vers, npm_version)) { + return { "node_version": "*", "npm_version": vers } + } + } + }) + } + + // no matching pairs: we're flying blind folks + return { "node_version": "*", "npm_version": "*" } +} + +// versions of node and the versions of npm they are bundled with +// see: https://stackoverflow.com/questions/51238643/which-versions-of-npm-came-with-which-versions-of-node +// read this file in -- from it we can get all the valid versions of npm and node +// for fetch usage: https://stackoverflow.com/questions/2499567/how-to-make-a-json-call-to-an-url/2499647#2499647 +const NODE_NPM_VERSIONS_URL = 'https://nodejs.org/dist/index.json'; +async function get_node_npm_version_pairs() { + let resp = await fetch(NODE_NPM_VERSIONS_URL); + // look for errors: + if (!resp.ok) { + throw new Error("Uh oh: error reaching npm/node version pairs"); + } + let all_data = await resp.json(); + let node_npm_pairs = []; + for (const vers_data of all_data) { + let node_version = vers_data["version"]; + let npm_version = vers_data["npm"]; + // if both were in the version data + if (node_version && npm_version) + node_npm_pairs.push({node: node_version, npm: npm_version}) + } + return node_npm_pairs; +} + +// check if a version is banned +function is_banned(vers) { + for (const banned of BANNED_VERSION_SUBSTRINGS) { + if (vers.indexOf(banned) > -1) { + return true; + } + } + return false; +} + +async function main(proj_dir) { + let pkg_json = {}; + try { + pkg_json = JSON.parse(await fs.readFile(proj_dir + "/package.json", 'utf8')); + } catch(e) { + console.error(e);//"Error, bailing out: " + proj_dir + " invalid directory, could not load package.json"); + process.exit(); + } + // get the node and npm versions + let reqs = await get_reqs_from_pkg_json(pkg_json); + console.log(reqs); +} + +if (process.argv.length != 3) { + console.error("Usage: node get_rel_project_req.js path_to_project_dir") + process.exit() +} + +let proj_dir = process.argv[2]; +console.log(proj_dir); +main(proj_dir); From 7f4f935858ed02cb95e91bf09a167380e929959c Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Wed, 31 May 2023 22:12:51 -0400 Subject: [PATCH 07/39] support for custom docker based on node/npm config of a project; and, installs deps of a project --- Dockerfile | 12 +++++--- build.sh | 62 ++++++++++++++++++++++++++++++++++++++--- get_rel_project_reqs.js | 15 ++++++---- 3 files changed, 76 insertions(+), 13 deletions(-) diff --git a/Dockerfile b/Dockerfile index 983305e..8e080cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,11 @@ FROM ubuntu:latest ARG DEBIAN_FRONTEND=noninteractive +# build arg: setting up for a specific repo? +ARG REPO_LINK + RUN apt-get update \ - && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg nodejs npm xz-utils parallel + && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel RUN apt update RUN apt -y install python3-pip @@ -10,10 +13,11 @@ RUN pip3 install bs4 scrapy RUN mkdir -p /home/npm-filter/results -COPY . /home/npm-filter +COPY src /home/npm-filter/ +COPY *.sh /home/npm-filter/ +COPY get_rel_project_reqs.js /home/npm-filter WORKDIR /home/npm-filter RUN git config --global http.sslVerify "false" -RUN npm config set strict-ssl false -RUN ./build.sh +RUN ./build.sh $REPO_LINK diff --git a/build.sh b/build.sh index 58b83af..e99329a 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,15 @@ #!/bin/bash +# can be building for one specific repo +repo_link=$1 + +# install nvm, so we can then use specific versions of node and npm +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | /usr/bin/bash +export NVM_DIR="$HOME/.nvm" +[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # this loads nvm + + rm build.sh -rm Dockerfile rm runDocker.sh if [ -d local_mount ]; then rm -r local_mount @@ -16,7 +24,6 @@ unzip codeql-linux64.zip git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential -curl -sL https://deb.nodesource.com/setup_12.x | bash - apt-get update curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - @@ -28,11 +35,58 @@ source $HOME/.cargo/env pip3 install --upgrade setuptools setuptools_rust wheel -npm install -g jest mocha tap ava nyc yarn next - echo "export PATH=/home/codeql_home/codeql:$PATH" >> /root/.bashrc echo "alias python=python3" >> /root/.bashrc echo "alias ipython=ipython3" >> /root/.bashrc echo "alias vi=vim" >> /root/.bashrc cd /home/npm-filter + +repo_dir_name=SPEC_REPO_DIR +node_version='node' # default to just the latest version +npm_version='*' +# if there's a repo_link specified +if [ -n $repo_link ]; then + git clone $repo_link $repo_dir_name + # this will make the node_version and npm_version variables + set_req_vars=`node get_rel_project_reqs.js $repo_dir_name 2>/dev/null` + `$set_req_vars` + + if [[ $node_version == "*" ]]; then + node_version=node + fi +fi + +# set up node and npm, and also add this node/npm config to the bashrc +# so that it runs on docker startup too + +nvm install $node_version +nvm use $node_version +echo "nvm use $node_version" >> /root/.bashrc + +if [[ $npm_version == "*" ]]; then + nvm install-latest-npm + echo "nvm install-latest-npm" >> /root/.bashrc +else + npm install -g npm@${npm_version} + echo "npm install -g npm@${npm_version}" >> /root/.bashrc +fi + + +# permissive +npm config set strict-ssl false + +# install the dependencies: but use the current version of npm +npm install -g jest mocha tap ava nyc yarn next semver + +if [ -n $repo_link ]; then + cd $repo_dir_name + # setup the project + if [ -f "yarn.lock" ]; then + yarn > /dev/null + else + npm install > /dev/null + fi + cd .. +fi + diff --git a/get_rel_project_reqs.js b/get_rel_project_reqs.js index 9391e69..314b233 100644 --- a/get_rel_project_reqs.js +++ b/get_rel_project_reqs.js @@ -28,8 +28,8 @@ async function get_reqs_from_pkg_json(pkg_json) { // if a range is specified, get a version in the valid range let { node_version, npm_version } = await get_versions_in_range(node_req, npm_req); - reqs["node"] = node_version; - reqs["npm"] = npm_version; + reqs["node_version"] = node_version; + reqs["npm_version"] = npm_version; oss = engines["os"] || []; @@ -142,18 +142,24 @@ function is_banned(vers) { } return false; } + +function print_as_bash_vars(reqs) { + for ( key in reqs) { + console.log(key + "=" + reqs[key]); + } +} async function main(proj_dir) { let pkg_json = {}; try { pkg_json = JSON.parse(await fs.readFile(proj_dir + "/package.json", 'utf8')); } catch(e) { - console.error(e);//"Error, bailing out: " + proj_dir + " invalid directory, could not load package.json"); + console.error("Error, bailing out: " + proj_dir + " invalid directory, could not load package.json"); process.exit(); } // get the node and npm versions let reqs = await get_reqs_from_pkg_json(pkg_json); - console.log(reqs); + print_as_bash_vars(reqs); } if (process.argv.length != 3) { @@ -162,5 +168,4 @@ if (process.argv.length != 3) { } let proj_dir = process.argv[2]; -console.log(proj_dir); main(proj_dir); From de485ea114988b90632ed1577d96f5e8d30d3c20 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Wed, 14 Jun 2023 00:11:38 -0400 Subject: [PATCH 08/39] add build to docker, and fix a few dumb bugs --- Dockerfile | 7 +++++-- build.sh | 46 +++++++++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8e080cc..3ad85f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,11 +9,14 @@ RUN apt-get update \ RUN apt update RUN apt -y install python3-pip -RUN pip3 install bs4 scrapy +RUN pip3 install bs4 scrapy xmltodict RUN mkdir -p /home/npm-filter/results +RUN mkdir /home/npm-filter/src +RUN mkdir /home/npm-filter/configs -COPY src /home/npm-filter/ +COPY src /home/npm-filter/src +COPY configs /home/npm-filter/configs COPY *.sh /home/npm-filter/ COPY get_rel_project_reqs.js /home/npm-filter diff --git a/build.sh b/build.sh index e99329a..559869d 100755 --- a/build.sh +++ b/build.sh @@ -17,11 +17,11 @@ fi mkdir -p /home/codeql_home -cd /home/codeql_home -curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip -unzip codeql-linux64.zip -# clone stable version -git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo +# cd /home/codeql_home +# curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip +# unzip codeql-linux64.zip +# # clone stable version +# git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential apt-get update @@ -35,19 +35,25 @@ source $HOME/.cargo/env pip3 install --upgrade setuptools setuptools_rust wheel -echo "export PATH=/home/codeql_home/codeql:$PATH" >> /root/.bashrc echo "alias python=python3" >> /root/.bashrc echo "alias ipython=ipython3" >> /root/.bashrc echo "alias vi=vim" >> /root/.bashrc cd /home/npm-filter -repo_dir_name=SPEC_REPO_DIR +if [ -d TESTING_REPOS ]; then + rm -rf TESTING_REPOS +fi +mkdir TESTING_REPOS + node_version='node' # default to just the latest version npm_version='*' # if there's a repo_link specified if [ -n $repo_link ]; then - git clone $repo_link $repo_dir_name + cd TESTING_REPOS + git clone $repo_link + # repo dir will be the only thing in TESTING_REPOS + repo_dir_name=`ls` # this will make the node_version and npm_version variables set_req_vars=`node get_rel_project_reqs.js $repo_dir_name 2>/dev/null` `$set_req_vars` @@ -62,14 +68,21 @@ fi nvm install $node_version nvm use $node_version -echo "nvm use $node_version" >> /root/.bashrc + +NVM_DIR=/root/.nvm +NODE_VERSION=`node --version` + +echo "export NODE_VERSION=\"$NODE_VERSION\"" >> /root/.bashrc +echo "export NVM_DIR=$NVM_DIR" >> /root/.bashrc +echo "export NODE_PATH=$NVM_DIR/$NODE_VERSION/lib/node_modules" >> /root/.bashrc +echo "export PATH=$NVM_DIR/$NODE_VERSION/bin:/home/codeql_home/codeql:$PATH" >> /root/.bashrc + +# echo "nvm use $node_version" >> /root/.bashrc if [[ $npm_version == "*" ]]; then nvm install-latest-npm - echo "nvm install-latest-npm" >> /root/.bashrc else npm install -g npm@${npm_version} - echo "npm install -g npm@${npm_version}" >> /root/.bashrc fi @@ -80,13 +93,8 @@ npm config set strict-ssl false npm install -g jest mocha tap ava nyc yarn next semver if [ -n $repo_link ]; then - cd $repo_dir_name - # setup the project - if [ -f "yarn.lock" ]; then - yarn > /dev/null - else - npm install > /dev/null - fi - cd .. + cd /home/npm-filter + # do the install and build + python3 src/diagnose_github_repo.py --repo_link $repo_link --config configs/build_only_config.json --output_dir results fi From e7a0e0eef74a00b9095f10ca2e5536e8eb60e294 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Wed, 14 Jun 2023 01:41:16 -0400 Subject: [PATCH 09/39] output parsing to a csv --- Dockerfile | 2 +- src/TestInfo.py | 2 +- src/output_parsing/test_output_proc.py | 37 +++++++++++++++++++------- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3ad85f7..6f32d98 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get update \ RUN apt update RUN apt -y install python3-pip -RUN pip3 install bs4 scrapy xmltodict +RUN pip3 install bs4 scrapy xmltodict pandas RUN mkdir -p /home/npm-filter/results RUN mkdir /home/npm-filter/src diff --git a/src/TestInfo.py b/src/TestInfo.py index 47a832f..6cf20af 100644 --- a/src/TestInfo.py +++ b/src/TestInfo.py @@ -49,7 +49,7 @@ class TestInfo: "mocha": { "args": " -- --reporter xunit --reporter-option output=$PLACEHOLDER_OUTPUT_FILE_NAME$", "position": -1, - "post_processing": TestOutputProc.xml_to_json + "post_processing": TestOutputProc.parse_mocha_json_to_csv } } TRACKED_INFRAS = { diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index 2c1022b..fcf98ec 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -1,15 +1,32 @@ import json import xmltodict +import pandas as pd -# convert an xml file to json -# used to convert the xunit reporter output from mocha into json -# note: this overwrites the existing file -# code from https://www.geeksforgeeks.org/python-xml-to-json/ -def xml_to_json(output_file, new_output_file=None): +def parse_mocha_json_to_csv(output_file, new_output_file=None): if new_output_file is None: - new_output_file = output_file + new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension + # convert an xml file to json + # used to convert the xunit reporter output from mocha into json + # code from https://www.geeksforgeeks.org/python-xml-to-json/ with open(output_file) as xml_file: - data_dict = xmltodict.parse(xml_file.read()) - json_data = json.dumps(data_dict) - with open(new_output_file, 'w') as json_file: - json_file.write(json_data) \ No newline at end of file + data_dict = xmltodict.parse(xml_file.read()).get("testsuite", {}) + # the format: all the tests are in a top-level list called "testcase" + test_suites = [] + test_names = [] + test_runtimes = [] + test_stdout = [] + test_pass_fail = [] + for test in data_dict.get("testcase", []): + test_suites += [test.get("@classname", "").strip()] + test_names += [test.get("@name", "").strip()] + test_runtimes += [float(test.get("@time", "NaN"))] + if test.get("failure", False): + test_stdout += [test["failure"]] + test_pass_fail += ["Fail"] + else: + test_stdout += [""] + test_pass_fail += ["Pass"] + res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) + res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] + with open(new_output_file, 'w') as csv_file: + csv_file.write(res_df.to_csv()) \ No newline at end of file From ecd7b9b10b3ee794d3eda80097b946eebf4d37ef Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Wed, 14 Jun 2023 15:56:48 +0000 Subject: [PATCH 10/39] Adds CI workflow for end-to-end testing of NPM Filter --- .github/workflows/barbosa23.yml | 285 +++++++++++++++++++++++++ .github/workflows/barbosa23flaky.yml | 36 ++++ .github/workflows/end2end.yml | 99 +++++++++ .github/workflows/smoketest.yml | 31 +++ output_proc_scripts/count_tests_run.py | 71 ++++++ runDocker.sh | 2 +- 6 files changed, 523 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/barbosa23.yml create mode 100644 .github/workflows/barbosa23flaky.yml create mode 100644 .github/workflows/end2end.yml create mode 100644 .github/workflows/smoketest.yml create mode 100644 output_proc_scripts/count_tests_run.py diff --git a/.github/workflows/barbosa23.yml b/.github/workflows/barbosa23.yml new file mode 100644 index 0000000..df065d9 --- /dev/null +++ b/.github/workflows/barbosa23.yml @@ -0,0 +1,285 @@ +name: Test on all of Barbosa23 JS (well, first 250ish) + +on: + workflow_dispatch + +env: + PROJECTS_JSON: | + { projects: [ + {"project_url": "adriantoine/enzyme-to-json", "project_sha": "7d90cdf5f1878815a46b3a53f4e1e1b63418b38f"}, + {"project_url": "agenda/agenda", "project_sha": "41a2b3793400073f564c37f7d2d0ec2d7e237bf2"}, + {"project_url": "airbnb/mocha-wrap", "project_sha": "e6bf4f6cff6d40425b2af323186cc1e69d05a270"}, + {"project_url": "allenmyao/canvas-graph-creator", "project_sha": "fadcd223a82ff665ee34685a1845d8087b997ee3"}, + {"project_url": "americanexpress/one-app-cli", "project_sha": "23a992558cc32cdc8a51c11e4fe80c2e2924aaf9"}, + {"project_url": "amireh/happypack", "project_sha": "e45926e9754f42098d882ff129269b15907ef00e"}, + {"project_url": "andreypopov/node-red-contrib-deconz", "project_sha": "7a7cdb10e4c9430a10dfe28fc9295abeaf107af5"}, + {"project_url": "andyholmes/gnome-shell-extension-gsconnect", "project_sha": "370493b76ab4ee7f30ba154b1e5b554a02413703"}, + {"project_url": "angular-translate/angular-translate", "project_sha": "1114534c064eddfb77fc4243b0deb61c37f5f41f"}, + {"project_url": "angular-ui/ui-sortable", "project_sha": "e763b5765eea87743c8463ddf045a53015193c20"}, + {"project_url": "apache/cordova-lib", "project_sha": "797286963eb526a2f5ad673291ff5733d6fb275b"}, + {"project_url": "apache/incubator-ponymail-foal", "project_sha": "f5addb5824e0c4d08474b22840ce556deade48f6"}, + {"project_url": "apiaryio/dredd", "project_sha": "5ab7b162afbbd8881cd716f27627dc2d05213eb7"}, + {"project_url": "apiaryio/dredd-transactions", "project_sha": "57477169b82a2980cb279c80a9caae5825754826"}, + {"project_url": "appium/appium", "project_sha": "2d124323c5973ef9d3e190f7401e67106886ffd4"}, + {"project_url": "appium/appium-desktop", "project_sha": "12a988aa08b9822e97056a09486c9bebb3aad8fe"}, + {"project_url": "atom-community/atom", "project_sha": "0f7c5c14eaad9643bdc16cf80579b457baa2dd8a"}, + {"project_url": "atom/atom", "project_sha": "1c3bd35ce238dc0491def9e1780d04748d8e18af"}, + {"project_url": "atom/find-and-replace", "project_sha": "7871ad213e2c09f99e003c8f97cd7d4b7f9f2d82"}, + {"project_url": "aurelia/cli", "project_sha": "82091bbeebcc4b08c9929e37a8cd91c5b5025791"}, + {"project_url": "Automattic/kue", "project_sha": "c5647b1a8890319169fa4ce2cf4ed4122c1c704a"}, + {"project_url": "avajs/ava", "project_sha": "568fe40c987dd6c593dfbcf4144d1d1627955d46"}, + {"project_url": "axa-ch-webhub-cloud/pattern-library", "project_sha": "04d7e0f227f85d7b39eb0a6bfa9911076027e924"}, + {"project_url": "axa-ch/patterns-library", "project_sha": "04d7e0f227f85d7b39eb0a6bfa9911076027e924"}, + {"project_url": "azachar/protractor-screenshoter-plugin", "project_sha": "989f8e0b52b986f7ddb07831b5b92dca6dceeb07"}, + {"project_url": "Azure/azure-iot-sdk-node", "project_sha": "450c672001eb96d99587eaeae5fe75ab0912e5d6"}, + {"project_url": "babel/babel-eslint", "project_sha": "b5b9a09edbac4350e4e51033a4608dd95dad1f67"}, + {"project_url": "badges/shields", "project_sha": "14892e3943a4677332618d8b9f584766f7940ee7"}, + {"project_url": "bbc/simorgh", "project_sha": "4c7e7d1ecc525dd62fb14bd98035a5e739c14290"}, + {"project_url": "bcgov/name-examination", "project_sha": "b55fc1127e0db98dc4fe780ad80831f4b1a2872e"}, + {"project_url": "bcoin-org/bcoin", "project_sha": "b0058696cc10c8f9b17190b31fd2cd907d85d047"}, + {"project_url": "beakerbrowser/beaker", "project_sha": "764bdefeeed9558dbf10aec77df262a896f57236"}, + {"project_url": "bee-queue/bee-queue", "project_sha": "f6d901308f3b6433f2531edc4a9ac354aab434e5"}, + {"project_url": "bkimminich/juice-shop", "project_sha": "b156c969d7bc8f24544f162f482c6285f58b4285"}, + {"project_url": "blocknative/assist", "project_sha": "3fb619e3994752eacbddba4078d2bf0cbc7e2c9c"}, + {"project_url": "bmazzarol/atom-plsql-linter", "project_sha": "02f6a1d48c4b5dbaa375dfb13d52703fc14b90a0"}, + {"project_url": "BookMyComics-Developers/BookMyComics", "project_sha": "1efe6adb3490d7f62e7b31e3d75ac15b3b981875"}, + {"project_url": "brave/brave-browser", "project_sha": "870d381ff8b08cb70d2b9fdea4b320d17bfe68f7"}, + {"project_url": "brion/ogv.js", "project_sha": "5ce404a6aa8f53b7cef220916b89e613ac58fd17"}, + {"project_url": "busterjs/buster", "project_sha": "5e20f3e23aeb7ea996be7a669e520c054b8f1035"}, + {"project_url": "CalebMorris/react-moment-proptypes", "project_sha": "89a61c17250ea7b71d55d2855f6739ae4071529a"}, + {"project_url": "CartoDB/cartodb", "project_sha": "9518ec6917e4091a56dc7b9d5fbf089bcb003271"}, + {"project_url": "cerner/terra-core", "project_sha": "15458289ff022f302144932e047a6669b6c461a5"}, + {"project_url": "cerner/terra-dev-site", "project_sha": "80a0e471548f553b7e58e30a2a0b6e8c0e7682fc"}, + {"project_url": "cloudfoundry-attic/cf-abacus", "project_sha": "68aad9e2d497335d3a2e0da736bb9f01fe54dfb3"}, + {"project_url": "cncf/landscapeapp", "project_sha": "62fa27892cd9e9095567c0c7e5d84fd514149cd9"}, + {"project_url": "codeceptjs/CodeceptJS", "project_sha": "3fb39ae1d4f9b00438b1398cefba0dc677260aeb"}, + {"project_url": "codetheweb/tuyapi", "project_sha": "905670c7cf7a8ad5756ea08eeca115178121423b"}, + {"project_url": "covidwatchorg/portal", "project_sha": "95e36eeb777fca76318b5b0680c82f43f502fee3"}, + {"project_url": "cryptee/web-client", "project_sha": "10f96daff7214a0e5afb71e56eed7256e59e17b0"}, + {"project_url": "ctrlplusb/react-universally", "project_sha": "83d533a9c780716d18f034f7fb52dbd3a1c4051b"}, + {"project_url": "cypress-io/cypress-example-recipes", "project_sha": "292325e6638bb4626861bc2f6df99d26ab8e7bff"}, + {"project_url": "DataDog/dd-trace-js", "project_sha": "71a5288dea5df31c6a492ce22ff8169552548d47"}, + {"project_url": "DeComX/pantheon", "project_sha": "deepforge-dev - deepforge"}, + {"project_url": "deepforge-dev/deepforge", "project_sha": "f9cb1ff12644f64c01ca4d71ca66e6e22506b084"}, + {"project_url": "dhis2/ui", "project_sha": "625c9c9391cdc6f625c927d20a39eef37f550a4a"}, + {"project_url": "digidem/mapeo-core", "project_sha": "fd782a55cebb5f54a45f2f042287218c849b5f35"}, + {"project_url": "dmitriz/min-karma", "project_sha": "8f1bcd25315d34a304d0d358166b9cb95a8a7871"}, + {"project_url": "Dogfalo/materialize", "project_sha": "824e78248b3de81e383445e76ffb04cc3264fe7d"}, + {"project_url": "domvm/domvm", "project_sha": "67de1a0cdf1879ad87926dafde0b8961f660c906"}, + {"project_url": "duckduckgo/tracker-radar-collector", "project_sha": "3e9f49e46e5051e9f3d26bcd3be054447af887e4"}, + {"project_url": "dukecon/dukecon_pwa", "project_sha": "127e8425ccff201a394448864407403c4e80d691"}, + {"project_url": "dustinspecker/generator-ng-poly", "project_sha": "53f0beec9ad9a33a9f6b47649ca34a4d6bae95f8"}, + {"project_url": "EFForg/privacybadger", "project_sha": "6f81b217e5717c46867cfec9e9b378da9354a84a"}, + {"project_url": "elastic/apm-agent-nodejs", "project_sha": "9f13472d69523109d69315c6bb212957e46809cb"}, + {"project_url": "elifesciences/elife-xpub", "project_sha": "bccea1e199bd213eef8ad03fca33d66727e34ccd"}, + {"project_url": "ember-app-scheduler/ember-app-scheduler", "project_sha": "fb0b4e0075cf8847664e5459cd59bf74a0a1d379"}, + {"project_url": "ember-batcher/ember-batcher", "project_sha": "231fb12ae51fde5e42704fa0e1daece8dd371532"}, + {"project_url": "ember-cli/ember-cli", "project_sha": "b851c0edcae99701335e3e90efe0c225951c4f0b"}, + {"project_url": "emberjs/ember.js", "project_sha": "3fa9068831b1e3cf8e594647a880adc0809861f3"}, + {"project_url": "eobrain/bajel", "project_sha": "ecbfe18a990e97f677e522a7240617df29d47cd6"}, + {"project_url": "eslint/eslint", "project_sha": "9e3d77cba65d0e38e07996e57961fb04f30d9303"}, + {"project_url": "ether/etherpad-lite", "project_sha": "7656c6b9f195a79bb07bd3b77b55de1393ab71f4"}, + {"project_url": "ethereum/web3.js", "project_sha": "f8a2533c2b09ce0a62f8414f2f6eed83ab78ca1f"}, + {"project_url": "ExpressGateway/express-gateway", "project_sha": "a294cac39c98d66f5750c424a24e0bb8ce351c1c"}, + {"project_url": "facebook/metro", "project_sha": "c6a94bc170cf95a6bb21b5638929ec3311a9a5b7"}, + {"project_url": "facebook/prepack", "project_sha": "5beedbe85bd5b9d2de1264abafbb3b76f8584297"}, + {"project_url": "facebook/react-native", "project_sha": "af99a6890b84713d002fbbd872f10fe2e6304861"}, + {"project_url": "fastify/fastify", "project_sha": "d1ad6c17ce9731f1bc28377318b010966ca339cd"}, + {"project_url": "flow-typed/flow-typed", "project_sha": "0e28de5e8a69def522d61f06ddffb624d465bceb"}, + {"project_url": "FlowCrypt/flowcrypt-browser", "project_sha": "92d0188c66572d2c14ef4ed24602b8a58445630c"}, + {"project_url": "FormidableLabs/nodejs-dashboard", "project_sha": "885fc96fec262b668da9282f57374966f7512b76"}, + {"project_url": "freeboardgames/FreeBoardGames.org", "project_sha": "b11dbaa3715d71605bced4c8f04a40a79bd7cfef"}, + {"project_url": "freedomjs/freedom", "project_sha": "9638e840aec9598c4d60383ed22444c525aefbf5"}, + {"project_url": "freedomjs/freedom-for-chrome", "project_sha": "0154d345e99ac781460a790a31772c4352cb41b6"}, + {"project_url": "freedomjs/freedom-for-firefox", "project_sha": "3a2922f378a9dbbb58f302b0216b56ec23cf17b3"}, + {"project_url": "getgauge/taiko", "project_sha": "532c62c69da79852ef3cf8abd2325d2fff903a15"}, + {"project_url": "GioBonvi/GoogleContactsEventsNotifier", "project_sha": "7e657a9e606f449fef22feae68d448d11083122b"}, + {"project_url": "google/shaka-player", "project_sha": "a543b80648f429524c522295b0f4f60039c2e0ea"}, + {"project_url": "googleads/videojs-ima", "project_sha": "11ecbefa37fbdbd6877fece63c38c11338b9e913"}, + {"project_url": "GoogleChrome/lighthouse", "project_sha": "b981a38e7b3becc512f0a7985b1d2a64320da235"}, + {"project_url": "GoogleChrome/workbox", "project_sha": "ee62b5b5b9ed321af457a2d962b2a34196a80263"}, + {"project_url": "hack4impact-uiuc/life-after-hate", "project_sha": "9cad8555b52ff6bd98c7d15fae456e2f8b7a2a8a"}, + {"project_url": "hapijs/lab", "project_sha": "aaaebb95108d3fdcb264a56e836c3459380844b1"}, + {"project_url": "hapijs/nes", "project_sha": "977750a158e0b0105c719e0e2d4bd354154bf0a8"}, + {"project_url": "hapijs/subtext", "project_sha": "ae0a2dd48ab8c6e2b8ebdebbc31baddb6b4c49b7"}, + {"project_url": "hapipal/hpal", "project_sha": "4661f17ac8bdb1d3915695b2f819ff2336730131"}, + {"project_url": "hapipal/schwifty", "project_sha": "088088572e7aac82b77a78d9c8ed05e7f1d5e957"}, + {"project_url": "Haufe-Lexware/wicked.haufe.io", "project_sha": "1efadeabae7b7ccb4b17473e9aa5d0af60796adb"}, + {"project_url": "hden/node-serf", "project_sha": "d176dede5c87e0285c383f7bbda3848584d6a2ad"}, + {"project_url": "HSLdevcom/transitlog-ui", "project_sha": "316a7843c2a8e6d66db7f4c9181f775f95f926ed"}, + {"project_url": "html-next/vertical-collection", "project_sha": "fd928512a33d44155a724ed65c5ba21cf7950d86"}, + {"project_url": "Human-Connection/Human-Connection", "project_sha": "72a8f3d7f567442ca5e191672abfb47ea1b825a6"}, + {"project_url": "hyperledger/cactus", "project_sha": "334612d251c56811a844b3308dc1561dcd6fc460"}, + {"project_url": "IBM-Cloud/gp-js-client", "project_sha": "8ac9e9b0ebee3264d446d68ff487ef995173bff0"}, + {"project_url": "ikydd/blackat", "project_sha": "26a8ba8dac8be027978b5fc046131936aadb76ec"}, + {"project_url": "IMA-WorldHealth/bhima", "project_sha": "f76ac0085b2566d249cdd6ab135950faf0e10da3"}, + {"project_url": "ing-bank/lion", "project_sha": "02e61285ddc83e4cb2ec7d2acc6d6a6620a94924"}, + {"project_url": "iodide-project/iodide", "project_sha": "f9dd78a725ce1a2aa96784a46b527b740605431b"}, + {"project_url": "ipfs-inactive/js-ipfs-http-client", "project_sha": "995abb41b83c8345b16cba67151e9ccb9cbea4de"}, + {"project_url": "israelroldan/grunt-ssh", "project_sha": "7175b5548291bb2105a33a45d772573cb888430d"}, + {"project_url": "istanbuljs/nyc", "project_sha": "ab7c53b2f340b458789a746dff2abd3e2e4790c3"}, + {"project_url": "jaggedsoft/node-binance-api", "project_sha": "950d773a5f2c3a61c6e29b53e3af57594921a239"}, + {"project_url": "jamesshore/quixote", "project_sha": "6b5c07b4d202d44e0ee6ecd99c22df4547558c17"}, + {"project_url": "jamhall/s3rver", "project_sha": "f834192dbb07da4548b48c95066bae50cfaac819"}, + {"project_url": "JeroenDeDauw/Maps", "project_sha": "f9bec919e77d671c4e96f9aa16d0452d17f700c7"}, + {"project_url": "jivid/akobi", "project_sha": "ccd8d4de55b2066db9c11f9f00ffeed36ea33673"}, + {"project_url": "jorgebucaran/hyperapp", "project_sha": "c3717e3ff78b6fa8663575d34d330d68929a0974"}, + {"project_url": "jrcasso/mean-demo", "project_sha": "31f3e21420fd5ef13cc7555a56e3106a31dd4a36"}, + {"project_url": "json-schema-faker/json-schema-faker", "project_sha": "9bbe0e895cc9ebce939d5f358385f151d72c739c"}, + {"project_url": "jwplayer/jwplayer", "project_sha": "30353cd1e1f3017a96ef2854ef758fb4f479cd7a"}, + {"project_url": "kaliber5/ember-bootstrap", "project_sha": "c92d1898b715da0ebd534a813a4ce592d1ed115c"}, + {"project_url": "kategengler/ember-cli-code-coverage", "project_sha": "46dc079ab518bddc325fb305790d58adf2c28aae"}, + {"project_url": "keystonejs/keystone", "project_sha": "67f0f2ce7fa58288cf06d198e4b1a5c51d265bcf"}, + {"project_url": "kgiszewski/Archetype", "project_sha": "2e0bce99b9f386aa24a56be02fca8cd7388b39bd"}, + {"project_url": "kiwicom/smart-faq", "project_sha": "2131be6290020a11dc6ad236eb82c5bde75945d8"}, + {"project_url": "Lambda-School-Labs/labs-spa-starter", "project_sha": "2d1bbf41db2a97574c62cc3d6745cc0b2e644ead"}, + {"project_url": "lekoder/consul-kv-object", "project_sha": "5cf3c44f416d28d11c567c9caab86b27e3e0f0a0"}, + {"project_url": "liferay/senna.js", "project_sha": "fd89ca02de0ad57e7697c5088f4e490f8d181958"}, + {"project_url": "linkeddata/dokieli", "project_sha": "52f9c3cc8519d45339996f2a926bae18c37bf5d8"}, + {"project_url": "LLK/scratch-vm", "project_sha": "e4bb21f1817a2b7bbca9be19da6eba529291ed0c"}, + {"project_url": "magda-io/magda", "project_sha": "754ec4cf2aff491549007cd82f676da4c3759061"}, + {"project_url": "magento/pwa-studio", "project_sha": "836aa40608465ccc28066d4fbdddee3a6a560b75"}, + {"project_url": "marcos8896/nutrition-care-node-api", "project_sha": "20b08a443d4d7714dc8ea137b3ffcce51f5524c0"}, + {"project_url": "marionettejs/backbone.marionette", "project_sha": "85936fc518dd7bb0934faf231123172e3eee0169"}, + {"project_url": "marklogic-community/marklogic-samplestack", "project_sha": "5449924fe9abd1712d3ef20ca2f25f2e291578e0"}, + {"project_url": "material-components/material-components-web", "project_sha": "a9ff9866f237fbeebe94e655ae578b68ce675a04"}, + {"project_url": "mbland/custom-links", "project_sha": "3e58bb2b4ea335451489d9b81226a414d7352c3f"}, + {"project_url": "mcollina/autocannon", "project_sha": "ba3a2124fa68be6f263e860001be419d71de39d9"}, + {"project_url": "meteor/meteor", "project_sha": "dc38e4325dcd88fb3c6d1be1639680c6ff6f5e80"}, + {"project_url": "microsoft/ChakraCore", "project_sha": "c3ead3f8a6e0bb8e32e043adc091c68cba5935e9"}, + {"project_url": "mikakaraila/node-red-contrib-opcua", "project_sha": "aec7272f4f7554a7473daf19136e6fa8c9dfc681"}, + {"project_url": "milieuinfo/webcomponent-vl-ui-wizard", "project_sha": "efecc0c4f3659ac1348ae456604534d42e6b90b7"}, + {"project_url": "mishoo/UglifyJS", "project_sha": "f0ca9cfbe65efc919149e7cd74cedd186d6413ee"}, + {"project_url": "mitodl/open-discussions", "project_sha": "462c242eab04f68552e80a6f416c18c4b0b57cb0"}, + {"project_url": "mocha-parallel/mocha-parallel-tests", "project_sha": "d1b2e88fa6bad71d0a5d7487809fcb4be4030b9d"}, + {"project_url": "mohsen1/yawn-yaml", "project_sha": "aab6ee95ead9da9b7f1b1bbfb7325b2e90d7d3f5"}, + {"project_url": "moorara/microservices-demo", "project_sha": "bc16c5eeb6091392e62d0c260d2acfe48aef4b06"}, + {"project_url": "mozilla/blok", "project_sha": "faac2281c48cd226b4fb8c4e22de588a02328c31"}, + {"project_url": "mui-org/material-ui", "project_sha": "6e8b99d133025c9e785a778a183fa81383998a42"}, + {"project_url": "n5ro/aframe-extras", "project_sha": "5c20172a159aba54e7b6f7f243a864f76905448e"}, + {"project_url": "nasa-gibs/worldview", "project_sha": "c4769a03394676dd4ec7126cc14a7c67dc7e4eaf"}, + {"project_url": "NativeScript/nativescript-cli", "project_sha": "eb918011d6f0be9a8ccb6b569628e3960fd4f8b9"}, + {"project_url": "nccgroup/tracy", "project_sha": "6ce4714a3b3b407503cecd8c9842132fe4dc37e4"}, + {"project_url": "neffo/earth-view-wallpaper-gnome-extension", "project_sha": "016c982dccd9e7b454b84e9f50b4accc1b4348d6"}, + {"project_url": "NetsBlox/NetsBlox", "project_sha": "419ca83482c562a0cfa5af1d2dd9907b7387f7ef"}, + {"project_url": "nightwatchjs/nightwatch", "project_sha": "4b09cb57c8a9fb29d6b6795e59c64b4942bddf67"}, + {"project_url": "noble/bleno", "project_sha": "72028bc995d55cb9dcf223f9b0ffce563d091212"}, + {"project_url": "nock/nock", "project_sha": "8a38f41a28b36fef50d5723daa94cf21a6490fc5"}, + {"project_url": "node-alarm-dot-com/homebridge-node-alarm-dot-com", "project_sha": "26516177a2324aa53b0cfbb8af52fb1354be78be"}, + {"project_url": "nodejs/citgm", "project_sha": "460c3a008f1c33bda2e136631d0162479419ed36"}, + {"project_url": "nodejs/node-chakracore", "project_sha": "770c8dcd1bc3e0fce2d4497b4eec3fe49d829d43"}, + {"project_url": "nodejs/undici", "project_sha": "c415fbbb59e2b898c5db6a681265cf3da865d02c"}, + {"project_url": "npm/cli", "project_sha": "29622c1349b38173924058a1fb0ede9edf8a5f6f"}, + {"project_url": "NSWSESMembers/availability-poc", "project_sha": "7ebc17b6005a3c1573e6c68bd5411b0657c98f71"}, + {"project_url": "nwjs-community/nw-builder", "project_sha": "a1d4fb5148255e2b6fa5bce4a2167c9be8cc71d6"}, + {"project_url": "observablehq/plot", "project_sha": "4d3cd1586e7412b95687157d12c792fde84a2229"}, + {"project_url": "ocadotechnology/rapid-router", "project_sha": "38adf70a3e76a05fa814a7d3c0e1c61e4ba125c2"}, + {"project_url": "ONSdigital/eq-author-app", "project_sha": "8bb1621cd4973281730a38378765b1718b08ca54"}, + {"project_url": "Ontotext-AD/graphdb.js", "project_sha": "d0880dabf966e82def44537a720bf620d6d29f5e"}, + {"project_url": "open-wc/open-wc", "project_sha": "57ddb3ccfff6b00468d3a7ebabbc15cfe966f7a9"}, + {"project_url": "OpenEnergyPlatform/oeplatform", "project_sha": "1ce978f8faade3effe4cf7d3eec7522e990df910"}, + {"project_url": "openseadragon/openseadragon", "project_sha": "ebab356c207e626b6622f88ffcb0cd28b918f85d"}, + {"project_url": "openstyles/stylus", "project_sha": "50a0a115d1c6587d221f3253feeb4cb88b6f5336"}, + {"project_url": "Opentrons/opentrons", "project_sha": "f8f7e699d512f59e1a2f4a9969428744e86a6a22"}, + {"project_url": "OpenZeppelin/openzeppelin-contracts", "project_sha": "604025400f9be5c32581bb6ab03a46bbc09c5562"}, + {"project_url": "OriginProtocol/origin", "project_sha": "57c55c023188e3a53cb9ee3dfafe0bf3210e0cf8"}, + {"project_url": "owncloud/contacts", "project_sha": "efb06fef530dbf1812cbb98d651ec87680de97a1"}, + {"project_url": "palantir/eclipse-typescript", "project_sha": "007579ba58d2979a5989caf04733a9d5dfcc56de"}, + {"project_url": "particle-iot/particle-cli", "project_sha": "07dfa4e7d928d9641be368881b2216c6fb017c6c"}, + {"project_url": "perfsonar/toolkit", "project_sha": "d4c8906acdf7d8be49cf37b59939748945e526d9"}, + {"project_url": "pingyhq/pingy-cli", "project_sha": "53721434b698f53ba195c4824ca8d1f87ea8b60c"}, + {"project_url": "poanetwork/tokenbridge", "project_sha": "961b12b9f3545830a04044e109762277efcea6ef"}, + {"project_url": "postmanlabs/newman", "project_sha": "89941554304362d0cfec2914d134f738348b27c5"}, + {"project_url": "postmanlabs/postman-runtime", "project_sha": "7855b3ae5858734bfb6f0c5985592d8b2957f4d1"}, + {"project_url": "pouchdb/pouchdb", "project_sha": "546c8bb696872f86816574d02d47131ace0b4d18"}, + {"project_url": "PowerlineApp/powerline-mobile", "project_sha": "2030817dc80a07f3cfc2129bd830ce33ab50373d"}, + {"project_url": "premasagar/sqwidget", "project_sha": "7edc6d21997bb18da7daa59068926a082028d6f0"}, + {"project_url": "PrismJS/prism", "project_sha": "59e5a3471377057de1f401ba38337aca27b80e03"}, + {"project_url": "probcomp/metaprob", "project_sha": "43c4bea80772ed8b2baa51cd5ac6c593a34a3a8b"}, + {"project_url": "ProjectMirador/mirador", "project_sha": "3c121dbe99bae4eab910cb2df00e93904bc123ea"}, + {"project_url": "Quicksaver/Tab-Groups", "project_sha": "29ea6517e73eb5d58b2f0b9fc2d65d589d910e8a"}, + {"project_url": "regl-project/regl", "project_sha": "3d90d57d473b5dee6680dc97897f4a9fba465501"}, + {"project_url": "reportportal/service-ui", "project_sha": "049abcb8fc70ee131625914e9da4a748e23d2230"}, + {"project_url": "restify/node-restify", "project_sha": "89e7ac81a4cc885d153df6f07d5cf35ed75fd4d0"}, + {"project_url": "rtfeldman/node-test-runner", "project_sha": "16cd4b9c8e5dab3ce297039f5d72d372bdd63de9"}, + {"project_url": "ruiquelhas/blaine", "project_sha": "a69cdad6e59ebb19493018eacb7b7602f2225ce1"}, + {"project_url": "ruiquelhas/copperfield", "project_sha": "fe5629ed8f5edea740ca4917dfac6a779e644b45"}, + {"project_url": "ruiquelhas/electron-recipes", "project_sha": "9bcfc2520ad383c1e5bebe9c427214cab1d0a0da"}, + {"project_url": "ruiquelhas/fischbacher", "project_sha": "35eb4dcf0225a8899e13a3ab63c3e878d9d434ca"}, + {"project_url": "ruiquelhas/henning", "project_sha": "ac75e0b1cebdbb123eccb05277bc5c663f8e6696"}, + {"project_url": "ruiquelhas/houdin", "project_sha": "4a700f66748b3a57a1c1ab6ee7bbe425ce20c526"}, + {"project_url": "ruiquelhas/lafayette", "project_sha": "038578c360b22ff846daa7b3e6e0aeb712b145b2"}, + {"project_url": "ruiquelhas/thurston", "project_sha": "071f9ee5265f64f47335b428a498df22895e549c"}, + {"project_url": "sampotts/plyr", "project_sha": "0c9759455cbfcce888c66925c3b457ce06cee31e"}, + {"project_url": "scalableminds/webknossos", "project_sha": "b91b15ff4180b2288c40ad9e3a86678258dcd5c9"}, + {"project_url": "scality/Arsenal", "project_sha": "96cbaeb821d8045cbe8eabd00092290e13e46784"}, + {"project_url": "SeleniumBuilder/se-builder", "project_sha": "8230ad58a526d3eb905d32a780daeaea1fb56a55"}, + {"project_url": "serverless/serverless", "project_sha": "17d64e6c94b88a5daf36f28a4fa192c231052cfb"}, + {"project_url": "SGrondin/bottleneck", "project_sha": "b83528333ba4d27cf70b81cc2be12e09d7ff692f"}, + {"project_url": "signalapp/Signal-Desktop", "project_sha": "bd14b74e638dce03928e08ffbe2a83a6c047406e"}, + {"project_url": "sindresorhus/npm-name", "project_sha": "7aef07b69ed35f584e0a8bf6cece96750becaf00"}, + {"project_url": "sindresorhus/serialize-error", "project_sha": "a212a8c3902fa1ff1fdef8f7625dd0cc6d6e89a1"}, + {"project_url": "skarfacegc/FlowTrack2", "project_sha": "990a2566f30b8dd84a61ea1ff6f58076016a7796"}, + {"project_url": "solid/node-solid-server", "project_sha": "bbb8d78df7e8908e20e3052ae6655722aa6fa6de"}, + {"project_url": "SolidarityEconomyAssociation/sea-map", "project_sha": "17fa76b9b4070354c31faae81ba0162b8f27bf1b"}, + {"project_url": "soscripted/sox", "project_sha": "4be396373c06bb8340d740089018e364729bec70"}, + {"project_url": "sourcecred/sourcecred", "project_sha": "3da222ebe44c110f265063cfa99316ed5c1fa0b3"}, + {"project_url": "spark-notebook/spark-notebook", "project_sha": "69174f3923d0564d2078c0e0c70125245157d5b5"}, + {"project_url": "stanford-oval/thingengine-core", "project_sha": "b69f7b0166d256428a08ba2dac3fc3ca8dddf611"}, + {"project_url": "stealjs/steal-tools", "project_sha": "05f60d58e3ee56dbb8428c83121fdb6ee2b1825c"}, + {"project_url": "stellar/js-stellar-sdk", "project_sha": "52947e81e487edf179a6003efb40a1425a4f7ff2"}, + {"project_url": "stimulusreflex/stimulus_reflex", "project_sha": "52aa993165a656eccbe2cefaca9f5388509d014d"}, + {"project_url": "streamr-dev/network", "project_sha": "4cdabba71db0a6c531c63368d1a78361fff01dce"}, + {"project_url": "strongloop/loopback", "project_sha": "13371fd2a138a6f39db77e5a455b3170e5d4a0f5"}, + {"project_url": "studentinsights/studentinsights", "project_sha": "4bb09f97eb9c0473a9ac6ee076171de12855e721"}, + {"project_url": "sumup-oss/circuit-ui", "project_sha": "00ceacbd82b6cd3a71592ea9d2da5b95892f965b"}, + {"project_url": "superscriptjs/superscript", "project_sha": "5e3e1b51654a54518dfada17c0cd9dc146c8e48a"}, + {"project_url": "sveltejs/kit", "project_sha": "c4476c6d106b41dd8e6badbbdd0128b78be49d5c"}, + {"project_url": "tarantool/graphql.0", "project_sha": "05f39946299cb2f35a97be326b992aace0205eaf"}, + {"project_url": "testem/testem", "project_sha": "42fe29451b187bd1cd1e546228fa1bfbe11084f3"}, + {"project_url": "thaliproject/jxcore", "project_sha": "d3ccd242a592416b6537dfea8ce539bd6208dd54"}, + {"project_url": "thejoshwolfe/snakefall", "project_sha": "62bdfe3718f86ef85fc8c11e600bf621fa2a586c"}, + {"project_url": "themgoncalves/react-loadable-ssr-addon", "project_sha": "2036a6f12e9048d8a6e3eb0a8097455fa0fe1ebc"}, + {"project_url": "TheScienceMuseum/collectionsonline", "project_sha": "ef486c650bce9f2dccf25b7188dbe986d4b63c3c"}, + {"project_url": "tmijs/tmi.js", "project_sha": "3904ae743a12b984aa1a175740e8b5bae08a03e4"}, + {"project_url": "transloadit/uppy", "project_sha": "f07697e7f45e471ca16bac8751fa7221d9445605"}, + {"project_url": "tristanHessell/mess-around", "project_sha": "19cdf7aa58eaf165a88ac7a3954fc7a33e5685bc"}, + {"project_url": "trufflesuite/truffle", "project_sha": "0f17cf9680ac0dc7aa6a314ad3b78ad569daa896"}, + {"project_url": "TryGhost/Ghost", "project_sha": "4da658e72ad42cf251e4fb100ca651a7d4dca79e"}, + {"project_url": "tubbo/openrct2-benchwarmer", "project_sha": "504d75bfaf1b158dbe23e4bbfb926502189a0ff6"}, + {"project_url": "tulios/kafkajs", "project_sha": "ff3b1117f316d527ae170b550bc0f772614338e9"}, + {"project_url": "TypeStrong/ts-loader", "project_sha": "cf5326d9b5f1b804ff8d817f88fb127bc45ad9d1"}, + {"project_url": "uber/baseweb", "project_sha": "65c791a6b5ac50722f34e2a7b1282b08c539f58a"}, + {"project_url": "usdigitalresponse/neighbor-express", "project_sha": "130d9edd9ac09f2a8aa947b0d21f054d4dfc0462"}, + {"project_url": "vega/vega", "project_sha": "b45cf431cd6c0d0c0e1567f087f9b3b55bc236fa"}, + {"project_url": "video-dev/hls.js", "project_sha": "59d421479b5002993a5f3b36d4505adff3209fb5"}, + {"project_url": "visgl/luma.gl", "project_sha": "044c0ef5f767cd56974e30475a30dd3f24305983"}, + {"project_url": "w3c/aria-practices", "project_sha": "4adb78ea96b22db559577aa6ed64c9059596ab4a"}, + {"project_url": "waiterio/api", "project_sha": "9948b542f5da1957c3f656d959c4f5957d364eb1"}, + {"project_url": "web-animations/web-animations-js-legacy", "project_sha": "6a1c45473f9ba2db1ccad34f879bca829f77264d"}, + {"project_url": "webdriverio/cucumber-boilerplate", "project_sha": "f91d34ff0bf9112d02830dc474f1a97ff6e8d9d3"}, + {"project_url": "webex/webex-js-sdk", "project_sha": "cc743f187c646290dab21322431cbf8f1ce771a2"}, + {"project_url": "webpack/webpack", "project_sha": "16143f5fa835ad8c7181b8aeedc52f9cdd0fd39d"}, + {"project_url": "webpack/webpack-cli", "project_sha": "4e1c45ad8de888dea13247855c78848632475653"}, + ]} + +jobs: + build-matrix: + runs-on: ubuntu-latest + outputs: + matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }} + steps: + - id: set-matrix + run: | + echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT + echo $PROJECTS_JSON >> $GITHUB_OUTPUT + echo "__EOF__" >> $GITHUB_OUTPUT + execute: + needs: [build-matrix] + strategy: + matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} + fail-fast: false + uses: ./.github/workflows/end2end.yml + with: + project_url: ${{ matrix.projects.project_url }} + project_sha: ${{ matrix.projects.project_sha }} diff --git a/.github/workflows/barbosa23flaky.yml b/.github/workflows/barbosa23flaky.yml new file mode 100644 index 0000000..e16758a --- /dev/null +++ b/.github/workflows/barbosa23flaky.yml @@ -0,0 +1,36 @@ +name: Test on Barbosa23 JS projects with >=5 flaky tests + +on: + push + +env: + PROJECTS_JSON: | + { projects: [ + {"project_url": "appium/appium", "project_sha": "2d124323c5973ef9d3e190f7401e67106886ffd4"}, + {"project_url": "badges/shields", "project_sha": "14892e3943a4677332618d8b9f584766f7940ee7"}, + {"project_url": "facebook/react-native", "project_sha": "af99a6890b84713d002fbbd872f10fe2e6304861"}, + {"project_url": "FlowCrypt/flowcrypt-browser", "project_sha": "92d0188c66572d2c14ef4ed24602b8a58445630c"}, + {"project_url": "meteor/meteor", "project_sha": "dc38e4325dcd88fb3c6d1be1639680c6ff6f5e80"}, + {"project_url": "yui/yui3", "project_sha": "25264e3629b1c07fb779d203c4a25c0879ec862c"} + ]} + +jobs: + build-matrix: + runs-on: ubuntu-latest + outputs: + matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }} + steps: + - id: set-matrix + run: | + echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT + echo $PROJECTS_JSON >> $GITHUB_OUTPUT + echo "__EOF__" >> $GITHUB_OUTPUT + execute: + needs: [build-matrix] + strategy: + matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} + fail-fast: false + uses: ./.github/workflows/end2end.yml + with: + project_url: ${{ matrix.projects.project_url }} + project_sha: ${{ matrix.projects.project_sha }} diff --git a/.github/workflows/end2end.yml b/.github/workflows/end2end.yml new file mode 100644 index 0000000..7238098 --- /dev/null +++ b/.github/workflows/end2end.yml @@ -0,0 +1,99 @@ +name: Test NPMFilter End to End on a Project + +on: + workflow_dispatch: + inputs: + project_url: + description: 'GitHub suffix of project to test (username/project)' + required: true + type: string + project_sha: + description: 'SHA of project to test' + required: true + type: string + workflow_call: + inputs: + project_url: + description: 'GitHub suffix of project to test (username/project)' + required: true + type: string + project_sha: + description: 'SHA of project to test' + required: true + type: string +jobs: + execute: + runs-on: self-hosted + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Run NPMFilter + id: run-npm-filter + env: + SHA: ${{ inputs.project_sha }} + URL: ${{ inputs.project_url }} + run: | + IFS="/" read -r -a projectArray <<< "$URL" + OrgName=${projectArray[0]} + ProjectName=${projectArray[1]} + LogDir=${URL//\//-} + echo "LogDir=$LogDir" >> $GITHUB_OUTPUT + + echo "Running NPMFilter on $OrgName/$ProjectName@$SHA" + + mkdir -p docker_configs/ + cat >docker_configs/debug_filter_config.json < tests-overview.csv + + # Check if tests were found + TestData=$(cat tests-overview.csv) + IFS="," read -r -a testCount <<< $(python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/) + TestsRun=${testCount[0]} + if [ $TestsRun -le 2 ]; then + echo "ERROR: No tests found." + exit -1 + else + echo "OK: ${TestsRun} tests found!" + fi + - name: Upload output + uses: actions/upload-artifact@v2 + with: + name: npm_filter_docker_results + path: npm_filter_docker_results \ No newline at end of file diff --git a/.github/workflows/smoketest.yml b/.github/workflows/smoketest.yml new file mode 100644 index 0000000..d1fa45e --- /dev/null +++ b/.github/workflows/smoketest.yml @@ -0,0 +1,31 @@ +name: Test NPMFilter End to End on a toy project + +on: + push: + +env: + PROJECTS_JSON: | + { projects: [ + {"project_url": "mtiller/ts-jest-sample", "project_sha": "6739c576d4590c53296f3e4fcdf3074e582ae297"}, + ]} + +jobs: + build-matrix: + runs-on: ubuntu-latest + outputs: + matrix-projects: ${{ steps.set-matrix.outputs.matrix-projects }} + steps: + - id: set-matrix + run: | + echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT + echo $PROJECTS_JSON >> $GITHUB_OUTPUT + echo "__EOF__" >> $GITHUB_OUTPUT + execute: + needs: [build-matrix] + strategy: + matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} + fail-fast: false + uses: ./.github/workflows/end2end.yml + with: + project_url: ${{ matrix.projects.project_url }} + project_sha: ${{ matrix.projects.project_sha }} diff --git a/output_proc_scripts/count_tests_run.py b/output_proc_scripts/count_tests_run.py new file mode 100644 index 0000000..03cba8d --- /dev/null +++ b/output_proc_scripts/count_tests_run.py @@ -0,0 +1,71 @@ +import json +import os +import sys + +# simple, unrefined script for parsing npm-filter output files +# for the current directory, get all files named *__results.json +# (wildcard represents the project name) +# prints out (Number of tests passing),(Number of tests failing) + + +# JSON specifying possible errors +# that should be avoided if an input JSON will pass the filter check + +JSON_filter = { + "setup": { + "repo_cloning_ERROR": True, + "pkg_json_ERROR": True + }, + "installation": { + "ERROR": True + }, +} + +# input to the function is a JSON of undesirable elements +# return true if the JSON to be filtered has any of the filter elements +def json_contains_issues(json_check, json_filter): + contains_issues = False + for filter_key, filter_val in json_filter.items(): + # recursive case + if isinstance( filter_val, dict): + contains_issues = contains_issues or json_contains_issues( json_check.get(filter_key, {}), filter_val) + # base case + contains_issues = contains_issues or (json_check.get(filter_key, {}) == filter_val) + return( contains_issues) + +# by default, there needs to be at least one passing test +def get_num_tests_run(json_check): + test_dict = json_check.get("testing", {}) + num_passing = 0 + num_failing = 0 + passing_commands = [] + for test_com, test_out in test_dict.items(): + if test_out.get("timed_out", False) or (not test_out.get("RUNS_NEW_USER_TESTS", True)) or test_out.get("ERROR", False): + continue + num_passing += test_out.get("num_passing") + num_failing += test_out.get("num_failing") + return [num_passing, num_failing] + +output_proc_dir = "." +if len(sys.argv) == 2: + output_proc_dir = sys.argv[1] +else: + print("No output directory specified: looking at current directory") + +# get all relevant files +all_files = [ output_proc_dir + "/" + fname for fname in os.listdir(output_proc_dir) if fname.find("__results.json") != -1] +passing_files = [] +total_passing_tests = 0 +total_failing_tests = 0 +for file in all_files: + with open(file) as f: + json_check = json.load(f) + proj_name = file[ : file.index("__results.json")] + if json_contains_issues( json_check, JSON_filter): + # print(proj_name + " has setup/install errors") + continue + num_tests = get_num_tests_run( json_check) + total_passing_tests += num_tests[0] + total_failing_tests += num_tests[1] + +print(f"{total_passing_tests},{total_failing_tests}") \ No newline at end of file diff --git a/runDocker.sh b/runDocker.sh index e4ec817..4d3f8a6 100755 --- a/runDocker.sh +++ b/runDocker.sh @@ -15,6 +15,6 @@ docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \ --volume `pwd`/npm_filter_docker_results:/home/npm-filter/results \ --volume `pwd`/docker_configs:/home/npm-filter/docker_configs\ -w /home/npm-filter \ - -it emarteca/npm-filter:latest \ + emarteca/npm-filter:latest \ bash -c "PATH=/home/codeql_home/codeql:$PATH; $npm_filter_command --output_dir results" rm -r local_mount From 1671e31d70449299fa4d00c3fd7cf38248097249 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Wed, 14 Jun 2023 13:04:25 -0400 Subject: [PATCH 11/39] jest parsing; also making default node latest LTS so it doesnt break yarn --- build.sh | 2 +- src/TestInfo.py | 2 +- src/output_parsing/test_output_proc.py | 38 ++++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index 559869d..3d8aa8f 100755 --- a/build.sh +++ b/build.sh @@ -46,7 +46,7 @@ if [ -d TESTING_REPOS ]; then fi mkdir TESTING_REPOS -node_version='node' # default to just the latest version +node_version='v18.16.0' # default to just the latest LTS version npm_version='*' # if there's a repo_link specified if [ -n $repo_link ]; then diff --git a/src/TestInfo.py b/src/TestInfo.py index 6cf20af..61f08f0 100644 --- a/src/TestInfo.py +++ b/src/TestInfo.py @@ -44,7 +44,7 @@ class TestInfo: "jest": { "args": " --verbose --json --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", "position": -1, - "post_processing": None + "post_processing": TestOutputProc.parse_jest_json_to_csv }, "mocha": { "args": " -- --reporter xunit --reporter-option output=$PLACEHOLDER_OUTPUT_FILE_NAME$", diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index fcf98ec..bd52da5 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -22,10 +22,44 @@ def parse_mocha_json_to_csv(output_file, new_output_file=None): test_runtimes += [float(test.get("@time", "NaN"))] if test.get("failure", False): test_stdout += [test["failure"]] - test_pass_fail += ["Fail"] + test_pass_fail += ["failed"] else: test_stdout += [""] - test_pass_fail += ["Pass"] + test_pass_fail += ["passed"] + res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) + res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] + with open(new_output_file, 'w') as csv_file: + csv_file.write(res_df.to_csv()) + +def parse_jest_json_to_csv(output_file, new_output_file=None): + if new_output_file is None: + new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension + with open(output_file) as json_file: + data_dict = json.loads(json_file.read()) + # the format: all tests are in a top level list called "testResults" + # this is a list of objects that have "assertionResults" representing the test suites + # "assertionResults" is a list of objects that have the test data + test_suites = [] + test_names = [] + test_runtimes = [] + test_stdout = [] + test_pass_fail = [] + for test_suite in data_dict.get("testResults", []): + test_suite_results = test_suite.get("assertionResults", []) + test_suite_name = test_suite.get("name", "") + for test_results in test_suite_results: + test_status = test_results.get("status", "failed") + test_duration = test_results.get("duration") + # if it can't convert to a string, could be missing/nonetype (None duration for pending tests) + try: + test_duration = float(test_duration) + except: + test_duration = float("NaN") + test_suites += [test_suite_name] + test_names += [test_results.get("fullName", "")] + test_runtimes += [test_duration] + test_stdout += [";".join(test_results.get("failureMessages", []))] + test_pass_fail += [test_status] # passed/failed/pending -- if not present assume failed res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] with open(new_output_file, 'w') as csv_file: From 2ef737f3c78abfb074be77eb032c344ab07e8116 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Wed, 14 Jun 2023 13:10:46 -0400 Subject: [PATCH 12/39] verbose config file --- configs/verbose.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 configs/verbose.json diff --git a/configs/verbose.json b/configs/verbose.json new file mode 100644 index 0000000..b29a4f1 --- /dev/null +++ b/configs/verbose.json @@ -0,0 +1,5 @@ +{ + "test": { + "test_verbose_all_output": { "do_verbose_tracking": true } + } +} From 528dc055fad85bc4bad01cc9576334bbc53f74d1 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Wed, 14 Jun 2023 17:00:52 -0400 Subject: [PATCH 13/39] more bug fixes :') --- Dockerfile | 5 ++-- build.sh | 56 ++++++++++++++++++++++++++++++----------- get_rel_project_reqs.js | 2 +- 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6f32d98..8508639 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ FROM ubuntu:latest ARG DEBIAN_FRONTEND=noninteractive -# build arg: setting up for a specific repo? +# build arg: setting up for a specific repo? at a specific commit? ARG REPO_LINK +ARG REPO_COMMIT RUN apt-get update \ && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel @@ -23,4 +24,4 @@ COPY get_rel_project_reqs.js /home/npm-filter WORKDIR /home/npm-filter RUN git config --global http.sslVerify "false" -RUN ./build.sh $REPO_LINK +RUN ./build.sh $REPO_LINK $REPO_COMMIT diff --git a/build.sh b/build.sh index 3d8aa8f..0c6260b 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,9 @@ #!/bin/bash -# can be building for one specific repo +# can be building for one specific repo, at a specific commit +# (if theyre not specified theyre just empty string, that's fine) repo_link=$1 +repo_commit=$2 # install nvm, so we can then use specific versions of node and npm curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.37.2/install.sh | /usr/bin/bash @@ -49,14 +51,37 @@ mkdir TESTING_REPOS node_version='v18.16.0' # default to just the latest LTS version npm_version='*' # if there's a repo_link specified -if [ -n $repo_link ]; then +if [ ! -z "$repo_link" ]; then cd TESTING_REPOS git clone $repo_link # repo dir will be the only thing in TESTING_REPOS repo_dir_name=`ls` + if [ ! -z "$repo_commit" ]; then + cd $repo_dir_name + git checkout $repo_commit + fi + cd /home/npm-filter + # this will make the node_version and npm_version variables - set_req_vars=`node get_rel_project_reqs.js $repo_dir_name 2>/dev/null` + # it's ok to use the generic version here -- just using it for the vars + # need these dependencies for my get_rel_project_reqs.js script + nvm install $node_version + nvm use $node_version + nvm install-latest-npm + + npm install semver node-fetch + + # script to set the env variables for node_version etc + echo "#!/bin/bash" > req_vars.sh + node get_rel_project_reqs.js TESTING_REPOS/${repo_dir_name} >> req_vars.sh + chmod 700 req_vars.sh + # source in current shell: so we set the variables in the current shell + . req_vars.sh + rm req_vars.sh + + echo $node_version `$set_req_vars` + rm -r node_modules if [[ $node_version == "*" ]]; then node_version=node @@ -69,6 +94,12 @@ fi nvm install $node_version nvm use $node_version +if [[ $npm_version == "*" ]]; then + nvm install-latest-npm +else + npm install -g npm@${npm_version} +fi + NVM_DIR=/root/.nvm NODE_VERSION=`node --version` @@ -77,24 +108,19 @@ echo "export NVM_DIR=$NVM_DIR" >> /root/.bashrc echo "export NODE_PATH=$NVM_DIR/$NODE_VERSION/lib/node_modules" >> /root/.bashrc echo "export PATH=$NVM_DIR/$NODE_VERSION/bin:/home/codeql_home/codeql:$PATH" >> /root/.bashrc -# echo "nvm use $node_version" >> /root/.bashrc - -if [[ $npm_version == "*" ]]; then - nvm install-latest-npm -else - npm install -g npm@${npm_version} -fi - - # permissive npm config set strict-ssl false # install the dependencies: but use the current version of npm -npm install -g jest mocha tap ava nyc yarn next semver +npm install -g jest mocha tap ava nyc yarn next -if [ -n $repo_link ]; then +if [ ! -z "$repo_link" ]; then cd /home/npm-filter # do the install and build - python3 src/diagnose_github_repo.py --repo_link $repo_link --config configs/build_only_config.json --output_dir results + if [ ! -z "$repo_commit" ]; then + python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config configs/build_only_config.json --output_dir results + else + python3 src/diagnose_github_repo.py --repo_link $repo_link --config configs/build_only_config.json --output_dir results + fi fi diff --git a/get_rel_project_reqs.js b/get_rel_project_reqs.js index 314b233..5a4bd6d 100644 --- a/get_rel_project_reqs.js +++ b/get_rel_project_reqs.js @@ -145,7 +145,7 @@ function is_banned(vers) { function print_as_bash_vars(reqs) { for ( key in reqs) { - console.log(key + "=" + reqs[key]); + console.log("export " + key + "=" + reqs[key]); } } From c9de12410c078402177eb192f82b80a6d60bcc74 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 15 Jun 2023 21:40:16 -0400 Subject: [PATCH 14/39] adding docker default command (run tests for repo link and commit provided); adding option to still diagnose tests if we skip install as long as there is node_modules (prev install); and adding config option for pre-install scripts --- Dockerfile | 10 ++++++++ build.sh | 30 ++++++++++++----------- configs/build_only_config.json | 5 ++++ configs/verbose_only.json | 11 +++++++++ run_verbose_for_repo_and_config.sh | 18 ++++++++++++++ src/diagnose_github_repo.py | 5 +++- src/diagnose_npm_package.py | 3 +++ src/test_JS_repo_lib.py | 39 ++++++++++++++++++++++++------ 8 files changed, 99 insertions(+), 22 deletions(-) create mode 100644 configs/build_only_config.json create mode 100644 configs/verbose_only.json create mode 100755 run_verbose_for_repo_and_config.sh diff --git a/Dockerfile b/Dockerfile index 8508639..72c3292 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,3 +25,13 @@ WORKDIR /home/npm-filter RUN git config --global http.sslVerify "false" RUN ./build.sh $REPO_LINK $REPO_COMMIT +# source the env variables produced by the build script (node version, etc) +RUN . /envfile + +# add a default command for running the tests for repo_link and commit provided +# this runs in verbose mode +# need to use ENV instead of ARG in the CMD b/c docker is 10/10 +ENV ENV_REPO_COMMIT=$REPO_COMMIT +ENV ENV_REPO_LINK=$REPO_LINK +# gotta source our env vars so the command can run and use npm/node/etc :-) +CMD . /envfile; ./run_verbose_for_repo_and_config.sh $ENV_REPO_LINK $ENV_REPO_COMMIT \ No newline at end of file diff --git a/build.sh b/build.sh index 0c6260b..85863e7 100755 --- a/build.sh +++ b/build.sh @@ -19,11 +19,11 @@ fi mkdir -p /home/codeql_home -# cd /home/codeql_home -# curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip -# unzip codeql-linux64.zip -# # clone stable version -# git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo +cd /home/codeql_home +curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip +unzip codeql-linux64.zip +# clone stable version +git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential apt-get update @@ -103,10 +103,12 @@ fi NVM_DIR=/root/.nvm NODE_VERSION=`node --version` -echo "export NODE_VERSION=\"$NODE_VERSION\"" >> /root/.bashrc -echo "export NVM_DIR=$NVM_DIR" >> /root/.bashrc -echo "export NODE_PATH=$NVM_DIR/$NODE_VERSION/lib/node_modules" >> /root/.bashrc -echo "export PATH=$NVM_DIR/$NODE_VERSION/bin:/home/codeql_home/codeql:$PATH" >> /root/.bashrc +echo "export NODE_VERSION=\"$NODE_VERSION\"" >> /envfile +echo "export NVM_DIR=$NVM_DIR" >> /envfile +echo "export NODE_PATH=$NVM_DIR/$NODE_VERSION/lib/node_modules" >> /envfile +echo "export PATH=$NVM_DIR/$NODE_VERSION/bin:/home/codeql_home/codeql:$PATH" >> /envfile + +cat /envfile >> /root/.bashrc # permissive npm config set strict-ssl false @@ -116,11 +118,11 @@ npm install -g jest mocha tap ava nyc yarn next if [ ! -z "$repo_link" ]; then cd /home/npm-filter - # do the install and build + # do the install and build only (build_only_config.json config file) if [ ! -z "$repo_commit" ]; then - python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config configs/build_only_config.json --output_dir results - else - python3 src/diagnose_github_repo.py --repo_link $repo_link --config configs/build_only_config.json --output_dir results - fi + python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config configs/build_only_config.json --output_dir results + else + python3 src/diagnose_github_repo.py --repo_link $repo_link --config configs/build_only_config.json --output_dir results + fi fi diff --git a/configs/build_only_config.json b/configs/build_only_config.json new file mode 100644 index 0000000..aa7cce2 --- /dev/null +++ b/configs/build_only_config.json @@ -0,0 +1,5 @@ +{ + "test": { + "track_tests": false + } +} \ No newline at end of file diff --git a/configs/verbose_only.json b/configs/verbose_only.json new file mode 100644 index 0000000..e307f7f --- /dev/null +++ b/configs/verbose_only.json @@ -0,0 +1,11 @@ +{ + "install": { + "do_install": false + }, + "build": { + "track_build": false + }, + "test": { + "test_verbose_all_output": { "do_verbose_tracking": true } + } +} diff --git a/run_verbose_for_repo_and_config.sh b/run_verbose_for_repo_and_config.sh new file mode 100755 index 0000000..6ab7538 --- /dev/null +++ b/run_verbose_for_repo_and_config.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# run npm-filter on a specified repo with verbose, at an optional commit +# output to the "results" directory + +# usage: ./run_for_repo_and_config.sh repo_link repo_commit + +repo_link=$1 +config_file=configs/verbose_only.json +repo_commit=$2 + +if [ ! -z "$repo_link" ] && [ ! -z "$config_file" ]; then + if [ ! -z "$repo_commit" ]; then + python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config $config_file --output_dir results + else + python3 src/diagnose_github_repo.py --repo_link $repo_link --config $config_file --output_dir results + fi +fi \ No newline at end of file diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index 5d65c3a..4f9af42 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -26,6 +26,7 @@ class RepoWalker(): VERBOSE_MODE = False RM_AFTER_CLONING = False SCRIPTS_OVER_CODE = [] + CUSTOM_SETUP_SCRIPTS = [] QL_QUERIES = [] DO_INSTALL = True @@ -76,11 +77,13 @@ def set_up_config( self, config_file): self.IGNORED_COMMANDS = cf_dict.get( "ignored_commands", self.IGNORED_COMMANDS) self.IGNORED_SUBSTRINGS = cf_dict.get( "ignored_substrings", self.IGNORED_SUBSTRINGS) self.RM_AFTER_CLONING = cf_dict.get( "rm_after_cloning", self.RM_AFTER_CLONING) - # script and query file location is relative to the config file + # scripts and query file location is relative to the config file self.SCRIPTS_OVER_CODE = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p for p in cf_dict.get( "scripts_over_code", self.SCRIPTS_OVER_CODE)] self.QL_QUERIES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p for p in cf_dict.get( "QL_queries", self.QL_QUERIES)] + self.CUSTOM_SETUP_SCRIPTS = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p + for p in cf_dict.get( "custom_setup_scripts", self.CUSTOM_SETUP_SCRIPTS)] cf_dict = config_json.get( "dependencies", {}) self.INCLUDE_DEV_DEPS = cf_dict.get("include_dev_deps", self.INCLUDE_DEV_DEPS) diff --git a/src/diagnose_npm_package.py b/src/diagnose_npm_package.py index aa5dcf1..bc59d57 100644 --- a/src/diagnose_npm_package.py +++ b/src/diagnose_npm_package.py @@ -19,6 +19,7 @@ class NPMSpider(scrapy.Spider): VERBOSE_MODE = False RM_AFTER_CLONING = False SCRIPTS_OVER_CODE = [] + CUSTOM_SETUP_SCRIPTS = [] QL_QUERIES = [] DO_INSTALL = True @@ -73,6 +74,8 @@ def set_up_config( self, config_file): for p in cf_dict.get( "scripts_over_code", self.SCRIPTS_OVER_CODE)] self.QL_QUERIES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p for p in cf_dict.get( "QL_queries", self.QL_QUERIES)] + self.CUSTOM_SETUP_SCRIPTS = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p + for p in cf_dict.get( "custom_setup_scripts", self.CUSTOM_SETUP_SCRIPTS)] cf_dict = config_json.get( "dependencies", {}) self.INCLUDE_DEV_DEPS = cf_dict.get("include_dev_deps", self.INCLUDE_DEV_DEPS) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index a3f9c4f..a42cc2e 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -288,10 +288,31 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): json_out["setup"]["pkg_json_ERROR"] = True return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) - # first, the install manager = "" + # first, check if there is a custom install + # this runs custom scripts the same way as the scripts_over_code below; only + # difference is it's before the npm-filter run + if crawler.CUSTOM_SETUP_SCRIPTS != []: + json_out["custom_setup_scripts"] = {} + for script in crawler.CUSTOM_SETUP_SCRIPTS: + print("Running custom setup script script over code: " + script) + json_out["custom_setup_scripts"][script] = {} + error, output, retcode = run_command( script) + script_output = output.decode('utf-8') + error.decode('utf-8') + ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])') + script_output = ansi_escape.sub('', script_output) + json_out["custom_setup_scripts"][script]["output"] = script_output + if retcode != 0: + json_out["custom_setup_scripts"][script]["ERROR"] = True + + # check if the install is done (check if there is a node_modules folder) + already_installed = os.path.isdir("node_modules") + + # then, the install if crawler.DO_INSTALL: - (manager, retcode, installer_command, installer_debug) = run_installation( pkg_json, crawler) + (new_manager, retcode, installer_command, installer_debug) = run_installation( pkg_json, crawler) + if manager == "": + manager = new_manager json_out["installation"] = {} json_out["installation"]["installer_command"] = installer_command if crawler.VERBOSE_MODE: @@ -299,10 +320,14 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): if retcode != 0: print("ERROR -- installation failed") json_out["installation"]["ERROR"] = True - return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) + if not already_installed: + return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) else: json_out["installation"] = { "do_install": False } + if manager == "": # default the manager to npm if it wasn't already IDd + manager = "npm run " + if crawler.COMPUTE_DEP_LISTS: json_out["dependencies"] = {} if not crawler.DO_INSTALL: @@ -316,8 +341,8 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): # now, proceed with the build if crawler.TRACK_BUILD: json_out["build"] = {} - if not crawler.DO_INSTALL: - print("Can't do build without installing (do_install: false) -- skipping") + if not crawler.DO_INSTALL and not already_installed: + print("Can't do build without installing (do_install: false and not already installed) -- skipping") else: (retcode, build_script_list, build_debug) = run_build( manager, pkg_json, crawler) json_out["build"]["build_script_list"] = build_script_list @@ -332,8 +357,8 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): # then, the testing if crawler.TRACK_TESTS: json_out["testing"] = {} - if not crawler.DO_INSTALL: - print("Can't run tests without installing (do_install: false) -- skipping") + if not crawler.DO_INSTALL and not already_installed: + print("Can't run tests without installing (do_install: false and not already installed) -- skipping") else: (retcode, test_json_summary) = run_tests( manager, pkg_json, crawler, repo_name, cur_dir) json_out["testing"] = test_json_summary From b080de07162cd57c44c9bbf4ea53f3100e75446b Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 15 Jun 2023 22:59:44 -0400 Subject: [PATCH 15/39] docker build option for custom install script --- Dockerfile | 22 +++++++++++++--------- build.sh | 20 +++++++++++++------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index 72c3292..d84717b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,30 @@ FROM ubuntu:latest ARG DEBIAN_FRONTEND=noninteractive -# build arg: setting up for a specific repo? at a specific commit? +# build arg: setting up for a specific repo? at a specific commit? custom install script? ARG REPO_LINK ARG REPO_COMMIT - -RUN apt-get update \ - && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel - -RUN apt update -RUN apt -y install python3-pip -RUN pip3 install bs4 scrapy xmltodict pandas +ARG CUSTOM_INSTALL_SCRIPT RUN mkdir -p /home/npm-filter/results RUN mkdir /home/npm-filter/src RUN mkdir /home/npm-filter/configs COPY src /home/npm-filter/src -COPY configs /home/npm-filter/configs +# copy the custom install script if it exists +COPY configs/* $CUSTOM_INSTALL_SCRIPT /home/npm-filter/configs/ +# and name it the custom_install_script +RUN if [ -f /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} ] ; then mv /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} /home/npm-filter/configs/custom_install_script ; fi COPY *.sh /home/npm-filter/ COPY get_rel_project_reqs.js /home/npm-filter +RUN apt-get update \ + && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel + +RUN apt update +RUN apt -y install python3-pip +RUN pip3 install bs4 scrapy xmltodict pandas + WORKDIR /home/npm-filter RUN git config --global http.sslVerify "false" diff --git a/build.sh b/build.sh index 85863e7..a3fa31b 100755 --- a/build.sh +++ b/build.sh @@ -19,11 +19,11 @@ fi mkdir -p /home/codeql_home -cd /home/codeql_home -curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip -unzip codeql-linux64.zip -# clone stable version -git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo +# cd /home/codeql_home +# curl -L -o codeql-linux64.zip https://github.com/github/codeql-cli-binaries/releases/download/v2.3.4/codeql-linux64.zip +# unzip codeql-linux64.zip +# # clone stable version +# git clone https://github.com/github/codeql.git --branch v1.26.0 codeql-repo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates gnupg build-essential apt-get update @@ -116,13 +116,19 @@ npm config set strict-ssl false # install the dependencies: but use the current version of npm npm install -g jest mocha tap ava nyc yarn next +config_file=configs/build_only_config.json +if [ -f "/home/npm-filter/configs/custom_install_script" ]; then + chmod +x /home/npm-filter/configs/custom_install_script + config_file=configs/custom_install_only.json +fi + if [ ! -z "$repo_link" ]; then cd /home/npm-filter # do the install and build only (build_only_config.json config file) if [ ! -z "$repo_commit" ]; then - python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config configs/build_only_config.json --output_dir results + python3 src/diagnose_github_repo.py --repo_link_and_SHA $repo_link $repo_commit --config $config_file --output_dir results else - python3 src/diagnose_github_repo.py --repo_link $repo_link --config configs/build_only_config.json --output_dir results + python3 src/diagnose_github_repo.py --repo_link $repo_link --config $config_file --output_dir results fi fi From c2592626579e7ef4de8675fbf35656e3a2fa6ed7 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 15 Jun 2023 23:44:15 -0400 Subject: [PATCH 16/39] lil fix --- Dockerfile | 9 +++++++-- configs/custom_install_only.json | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 configs/custom_install_only.json diff --git a/Dockerfile b/Dockerfile index d84717b..3710a79 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,8 @@ ARG DEBIAN_FRONTEND=noninteractive # build arg: setting up for a specific repo? at a specific commit? custom install script? ARG REPO_LINK ARG REPO_COMMIT -ARG CUSTOM_INSTALL_SCRIPT +# placeholder: if this arg isn't specified, copy over the readme file in configs (can't copy no source, RIP) +ARG CUSTOM_INSTALL_SCRIPT=configs/README.md RUN mkdir -p /home/npm-filter/results RUN mkdir /home/npm-filter/src @@ -12,7 +13,11 @@ RUN mkdir /home/npm-filter/configs COPY src /home/npm-filter/src # copy the custom install script if it exists -COPY configs/* $CUSTOM_INSTALL_SCRIPT /home/npm-filter/configs/ +RUN echo $CUSTOM_INSTALL_SCRIPT +COPY ${CUSTOM_INSTALL_SCRIPT} configs/ /home/npm-filter/configs/ +# delete the config readme: we don't need this in the docker. and it's a flag for no-custom-install +# since the readme is the default for custom install +RUN rm /home/npm-filter/configs/README.md # and name it the custom_install_script RUN if [ -f /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} ] ; then mv /home/npm-filter/configs/${CUSTOM_INSTALL_SCRIPT} /home/npm-filter/configs/custom_install_script ; fi COPY *.sh /home/npm-filter/ diff --git a/configs/custom_install_only.json b/configs/custom_install_only.json new file mode 100644 index 0000000..fb3df09 --- /dev/null +++ b/configs/custom_install_only.json @@ -0,0 +1,8 @@ +{ + "test": { + "track_tests": false + }, + "meta_info": { + "custom_setup_scripts": [ "custom_install_script" ] + } +} From 70816c6d37bf492bcd28eeda807aac38f9577334 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 22 Jun 2023 22:06:06 -0400 Subject: [PATCH 17/39] adding option to repeat test command executions --- configs/default_filter_config.json | 1 + src/diagnose_github_repo.py | 2 + src/diagnose_npm_package.py | 2 + src/test_JS_repo_lib.py | 115 ++++++++++++++++------------- 4 files changed, 67 insertions(+), 53 deletions(-) diff --git a/configs/default_filter_config.json b/configs/default_filter_config.json index 14fdabb..56d0149 100644 --- a/configs/default_filter_config.json +++ b/configs/default_filter_config.json @@ -13,6 +13,7 @@ "timeout": 1000 }, "test": { + "test_command_repeats": 1, "track_tests": true, "test_verbose_all_output": { "do_verbose_tracking": false, diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index 4f9af42..d2a5843 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -36,6 +36,7 @@ class RepoWalker(): TRACK_TESTS = True TEST_VERBOSE_ALL_OUTPUT = False TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json" + TEST_COMMAND_REPEATS = 1 TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"] @@ -102,6 +103,7 @@ def set_up_config( self, config_file): self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT) self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS) self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS) + self.TEST_COMMAND_REPEATS = cf_dict.get("test_command_repeats", self.TEST_COMMAND_REPEATS) test_verbose_config = cf_dict.get("test_verbose_all_output", {}) self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT) self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON) diff --git a/src/diagnose_npm_package.py b/src/diagnose_npm_package.py index bc59d57..59daa28 100644 --- a/src/diagnose_npm_package.py +++ b/src/diagnose_npm_package.py @@ -29,6 +29,7 @@ class NPMSpider(scrapy.Spider): TRACK_TESTS = True TEST_VERBOSE_ALL_OUTPUT = False TEST_VERBOSE_OUTPUT_JSON = "verbose_test_report.json" + TEST_COMMAND_REPEATS = 1 TRACKED_TEST_COMMANDS = ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"] @@ -94,6 +95,7 @@ def set_up_config( self, config_file): self.TEST_TIMEOUT = cf_dict.get("timeout", self.TEST_TIMEOUT) self.TRACKED_TEST_COMMANDS = cf_dict.get("tracked_test_commands", self.TRACKED_TEST_COMMANDS) self.TRACK_TESTS = cf_dict.get("track_tests", self.TRACK_TESTS) + self.TEST_COMMAND_REPEATS = cf_dict.get("test_command_repeats", self.TEST_COMMAND_REPEATS) test_verbose_config = cf_dict.get("test_verbose_all_output", {}) self.TEST_VERBOSE_ALL_OUTPUT = test_verbose_config.get("do_verbose_tracking", self.TEST_VERBOSE_ALL_OUTPUT) self.TEST_VERBOSE_OUTPUT_JSON = test_verbose_config.get("verbose_json_output_file", self.TEST_VERBOSE_OUTPUT_JSON) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index a42cc2e..6815be4 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -110,59 +110,68 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): test_scripts = [t for t in test_scripts if set([t.find(ig_com) for ig_com in crawler.IGNORED_COMMANDS]) == {-1}] test_scripts = [t for t in test_scripts if set([pkg_json.get("scripts", {})[t].find(ig_sub) for ig_sub in crawler.IGNORED_SUBSTRINGS]) == {-1}] for test_index, t in enumerate(test_scripts): - print("Running: " + manager + t) - error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) - test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) - test_info.set_test_command( pkg_json.get("scripts", {})[t]) - test_info.compute_test_infras() - test_info.compute_nested_test_commands( test_scripts) - test_info.compute_test_stats() - # if we're in verbose testing mode (i.e. getting all timing info for each test, etc) - # then, we rerun the test commands with all the commands for adding verbose_mode to - # each of the test infras involved (individually) - if crawler.TEST_VERBOSE_ALL_OUTPUT: - # we're gonna be adding our new custom scripts for verbosity testing - run_command( "mv package.json TEMP_package.json_TEMP") - test_verbosity_output = {} - for verbosity_index, test_infra in enumerate(test_info.test_infras): - verbose_test_json = crawler.output_dir + "/" \ - + "repo_" + repo_name + "_" \ - + "test_" + str(test_index) + "_"\ - + "infra_" + str(verbosity_index) + "_" \ - + crawler.TEST_VERBOSE_OUTPUT_JSON - infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS[test_infra] - if not infra_verbosity_config: # checks if it's an empty object - print("TEST VERBOSE MODE: unsupported test infra " + test_infra) - test_verbosity_output[test_infra] = { "error": True } - continue - infra_verbosity_args = infra_verbosity_config.get("args", "") - infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end - infra_verbosity_post_proc = infra_verbosity_config.get("post_processing", None) - infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_info.test_command, test_infra, infra_verbosity_args, - verbose_test_json, infra_verbosity_args_pos) - verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index) - pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command - with open("package.json", 'w') as f: - json.dump( pkg_json, f) - print("Running verbosity: " + manager + infra_verbosity_command) - verb_error, verb_output, verb_retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) - # if there's post-processing to be done - if not infra_verbosity_post_proc is None: - for out_file_obj in out_files: - infra_verbosity_post_proc(out_file_obj["output_file"]) - verbosity_index += 1 - # get the output - test_verbosity_infra = {} - test_verbosity_infra["command"] = infra_verbosity_command - test_verbosity_infra["output_files"] = out_files - if crawler.VERBOSE_MODE: - test_verbosity_infra["test_debug"] = "\nError output: " + verb_error.decode('utf-8') \ - + "\nOutput stream: " + verb_output.decode('utf-8') - test_verbosity_output[test_infra] = test_verbosity_infra - test_info.set_test_verbosity_output(test_verbosity_output) - # put the package.json back - run_command( "mv TEMP_package.json_TEMP package.json") - test_json_summary[t] = test_info.get_json_rep() + test_output_rep = {} + for test_rep_index in range(crawler.TEST_COMMAND_REPEATS): + test_rep_id = "" if crawler.TEST_COMMAND_REPEATS == 1 else "testrep_" + str(test_rep_index) + print("Running rep " + str(test_rep_index) + " of " + str(crawler.TEST_COMMAND_REPEATS - 1) + ": " + manager + t) + error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) + test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) + test_info.set_test_command( pkg_json.get("scripts", {})[t]) + test_info.compute_test_infras() + test_info.compute_nested_test_commands( test_scripts) + test_info.compute_test_stats() + # if we're in verbose testing mode (i.e. getting all timing info for each test, etc) + # then, we rerun the test commands with all the commands for adding verbose_mode to + # each of the test infras involved (individually) + if crawler.TEST_VERBOSE_ALL_OUTPUT: + # we're gonna be adding our new custom scripts for verbosity testing + run_command( "mv package.json TEMP_package.json_TEMP") + test_verbosity_output = {} + for verbosity_index, test_infra in enumerate(test_info.test_infras): + verbose_test_json = crawler.output_dir + "/" \ + + "repo_" + repo_name + "_" \ + + "test_" + str(test_index) + "_"\ + + "infra_" + str(verbosity_index) + "_" \ + + "" if test_rep_id == "" else test_rep_id + "_" \ + + crawler.TEST_VERBOSE_OUTPUT_JSON + infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS[test_infra] + if not infra_verbosity_config: # checks if it's an empty object + print("TEST VERBOSE MODE: unsupported test infra " + test_infra) + test_verbosity_output[test_infra] = { "error": True } + continue + infra_verbosity_args = infra_verbosity_config.get("args", "") + infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end + infra_verbosity_post_proc = infra_verbosity_config.get("post_processing", None) + infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_info.test_command, test_infra, infra_verbosity_args, + verbose_test_json, infra_verbosity_args_pos) + verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index) + pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command + with open("package.json", 'w') as f: + json.dump( pkg_json, f) + print("Running verbosity: " + manager + infra_verbosity_command) + verb_error, verb_output, verb_retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) + # if there's post-processing to be done + if not infra_verbosity_post_proc is None: + for out_file_obj in out_files: + infra_verbosity_post_proc(out_file_obj["output_file"]) + verbosity_index += 1 + # get the output + test_verbosity_infra = {} + test_verbosity_infra["command"] = infra_verbosity_command + test_verbosity_infra["output_files"] = out_files + if crawler.VERBOSE_MODE: + test_verbosity_infra["test_debug"] = "\nError output: " + verb_error.decode('utf-8') \ + + "\nOutput stream: " + verb_output.decode('utf-8') + test_verbosity_output[test_infra] = test_verbosity_infra + test_info.set_test_verbosity_output(test_verbosity_output) + # put the package.json back + run_command( "mv TEMP_package.json_TEMP package.json") + # if we're not doing any repeats then don't make another layer of jsons + if crawler.TEST_COMMAND_REPEATS == 1: + test_output_rep = test_info.get_json_rep() + else: + test_output_rep[test_rep_id] = test_info.get_json_rep() + test_json_summary[t] = test_output_rep return( retcode, test_json_summary) def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos): From 796dddadccb3f4f17c418a8b46f25c52ee1374c3 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 22 Jun 2023 23:13:50 -0400 Subject: [PATCH 18/39] wow embarassing string concat FAIL --- src/test_JS_repo_lib.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index 6815be4..b231fd9 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -132,7 +132,7 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): + "repo_" + repo_name + "_" \ + "test_" + str(test_index) + "_"\ + "infra_" + str(verbosity_index) + "_" \ - + "" if test_rep_id == "" else test_rep_id + "_" \ + + ("" if test_rep_id == "" else test_rep_id + "_") \ + crawler.TEST_VERBOSE_OUTPUT_JSON infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS[test_infra] if not infra_verbosity_config: # checks if it's an empty object @@ -194,6 +194,7 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity out_file_object["output_file"] = output_file else: output_file = verbose_test_json[:path_index] + "/out_" + str(num_files) + "_" + verbose_test_json[path_index + 1:] + print(output_file) new_infra_verbosity_args += output_file out_file_object["output_file"] = output_file output_files += [ out_file_object ] From bfa51daafec3a4ccaee63fff69749b73fae3d84f Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 22 Jun 2023 23:22:22 -0400 Subject: [PATCH 19/39] option for output dir in batch runner --- runParallelGitRepos.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/runParallelGitRepos.sh b/runParallelGitRepos.sh index 7b15acb..7a8240a 100755 --- a/runParallelGitRepos.sh +++ b/runParallelGitRepos.sh @@ -2,10 +2,15 @@ repo_link_file=$1 config_file=$2 +output_dir=$3 if [ ! -f $config_file ]; then config_file="configs/QL_output_config.json" fi +if [ ! -d $output_dir ]; then + output_dir=`pwd` +fi + # you'll probably want to bg this -nohup parallel -j 20 -a $repo_link_file --timeout 600 --joblog job.log python3 src/diagnose_github_repo.py --repo_link {} --config $config_file +nohup parallel -j 20 -a $repo_link_file --timeout 600 --joblog job.log python3 src/diagnose_github_repo.py --repo_link {} --config $config_file --output_dir $output_dir From 6b82cd91aecc5e4d06b220412eb1d99f4ef6bf58 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 26 Jun 2023 19:55:49 +0000 Subject: [PATCH 20/39] build docker container in CI --- .github/workflows/end2end.yml | 4 +++- runDocker.sh | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/end2end.yml b/.github/workflows/end2end.yml index 7238098..12171ea 100644 --- a/.github/workflows/end2end.yml +++ b/.github/workflows/end2end.yml @@ -28,12 +28,14 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v3 - + - name: Build NPMFilter container + run: docker build -t npmfilter . - name: Run NPMFilter id: run-npm-filter env: SHA: ${{ inputs.project_sha }} URL: ${{ inputs.project_url }} + DOCKER_IMAGE: npmfilter:latest run: | IFS="/" read -r -a projectArray <<< "$URL" OrgName=${projectArray[0]} diff --git a/runDocker.sh b/runDocker.sh index 4d3f8a6..eee9e76 100755 --- a/runDocker.sh +++ b/runDocker.sh @@ -11,10 +11,14 @@ if [ ! -d npm_filter_docker_results ]; then mkdir npm_filter_docker_results fi +if [ -v $DOCKER_IMAGE ]; then + DOCKER_IMAGE=emarteca/npm-filter:latest +fi + docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \ --volume `pwd`/npm_filter_docker_results:/home/npm-filter/results \ --volume `pwd`/docker_configs:/home/npm-filter/docker_configs\ -w /home/npm-filter \ - emarteca/npm-filter:latest \ + $DOCKER_IMAGE \ bash -c "PATH=/home/codeql_home/codeql:$PATH; $npm_filter_command --output_dir results" -rm -r local_mount +rm -r local_mount \ No newline at end of file From ee4a9192409824dcc1bb5080571f10e66b986a6f Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 26 Jun 2023 20:53:10 +0000 Subject: [PATCH 21/39] Change CI to build/run the project-specific containers --- .github/workflows/barbosa23flaky.yml | 2 +- .github/workflows/end2endCustomContainers.yml | 107 ++++++++++++++++++ .github/workflows/smoketest.yml | 2 +- 3 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/end2endCustomContainers.yml diff --git a/.github/workflows/barbosa23flaky.yml b/.github/workflows/barbosa23flaky.yml index e16758a..ff38578 100644 --- a/.github/workflows/barbosa23flaky.yml +++ b/.github/workflows/barbosa23flaky.yml @@ -30,7 +30,7 @@ jobs: strategy: matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} fail-fast: false - uses: ./.github/workflows/end2end.yml + uses: ./.github/workflows/end2endCustomContainers.yml with: project_url: ${{ matrix.projects.project_url }} project_sha: ${{ matrix.projects.project_sha }} diff --git a/.github/workflows/end2endCustomContainers.yml b/.github/workflows/end2endCustomContainers.yml new file mode 100644 index 0000000..8dc2c79 --- /dev/null +++ b/.github/workflows/end2endCustomContainers.yml @@ -0,0 +1,107 @@ +name: Test NPMFilter End to End on a Project with custom-built containers per-project + +on: + workflow_dispatch: + inputs: + project_url: + description: 'GitHub suffix of project to test (username/project)' + required: true + type: string + project_sha: + description: 'SHA of project to test' + required: true + type: string + workflow_call: + inputs: + project_url: + description: 'GitHub suffix of project to test (username/project)' + required: true + type: string + project_sha: + description: 'SHA of project to test' + required: true + type: string +jobs: + execute: + runs-on: self-hosted + + steps: + - name: Checkout code + uses: actions/checkout@v3 + - name: Build NPMFilter container + run: | + if [ -f "project-overrides/${OrgName}-${ProjectName}.sh" ]; then + CUSTOM_INSTALL_SCRIPT="--build-arg CUSTOM_INSTALL_SCRIPT=project-overrides/${OrgName}-${ProjectName}.sh" + fi + docker build -t npmfilter --build-arg REPO_LINK=${{ inputs.project_url }} --REPO_COMMIT=${{ inputs.project_sha }} $CUSTOM_INSTALL_SCRIPT . + - name: Run NPMFilter + id: run-npm-filter + env: + SHA: ${{ inputs.project_sha }} + URL: ${{ inputs.project_url }} + DOCKER_IMAGE: npmfilter:latest + run: | + IFS="/" read -r -a projectArray <<< "$URL" + OrgName=${projectArray[0]} + ProjectName=${projectArray[1]} + LogDir=${URL//\//-} + echo "LogDir=$LogDir" >> $GITHUB_OUTPUT + + echo "Running NPMFilter on $OrgName/$ProjectName@$SHA" + + mkdir -p docker_configs/ + cat >docker_configs/debug_filter_config.json < tests-overview.csv + + # Check if tests were found + TestData=$(cat tests-overview.csv) + IFS="," read -r -a testCount <<< $(python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/) + TestsRun=${testCount[0]} + if [ $TestsRun -le 2 ]; then + echo "ERROR: No tests found." + exit -1 + else + echo "OK: ${TestsRun} tests found!" + fi + - name: Upload output + uses: actions/upload-artifact@v2 + with: + name: npm_filter_docker_results + path: npm_filter_docker_results \ No newline at end of file diff --git a/.github/workflows/smoketest.yml b/.github/workflows/smoketest.yml index d1fa45e..a9f84e8 100644 --- a/.github/workflows/smoketest.yml +++ b/.github/workflows/smoketest.yml @@ -25,7 +25,7 @@ jobs: strategy: matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} fail-fast: false - uses: ./.github/workflows/end2end.yml + uses: ./.github/workflows/end2endCustomContainers.yml with: project_url: ${{ matrix.projects.project_url }} project_sha: ${{ matrix.projects.project_sha }} From 51d8d0f2b6c66e5b6e42714bf9110956c370b802 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 26 Jun 2023 20:54:25 +0000 Subject: [PATCH 22/39] Change CI to build/run the project-specific containers --- .github/workflows/end2endCustomContainers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/end2endCustomContainers.yml b/.github/workflows/end2endCustomContainers.yml index 8dc2c79..5ca6227 100644 --- a/.github/workflows/end2endCustomContainers.yml +++ b/.github/workflows/end2endCustomContainers.yml @@ -33,7 +33,7 @@ jobs: if [ -f "project-overrides/${OrgName}-${ProjectName}.sh" ]; then CUSTOM_INSTALL_SCRIPT="--build-arg CUSTOM_INSTALL_SCRIPT=project-overrides/${OrgName}-${ProjectName}.sh" fi - docker build -t npmfilter --build-arg REPO_LINK=${{ inputs.project_url }} --REPO_COMMIT=${{ inputs.project_sha }} $CUSTOM_INSTALL_SCRIPT . + docker build -t npmfilter --build-arg REPO_LINK=${{ inputs.project_url }} --build-arg REPO_COMMIT=${{ inputs.project_sha }} $CUSTOM_INSTALL_SCRIPT . - name: Run NPMFilter id: run-npm-filter env: From 4f32ae0bf551825046feb3b8f073d28abdab166b Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 26 Jun 2023 20:57:58 +0000 Subject: [PATCH 23/39] Change CI to build/run the project-specific containers --- .github/workflows/end2endCustomContainers.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/end2endCustomContainers.yml b/.github/workflows/end2endCustomContainers.yml index 5ca6227..b4388a4 100644 --- a/.github/workflows/end2endCustomContainers.yml +++ b/.github/workflows/end2endCustomContainers.yml @@ -33,7 +33,7 @@ jobs: if [ -f "project-overrides/${OrgName}-${ProjectName}.sh" ]; then CUSTOM_INSTALL_SCRIPT="--build-arg CUSTOM_INSTALL_SCRIPT=project-overrides/${OrgName}-${ProjectName}.sh" fi - docker build -t npmfilter --build-arg REPO_LINK=${{ inputs.project_url }} --build-arg REPO_COMMIT=${{ inputs.project_sha }} $CUSTOM_INSTALL_SCRIPT . + docker build -t npmfilter --build-arg REPO_LINK=https://github.com/${{ inputs.project_url }} --build-arg REPO_COMMIT=${{ inputs.project_sha }} $CUSTOM_INSTALL_SCRIPT . - name: Run NPMFilter id: run-npm-filter env: From c9bd3faf70f71c8560269956af07b6617db26733 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 26 Jun 2023 20:59:39 +0000 Subject: [PATCH 24/39] fix bash sourcing in runDocker --- runDocker.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runDocker.sh b/runDocker.sh index eee9e76..fb1a3cf 100755 --- a/runDocker.sh +++ b/runDocker.sh @@ -20,5 +20,5 @@ docker run --mount type=bind,source=`pwd`/local_mount,destination=/mount \ --volume `pwd`/docker_configs:/home/npm-filter/docker_configs\ -w /home/npm-filter \ $DOCKER_IMAGE \ - bash -c "PATH=/home/codeql_home/codeql:$PATH; $npm_filter_command --output_dir results" + bash -c "source /envfile; PATH=/home/codeql_home/codeql:\$PATH; $npm_filter_command --output_dir results" rm -r local_mount \ No newline at end of file From d76ac57c04c6c9c3ca38a9ce83f3b5b5361714b0 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Mon, 26 Jun 2023 21:06:48 +0000 Subject: [PATCH 25/39] back to runDocker.sh in CI --- .github/workflows/end2end.yml | 4 ++++ .github/workflows/end2endCustomContainers.yml | 7 +++++-- .github/workflows/smoketest.yml | 11 ++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/workflows/end2end.yml b/.github/workflows/end2end.yml index 12171ea..d3e25f8 100644 --- a/.github/workflows/end2end.yml +++ b/.github/workflows/end2end.yml @@ -63,6 +63,10 @@ jobs: }, "test": { "track_tests": true, + "test_verbose_all_output": { + "do_verbose_tracking": false, + "verbose_json_output_file": "verbose_test_report.json" + }, "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"], "timeout": 1000 diff --git a/.github/workflows/end2endCustomContainers.yml b/.github/workflows/end2endCustomContainers.yml index b4388a4..fa0ad9a 100644 --- a/.github/workflows/end2endCustomContainers.yml +++ b/.github/workflows/end2endCustomContainers.yml @@ -69,6 +69,10 @@ jobs: "track_tests": true, "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"], + "test_verbose_all_output": { + "do_verbose_tracking": false, + "verbose_json_output_file": "verbose_test_report.json" + }, "timeout": 1000 }, "meta_info": { @@ -84,8 +88,7 @@ jobs: CUR_DIR=$(pwd) # Run NPMFilter - # ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/$URL $SHA --config docker_configs/debug_filter_config.json - docker run --volume $CUR_DIR/results/:/home/npm-filter/results -w /home/npm-filter npmfilter:latest + ./runDocker.sh python3 src/diagnose_github_repo.py --repo_link_and_SHA https://github.com/$URL $SHA --config docker_configs/debug_filter_config.json # Get tests overview python3 output_proc_scripts/count_tests_run.py npm_filter_docker_results/ > tests-overview.csv diff --git a/.github/workflows/smoketest.yml b/.github/workflows/smoketest.yml index a9f84e8..f6dd45b 100644 --- a/.github/workflows/smoketest.yml +++ b/.github/workflows/smoketest.yml @@ -20,7 +20,7 @@ jobs: echo "matrix-projects<<__EOF__" >> $GITHUB_OUTPUT echo $PROJECTS_JSON >> $GITHUB_OUTPUT echo "__EOF__" >> $GITHUB_OUTPUT - execute: + execute-specialized-container: needs: [build-matrix] strategy: matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} @@ -29,3 +29,12 @@ jobs: with: project_url: ${{ matrix.projects.project_url }} project_sha: ${{ matrix.projects.project_sha }} + execute-generic-container: + needs: [build-matrix] + strategy: + matrix: ${{ fromJSON(needs.build-matrix.outputs.matrix-projects) }} + fail-fast: false + uses: ./.github/workflows/end2end.yml + with: + project_url: ${{ matrix.projects.project_url }} + project_sha: ${{ matrix.projects.project_sha }} \ No newline at end of file From e0c26e32359b0d2c6959711990a469be0f7df7a6 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Tue, 27 Jun 2023 00:46:57 +0000 Subject: [PATCH 26/39] try to fix CI output logging --- .github/workflows/end2end.yml | 4 ++-- .github/workflows/end2endCustomContainers.yml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/end2end.yml b/.github/workflows/end2end.yml index d3e25f8..3b6c3c8 100644 --- a/.github/workflows/end2end.yml +++ b/.github/workflows/end2end.yml @@ -64,7 +64,7 @@ jobs: "test": { "track_tests": true, "test_verbose_all_output": { - "do_verbose_tracking": false, + "do_verbose_tracking": true, "verbose_json_output_file": "verbose_test_report.json" }, "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", @@ -101,5 +101,5 @@ jobs: - name: Upload output uses: actions/upload-artifact@v2 with: - name: npm_filter_docker_results + name: results_${{ inputs.project_url }}_${{ inputs.project_sha }} path: npm_filter_docker_results \ No newline at end of file diff --git a/.github/workflows/end2endCustomContainers.yml b/.github/workflows/end2endCustomContainers.yml index fa0ad9a..3fc0dba 100644 --- a/.github/workflows/end2endCustomContainers.yml +++ b/.github/workflows/end2endCustomContainers.yml @@ -70,7 +70,7 @@ jobs: "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"], "test_verbose_all_output": { - "do_verbose_tracking": false, + "do_verbose_tracking": true, "verbose_json_output_file": "verbose_test_report.json" }, "timeout": 1000 @@ -106,5 +106,5 @@ jobs: - name: Upload output uses: actions/upload-artifact@v2 with: - name: npm_filter_docker_results - path: npm_filter_docker_results \ No newline at end of file + name: results_${{ inputs.project_url }}_${{ inputs.project_sha }} + path: npm_filter_docker_results \ No newline at end of file From 3cdf18bec565bb7dd92d2e5c11c8d0f986918d5d Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Tue, 27 Jun 2023 00:55:31 +0000 Subject: [PATCH 27/39] try to fix CI output logging --- .github/workflows/end2end.yml | 4 +++- .github/workflows/end2endCustomContainers.yml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/end2end.yml b/.github/workflows/end2end.yml index 3b6c3c8..e38c2bd 100644 --- a/.github/workflows/end2end.yml +++ b/.github/workflows/end2end.yml @@ -98,8 +98,10 @@ jobs: else echo "OK: ${TestsRun} tests found!" fi + + echo "LOGNAME=results-${OrgName}-${ProjectName}-${SHA}" >> "$GITHUB_OUTPUT" - name: Upload output uses: actions/upload-artifact@v2 with: - name: results_${{ inputs.project_url }}_${{ inputs.project_sha }} + name: ${{ steps.run-npm-filter.outputs.LOGNAME }} path: npm_filter_docker_results \ No newline at end of file diff --git a/.github/workflows/end2endCustomContainers.yml b/.github/workflows/end2endCustomContainers.yml index 3fc0dba..ba8432e 100644 --- a/.github/workflows/end2endCustomContainers.yml +++ b/.github/workflows/end2endCustomContainers.yml @@ -103,8 +103,10 @@ jobs: else echo "OK: ${TestsRun} tests found!" fi + + echo "LOGNAME=results-${OrgName}-${ProjectName}-${SHA}" >> "$GITHUB_OUTPUT" - name: Upload output uses: actions/upload-artifact@v2 with: - name: results_${{ inputs.project_url }}_${{ inputs.project_sha }} + name: ${{ steps.run-npm-filter.outputs.LOGNAME }} path: npm_filter_docker_results \ No newline at end of file From 7d2f03b2c1f301c93cdc60c775731f0874c8d71e Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Tue, 27 Jun 2023 17:16:39 -0400 Subject: [PATCH 28/39] fixing command instrumentation for strings that have test infras but arent a call to them; and a few minor tweaks --- configs/verbose.json | 1 + src/TestInfo.py | 2 +- src/output_parsing/test_output_proc.py | 16 ++++++++++++---- src/test_JS_repo_lib.py | 10 +++++++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/configs/verbose.json b/configs/verbose.json index b29a4f1..0a630aa 100644 --- a/configs/verbose.json +++ b/configs/verbose.json @@ -1,5 +1,6 @@ { "test": { + "test_command_repeats": 1, "test_verbose_all_output": { "do_verbose_tracking": true } } } diff --git a/src/TestInfo.py b/src/TestInfo.py index 61f08f0..0cb39a1 100644 --- a/src/TestInfo.py +++ b/src/TestInfo.py @@ -99,7 +99,7 @@ class TestInfo: "gulp lint": "gulp lint -- linter" } - TRACKED_RUNNERS = [ "node", "babel-node", "grunt" ] + TRACKED_RUNNERS = [ "node", "babel-node", "grunt", "lerna" ] def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE): self.success = success diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index bd52da5..e1b6ee5 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -8,8 +8,12 @@ def parse_mocha_json_to_csv(output_file, new_output_file=None): # convert an xml file to json # used to convert the xunit reporter output from mocha into json # code from https://www.geeksforgeeks.org/python-xml-to-json/ - with open(output_file) as xml_file: - data_dict = xmltodict.parse(xml_file.read()).get("testsuite", {}) + data_dict = {} + try: + with open(output_file) as xml_file: + data_dict = xmltodict.parse(xml_file.read()).get("testsuite", {}) + except: + data_dict = {} # the format: all the tests are in a top-level list called "testcase" test_suites = [] test_names = [] @@ -34,8 +38,12 @@ def parse_mocha_json_to_csv(output_file, new_output_file=None): def parse_jest_json_to_csv(output_file, new_output_file=None): if new_output_file is None: new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension - with open(output_file) as json_file: - data_dict = json.loads(json_file.read()) + data_dict = {} + try: + with open(output_file) as json_file: + data_dict = json.loads(json_file.read()) + except: + data_dict = {} # the format: all tests are in a top level list called "testResults" # this is a list of objects that have "assertionResults" representing the test suites # "assertionResults" is a list of objects that have the test data diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index b231fd9..abf56cb 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -134,7 +134,7 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): + "infra_" + str(verbosity_index) + "_" \ + ("" if test_rep_id == "" else test_rep_id + "_") \ + crawler.TEST_VERBOSE_OUTPUT_JSON - infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS[test_infra] + infra_verbosity_config = TestInfo.VERBOSE_TESTS_EXTRA_ARGS.get(test_infra) if not infra_verbosity_config: # checks if it's an empty object print("TEST VERBOSE MODE: unsupported test infra " + test_infra) test_verbosity_output[test_infra] = { "error": True } @@ -205,6 +205,14 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity infra_calls = test_script.split(test_infra) instrumented_test_command = [] for i, infra_call in enumerate(infra_calls): + # if the last char in the string is not whitespace and not a command delimiter, + # and it's not the last string in the split + # then it's a string that is appended to the front of the name of the infra (e.g., "\"jest\"") + # and not a call + if i < len(infra_calls) - 1 and infra_call != "" and (not infra_call[-1].isspace()) and (not any([infra_call.endswith(s) for s in command_split_chars])): + instrumented_test_command += [ infra_call ] + continue + # if the current call is empty string # then this is the call to the testing infra and the next is the arguments # so, skip this one From 76be9c1e47959f87b90d42b08e5ef9e7bd418bff Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 7 Jul 2023 16:22:27 -0400 Subject: [PATCH 29/39] adding option to specify a custom lock file to be copied over pre-install --- src/diagnose_github_repo.py | 3 +++ src/diagnose_npm_package.py | 3 +++ src/test_JS_repo_lib.py | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index d2a5843..ba2d938 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -27,6 +27,7 @@ class RepoWalker(): RM_AFTER_CLONING = False SCRIPTS_OVER_CODE = [] CUSTOM_SETUP_SCRIPTS = [] + CUSTOM_LOCK_FILES = [] QL_QUERIES = [] DO_INSTALL = True @@ -93,6 +94,8 @@ def set_up_config( self, config_file): cf_dict = config_json.get( "install", {}) self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL) self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT) + self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p + for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)] cf_dict = config_json.get( "build", {}) self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD) diff --git a/src/diagnose_npm_package.py b/src/diagnose_npm_package.py index 59daa28..efcb4c1 100644 --- a/src/diagnose_npm_package.py +++ b/src/diagnose_npm_package.py @@ -20,6 +20,7 @@ class NPMSpider(scrapy.Spider): RM_AFTER_CLONING = False SCRIPTS_OVER_CODE = [] CUSTOM_SETUP_SCRIPTS = [] + CUSTOM_LOCK_FILES = [] QL_QUERIES = [] DO_INSTALL = True @@ -85,6 +86,8 @@ def set_up_config( self, config_file): cf_dict = config_json.get( "install", {}) self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL) self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT) + self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p + for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)] cf_dict = config_json.get( "build", {}) self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index abf56cb..f65624e 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -307,6 +307,11 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) manager = "" + # if there's custom lock files, copy them into the repo (repo is "." since we're in the repo currently) + if crawler.CUSTOM_LOCK_FILES != []: + for custom_lock in crawler.CUSTOM_LOCK_FILES: + run_command("cp " + custom_lock + " .") + # first, check if there is a custom install # this runs custom scripts the same way as the scripts_over_code below; only # difference is it's before the npm-filter run From 83d2179a3f6a8e93a0c8ad182c81007a87d18e96 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 7 Jul 2023 17:59:49 -0400 Subject: [PATCH 30/39] adding option for diagnosing a local dir --- src/diagnose_github_repo.py | 22 +++++++++++++++++++++- src/test_JS_repo_lib.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index ba2d938..948ebb8 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -20,6 +20,11 @@ def get_repo_and_SHA_from_repo_link(repo): commit_SHA = split_res[1] return(split_res[0], commit_SHA) +# same format as getting the name from the repo link: we want the name of the dir, +# so after the last slash (and if there's no slash the whole name is returned) +def get_name_from_path(repo_local_path): + return( repo_local_path.split("/")[-1]) + class RepoWalker(): name = "npm-pkgs" @@ -60,6 +65,9 @@ def __init__(self, config_file="", output_dir = "."): def set_repo_links(self, repo_links): self.repo_links = repo_links + def set_local_repo_path(self, repo_local_dir): + self.repo_local_dir = repo_local_dir + def set_up_config( self, config_file): if not os.path.exists(config_file): if config_file != "": @@ -126,22 +134,33 @@ def iterate_over_repos( self): json_results["metadata"]["repo_commit_SHA"] = commit_SHA with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: json.dump( json_results, f, indent=4) + if self.repo_local_dir: + package_name = get_name_from_path( self.repo_local_dir) + json_results = diagnose_local_dir(self.repo_local_dir, self) + json_results["metadata"] = {} + json_results["metadata"]["repo_local_dir"] = repo_local_dir + with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: + json.dump( json_results, f, indent=4) argparser = argparse.ArgumentParser(description="Diagnose github repos, from a variety of sources") argparser.add_argument("--repo_list_file", metavar="rlistfile", type=str, nargs='?', help="file with list of github repo links") argparser.add_argument("--repo_link", metavar="rlink", type=str, nargs='?', help="single repo link") +argparser.add_argument("--repo_local_dir", metavar="rlocallink", type=str, nargs='?', help="path to local directory that has the repo code") argparser.add_argument("--repo_link_and_SHA", metavar="rlink_and_SHA", type=str, nargs='*', help="single repo link, with optional commit SHA") argparser.add_argument("--config", metavar="config_file", type=str, nargs='?', help="path to config file") argparser.add_argument("--output_dir", metavar="output_dir", type=str, nargs='?', help="directory for results to be output to") args = argparser.parse_args() config = args.config if args.config else "" - output_dir = args.output_dir if args.output_dir else "." walker = RepoWalker(config_file=config, output_dir=output_dir) +repo_local_dir = None +if args.repo_local_dir: + repo_local_dir = os.path.abspath(args.repo_local_dir) + repo_links = [] if args.repo_list_file: try: @@ -159,6 +178,7 @@ def iterate_over_repos( self): # so we join all the repo_link args into a space-delimited string repo_links += [' '.join(args.repo_link_and_SHA)] walker.set_repo_links( repo_links) +walker.set_local_repo_path(repo_local_dir) walker.iterate_over_repos() diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index f65624e..a60eae9 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -281,7 +281,40 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): else: print( "Package repository already exists. Using existing directory: " + repo_name) + # diagnose the repo dir + return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=commit_SHA)) +def diagnose_local_dir(repo_dir, crawler): + json_out = {} + repo_name = "" + cur_dir = os.getcwd() + repo_name = repo_dir.split("/")[-1] + if not os.path.isdir(repo_dir): + print("ERROR using local directory: " + repo_dir + " invalid directory path") + json_out["setup"] = {} + json_out["setup"]["local_dir_ERROR"] = True + return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) + + print("Diagnosing: " + repo_name + " --- from: " + repo_dir) + if not os.path.isdir("TESTING_REPOS"): + os.mkdir("TESTING_REPOS") + os.chdir("TESTING_REPOS") + + # if the repo already exists, dont clone it + if not os.path.isdir( repo_name): + print( "Copying package directory") + error, output, retcode = run_command( "cp -r " + repo_dir + " " + repo_name) + if retcode != 0: + print("ERROR copying the directory. Exiting now.") + json_out["setup"] = {} + json_out["setup"]["local_dir_ERROR"] = True + return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) + else: + print( "Package directory already exists. Using existing directory: " + repo_name) + # diagnose the repo dir + return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir)) + +def diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=None): # move into the repo and begin testing os.chdir( repo_name) From a6bb11102273f165e7c37461cd70c1c22298ecc4 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Mon, 10 Jul 2023 21:17:14 -0400 Subject: [PATCH 31/39] fixing bug in verbose test command instrumentation when theres args --- src/TestInfo.py | 2 ++ src/test_JS_repo_lib.py | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/TestInfo.py b/src/TestInfo.py index 0cb39a1..c5bec7e 100644 --- a/src/TestInfo.py +++ b/src/TestInfo.py @@ -228,6 +228,8 @@ def called_in_command( str_comm, command, manager): return( True) if command.find( "cross-env CI=true " + check_comm) > -1: return( True) + if command.find( "cross-env TZ=utc " + check_comm) > -1: + return( True) if command.find( "opener " + check_comm) > -1: return( True) if command.find( "gulp " + check_comm) > -1: diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index a60eae9..b1e94db 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -203,16 +203,30 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity # split into sub-commands command_split_chars = [ "&&", ";"] infra_calls = test_script.split(test_infra) - instrumented_test_command = [] - for i, infra_call in enumerate(infra_calls): + real_calls = [] + for maybe_call in infra_calls: # if the last char in the string is not whitespace and not a command delimiter, # and it's not the last string in the split # then it's a string that is appended to the front of the name of the infra (e.g., "\"jest\"") # and not a call - if i < len(infra_calls) - 1 and infra_call != "" and (not infra_call[-1].isspace()) and (not any([infra_call.endswith(s) for s in command_split_chars])): - instrumented_test_command += [ infra_call ] - continue - + # rebuild it + if i < len(infra_calls) - 1 and maybe_call != "" and (not maybe_call[-1].isspace()) and (not any([maybe_call.endswith(s) for s in command_split_chars])): + if len(real_calls) > 0: + real_calls[-1] += test_infra + maybe_call + continue + # if the first char in the string is not whitespace and not a command delimiter, + # and it's not the first string in the split + # then it's a string that is appended to the back of the name of the infra (e.g., jest".config.js") + # and not a call either + # rebuild it + if i > 0 and maybe_call != "" and (not maybe_call[0].isspace()) and (not any([maybe_call.startswith(s) for s in command_split_chars])): + if len(real_calls) > 0: + real_calls[-1] += test_infra + maybe_call + continue + real_calls += [ maybe_call ] + infra_calls = real_calls + instrumented_test_command = [] + for i, infra_call in enumerate(infra_calls): # if the current call is empty string # then this is the call to the testing infra and the next is the arguments # so, skip this one From 123544e937327da1761a236cacdb874fb8d718cc Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Tue, 11 Jul 2023 00:21:55 -0400 Subject: [PATCH 32/39] another lil bug fix in the verbose test mode instrumentation --- src/output_parsing/test_output_proc.py | 9 ++++++--- src/test_JS_repo_lib.py | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index e1b6ee5..e757291 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -69,6 +69,9 @@ def parse_jest_json_to_csv(output_file, new_output_file=None): test_stdout += [";".join(test_results.get("failureMessages", []))] test_pass_fail += [test_status] # passed/failed/pending -- if not present assume failed res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) - res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] - with open(new_output_file, 'w') as csv_file: - csv_file.write(res_df.to_csv()) \ No newline at end of file + try: + res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] + with open(new_output_file, 'w') as csv_file: + csv_file.write(res_df.to_csv()) + except: + print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...") \ No newline at end of file diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index b1e94db..abb7fbd 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -234,8 +234,8 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity if infra_call == "" and i < len(infra_calls) - 1: instrumented_test_command += [ "" ] continue - # if the first call is non-empty, then it's pre-test-infra and we skip it too - elif infra_call != "" and i == 0: + # if the first call is non-empty and there's more than one call, then it's pre-test-infra and we skip it too + elif len(infra_calls) > 1 and infra_call != "" and i == 0: instrumented_test_command += [ "" ] continue # get the arguments, splitting off from any other non-test commands that might be From ca0a8a2bc202836a9c1ee469bcb2e1fa427bd275 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Fri, 4 Aug 2023 13:04:14 +0000 Subject: [PATCH 33/39] Increase timeout from 15 minutes to 3 hours. Running on throttled configs results in a need for a greater timeout... --- Dockerfile | 1 - src/diagnose_github_repo.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3710a79..f9427b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,6 @@ COPY get_rel_project_reqs.js /home/npm-filter RUN apt-get update \ && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel -RUN apt update RUN apt -y install python3-pip RUN pip3 install bs4 scrapy xmltodict pandas diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index 948ebb8..ee7f09b 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -51,10 +51,10 @@ class RepoWalker(): TRACKED_BUILD_COMMANDS = ["build", "compile", "init"] # timeouts for stages, in seconds - INSTALL_TIMEOUT = 1000 + INSTALL_TIMEOUT = 10800 # 3 hours # note: these are timeouts per *script* in the stage of the process - BUILD_TIMEOUT = 1000 - TEST_TIMEOUT = 1000 + BUILD_TIMEOUT = 10800 # 3 hours + TEST_TIMEOUT = 10800 # 3 hours QL_CUTOFF = 5 # ignore if there are < 5 results From db4382e140b065aa237536dd15d087785c65765c Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 4 Aug 2023 18:55:00 -0400 Subject: [PATCH 34/39] docs --- README.md | 27 ++++++++++++++++++++++++-- configs/README.md | 17 ++++++++++++++-- configs/default_filter_config.json | 3 ++- src/output_parsing/test_output_proc.py | 17 ++++++++++++++++ src/test_JS_repo_lib.py | 1 + 5 files changed, 60 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 84de04d..11c31da 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ python src/diagnose_github_repo.py [--repo_list_file [rlistfile]] [--repo_link [rlink]] [--repo_link_and_SHA [rlink_and_SHA]] + [--repo_local_dir [path_to_local_dir]] [--config [config_file]] [--output_dir [output_dir]] ``` @@ -35,6 +36,7 @@ All arguments are optional, although the tool will not do anything if no repo li ``` * `--repo_link [rlink]`: a link to a single GitHub repo to be analyzed, e.g., `https://github.com/expressjs/body-parser` * `--repo_link_and_SHA [rlink_and_SHA]`: a link to a single GitHub repo to be analyzed, followed by a space-delimited commit SHA to analyze the repo at, e.g., `https://github.com/expressjs/body-parser d0a214b3beded8a9cd2dcb51d355f92c9ead81d4` +* `repo_local_dir`: path to a local directory containing the source code of a repo/package to be diagnosed * `--config [config_file]`: path to a configuration file for the tool (config options explained in [the config file section](#configuration-file)) * `--output_dir [output_dir]`: path to a directory in which to output the tool's results files (shape of results are explained in [the output section](#output)) @@ -73,6 +75,7 @@ The output is organized into the following top-level fields in the JSON, in orde * if it runs other test commands, then a list of these commands are included (`nested_test_commands`) * whether or not it timed out (`timed_out`) * if it does run new user tests, then the number of passing and number of failing tests (`num_passing`, `num_failing`) + * if verbose testing is specified as an option, then there will be an additional file of extra test output produced * `scripts_over_code`: an object with fields for each of the scripts run over the package source code. For each script, the tool lists its output and if there was an error. * `QL_queries`: an object with fields for each of the QL queries run over the package source code. For each script, the tool lists the output (if running in verbose mode), and if there was an error. * `metadata`: an object with fields for some metadata about the package: repository link, commit SHA if one was specified @@ -132,9 +135,29 @@ The output of each QL query is saved to a CSV file in the same directory as the ### Running with docker To be safe, you should probably run any untrusted code in a sandbox. Since the entire point of this tool is to run code from a set of packages/projects you didn't write, we assume most of this code will fall into the untrusted category. -We host the docker container [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter); if you edit the package source code and want to run your version in a docker container, we have included the docker build command below. -#### Building docker (if you've updated the npm-filter source code) +We host the generic docker container [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter); if you edit the package source code and want to run your version in a docker container, we have included the docker build command below. + +The generic docker container runs on any package or repo specified. +However, it is pre-built with default versions of node and npm. +There is also the option to build a _repo-specific_ docker container. +In this case, the container is built with the particular version of node and npm specified in the repo's `package.json` configuration file. +The container is also pre-built with the install and build phases of `npm-filter` run, so that you can then run the tests in the container without waiting for any setup. + +#### Building a container-specific docker +If you want to build a container specific to a particular repo, use the following command: +``` +# general use +docker build -t emarteca/npm-filter --build-arg REPO_LINK=[github link to repo] [--build-arg REPO_COMMIT=[specific commit SHA]] + +# specific example for memfs +docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs + +# another example, for memfs at a specific commit +docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs --build-arg REPO_COMMIT=863f373185837141504c05ed19f7a253232e0905 +``` + +#### Building generic docker (if you've updated the npm-filter source code) Note: you don't need to do this if you're using npm-filter out of the box. In that case, you'll pull directly from DockerHub. ``` diff --git a/configs/README.md b/configs/README.md index b387231..a79313e 100644 --- a/configs/README.md +++ b/configs/README.md @@ -3,6 +3,7 @@ The configuration file is a JSON, organized by stages of npm-filter analysis. The stages are as follows: * `install`: package installation. Users can specify: * `timeout`: number of millisections after which, if the install is not complete, the process bails and is considered timed out + * `do_install`: if false, skip the install stage * `dependencies`: package dependency tracking (this is the libraries the current package depends on, both directly and transitively). Users can specify: * `track_deps`: if true, this specifies to compute the package dependencies * `include_dev_deps`: if true, this specifies to include the `devDependencies` in the dependency computation @@ -10,10 +11,14 @@ The stages are as follows: * `build`: package compile/build stage. Users can specify: * `tracked_build_commands`: a list of build commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the build stage. * `timeout`: timeout in milliseconds, per build command + * `track_build`: if false, skip the build stage * `test`: package test stage. Users can specify: * `track_tests`: if true, then the tool will run this testing diagnostic stage * `tracked_test_commands`: a list of test commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the test stage. * `timeout`: timeout in milliseconds, per test command + * `test_verbose_all_output`: an object with two fields to configure the "verbose" test tracking option: here, output and some metrics (runtime, pass/fail, etc) for each test is output to a specified file. Note that currently we only support this option for the `jest` and `mocha` test infras. + * `do_verbose_tracking`: if true, do this verbose test tracking + * `verbose_json_output_file`: name of the file to which to save this verbose output * `meta_info`: any analysis-level configurations. Users can specify: * `VERBOSE_MODE`: if true, then the output JSON file will include the full output of all the commands run. Mainly for debugging. * `ignored_commands`: commands to ignore: if these are present in the npm script name, then they are not run even if they otherwise fall into a category of commands to run (mainly used to exclude any interactive-mode commands, such as tests with `watch`) @@ -21,6 +26,7 @@ The stages are as follows: * `rm_after_cloning`: if true, delete the package source code after the tool is done running. Strongly recommended if running over a large batch of packages. * `scripts_over_code`: list of paths to script files to run over the package source code. Note that these paths are relative to the location of **the config file**. * `QL_queries`: list of paths to QL query files to run over the package source code. Like the scripts, these paths are relative to the location of the config file. + * `custom_setup_scripts`: list of paths to script files to run over the package code after cloning, but before any of the stages of `npm-filter` are actually run. Commonly used to replace the default install stage (i.e., set `do_install` to `false`). Like all the other scripts, these paths are relative to the location of the config file. Users can customize any of the configuration fields, by providing a JSON file with the desired fields modified. Default values are used for any fields not specified. @@ -29,18 +35,24 @@ As a demonstrative example, the default configuration is included below. ``` { "install": { - "timeout": 1000 + "timeout": 1000, + "do_install": true }, "dependencies": { "track_deps": false, "include_dev_deps": false }, "build": { + "track_build": true, "tracked_build_commands": ["build", "compile", "init"], "timeout": 1000 }, "test": { "track_tests": true, + "test_verbose_all_output": { + "do_verbose_tracking": false, + "verbose_json_output_file": "verbose_test_report.json" + }, "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"], "timeout": 1000 @@ -51,7 +63,8 @@ As a demonstrative example, the default configuration is included below. "ignored_substrings": ["--watch", "nodemon"], "rm_after_cloning": false, "scripts_over_code": [ ], - "QL_queries": [ ] + "QL_queries": [ ], + "custom_setup_scripts": [ ] } } ``` diff --git a/configs/default_filter_config.json b/configs/default_filter_config.json index 56d0149..d0d8fa4 100644 --- a/configs/default_filter_config.json +++ b/configs/default_filter_config.json @@ -29,6 +29,7 @@ "ignored_substrings": ["--watch", "nodemon"], "rm_after_cloning": false, "scripts_over_code": [ ], - "QL_queries": [ ] + "QL_queries": [ ], + "custom_setup_scripts": [ ] } } \ No newline at end of file diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index e757291..5d4bc3b 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -2,6 +2,14 @@ import xmltodict import pandas as pd +# parse the output of mocha xunit reporter to a csv +# does not delete the original xunit output file +# outputs include, per test (in this order): +# - test suite it's a part of +# - name of the test itself +# - runtime of the test +# - stdout of the test (if any) +# - pass/fail status (could also be "pending") def parse_mocha_json_to_csv(output_file, new_output_file=None): if new_output_file is None: new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension @@ -35,6 +43,15 @@ def parse_mocha_json_to_csv(output_file, new_output_file=None): with open(new_output_file, 'w') as csv_file: csv_file.write(res_df.to_csv()) +# parse the output of jest xunit reporter to a csv +# this does the same thing as for mocha, to produce the same data fields +# does not delete the original xunit output file +# outputs include, per test (in this order): +# - test suite it's a part of +# - name of the test itself +# - runtime of the test +# - stdout of the test (if any) +# - pass/fail status (could also be "pending") def parse_jest_json_to_csv(output_file, new_output_file=None): if new_output_file is None: new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index abb7fbd..cd19dd1 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -174,6 +174,7 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): test_json_summary[t] = test_output_rep return( retcode, test_json_summary) +# instrument the test command specified to make it produce verbose output to a file def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos): # replace the output file name with the custom output filename # add an index to the filename for the 2nd,+ time the filename shows up From 265471ad5c953e282d8904782b077c68987c93f1 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 4 Aug 2023 18:55:48 -0400 Subject: [PATCH 35/39] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 11c31da..f801837 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ python src/diagnose_github_repo.py [--repo_list_file [rlistfile]] [--repo_link [rlink]] [--repo_link_and_SHA [rlink_and_SHA]] - [--repo_local_dir [path_to_local_dir]] + [--repo_local_dir [path_to_local_dir]] [--config [config_file]] [--output_dir [output_dir]] ``` From 27783bbb71dd88e1ee84c9158612c9d5c514aefe Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Wed, 16 Aug 2023 20:42:52 +0000 Subject: [PATCH 36/39] Add logging for timing of each test target + attempt to force jest to run tests in band --- src/TestInfo.py | 8 +++++++- src/test_JS_repo_lib.py | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/TestInfo.py b/src/TestInfo.py index c5bec7e..7d99c9f 100644 --- a/src/TestInfo.py +++ b/src/TestInfo.py @@ -40,9 +40,11 @@ class TestInfo: } # extra args, their position in the arg list, and any post-processing required # post-processing is a function that takes 2 arguments: input file and output file + # CAUTION: DO NOT PUT ANY MORE ARGS AFTER PLACEHOLDER_OUTPUT_FILE_NAME. THE CODE THAT + # PARSES THE OUTPUT RELIES ON THIS BEING THE *LAST* ARGUMENT VERBOSE_TESTS_EXTRA_ARGS = { "jest": { - "args": " --verbose --json --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", + "args": " --verbose --json -i --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", "position": -1, "post_processing": TestOutputProc.parse_jest_json_to_csv }, @@ -116,6 +118,8 @@ def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE): self.timed_out = False self.VERBOSE_MODE = VERBOSE_MODE self.test_verbosity_output = None + self.startTime = 0 + self.endTime = 0 def set_test_command( self, test_command): self.test_command = test_command @@ -189,6 +193,8 @@ def get_json_rep( self): if self.test_verbosity_output: json_rep["test_verbosity_output"] = self.test_verbosity_output json_rep["timed_out"] = self.timed_out + json_rep["start_time"] = self.startTime + json_rep["end_time"] = self.endTime return( json_rep) def __str__(self): diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index abb7fbd..8113f1e 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -2,6 +2,7 @@ import subprocess import json import os +import time from TestInfo import * def run_command( commands, timeout=None): @@ -114,8 +115,13 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): for test_rep_index in range(crawler.TEST_COMMAND_REPEATS): test_rep_id = "" if crawler.TEST_COMMAND_REPEATS == 1 else "testrep_" + str(test_rep_index) print("Running rep " + str(test_rep_index) + " of " + str(crawler.TEST_COMMAND_REPEATS - 1) + ": " + manager + t) + # time how long the next line takes + startTime = time.time() error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) + endTime = time.time() test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) + test_info.startTime = startTime + test_info.endTime = endTime test_info.set_test_command( pkg_json.get("scripts", {})[t]) test_info.compute_test_infras() test_info.compute_nested_test_commands( test_scripts) From 98a8bda517d73fd9009fed8e32ab1a29c435e4b2 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Mon, 21 Aug 2023 19:37:32 -0400 Subject: [PATCH 37/39] stop running the non-verbose-instrumented version of test commands if we are also running the verbose-instrumented version --- src/TestInfo.py | 9 +++++++-- src/test_JS_repo_lib.py | 34 +++++++++++++++++++++------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/TestInfo.py b/src/TestInfo.py index c5bec7e..0c91d2c 100644 --- a/src/TestInfo.py +++ b/src/TestInfo.py @@ -123,14 +123,19 @@ def set_test_command( self, test_command): def set_test_verbosity_output( self, verbose_output): self.test_verbosity_output = verbose_output + def get_test_infras_list( test_command, manager): + test_infras = [] + test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, test_command, manager) ] + test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, test_command, manager) ] + return( test_infras) + def compute_test_infras( self): self.test_infras = [] self.test_covs = [] self.test_lints = [] self.nested_test_commands = [] if self.test_command: - self.test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, self.test_command, self.manager) ] - self.test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, self.test_command, self.manager) ] + self.test_infras += TestInfo.get_test_infras_list(self.test_command, self.manager) self.test_covs += [ TestInfo.TRACKED_COVERAGE[ti] for ti in TestInfo.TRACKED_COVERAGE if called_in_command(ti, self.test_command, self.manager) ] self.test_lints += [ TestInfo.TRACKED_LINTERS[ti] for ti in TestInfo.TRACKED_LINTERS if called_in_command(ti, self.test_command, self.manager) ] self.test_infras = list(set(self.test_infras)) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index cd19dd1..4623306 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -114,20 +114,16 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): for test_rep_index in range(crawler.TEST_COMMAND_REPEATS): test_rep_id = "" if crawler.TEST_COMMAND_REPEATS == 1 else "testrep_" + str(test_rep_index) print("Running rep " + str(test_rep_index) + " of " + str(crawler.TEST_COMMAND_REPEATS - 1) + ": " + manager + t) - error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) - test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) - test_info.set_test_command( pkg_json.get("scripts", {})[t]) - test_info.compute_test_infras() - test_info.compute_nested_test_commands( test_scripts) - test_info.compute_test_stats() + test_command = pkg_json.get("scripts", {})[t] + test_infras = TestInfo.get_test_infras_list(test_command, manager) + test_verbosity_output = {} # if we're in verbose testing mode (i.e. getting all timing info for each test, etc) # then, we rerun the test commands with all the commands for adding verbose_mode to # each of the test infras involved (individually) if crawler.TEST_VERBOSE_ALL_OUTPUT: # we're gonna be adding our new custom scripts for verbosity testing run_command( "mv package.json TEMP_package.json_TEMP") - test_verbosity_output = {} - for verbosity_index, test_infra in enumerate(test_info.test_infras): + for verbosity_index, test_infra in enumerate(test_infras): verbose_test_json = crawler.output_dir + "/" \ + "repo_" + repo_name + "_" \ + "test_" + str(test_index) + "_"\ @@ -142,14 +138,14 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): infra_verbosity_args = infra_verbosity_config.get("args", "") infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end infra_verbosity_post_proc = infra_verbosity_config.get("post_processing", None) - infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_info.test_command, test_infra, infra_verbosity_args, + infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_command, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos) verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index) pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command with open("package.json", 'w') as f: json.dump( pkg_json, f) print("Running verbosity: " + manager + infra_verbosity_command) - verb_error, verb_output, verb_retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) + error, output, retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) # if there's post-processing to be done if not infra_verbosity_post_proc is None: for out_file_obj in out_files: @@ -160,12 +156,24 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): test_verbosity_infra["command"] = infra_verbosity_command test_verbosity_infra["output_files"] = out_files if crawler.VERBOSE_MODE: - test_verbosity_infra["test_debug"] = "\nError output: " + verb_error.decode('utf-8') \ - + "\nOutput stream: " + verb_output.decode('utf-8') + test_verbosity_infra["test_debug"] = "\nError output: " + error.decode('utf-8') \ + + "\nOutput stream: " + output.decode('utf-8') test_verbosity_output[test_infra] = test_verbosity_infra - test_info.set_test_verbosity_output(test_verbosity_output) # put the package.json back run_command( "mv TEMP_package.json_TEMP package.json") + # not verbose test mode -- just run the normal test command + else: + error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) + test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) + # the below info on the test infras etc is independent of verbose mode: just based on the command itself + test_info.set_test_command( test_command) + test_info.compute_test_infras() + test_info.compute_nested_test_commands( test_scripts) + # note: if we're running in verbose mode, then the stats will be that of the last executed verbose mode + # instrumented version of the test command + test_info.compute_test_stats() + if crawler.TEST_VERBOSE_ALL_OUTPUT: + test_info.set_test_verbosity_output(test_verbosity_output) # if we're not doing any repeats then don't make another layer of jsons if crawler.TEST_COMMAND_REPEATS == 1: test_output_rep = test_info.get_json_rep() From a875faa9c74593d8204e13ba399becf864822625 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Mon, 21 Aug 2023 23:10:25 -0400 Subject: [PATCH 38/39] lil fix --- src/test_JS_repo_lib.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index 73ae479..81b415d 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -169,7 +169,9 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): # put the package.json back run_command( "mv TEMP_package.json_TEMP package.json") # not verbose test mode -- just run the normal test command - else: + # if start and end time are both still zero then no instrumented test commands ran + # and so we also rerun here + if (not crawler.TEST_VERBOSE_ALL_OUTPUT) or (start_time == 0 and end_time == 0): start_time = time.time() error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) end_time = time.time() From 7dc635f59678d1959346b6483569d61badab270b Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Thu, 24 Aug 2023 22:50:00 -0400 Subject: [PATCH 39/39] catch errors in mocha output format (same as jest) --- src/output_parsing/test_output_proc.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index 5d4bc3b..9ab742d 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -39,9 +39,12 @@ def parse_mocha_json_to_csv(output_file, new_output_file=None): test_stdout += [""] test_pass_fail += ["passed"] res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) - res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] - with open(new_output_file, 'w') as csv_file: - csv_file.write(res_df.to_csv()) + try: + res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] + with open(new_output_file, 'w') as csv_file: + csv_file.write(res_df.to_csv()) + except: + print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...") # parse the output of jest xunit reporter to a csv # this does the same thing as for mocha, to produce the same data fields