diff --git a/Dockerfile b/Dockerfile index 3710a79..f9427b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,6 @@ COPY get_rel_project_reqs.js /home/npm-filter RUN apt-get update \ && apt-get -y install --no-install-recommends python3 git unzip vim curl gnupg xz-utils parallel -RUN apt update RUN apt -y install python3-pip RUN pip3 install bs4 scrapy xmltodict pandas diff --git a/README.md b/README.md index 84de04d..f801837 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ python src/diagnose_github_repo.py [--repo_list_file [rlistfile]] [--repo_link [rlink]] [--repo_link_and_SHA [rlink_and_SHA]] + [--repo_local_dir [path_to_local_dir]] [--config [config_file]] [--output_dir [output_dir]] ``` @@ -35,6 +36,7 @@ All arguments are optional, although the tool will not do anything if no repo li ``` * `--repo_link [rlink]`: a link to a single GitHub repo to be analyzed, e.g., `https://github.com/expressjs/body-parser` * `--repo_link_and_SHA [rlink_and_SHA]`: a link to a single GitHub repo to be analyzed, followed by a space-delimited commit SHA to analyze the repo at, e.g., `https://github.com/expressjs/body-parser d0a214b3beded8a9cd2dcb51d355f92c9ead81d4` +* `repo_local_dir`: path to a local directory containing the source code of a repo/package to be diagnosed * `--config [config_file]`: path to a configuration file for the tool (config options explained in [the config file section](#configuration-file)) * `--output_dir [output_dir]`: path to a directory in which to output the tool's results files (shape of results are explained in [the output section](#output)) @@ -73,6 +75,7 @@ The output is organized into the following top-level fields in the JSON, in orde * if it runs other test commands, then a list of these commands are included (`nested_test_commands`) * whether or not it timed out (`timed_out`) * if it does run new user tests, then the number of passing and number of failing tests (`num_passing`, `num_failing`) + * if verbose testing is specified as an option, then there will be an additional file of extra test output produced * `scripts_over_code`: an object with fields for each of the scripts run over the package source code. For each script, the tool lists its output and if there was an error. * `QL_queries`: an object with fields for each of the QL queries run over the package source code. For each script, the tool lists the output (if running in verbose mode), and if there was an error. * `metadata`: an object with fields for some metadata about the package: repository link, commit SHA if one was specified @@ -132,9 +135,29 @@ The output of each QL query is saved to a CSV file in the same directory as the ### Running with docker To be safe, you should probably run any untrusted code in a sandbox. Since the entire point of this tool is to run code from a set of packages/projects you didn't write, we assume most of this code will fall into the untrusted category. -We host the docker container [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter); if you edit the package source code and want to run your version in a docker container, we have included the docker build command below. -#### Building docker (if you've updated the npm-filter source code) +We host the generic docker container [on DockerHub](https://hub.docker.com/r/emarteca/npm-filter); if you edit the package source code and want to run your version in a docker container, we have included the docker build command below. + +The generic docker container runs on any package or repo specified. +However, it is pre-built with default versions of node and npm. +There is also the option to build a _repo-specific_ docker container. +In this case, the container is built with the particular version of node and npm specified in the repo's `package.json` configuration file. +The container is also pre-built with the install and build phases of `npm-filter` run, so that you can then run the tests in the container without waiting for any setup. + +#### Building a container-specific docker +If you want to build a container specific to a particular repo, use the following command: +``` +# general use +docker build -t emarteca/npm-filter --build-arg REPO_LINK=[github link to repo] [--build-arg REPO_COMMIT=[specific commit SHA]] + +# specific example for memfs +docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs + +# another example, for memfs at a specific commit +docker build -t emarteca/npm-filter --build-arg REPO_LINK=https://github.com/streamich/memfs --build-arg REPO_COMMIT=863f373185837141504c05ed19f7a253232e0905 +``` + +#### Building generic docker (if you've updated the npm-filter source code) Note: you don't need to do this if you're using npm-filter out of the box. In that case, you'll pull directly from DockerHub. ``` diff --git a/configs/README.md b/configs/README.md index b387231..a79313e 100644 --- a/configs/README.md +++ b/configs/README.md @@ -3,6 +3,7 @@ The configuration file is a JSON, organized by stages of npm-filter analysis. The stages are as follows: * `install`: package installation. Users can specify: * `timeout`: number of millisections after which, if the install is not complete, the process bails and is considered timed out + * `do_install`: if false, skip the install stage * `dependencies`: package dependency tracking (this is the libraries the current package depends on, both directly and transitively). Users can specify: * `track_deps`: if true, this specifies to compute the package dependencies * `include_dev_deps`: if true, this specifies to include the `devDependencies` in the dependency computation @@ -10,10 +11,14 @@ The stages are as follows: * `build`: package compile/build stage. Users can specify: * `tracked_build_commands`: a list of build commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the build stage. * `timeout`: timeout in milliseconds, per build command + * `track_build`: if false, skip the build stage * `test`: package test stage. Users can specify: * `track_tests`: if true, then the tool will run this testing diagnostic stage * `tracked_test_commands`: a list of test commands to test (any npm script with one of these commands as a substring will be tested). Any command not in this list will not be tested for the test stage. * `timeout`: timeout in milliseconds, per test command + * `test_verbose_all_output`: an object with two fields to configure the "verbose" test tracking option: here, output and some metrics (runtime, pass/fail, etc) for each test is output to a specified file. Note that currently we only support this option for the `jest` and `mocha` test infras. + * `do_verbose_tracking`: if true, do this verbose test tracking + * `verbose_json_output_file`: name of the file to which to save this verbose output * `meta_info`: any analysis-level configurations. Users can specify: * `VERBOSE_MODE`: if true, then the output JSON file will include the full output of all the commands run. Mainly for debugging. * `ignored_commands`: commands to ignore: if these are present in the npm script name, then they are not run even if they otherwise fall into a category of commands to run (mainly used to exclude any interactive-mode commands, such as tests with `watch`) @@ -21,6 +26,7 @@ The stages are as follows: * `rm_after_cloning`: if true, delete the package source code after the tool is done running. Strongly recommended if running over a large batch of packages. * `scripts_over_code`: list of paths to script files to run over the package source code. Note that these paths are relative to the location of **the config file**. * `QL_queries`: list of paths to QL query files to run over the package source code. Like the scripts, these paths are relative to the location of the config file. + * `custom_setup_scripts`: list of paths to script files to run over the package code after cloning, but before any of the stages of `npm-filter` are actually run. Commonly used to replace the default install stage (i.e., set `do_install` to `false`). Like all the other scripts, these paths are relative to the location of the config file. Users can customize any of the configuration fields, by providing a JSON file with the desired fields modified. Default values are used for any fields not specified. @@ -29,18 +35,24 @@ As a demonstrative example, the default configuration is included below. ``` { "install": { - "timeout": 1000 + "timeout": 1000, + "do_install": true }, "dependencies": { "track_deps": false, "include_dev_deps": false }, "build": { + "track_build": true, "tracked_build_commands": ["build", "compile", "init"], "timeout": 1000 }, "test": { "track_tests": true, + "test_verbose_all_output": { + "do_verbose_tracking": false, + "verbose_json_output_file": "verbose_test_report.json" + }, "tracked_test_commands": ["test", "unit", "cov", "ci", "integration", "lint", "travis", "e2e", "bench", "mocha", "jest", "ava", "tap", "jasmine"], "timeout": 1000 @@ -51,7 +63,8 @@ As a demonstrative example, the default configuration is included below. "ignored_substrings": ["--watch", "nodemon"], "rm_after_cloning": false, "scripts_over_code": [ ], - "QL_queries": [ ] + "QL_queries": [ ], + "custom_setup_scripts": [ ] } } ``` diff --git a/configs/default_filter_config.json b/configs/default_filter_config.json index 56d0149..d0d8fa4 100644 --- a/configs/default_filter_config.json +++ b/configs/default_filter_config.json @@ -29,6 +29,7 @@ "ignored_substrings": ["--watch", "nodemon"], "rm_after_cloning": false, "scripts_over_code": [ ], - "QL_queries": [ ] + "QL_queries": [ ], + "custom_setup_scripts": [ ] } } \ No newline at end of file diff --git a/src/TestInfo.py b/src/TestInfo.py index 0cb39a1..9e89972 100644 --- a/src/TestInfo.py +++ b/src/TestInfo.py @@ -40,9 +40,11 @@ class TestInfo: } # extra args, their position in the arg list, and any post-processing required # post-processing is a function that takes 2 arguments: input file and output file + # CAUTION: DO NOT PUT ANY MORE ARGS AFTER PLACEHOLDER_OUTPUT_FILE_NAME. THE CODE THAT + # PARSES THE OUTPUT RELIES ON THIS BEING THE *LAST* ARGUMENT VERBOSE_TESTS_EXTRA_ARGS = { "jest": { - "args": " --verbose --json --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", + "args": " --verbose --json -i --outputFile=$PLACEHOLDER_OUTPUT_FILE_NAME$", "position": -1, "post_processing": TestOutputProc.parse_jest_json_to_csv }, @@ -116,6 +118,8 @@ def __init__(self, success, error_stream, output_stream, manager, VERBOSE_MODE): self.timed_out = False self.VERBOSE_MODE = VERBOSE_MODE self.test_verbosity_output = None + self.startTime = 0 + self.endTime = 0 def set_test_command( self, test_command): self.test_command = test_command @@ -123,14 +127,19 @@ def set_test_command( self, test_command): def set_test_verbosity_output( self, verbose_output): self.test_verbosity_output = verbose_output + def get_test_infras_list( test_command, manager): + test_infras = [] + test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, test_command, manager) ] + test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, test_command, manager) ] + return( test_infras) + def compute_test_infras( self): self.test_infras = [] self.test_covs = [] self.test_lints = [] self.nested_test_commands = [] if self.test_command: - self.test_infras += [ ti for ti in TestInfo.TRACKED_INFRAS if called_in_command(ti, self.test_command, self.manager) ] - self.test_infras += [ ri for ri in TestInfo.TRACKED_RUNNERS if called_in_command(ri, self.test_command, self.manager) ] + self.test_infras += TestInfo.get_test_infras_list(self.test_command, self.manager) self.test_covs += [ TestInfo.TRACKED_COVERAGE[ti] for ti in TestInfo.TRACKED_COVERAGE if called_in_command(ti, self.test_command, self.manager) ] self.test_lints += [ TestInfo.TRACKED_LINTERS[ti] for ti in TestInfo.TRACKED_LINTERS if called_in_command(ti, self.test_command, self.manager) ] self.test_infras = list(set(self.test_infras)) @@ -189,6 +198,8 @@ def get_json_rep( self): if self.test_verbosity_output: json_rep["test_verbosity_output"] = self.test_verbosity_output json_rep["timed_out"] = self.timed_out + json_rep["start_time"] = self.start_time + json_rep["end_time"] = self.end_time return( json_rep) def __str__(self): @@ -228,6 +239,8 @@ def called_in_command( str_comm, command, manager): return( True) if command.find( "cross-env CI=true " + check_comm) > -1: return( True) + if command.find( "cross-env TZ=utc " + check_comm) > -1: + return( True) if command.find( "opener " + check_comm) > -1: return( True) if command.find( "gulp " + check_comm) > -1: diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index d2a5843..ee7f09b 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -20,6 +20,11 @@ def get_repo_and_SHA_from_repo_link(repo): commit_SHA = split_res[1] return(split_res[0], commit_SHA) +# same format as getting the name from the repo link: we want the name of the dir, +# so after the last slash (and if there's no slash the whole name is returned) +def get_name_from_path(repo_local_path): + return( repo_local_path.split("/")[-1]) + class RepoWalker(): name = "npm-pkgs" @@ -27,6 +32,7 @@ class RepoWalker(): RM_AFTER_CLONING = False SCRIPTS_OVER_CODE = [] CUSTOM_SETUP_SCRIPTS = [] + CUSTOM_LOCK_FILES = [] QL_QUERIES = [] DO_INSTALL = True @@ -45,10 +51,10 @@ class RepoWalker(): TRACKED_BUILD_COMMANDS = ["build", "compile", "init"] # timeouts for stages, in seconds - INSTALL_TIMEOUT = 1000 + INSTALL_TIMEOUT = 10800 # 3 hours # note: these are timeouts per *script* in the stage of the process - BUILD_TIMEOUT = 1000 - TEST_TIMEOUT = 1000 + BUILD_TIMEOUT = 10800 # 3 hours + TEST_TIMEOUT = 10800 # 3 hours QL_CUTOFF = 5 # ignore if there are < 5 results @@ -59,6 +65,9 @@ def __init__(self, config_file="", output_dir = "."): def set_repo_links(self, repo_links): self.repo_links = repo_links + def set_local_repo_path(self, repo_local_dir): + self.repo_local_dir = repo_local_dir + def set_up_config( self, config_file): if not os.path.exists(config_file): if config_file != "": @@ -93,6 +102,8 @@ def set_up_config( self, config_file): cf_dict = config_json.get( "install", {}) self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL) self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT) + self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p + for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)] cf_dict = config_json.get( "build", {}) self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD) @@ -123,22 +134,33 @@ def iterate_over_repos( self): json_results["metadata"]["repo_commit_SHA"] = commit_SHA with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: json.dump( json_results, f, indent=4) + if self.repo_local_dir: + package_name = get_name_from_path( self.repo_local_dir) + json_results = diagnose_local_dir(self.repo_local_dir, self) + json_results["metadata"] = {} + json_results["metadata"]["repo_local_dir"] = repo_local_dir + with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: + json.dump( json_results, f, indent=4) argparser = argparse.ArgumentParser(description="Diagnose github repos, from a variety of sources") argparser.add_argument("--repo_list_file", metavar="rlistfile", type=str, nargs='?', help="file with list of github repo links") argparser.add_argument("--repo_link", metavar="rlink", type=str, nargs='?', help="single repo link") +argparser.add_argument("--repo_local_dir", metavar="rlocallink", type=str, nargs='?', help="path to local directory that has the repo code") argparser.add_argument("--repo_link_and_SHA", metavar="rlink_and_SHA", type=str, nargs='*', help="single repo link, with optional commit SHA") argparser.add_argument("--config", metavar="config_file", type=str, nargs='?', help="path to config file") argparser.add_argument("--output_dir", metavar="output_dir", type=str, nargs='?', help="directory for results to be output to") args = argparser.parse_args() config = args.config if args.config else "" - output_dir = args.output_dir if args.output_dir else "." walker = RepoWalker(config_file=config, output_dir=output_dir) +repo_local_dir = None +if args.repo_local_dir: + repo_local_dir = os.path.abspath(args.repo_local_dir) + repo_links = [] if args.repo_list_file: try: @@ -156,6 +178,7 @@ def iterate_over_repos( self): # so we join all the repo_link args into a space-delimited string repo_links += [' '.join(args.repo_link_and_SHA)] walker.set_repo_links( repo_links) +walker.set_local_repo_path(repo_local_dir) walker.iterate_over_repos() diff --git a/src/diagnose_npm_package.py b/src/diagnose_npm_package.py index 59daa28..efcb4c1 100644 --- a/src/diagnose_npm_package.py +++ b/src/diagnose_npm_package.py @@ -20,6 +20,7 @@ class NPMSpider(scrapy.Spider): RM_AFTER_CLONING = False SCRIPTS_OVER_CODE = [] CUSTOM_SETUP_SCRIPTS = [] + CUSTOM_LOCK_FILES = [] QL_QUERIES = [] DO_INSTALL = True @@ -85,6 +86,8 @@ def set_up_config( self, config_file): cf_dict = config_json.get( "install", {}) self.DO_INSTALL = cf_dict.get("do_install", self.DO_INSTALL) self.INSTALL_TIMEOUT = cf_dict.get("timeout", self.INSTALL_TIMEOUT) + self.CUSTOM_LOCK_FILES = [ os.path.abspath(os.path.dirname(config_file if config_file else __file__)) + "/" + p + for p in cf_dict.get( "custom_lock_files", self.CUSTOM_LOCK_FILES)] cf_dict = config_json.get( "build", {}) self.TRACK_BUILD = cf_dict.get("track_build", self.TRACK_BUILD) diff --git a/src/output_parsing/test_output_proc.py b/src/output_parsing/test_output_proc.py index e1b6ee5..9ab742d 100644 --- a/src/output_parsing/test_output_proc.py +++ b/src/output_parsing/test_output_proc.py @@ -2,6 +2,14 @@ import xmltodict import pandas as pd +# parse the output of mocha xunit reporter to a csv +# does not delete the original xunit output file +# outputs include, per test (in this order): +# - test suite it's a part of +# - name of the test itself +# - runtime of the test +# - stdout of the test (if any) +# - pass/fail status (could also be "pending") def parse_mocha_json_to_csv(output_file, new_output_file=None): if new_output_file is None: new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension @@ -31,10 +39,22 @@ def parse_mocha_json_to_csv(output_file, new_output_file=None): test_stdout += [""] test_pass_fail += ["passed"] res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) - res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] - with open(new_output_file, 'w') as csv_file: - csv_file.write(res_df.to_csv()) + try: + res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] + with open(new_output_file, 'w') as csv_file: + csv_file.write(res_df.to_csv()) + except: + print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...") +# parse the output of jest xunit reporter to a csv +# this does the same thing as for mocha, to produce the same data fields +# does not delete the original xunit output file +# outputs include, per test (in this order): +# - test suite it's a part of +# - name of the test itself +# - runtime of the test +# - stdout of the test (if any) +# - pass/fail status (could also be "pending") def parse_jest_json_to_csv(output_file, new_output_file=None): if new_output_file is None: new_output_file = output_file.split(".")[0] + ".csv" # same name, csv file extension @@ -69,6 +89,9 @@ def parse_jest_json_to_csv(output_file, new_output_file=None): test_stdout += [";".join(test_results.get("failureMessages", []))] test_pass_fail += [test_status] # passed/failed/pending -- if not present assume failed res_df = pd.DataFrame(list(zip(test_suites, test_names, test_runtimes, test_stdout, test_pass_fail))) - res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] - with open(new_output_file, 'w') as csv_file: - csv_file.write(res_df.to_csv()) \ No newline at end of file + try: + res_df.columns = ["test_suite", "name", "runtime", "stdout", "pass_fail"] + with open(new_output_file, 'w') as csv_file: + csv_file.write(res_df.to_csv()) + except: + print("ERROR in data for file " + new_output_file + " -- no output printed. skipping to next step...") \ No newline at end of file diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index abf56cb..81b415d 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -2,6 +2,7 @@ import subprocess import json import os +import time from TestInfo import * def run_command( commands, timeout=None): @@ -114,20 +115,19 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): for test_rep_index in range(crawler.TEST_COMMAND_REPEATS): test_rep_id = "" if crawler.TEST_COMMAND_REPEATS == 1 else "testrep_" + str(test_rep_index) print("Running rep " + str(test_rep_index) + " of " + str(crawler.TEST_COMMAND_REPEATS - 1) + ": " + manager + t) - error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) - test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) - test_info.set_test_command( pkg_json.get("scripts", {})[t]) - test_info.compute_test_infras() - test_info.compute_nested_test_commands( test_scripts) - test_info.compute_test_stats() + test_command = pkg_json.get("scripts", {})[t] + test_infras = TestInfo.get_test_infras_list(test_command, manager) + test_verbosity_output = {} + # initialize these variables for timing; they'll be set before/after running test commands (resp) + start_time = 0 + end_time = 0 # if we're in verbose testing mode (i.e. getting all timing info for each test, etc) # then, we rerun the test commands with all the commands for adding verbose_mode to # each of the test infras involved (individually) if crawler.TEST_VERBOSE_ALL_OUTPUT: # we're gonna be adding our new custom scripts for verbosity testing run_command( "mv package.json TEMP_package.json_TEMP") - test_verbosity_output = {} - for verbosity_index, test_infra in enumerate(test_info.test_infras): + for verbosity_index, test_infra in enumerate(test_infras): verbose_test_json = crawler.output_dir + "/" \ + "repo_" + repo_name + "_" \ + "test_" + str(test_index) + "_"\ @@ -142,14 +142,17 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): infra_verbosity_args = infra_verbosity_config.get("args", "") infra_verbosity_args_pos = infra_verbosity_config.get("position", -1) # default position is at the end infra_verbosity_post_proc = infra_verbosity_config.get("post_processing", None) - infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_info.test_command, test_infra, infra_verbosity_args, + infra_verbosity_command, out_files = instrument_test_command_for_verbose(test_command, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos) verbosity_script_name = "instrumented_verbosity_command_" + str(verbosity_index) pkg_json["scripts"][verbosity_script_name] = infra_verbosity_command with open("package.json", 'w') as f: json.dump( pkg_json, f) print("Running verbosity: " + manager + infra_verbosity_command) - verb_error, verb_output, verb_retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) + # time how long the next line takes + start_time = time.time() + error, output, retcode = run_command( manager + verbosity_script_name, crawler.TEST_TIMEOUT) + end_time = time.time() # if there's post-processing to be done if not infra_verbosity_post_proc is None: for out_file_obj in out_files: @@ -160,12 +163,30 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): test_verbosity_infra["command"] = infra_verbosity_command test_verbosity_infra["output_files"] = out_files if crawler.VERBOSE_MODE: - test_verbosity_infra["test_debug"] = "\nError output: " + verb_error.decode('utf-8') \ - + "\nOutput stream: " + verb_output.decode('utf-8') + test_verbosity_infra["test_debug"] = "\nError output: " + error.decode('utf-8') \ + + "\nOutput stream: " + output.decode('utf-8') test_verbosity_output[test_infra] = test_verbosity_infra - test_info.set_test_verbosity_output(test_verbosity_output) # put the package.json back run_command( "mv TEMP_package.json_TEMP package.json") + # not verbose test mode -- just run the normal test command + # if start and end time are both still zero then no instrumented test commands ran + # and so we also rerun here + if (not crawler.TEST_VERBOSE_ALL_OUTPUT) or (start_time == 0 and end_time == 0): + start_time = time.time() + error, output, retcode = run_command( manager + t, crawler.TEST_TIMEOUT) + end_time = time.time() + test_info = TestInfo( (retcode == 0), error, output, manager, crawler.VERBOSE_MODE) + # the below info on the test infras etc is independent of verbose mode: just based on the command itself + test_info.set_test_command( test_command) + test_info.compute_test_infras() + test_info.compute_nested_test_commands( test_scripts) + test_info.start_time = start_time + test_info.end_time = end_time + # note: if we're running in verbose mode, then the stats will be that of the last executed verbose mode + # instrumented version of the test command + test_info.compute_test_stats() + if crawler.TEST_VERBOSE_ALL_OUTPUT: + test_info.set_test_verbosity_output(test_verbosity_output) # if we're not doing any repeats then don't make another layer of jsons if crawler.TEST_COMMAND_REPEATS == 1: test_output_rep = test_info.get_json_rep() @@ -174,6 +195,7 @@ def run_tests( manager, pkg_json, crawler, repo_name, cur_dir="."): test_json_summary[t] = test_output_rep return( retcode, test_json_summary) +# instrument the test command specified to make it produce verbose output to a file def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity_args, verbose_test_json, infra_verbosity_args_pos): # replace the output file name with the custom output filename # add an index to the filename for the 2nd,+ time the filename shows up @@ -203,16 +225,30 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity # split into sub-commands command_split_chars = [ "&&", ";"] infra_calls = test_script.split(test_infra) - instrumented_test_command = [] - for i, infra_call in enumerate(infra_calls): + real_calls = [] + for maybe_call in infra_calls: # if the last char in the string is not whitespace and not a command delimiter, # and it's not the last string in the split # then it's a string that is appended to the front of the name of the infra (e.g., "\"jest\"") # and not a call - if i < len(infra_calls) - 1 and infra_call != "" and (not infra_call[-1].isspace()) and (not any([infra_call.endswith(s) for s in command_split_chars])): - instrumented_test_command += [ infra_call ] - continue - + # rebuild it + if i < len(infra_calls) - 1 and maybe_call != "" and (not maybe_call[-1].isspace()) and (not any([maybe_call.endswith(s) for s in command_split_chars])): + if len(real_calls) > 0: + real_calls[-1] += test_infra + maybe_call + continue + # if the first char in the string is not whitespace and not a command delimiter, + # and it's not the first string in the split + # then it's a string that is appended to the back of the name of the infra (e.g., jest".config.js") + # and not a call either + # rebuild it + if i > 0 and maybe_call != "" and (not maybe_call[0].isspace()) and (not any([maybe_call.startswith(s) for s in command_split_chars])): + if len(real_calls) > 0: + real_calls[-1] += test_infra + maybe_call + continue + real_calls += [ maybe_call ] + infra_calls = real_calls + instrumented_test_command = [] + for i, infra_call in enumerate(infra_calls): # if the current call is empty string # then this is the call to the testing infra and the next is the arguments # so, skip this one @@ -220,8 +256,8 @@ def instrument_test_command_for_verbose(test_script, test_infra, infra_verbosity if infra_call == "" and i < len(infra_calls) - 1: instrumented_test_command += [ "" ] continue - # if the first call is non-empty, then it's pre-test-infra and we skip it too - elif infra_call != "" and i == 0: + # if the first call is non-empty and there's more than one call, then it's pre-test-infra and we skip it too + elif len(infra_calls) > 1 and infra_call != "" and i == 0: instrumented_test_command += [ "" ] continue # get the arguments, splitting off from any other non-test commands that might be @@ -281,7 +317,40 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): else: print( "Package repository already exists. Using existing directory: " + repo_name) + # diagnose the repo dir + return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=commit_SHA)) +def diagnose_local_dir(repo_dir, crawler): + json_out = {} + repo_name = "" + cur_dir = os.getcwd() + repo_name = repo_dir.split("/")[-1] + if not os.path.isdir(repo_dir): + print("ERROR using local directory: " + repo_dir + " invalid directory path") + json_out["setup"] = {} + json_out["setup"]["local_dir_ERROR"] = True + return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) + + print("Diagnosing: " + repo_name + " --- from: " + repo_dir) + if not os.path.isdir("TESTING_REPOS"): + os.mkdir("TESTING_REPOS") + os.chdir("TESTING_REPOS") + + # if the repo already exists, dont clone it + if not os.path.isdir( repo_name): + print( "Copying package directory") + error, output, retcode = run_command( "cp -r " + repo_dir + " " + repo_name) + if retcode != 0: + print("ERROR copying the directory. Exiting now.") + json_out["setup"] = {} + json_out["setup"]["local_dir_ERROR"] = True + return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) + else: + print( "Package directory already exists. Using existing directory: " + repo_name) + # diagnose the repo dir + return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir)) + +def diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=None): # move into the repo and begin testing os.chdir( repo_name) @@ -307,6 +376,11 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) manager = "" + # if there's custom lock files, copy them into the repo (repo is "." since we're in the repo currently) + if crawler.CUSTOM_LOCK_FILES != []: + for custom_lock in crawler.CUSTOM_LOCK_FILES: + run_command("cp " + custom_lock + " .") + # first, check if there is a custom install # this runs custom scripts the same way as the scripts_over_code below; only # difference is it's before the npm-filter run