From 83d2179a3f6a8e93a0c8ad182c81007a87d18e96 Mon Sep 17 00:00:00 2001 From: Ellen Arteca Date: Fri, 7 Jul 2023 17:59:49 -0400 Subject: [PATCH] adding option for diagnosing a local dir --- src/diagnose_github_repo.py | 22 +++++++++++++++++++++- src/test_JS_repo_lib.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/diagnose_github_repo.py b/src/diagnose_github_repo.py index ba2d938..948ebb8 100644 --- a/src/diagnose_github_repo.py +++ b/src/diagnose_github_repo.py @@ -20,6 +20,11 @@ def get_repo_and_SHA_from_repo_link(repo): commit_SHA = split_res[1] return(split_res[0], commit_SHA) +# same format as getting the name from the repo link: we want the name of the dir, +# so after the last slash (and if there's no slash the whole name is returned) +def get_name_from_path(repo_local_path): + return( repo_local_path.split("/")[-1]) + class RepoWalker(): name = "npm-pkgs" @@ -60,6 +65,9 @@ def __init__(self, config_file="", output_dir = "."): def set_repo_links(self, repo_links): self.repo_links = repo_links + def set_local_repo_path(self, repo_local_dir): + self.repo_local_dir = repo_local_dir + def set_up_config( self, config_file): if not os.path.exists(config_file): if config_file != "": @@ -126,22 +134,33 @@ def iterate_over_repos( self): json_results["metadata"]["repo_commit_SHA"] = commit_SHA with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: json.dump( json_results, f, indent=4) + if self.repo_local_dir: + package_name = get_name_from_path( self.repo_local_dir) + json_results = diagnose_local_dir(self.repo_local_dir, self) + json_results["metadata"] = {} + json_results["metadata"]["repo_local_dir"] = repo_local_dir + with open(self.output_dir + "/" + package_name + '__results.json', 'w') as f: + json.dump( json_results, f, indent=4) argparser = argparse.ArgumentParser(description="Diagnose github repos, from a variety of sources") argparser.add_argument("--repo_list_file", metavar="rlistfile", type=str, nargs='?', help="file with list of github repo links") argparser.add_argument("--repo_link", metavar="rlink", type=str, nargs='?', help="single repo link") +argparser.add_argument("--repo_local_dir", metavar="rlocallink", type=str, nargs='?', help="path to local directory that has the repo code") argparser.add_argument("--repo_link_and_SHA", metavar="rlink_and_SHA", type=str, nargs='*', help="single repo link, with optional commit SHA") argparser.add_argument("--config", metavar="config_file", type=str, nargs='?', help="path to config file") argparser.add_argument("--output_dir", metavar="output_dir", type=str, nargs='?', help="directory for results to be output to") args = argparser.parse_args() config = args.config if args.config else "" - output_dir = args.output_dir if args.output_dir else "." walker = RepoWalker(config_file=config, output_dir=output_dir) +repo_local_dir = None +if args.repo_local_dir: + repo_local_dir = os.path.abspath(args.repo_local_dir) + repo_links = [] if args.repo_list_file: try: @@ -159,6 +178,7 @@ def iterate_over_repos( self): # so we join all the repo_link args into a space-delimited string repo_links += [' '.join(args.repo_link_and_SHA)] walker.set_repo_links( repo_links) +walker.set_local_repo_path(repo_local_dir) walker.iterate_over_repos() diff --git a/src/test_JS_repo_lib.py b/src/test_JS_repo_lib.py index f65624e..a60eae9 100644 --- a/src/test_JS_repo_lib.py +++ b/src/test_JS_repo_lib.py @@ -281,7 +281,40 @@ def diagnose_package( repo_link, crawler, commit_SHA=None): else: print( "Package repository already exists. Using existing directory: " + repo_name) + # diagnose the repo dir + return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=commit_SHA)) +def diagnose_local_dir(repo_dir, crawler): + json_out = {} + repo_name = "" + cur_dir = os.getcwd() + repo_name = repo_dir.split("/")[-1] + if not os.path.isdir(repo_dir): + print("ERROR using local directory: " + repo_dir + " invalid directory path") + json_out["setup"] = {} + json_out["setup"]["local_dir_ERROR"] = True + return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) + + print("Diagnosing: " + repo_name + " --- from: " + repo_dir) + if not os.path.isdir("TESTING_REPOS"): + os.mkdir("TESTING_REPOS") + os.chdir("TESTING_REPOS") + + # if the repo already exists, dont clone it + if not os.path.isdir( repo_name): + print( "Copying package directory") + error, output, retcode = run_command( "cp -r " + repo_dir + " " + repo_name) + if retcode != 0: + print("ERROR copying the directory. Exiting now.") + json_out["setup"] = {} + json_out["setup"]["local_dir_ERROR"] = True + return( on_diagnose_exit( json_out, crawler, cur_dir, repo_name)) + else: + print( "Package directory already exists. Using existing directory: " + repo_name) + # diagnose the repo dir + return( diagnose_repo_name(repo_name, crawler, json_out, cur_dir)) + +def diagnose_repo_name(repo_name, crawler, json_out, cur_dir, commit_SHA=None): # move into the repo and begin testing os.chdir( repo_name)