Upload scripts

tylergu · tylergu · commit 817e1c1b98d7 · 2024-10-13T21:50:49.000-05:00
Signed-off-by: Tyler Gu &lt;jiaweig3@illinois.edu&gt;
diff --git a/scripts/collect_statistics.py b/scripts/collect_statistics.py
@@ -0,0 +1,72 @@
+import argparse
+import json
+import os
+
+
+def count_recovery_deletion_tests(folder_path: str):
+    """Count the number of deletion and recovery tests in the testrun folder"""
+    # Initialize the test counter
+    recovery_tests_ = 0
+    deletion_tests_ = 0
+
+    # Iterate over all files in the folder
+    for root, dirs, _ in os.walk(folder_path):
+        for dir_ in dirs:
+            deletion_tests_ += 1
+            if "mutated--01.yaml" in os.listdir(os.path.join(root, dir_)):
+                recovery_tests_ += 1
+
+    return deletion_tests_, recovery_tests_
+
+
+def count_post_diff_tests(folder_path: str) -> int:
+    """Count the number of post-diff tests in the testrun folder"""
+    # Initialize the test counter
+    post_diff_tests_ = 0
+
+    for root, dirs, _ in os.walk(folder_path):
+        for root, dirs, _ in os.walk(os.path.join(root, "post_diff_test")):
+            for dir_ in dirs:
+                for file in os.listdir(os.path.join(root, dir_)):
+                    if file.startswith("mutated"):
+                        post_diff_tests_ += 1
+
+    return post_diff_tests_
+
+
+def read_normal_tests(folder_path) -> int:
+    """Read the number of normal tests in the testrun folder"""
+    # Initialize the test counter
+    normal_tests_ = 0
+
+    with open(
+        os.path.join(folder_path, "testrun_info.json"), "r", encoding="utf-8"
+    ) as f:
+        testrun_info = json.load(f)
+        normal_tests_ = testrun_info["num_total_testcases"][
+            "total_number_of_test_cases"
+        ]
+
+    return normal_tests_
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path", type=str, help="Path to the testrun folder")
+    parser.add_argument("--operator", type=str, help="Name of the operator")
+    args = parser.parse_args()
+
+    deletion_tests, recovery_tests = count_recovery_deletion_tests(args.path)
+    normal_tests = read_normal_tests(args.path)
+    post_diff_tests = count_post_diff_tests(  # pylint: disable=invalid-name
+        args.path
+    )
+
+    total = deletion_tests + recovery_tests + post_diff_tests + normal_tests
+
+    print(f"Operator: {args.operator}")
+    print(f"Total tests: {total}")
+    print(f"    Normal tests: {normal_tests}")
+    print(f"    Deletion tests: {deletion_tests}")
+    print(f"    Recovery tests: {recovery_tests}")
+    print(f"    Post-diff tests: {post_diff_tests}")
diff --git a/scripts/gen_missing_examples.py b/scripts/gen_missing_examples.py
@@ -0,0 +1,104 @@
+import argparse
+import glob
+import json
+import os
+
+import openai
+import yaml
+
+
+def read_missing_properties(path):
+    path = os.path.join(path, "missing_fields.json")
+    with open(path, "r", encoding="utf-8") as f:
+        missing_properties = json.load(f)
+
+    return missing_properties
+
+
+def gen_values(missing_values, path, api_key, operator):
+    openai.api_key = api_key
+
+    context = f"You are a expert of the {operator} of the Kubernetes ecosystem. You are tasked with providing values for properties of the {operator} CRD"
+
+    for i in range(len(missing_values)):
+        p = missing_values[i]
+
+        if p[0].endswith("ITEM"):
+            continue
+
+        prop = f"- {p[0]}\n description: {p[1]}\n type: {p[2]}\n structure: {p[3]}\n"
+
+        prompt = "Here is the property that need values:\n"
+        prompt += f"{prop}\n"
+
+        prompt += "\nThe property has a datatype and description provided above, please make sure the generated value satisfies the datatype and description.\n"
+
+        prompt += "\n If the property has structure that indicating subfields, make sure to generate all the subfields for the property as a whole.\n"
+
+        prompt += "\nProvide three values for the property and please follow the cr yaml format. Directly give me the yaml file without any other message, for example\n"
+
+        format = "spec:\n"
+        format += f"  {p[0]}: value\n"
+        format += "---"
+        format += "spec:\n"
+        format += f"  {p[0]}: value\n"
+
+        prompt += format
+
+        prompt += 'If the property has `ITEM` in the property path, that means the property should be an item in an array. For example, for "spec.pdms.ITEM.config:" the format should be:\n'
+        array_format = "spec:\n"
+        array_format += f"  property:\n"
+        array_format += "   - subproperty: value\n"
+        array_format += "   - subproperty: value\n"
+        array_format += "   - subproperty: value\n"
+
+        prompt += array_format
+
+        completion = openai.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": context},
+                {"role": "user", "content": prompt},
+            ],
+        )
+
+        result_text = completion.choices[0].message.content
+
+        result = result_text.split("```yaml\n")[1].split("```")[0]
+
+        output_file = os.path.join(path, f"value_{p[0]}.yaml")
+        with open(output_file, "w") as f:
+            f.write(result)
+
+
+def store_to_examples(path):
+    main_results = []
+
+    for file in glob.glob(os.path.join(path, "*.yaml"), recursive=True):
+        if file == "examples.yaml":
+            continue
+        with open(file, "r") as f:
+            content = yaml.safe_load_all(f)
+            for doc in content:
+                main_results.append(doc)
+    with open(os.path.join(path, "examples.yaml"), "w") as f:
+        yaml.dump_all(main_results, f)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--path",
+        type=str,
+        help="Path to the file containing the missing properties",
+    )
+    parser.add_argument(
+        "--api_key", type=str, help="API key for the OpenAI API"
+    )
+    parser.add_argument("--operator", type=str, help="Name of the operator")
+    args = parser.parse_args()
+
+    missing_properties = read_missing_properties(args.path)
+
+    gen_values(missing_properties, args.path, args.api_key, args.operator)
+    store_to_examples(args.path)