Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyz committed Aug 2, 2024
2 parents 1289d53 + 617b5bd commit 4d05ba9
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 9 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,11 @@ Here are some tips to speed up the evaluation:
You can inspect the failed samples by using the following command:
```bash
bigcodebench.inspect --eval-results sample-sanitized-calibrated_eval_results.json --in-place
# Inspect the failed samples and save the results to `inspect/`
bigcodebench.inspect --eval_results sample-sanitized-calibrated_eval_results.json --split complete --subset hard
# Re-run the inspection in place
bigcodebench.inspect --eval_results sample-sanitized-calibrated_eval_results.json --split complete --subset hard --in_place
```
## 🚀 Full Script
Expand Down
20 changes: 18 additions & 2 deletions bigcodebench/eval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import tempfile
import subprocess
import multiprocessing
import time
from typing import Optional

TIMEOUT_LIMIT=240.0
Expand Down Expand Up @@ -141,7 +142,7 @@ def safe_kill(pid, sig):
else:
print(f"Prevented attempt to kill PID {pid} with signal {sig}")
except ProcessLookupError:
print(f"Process {pid} does not exist.")
pass

def safe_killpg(pgid, sig):
if pgid == current_pgid or pgid in {os.getpgid(pid) for pid in child_pids}:
Expand Down Expand Up @@ -221,7 +222,22 @@ def safe_exec(*args, **kwargs):
try:
yield
finally:
# Restore original functions after the block
for pid in child_pids:
try:
os.kill(pid, signal.SIGTERM)
for _ in range(10):
time.sleep(0.1)
try:
os.kill(pid, 0)
except ProcessLookupError:
break
else:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
pass
except Exception as e:
print(f"Error handling process {pid}: {e}")

os.kill = original_kill
os.killpg = original_killpg
os.system = original_system
Expand Down
18 changes: 12 additions & 6 deletions bigcodebench/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,25 @@ def inspection(args):
-- completion.py: prompt + completion
-- execution_trace.txt: execution trace
"""
path = os.path.join("inspect", args.eval_results.split("/")[-1].replace(".json", ""))
path = os.path.join(args.save_path, args.eval_results.split("/")[-1].replace(".json", ""))
if args.in_place:
shutil.rmtree(path, ignore_errors=True)
if not os.path.exists(path):
os.makedirs(path)
problems = get_bigcodebench()
problems = get_bigcodebench(subset=args.subset)

eval_results = json.load(open(args.eval_results, "r"))
for task_id, results in eval_results["eval"].items():
if task_id not in problems:
continue
if all(result["status"] == "pass" for result in results):
continue
task_path = os.path.join(path, task_id)
if not os.path.exists(task_path):
os.makedirs(task_path)
task_id_data = problems[task_id]
with open(os.path.join(task_path, "ground_truth.py"), "w") as f:
f.write(task_id_data[f"{args.subset}_prompt"] + "\n\n" + task_id_data["canonical_solution"])
f.write(task_id_data[f"{args.split}_prompt"] + "\n\n" + task_id_data["canonical_solution"])

# write test
with open(os.path.join(task_path, "test_case.py"), "w") as f:
Expand All @@ -48,9 +50,13 @@ def inspection(args):
f.write("="*50 + "\n")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--eval-results", required=True, type=str)
parser.add_argument("--subset", required=True, type=str)
parser.add_argument("--in-place", action="store_true")
parser.add_argument("--eval_results", required=True, type=str)
parser.add_argument(
"--split", required=True, type=str, choices=["complete", "instruct"]
)
parser.add_argument("--subset", default="hard", type=str, choices=["full", "hard"])
parser.add_argument("--save_path", default="inspect", type=str)
parser.add_argument("--in_place", action="store_true")
args = parser.parse_args()

inspection(args)
Expand Down

0 comments on commit 4d05ba9

Please sign in to comment.