diff --git a/README.ja.md b/README.ja.md index e2f27a3e..2096735a 100644 --- a/README.ja.md +++ b/README.ja.md @@ -52,6 +52,7 @@ $ oj test [-c COMMAND] [TEST...] $ oj test-reactive [-c COMMAND] JUDGE_COMMAND $ oj generate-input GENERATOR_COMMAND $ oj generate-output [-c COMMAND] [TEST...] +$ oj generate-reactive [-g GENERATOR_COMMAND] HACKED_COMMAND JUDGE_COMMAND ``` 詳細は `$ oj --help` を見てください。 diff --git a/README.md b/README.md index d478632f..6c4dce5b 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,7 @@ $ oj test [-c COMMAND] [TEST...] $ oj test-reactive [-c COMMAND] JUDGE_COMMAND $ oj generate-input GENERATOR_COMMAND $ oj generate-output [-c COMMAND] [TEST...] +$ oj generate-reactive [-g GENERATOR_COMMAND] HACKED_COMMAND JUDGE_COMMAND ``` For details, see `$ oj --help`. diff --git a/onlinejudge_command/main.py b/onlinejudge_command/main.py index 1bd14509..0d3b7e10 100644 --- a/onlinejudge_command/main.py +++ b/onlinejudge_command/main.py @@ -12,6 +12,7 @@ import onlinejudge_command.subcommand.download as subcommand_download import onlinejudge_command.subcommand.generate_input as subcommand_generate_input import onlinejudge_command.subcommand.generate_output as subcommand_generate_output +import onlinejudge_command.subcommand.generate_reactive as subcommand_generate_reactive import onlinejudge_command.subcommand.login as subcommand_login import onlinejudge_command.subcommand.submit as subcommand_submit import onlinejudge_command.subcommand.test as subcommand_test @@ -42,6 +43,7 @@ def get_parser() -> argparse.ArgumentParser: subcommand_test.add_subparser(subparsers) subcommand_generate_output.add_subparser(subparsers) subcommand_generate_input.add_subparser(subparsers) + subcommand_generate_reactive.add_subparser(subparsers) subcommand_test_reactive.add_subparser(subparsers) return parser @@ -76,6 +78,8 @@ def run_program(args: argparse.Namespace, parser: argparse.ArgumentParser) -> in subcommand_generate_output.run(args) elif args.subcommand in ['generate-input', 'g/i']: subcommand_generate_input.run(args) + elif args.subcommand in ['generate-reactive', 'g/r']: + subcommand_generate_reactive.run(args) else: parser.print_help(file=sys.stderr) return 1 diff --git a/onlinejudge_command/subcommand/generate_reactive.py b/onlinejudge_command/subcommand/generate_reactive.py new file mode 100644 index 00000000..c5e319cf --- /dev/null +++ b/onlinejudge_command/subcommand/generate_reactive.py @@ -0,0 +1,270 @@ +import argparse +import concurrent.futures +import contextlib +import enum +import hashlib +import itertools +import os +import pathlib +import subprocess +import sys +import tempfile +import threading +from logging import getLogger +from typing import * + +import onlinejudge_command.format_utils as fmtutils +from onlinejudge_command import pretty_printers, utils + +logger = getLogger(__name__) + + +def add_subparser(subparsers: argparse.Action) -> None: + subparsers_add_parser: Callable[..., argparse.ArgumentParser] = subparsers.add_parser # type: ignore + subparser = subparsers_add_parser('generate-reactive', aliases=['g/r'], help='generate input files for reactive problems from given generator', formatter_class=argparse.RawTextHelpFormatter, epilog='''\ +format string for --format: + %s name + %e extension: "in" or "out" + (both %d and %e are required.) + +tips: + For the random testing, you can read a tutorial: https://github.com/online-judge-tools/oj/blob/master/docs/getting-started.md#random-testing + + There is a command to automatically generate a input generator, `oj-template` command. See https://github.com/online-judge-tools/template-generator . + + This subcommand has also the feature to find a hack case. + e.g. for a target program `a.out`, and a reactive judge program `judge.py` which generates a random input-case by itself, run $ oj g/r ./a.out 'python3 judge.py' + For this case, `judge.py` should print debugging information. + + e.g. for a target program `a.out`, a random input-case generator `generate.py`, and a reactive judge program `judge.py`, run $ oj g/r -g 'python3 generate.py' ./a.out 'python3 judge.py' + + You can do similar things with shell + e.g. $ for i in `seq 100` ; do [[ -e a.pipe ]] && rm -f a.pipe ; mkfifo a.pipe && ./a.out < a.pipe | python3 judge.py > a.pipe; done + e.g. $ for i in `seq 100` ; do python3 generate.py > test/random-$i.in ; [[ -e a.pipe ]] && rm -f a.pipe ; mkfifo a.pipe && ./a.out < a.pipe | python3 judge.py test/random-$i.in > a.pipe; done +''') + subparser.add_argument('-f', '--format', default='%s.%e', help='a format string to recognize the relationship of test cases. (default: "%%s.%%e")') + subparser.add_argument('-d', '--directory', type=pathlib.Path, default=pathlib.Path('test'), help='a directory name for test cases (default: test/)') + subparser.add_argument('-t', '--tle', type=float, help='set the time limit (in second) (default: inf)') + subparser.add_argument('-j', '--jobs', type=int, help='run tests in parallel') + subparser.add_argument('--width', type=int, default=3, help='specify the width of indices of cases. (default: 3)') + subparser.add_argument('--name', help='specify the base name of cases. (default: "random")') + subparser.add_argument('-g', '--generator', type=str, help='your program to generate test cases') + subparser.add_argument('hack', help='specify your wrong solution to be judged with the reactive program') + subparser.add_argument('judge', type=str, help='judge program using standard I/O') + subparser.add_argument('count', nargs='?', type=int, help='the number of cases to generate (default: 100)') + + +@contextlib.contextmanager +def BufferedExecutor(lock: Optional[threading.Lock]): + buf: List[Tuple[Callable, List[Any], Dict[str, Any]]] = [] + + def submit(f, *args, **kwargs): + nonlocal buf + if lock is None: + f(*args, **kwargs) + else: + buf += [(f, args, kwargs)] + + result = yield submit + + if lock is not None: + with lock: + for f, args, kwargs in buf: + f(*args, **kwargs) + return result + + +def write_result(input_data: bytes, *, input_path: pathlib.Path, lock: Optional[threading.Lock] = None) -> None: + # acquire lock to print logs properly, if in parallel + nullcontext = contextlib.ExitStack() # TODO: use contextlib.nullcontext after Python 3.7 + with lock or nullcontext: + if not input_path.parent.is_dir(): + os.makedirs(str(input_path.parent), exist_ok=True) + with input_path.open('wb') as fh: + fh.write(input_data) + logger.info(utils.SUCCESS + 'saved to: %s', input_path) + + +def check_status(info: Dict[str, Any], proc: subprocess.Popen, *, submit: Callable[..., None], input_data: Optional[bytes]) -> bool: + submit(logger.info, 'time: %f sec', info['elapsed']) + if proc.returncode is None: + submit(logger.info, utils.FAILURE + utils.red('TLE')) + if input_data is not None: + submit(logger.info, utils.NO_HEADER + 'input:') + submit(logger.info, utils.NO_HEADER + '%s', pretty_printers.make_pretty_large_file_content(input_data, limit=40, head=20, tail=10)) + submit(logger.info, 'skipped.') + return False + elif proc.returncode != 0: + submit(logger.info, utils.FAILURE + utils.red('RE') + ': return code %d', proc.returncode) + if input_data is not None: + submit(logger.info, utils.NO_HEADER + 'input:') + submit(logger.info, utils.NO_HEADER + '%s', pretty_printers.make_pretty_large_file_content(input_data, limit=40, head=20, tail=10)) + submit(logger.info, 'skipped.') + return False + assert info['answer'] is not None + return True + + +def check_randomness_of_generator(input_data: bytes, *, name: str, lock: Optional[threading.Lock], generated_input_hashes: Dict[bytes, str]) -> Optional[str]: + """check_randomness_of_generator() checks the generated inputs. This adds some overheads but is needed for foolproof. Many users forget to initialize their library and use fixed seeds. + + :returns: a previous name of the input when it was already once generated. None if it's a new input. + """ + + # To prevent consuming unlimited memories, do nothing if the user's generator is properly implemented. + limit = 1000 + if len(generated_input_hashes) >= limit: + return None + + input_digest = hashlib.sha1(input_data).digest() + nullcontext = contextlib.ExitStack() # TODO: use contextlib.nullcontext after Python 3.7 + with lock or nullcontext: + if len(generated_input_hashes) < limit: + if input_digest in generated_input_hashes: + return generated_input_hashes[input_digest] + else: + generated_input_hashes[input_digest] = name + if len(generated_input_hashes) == limit: + logger.info('Conflict checking of generated inputs is disabled now because it seems the given input generator has enough randomness.') # This prints a log line but it's safe because here is in a lock. + return None + + +class JudgeStatus(enum.Enum): + AC = 'AC' + WA = 'WA' + RE = 'RE' + + +@contextlib.contextmanager +def fifo() -> Generator[Tuple[Any, Any], None, None]: + fdr, fdw = os.pipe() + fhr = os.fdopen(fdr, 'r') + fhw = os.fdopen(fdw, 'w') + yield fhr, fhw + fhw.close() + fhr.close() + # os.close(fdw), os.close(fdr) are unnecessary + + +def run_reactive(hack: str, judge: str, generated_file: Optional[pathlib.Path]) -> JudgeStatus: + with fifo() as (fhr1, fhw1): + with fifo() as (fhr2, fhw2): + with subprocess.Popen(hack, shell=True, stdin=fhr2, stdout=fhw1, stderr=sys.stderr) as proc1: + if generated_file is not None: + judge_command = ' '.join([judge, str(generated_file.resolve())]) + else: + judge_command = judge + + with subprocess.Popen(judge_command, shell=True, stdin=fhr1, stdout=fhw2, stderr=sys.stderr) as proc2: + proc1.communicate() + proc2.communicate() + + if proc1.returncode != 0: + return JudgeStatus.RE + elif proc2.returncode == 0: + return JudgeStatus.AC + else: + return JudgeStatus.WA + + +def try_hack_once(generator: Optional[str], hack: str, judge: str, *, tle: Optional[float], attempt: int, lock: Optional[threading.Lock] = None, generated_input_hashes: Dict[bytes, str]) -> Tuple[bool, Optional[bytes]]: + with BufferedExecutor(lock) as submit: + + # print the header + submit(logger.info, '') + submit(logger.info, '%d-th attempt', attempt) + + # generate input if generator is given + input_data: Optional[bytes] = None + if generator is not None: + submit(logger.info, 'generate input...') + info, proc = utils.exec_command(generator, stdin=None, timeout=tle) + input_data = info['answer'] + if not check_status(info, proc, submit=submit, input_data=input_data): + return (False, None) + assert input_data is not None + + # check the randomness of generator + name = '{}-th attempt'.format(attempt) + if input_data is not None: + conflicted_name = check_randomness_of_generator(input_data, name=name, lock=lock, generated_input_hashes=generated_input_hashes) + if conflicted_name is not None: + submit(logger.warning, 'The same input is already generated at %s. Please use a random input generator.', conflicted_name) + submit(logger.info, utils.NO_HEADER + 'input:') + submit(logger.info, utils.NO_HEADER + '%s', pretty_printers.make_pretty_large_file_content(input_data, limit=40, head=20, tail=10)) + + # hack + submit(logger.info, 'hack...') + if input_data is not None: + with tempfile.NamedTemporaryFile(delete=True) as fh: + with open(fh.name, 'wb') as fh1: + fh1.write(input_data) + status = run_reactive(hack, judge, pathlib.Path(fh.name)) + else: + status = run_reactive(hack, judge, None) + + if status != JudgeStatus.AC and input_data is not None: + logger.info(utils.FAILURE + '' + utils.red(status.value)) + logger.info(utils.NO_HEADER + 'input:\n%s', pretty_printers.make_pretty_large_file_content(input_data, limit=40, head=20, tail=10)) + + # return the result + if status == JudgeStatus.AC: + return (False, None) + elif generator is None: + return (True, None) + else: + return (True, input_data) + + +def run(args: argparse.Namespace) -> None: + if not args.generator: + logger.info('--generator is not given. It will assume the reactive judge generates a random input-case each time.') + + if args.name is None: + args.name = 'hack' + + if args.count is None: + args.count = 1 + + def iterate_path(): + for i in itertools.count(): + name = '{}-{}'.format(args.name, str(i).zfill(args.width)) + input_path = fmtutils.path_from_format(args.directory, args.format, name=name, ext='in') + if not input_path.exists(): + yield (name, input_path) + + # generate cases + generated_input_hashes: Dict[bytes, str] = {} + if args.jobs is None: + for _, input_path in itertools.islice(iterate_path(), args.count): + # hack serially + for attempt in itertools.count(1): + (hacked, input_data) = try_hack_once(args.generator, hack=args.hack, judge=args.judge, tle=args.tle, attempt=attempt, generated_input_hashes=generated_input_hashes) + if hacked: + if input_data is not None: + write_result(input_data, input_path=input_path) + break + else: + with concurrent.futures.ThreadPoolExecutor(max_workers=args.jobs) as executor: + lock = threading.Lock() + futures: List[concurrent.futures.Future] = [] + + # hack concurrently + attempt = 0 + for _ in range(args.jobs): + attempt += 1 + futures += [executor.submit(try_hack_once, args.generator, hack=args.hack, judge=args.judge, tle=args.tle, attempt=attempt, lock=lock, generated_input_hashes=generated_input_hashes)] + for _, input_path, in itertools.islice(iterate_path(), args.count): + hacked = False + while not hacked: + concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED) + for i in range(len(futures)): + if not futures[i].done(): + continue + [hacked, input_data] = futures[i].result() + attempt += 1 + futures[i] = executor.submit(try_hack_once, args.generator, hack=args.hack, judge=args.judge, tle=args.tle, attempt=attempt, lock=lock, generated_input_hashes=generated_input_hashes) + if hacked: + break + if input_data is not None: + write_result(input_data, input_path=input_path, lock=lock) diff --git a/onlinejudge_command/subcommand/test_reactive.py b/onlinejudge_command/subcommand/test_reactive.py index c31a9a64..bd78b17c 100644 --- a/onlinejudge_command/subcommand/test_reactive.py +++ b/onlinejudge_command/subcommand/test_reactive.py @@ -1,12 +1,17 @@ import argparse +import concurrent.futures import contextlib +import enum import os +import pathlib import subprocess import sys +import threading from logging import getLogger from typing import * -import onlinejudge_command.utils as utils +import onlinejudge_command.format_utils as fmtutils +from onlinejudge_command import pretty_printers, utils logger = getLogger(__name__) @@ -21,7 +26,14 @@ def add_subparser(subparsers: argparse.Action) -> None: e.g. $ while oj t/i 'python3 judge.py' ; do : ; done ''') subparser.add_argument('-c', '--command', default=utils.get_default_command(), help='your solution to be tested. (default: "{}")'.format(utils.get_default_command())) + subparser.add_argument('-f', '--format', default='%s.%e', help='a format string to recognize the relationship of test cases. (default: "%%s.%%e")') + subparser.add_argument('-d', '--directory', type=pathlib.Path, default=pathlib.Path('test'), help='a directory name for test cases (default: test/)') + subparser.add_argument('-j', '--jobs', metavar='N', type=int, help='specifies the number of jobs to run simultaneously (default: no parallelization)') + subparser.add_argument('-i', '--from-file', action='store_true', help='tests each case (specified by -d option or test argument), whose filename is passed to the judge as sys.argv[1]') + subparser.add_argument('--no-ignore-backup', action='store_false', dest='ignore_backup') + subparser.add_argument('--ignore-backup', action='store_true', help='ignore backup files and hidden files (i.e. files like "*~", "\\#*\\#" and ".*") (default)') subparser.add_argument('judge', help='judge program using standard I/O') + subparser.add_argument('test', nargs='*', type=pathlib.Path, help='paths of test cases. (if empty: globbed from --format)') @contextlib.contextmanager @@ -36,6 +48,13 @@ def fifo() -> Generator[Tuple[Any, Any], None, None]: def run(args: argparse.Namespace) -> bool: + if not args.from_file: + return run_single(args) + else: + return run_from_file(args) + + +def run_single(args: argparse.Namespace) -> bool: with fifo() as (fhr1, fhw1): with fifo() as (fhr2, fhw2): with subprocess.Popen(args.command, shell=True, stdin=fhr2, stdout=fhw1, stderr=sys.stderr) as proc1: @@ -53,3 +72,98 @@ def run(args: argparse.Namespace) -> bool: logger.info(utils.FAILURE + 'WA: judge returns %d', proc2.returncode) result = False return result + + +class JudgeStatus(enum.Enum): + AC = 'AC' + WA = 'WA' + RE = 'RE' + + +def run_from_file(args: argparse.Namespace) -> bool: + # list tests + if not args.test: + args.test = fmtutils.glob_with_format(args.directory, args.format) # by default + if args.ignore_backup: + args.test = fmtutils.drop_backup_or_hidden_files(args.test) + tests = fmtutils.construct_relationship_of_files(args.test, args.directory, args.format) + + # run tests + history: List[Dict[str, Any]] = [] + if args.jobs is None: + for name, paths in sorted(tests.items()): + history += [test_single_case(name, paths['in'], args=args)] + else: + if os.name == 'nt': + logger.warning("-j/--jobs opiton is unstable on Windows environment") + with concurrent.futures.ThreadPoolExecutor(max_workers=args.jobs) as executor: + lock = threading.Lock() + futures: List[concurrent.futures.Future] = [] + for name, paths in sorted(tests.items()): + futures += [executor.submit(test_single_case, name, paths['in'], lock=lock, args=args)] + for future in futures: + history += [future.result()] + + # summarize + ac_count = 0 + for result in history: + if result['status'] == 'AC': + ac_count += 1 + + # print the summary + logger.info('') + if ac_count == len(tests): + logger.info(utils.SUCCESS + 'test ' + utils.green('success') + ': %d cases', len(tests)) + else: + logger.info(utils.FAILURE + 'test ' + utils.red('failed') + ': %d AC / %d cases', ac_count, len(tests)) + + # return the result + return ac_count == len(tests) + + +def test_single_case(test_name: str, test_input_path: pathlib.Path, *, lock: Optional[threading.Lock] = None, args: argparse.Namespace) -> Dict[str, Any]: + # print the header earlier if not in parallel + if lock is None: + logger.info('') + logger.info('%s', test_name) + + with fifo() as (fhr1, fhw1): + with fifo() as (fhr2, fhw2): + with subprocess.Popen(args.command, shell=True, stdin=fhr2, stdout=fhw1, stderr=sys.stderr) as proc1: + judge_command = ' '.join([ + args.judge, + str(test_input_path.resolve()), + ]) + with subprocess.Popen(judge_command, shell=True, stdin=fhr1, stdout=fhw2, stderr=sys.stderr) as proc2: + proc1.communicate() + proc2.communicate() + + # lock is require to avoid mixing logs if in parallel + nullcontext = contextlib.ExitStack() # TODO: use contextlib.nullcontext() after updating Python to 3.7 + with lock or nullcontext: + if lock is not None: + logger.info('') + logger.info('%s', test_name) + + status = JudgeStatus.AC + if proc1.returncode != 0: + logger.info(utils.FAILURE + 'RE: solution returns %d', proc1.returncode) + status = JudgeStatus.RE + if proc2.returncode == 0: + logger.info(utils.SUCCESS + 'AC') + else: + logger.info(utils.FAILURE + 'WA: judge returns %d', proc2.returncode) + status = JudgeStatus.WA + + if status != JudgeStatus.AC: + with test_input_path.open('rb') as inf: + logger.info(utils.NO_HEADER + 'input:\n%s', pretty_printers.make_pretty_large_file_content(inf.read(), limit=40, head=20, tail=10)) + + testcase = { + 'name': test_name, + 'input': str(test_input_path.resolve()), + } + return { + 'status': status.value, + 'testcase': testcase, + }