From 92ac899e712c51ed1362ec3d16e47de063019a7c Mon Sep 17 00:00:00 2001 From: Gary Benson Date: Tue, 4 Jun 2024 08:36:41 +0100 Subject: [PATCH] *** update runner.py --- runner.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/runner.py b/runner.py index 24297e6..ee8ae1f 100644 --- a/runner.py +++ b/runner.py @@ -3,7 +3,6 @@ from itertools import chain -from dom_tokenizers.internal import json from dom_tokenizers.pre_tokenizers.shared_oracle import SharedOracle DEFAULT_TESTCASES = [ @@ -38,9 +37,14 @@ def main(): lines = DEFAULT_TESTCASES else: lines = chain.from_iterable( - (json.loads(line)["text"] - for line in open(filename).readlines()) - for filename in sys.argv[1:]) + (line.rstrip() + for code, line in ( + line.split(maxsplit=1) + for line in open(filename).readlines() + ) + if code == "data") + for filename in sys.argv[1:] + ) for line in lines: print("input:", line)