-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoco_splits.py
73 lines (60 loc) · 1.82 KB
/
coco_splits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import json
import argparse
import os
import random
random.seed(42)
train = []
val = []
test = []
def load_file(file_name: str) -> dict:
with open(file_name, "r") as f:
data = json.load(f)
return data
def main(file_name, output_location, percentage):
data = load_file(file_name)
images = data["images"]
for img in images:
if img["split"] == "val":
val.append(img)
elif img["split"] == "test":
test.append(img)
else:
train.append(img)
mini_train = random.sample(train, int(len(train) * percentage))
# mini_val = val[: int(len(val) * PERCENTAGE)]
# mini_test = test[: int(len(test) * PERCENTAGE)]
new_data = {}
new_data["images"] = mini_train + val + test
new_data["dataset"] = data["dataset"]
print(f"Number of image in train: {len(mini_train)}")
print(f"Number of image in val: {len(val)}")
print(f"Number of image in test: {len(test)}")
print(f"Number of image in new dataset split: {len(new_data['images'])}")
# Output file
out_file = os.path.join(
output_location, f"dataset_coco_{int(percentage * 100)}.json"
)
with open(out_file, "w+") as f:
json.dump(new_data, f)
if __name__ == "__main__":
args = argparse.ArgumentParser()
args.add_argument(
"--dataset_json",
type=str,
help="Location of the dataset json file",
required=True,
)
args.add_argument(
"--output_location",
type=str,
help="The directory to dump the file",
required=True,
)
args.add_argument(
"--percentage",
type=float,
help="Percentage of full dataset in the new split (between 0 and 1)",
default=0.1,
)
args = args.parse_args()
main(args.dataset_json, args.output_location, args.percentage)