-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathimage2numpy_imagenet_val.py
89 lines (69 loc) · 2.68 KB
/
image2numpy_imagenet_val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# http://stackoverflow.com/questions/35032675/how-to-create-dataset-similar-to-cifar-10/35034287
from argparse import ArgumentParser
import numpy as np
import os
from scipy import misc
from utils import *
def parse_arguments():
parser = ArgumentParser()
parser.add_argument('-i', '--in_dir', help="Input directory with source images")
parser.add_argument('-o', '--out_dir', help="Output directory for pickle files")
args = parser.parse_args()
return args.in_dir, args.out_dir
def process_folder(in_dir, out_dir):
label_dict = get_label_dict()
folders = get_ordered_folders()
val_ground_dict = get_val_ground_dict()
# Subsampling folders could be useful when we want to create smaller dataset
# For example we want to use only every 10th class or first 100 classes (Below)
# Here subsample folders
# folders = folders[0::10]
# folders = folders[:100]
# Table contains labels that are associated with those folders
labels_searched = []
for folder in folders:
labels_searched.append(label_dict[folder])
print("Processing folder %s" % in_dir)
labels_list = []
images = []
for image_name in os.listdir(in_dir):
# Get label for that image
# If it was resized using 'image_resizer_imagenet.py' script then we know that it has extension '.png'
label = val_ground_dict[image_name[:-4]]
# Ignore if it's not one of the subsampled classes
if label not in labels_searched:
continue
try:
img = misc.imread(os.path.join(in_dir, image_name))
r = img[:, :, 0].flatten()
g = img[:, :, 1].flatten()
b = img[:, :, 2].flatten()
except:
print('Cant process image %s' % image_name)
with open("log_img2np_val.txt", "a") as f:
f.write("Couldn't read: %s" % os.path.join(in_dir, image_name))
continue
arr = np.array(list(r) + list(g) + list(b), dtype=np.uint8)
images.append(arr)
labels_list.append(label)
data_val = np.row_stack(images)
# Can add some kind of data splitting
d_val = {
'data': data_val,
'labels': labels_list
}
if not os.path.exists(out_dir):
os.makedirs(out_dir)
pickle.dump(d_val, open(os.path.join(out_dir, 'val_data'), 'wb'))
y_test = d_val['labels']
count = np.zeros([1000])
for i in y_test:
count[i-1] += 1
for i in range(1000):
print('%d : %d' % (i, count[i]))
print('SUM: %d' % len(y_test))
if __name__ == '__main__':
in_dir, out_dir = parse_arguments()
print("Start program ...")
process_folder(in_dir=in_dir, out_dir=out_dir)
print("Finished.")