-
Notifications
You must be signed in to change notification settings - Fork 1
/
storage.py
167 lines (146 loc) · 7.85 KB
/
storage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import os
import json
import pathlib
import glob
import numpy as np
from util import Constants
import pandas as pd
class Database:
"""Generic class to read and write from txt, np, json or csv files"""
def __getpath(self):
if self.path is not None:
return self.path
return os.getcwd()
def __load__lbl_feat_vector_util(self,path) :
feat_dict = {}
for filename in os.listdir(path):
if(filename[0] == '.') :
continue
with open(os.path.join(path,filename), 'r') as file:
read_list = json.load(file)
read_list = np.array(read_list).flatten()
read_list = np.nan_to_num(read_list, nan=0.0)
feat_dict[filename[:-4]] = read_list
#label_dict[int(filename[:-4].split('_',1)[0])] = filename[:-4].split('_',1)[1]
return feat_dict
def __load_feat_vector_util(self,feat_path) :
feat_dict = {}
for filename in os.listdir(feat_path):
if(filename[0] == '.') :
continue
with open(os.path.join(feat_path,filename), 'r') as file:
read_list = json.load(file)
read_list = np.array(read_list).flatten()
read_list = np.nan_to_num(read_list, nan=0.0)
feat_dict[int(filename[:-4].split('_',1)[0])] = read_list
return feat_dict
def __load_id_label_dict_for_dataset(self, datasetType) :
data = {}
for filename in os.listdir(os.path.join(self.__getpath(), "Outputs", datasetType, "features", Constants.COLOR_MOMENTS)):
if(filename[0] == '.') :
continue
data[int(filename[:-4].split('_',1)[0])] = filename[:-4].split('_',1)[1]
return data
def __load_id_label_dict(self):
return {
Constants.DatasetTypeTrain: self.__load_id_label_dict_for_dataset(Constants.DatasetTypeTrain),
Constants.DatasetTypeTest: self.__load_id_label_dict_for_dataset(Constants.DatasetTypeTest)
}
def __load_all_label_fds_for_dataset(self, datasetType) :
label_vector_location = os.path.join(self.__getpath(), "Outputs", datasetType, "labels")
return self.__load_all_fds(self.__load__lbl_feat_vector_util, label_vector_location)
def __load_all_label_fds(self) :
print('Loading Label Feature Vectors...')
return {
Constants.DatasetTypeTrain: self.__load_all_label_fds_for_dataset(Constants.DatasetTypeTrain),
Constants.DatasetTypeTest: self.__load_all_label_fds_for_dataset(Constants.DatasetTypeTest)
}
def __load_all_image_fds_for_dataset(self, datasetType):
feat_vector_location = os.path.join(self.__getpath(), "Outputs", datasetType, "features")
return self.__load_all_fds(self.__load_feat_vector_util, feat_vector_location)
def __load_all_image_fds(self):
print('Loading Image Feature Vectors...')
return {
Constants.DatasetTypeTrain: self.__load_all_image_fds_for_dataset(Constants.DatasetTypeTrain),
Constants.DatasetTypeTest: self.__load_all_image_fds_for_dataset(Constants.DatasetTypeTest)
}
def __load_all_fds(self, reader, path):
data = {}
data[Constants.COLOR_MOMENTS] = reader(os.path.join(path, Constants.COLOR_MOMENTS))
data[Constants.HOG] = reader(os.path.join(path, Constants.HOG))
data[Constants.ResNet_AvgPool_1024] = reader(os.path.join(path, Constants.ResNet_AvgPool_1024))
data[Constants.ResNet_Layer3_1024] = reader(os.path.join(path, Constants.ResNet_Layer3_1024))
data[Constants.ResNet_FC_1000] = reader(os.path.join(path, Constants.ResNet_FC_1000))
data[Constants.ResNet_SoftMax_1000] = reader(os.path.join(path, Constants.ResNet_SoftMax_1000))
return data
def __load_similarity_matrices(self):
data = {}
path = os.path.join(self.__getpath(), 'Outputs', 'similarity_matrices')
if os.path.exists(path):
for file in glob.glob(path + "/*"):
filename = os.path.basename(file)
id, fd, _ = filename.split('-') # format is {image_image|label_label}-{fd}-similarity_matrix.csv
data[(id, fd)] = pd.read_csv(file, index_col=0)
return data
def __init__(self, load_all_data = True, path = None):
self.feature_descriptors = {}
self.label_feature_descriptors = {}
self.similarity_matrices = {}
self.latent_semantics = {}
self.id_label_dict = {} # format is {image_id: image_label}
self.path = path
if load_all_data:
self.feature_descriptors = self.__load_all_image_fds()
self.label_feature_descriptors = self.__load_all_label_fds()
self.similarity_matrices = self.__load_similarity_matrices()
self.id_label_dict = self.__load_id_label_dict()
def get_feature_descriptors(self, fd, train_data=True):
datasetType = Constants.DatasetTypeTrain if train_data else Constants.DatasetTypeTest
if datasetType not in self.feature_descriptors or fd not in self.feature_descriptors[datasetType]:
return {}
return self.feature_descriptors[datasetType][fd]
def get_label_feature_descriptors(self, fd, train_data=True):
datasetType = Constants.DatasetTypeTrain if train_data else Constants.DatasetTypeTest
if datasetType not in self.label_feature_descriptors or fd not in self.label_feature_descriptors[datasetType]:
return {}
return self.label_feature_descriptors[datasetType][fd]
def get_id_label_dict(self, train_data=True):
datasetType = Constants.DatasetTypeTrain if train_data else Constants.DatasetTypeTest
if not self.id_label_dict or datasetType not in self.id_label_dict:
return {}
return self.id_label_dict[datasetType]
def get_distances_matrix_df(self, fd):
path = os.path.join(os.getcwd(), 'Outputs', 'distance_matrices', '%s.csv'%fd)
if not os.path.exists(path):
return None
return pd.read_csv(path, index_col=0)
def write_latent_semantics_into_file(self, ls, fd, drt, k, ids, latent_semantics_mat, data):
# also update in internal dictionaries
if ls not in self.latent_semantics:
self.latent_semantics[ls] = {}
self.latent_semantics[ls][(fd, drt, k)] = (ids, data)
# create folder if doesn't exist already
dir_path = os.path.join(self.__getpath(), 'Outputs', 'latent_semantics', ls)
pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True)
filename = '%s_%s_%d.csv' % (fd, drt, k) # eg: hog_svd_5.csv
file_path = os.path.join(dir_path, filename)
# create dataframe and save to file
# pd.DataFrame(latent_semantics_mat, index=ids).to_csv(file_path)
pd.DataFrame(latent_semantics_mat).to_csv(file_path)
def get_similarity_matrix(self, id, fd):
# id is image_image or label_label
return self.similarity_matrices[(id, fd)] if (id, fd) in self.similarity_matrices else None
def write_similarity_matrix_to_file(self, id, fd, data, object_ids):
# id is image_image or label_label
df = pd.DataFrame(data, index=object_ids)
self.similarity_matrices[(id, fd)] = df
# create folder if doesn't exist already
path = os.path.join(self.__getpath(), 'Outputs', 'similarity_matrices')
pathlib.Path(path).mkdir(parents=True, exist_ok=True)
# filename format is {image_image|label_label}-{fd}-similarity_matrix.csv
df.to_csv(os.path.join(path, '%s-%s-similarity_matrix.csv'%(id, fd)))
def get_latent_semantics_data(self, ls, fd, drt, k):
if ls not in self.latent_semantics:
return None
key = (fd, drt, k)
return self.latent_semantics[ls][key] if key in self.latent_semantics[ls] else None