-
Notifications
You must be signed in to change notification settings - Fork 3
/
fetch_data.py
69 lines (49 loc) · 2.11 KB
/
fetch_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import numpy as np
"""
for creating Label.csv
data = pd.read_csv('Validation.csv')
df = pd.DataFrame(data[['label_id','label']])
label_id = df['label_id'].unique()
labels = df['label'].unique()
#Creating label.csv
label = pd.DataFrame(list(zip(labels, label_id)),
columns = ['label','label_id'])
label = label.sort_values('label_id')
label.to_csv('Label.csv')
"""
class DataLoader():
def __init__(self, path_vid,path_labels,path_train = None,path_val = None,path_test =None):
self.path_vid = path_vid
self.path_labels = path_labels
self.path_train = path_train
self.path_val = path_val
self.path_test = path_test
self.get_labels(self.path_labels)
if self.path_train:
self.train_df = self.load_video_label(self.path_train)
if self.path_val:
self.val_df = self.load_video_label(self.path_val)
if self.path_test:
self.test_df = self.load_video_label(self.path_test, mode = 'input')
def get_labels(self,path_labels):
self.labels_df = pd.read_csv(path_labels,names = ['label','label_id'])
self.labels = [str(label[0]) for label in self.labels_df.values]
self.n_labels = len(self.labels)
# have skipped one step ,implement if neccessary
def load_video_label(self, path_sub , mode = 'label'):
if mode == 'input':
names = ['video_id']
elif mode == 'label':
names = ['video_id','label','label_id']
df = pd.read_csv(path_sub,dtype=str,sep = ',',names = names)
if mode == 'label':
df = df[df.label.isin(self.labels)]
lst = []
for label in df.label_id:
zeroes = np.zeros(27,dtype = int)
zeroes[int(label)] = 1
zeroes = tuple(zeroes)
lst.append(zeroes)
df['encoded_label'] = [i for i in lst]
return df