-
Notifications
You must be signed in to change notification settings - Fork 18
/
utils.py
118 lines (89 loc) · 3.27 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""Reimplement TimeGAN-pytorch Codebase.
Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar,
"Time-series Generative Adversarial Networks,"
Neural Information Processing Systems (NeurIPS), 2019.
Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks
Last updated Date: October 18th 2021
Code author: Zhiwei Zhang ([email protected])
-----------------------------
utils.py
(1) train_test_divide: Divide train and test data for both original and synthetic data.
(2) extract_time: Returns Maximum sequence length and each sequence length.
(3) random_generator: random vector generator
(4) NormMinMax: return data info
"""
## Necessary Packages
import numpy as np
def train_test_divide (data_x, data_x_hat, data_t, data_t_hat, train_rate = 0.8):
"""Divide train and test data for both original and synthetic data.
Args:
- data_x: original data
- data_x_hat: generated data
- data_t: original time
- data_t_hat: generated time
- train_rate: ratio of training data from the original data
"""
# Divide train/test index (original data)
no = len(data_x)
idx = np.random.permutation(no)
train_idx = idx[:int(no*train_rate)]
test_idx = idx[int(no*train_rate):]
train_x = [data_x[i] for i in train_idx]
test_x = [data_x[i] for i in test_idx]
train_t = [data_t[i] for i in train_idx]
test_t = [data_t[i] for i in test_idx]
# Divide train/test index (synthetic data)
no = len(data_x_hat)
idx = np.random.permutation(no)
train_idx = idx[:int(no*train_rate)]
test_idx = idx[int(no*train_rate):]
train_x_hat = [data_x_hat[i] for i in train_idx]
test_x_hat = [data_x_hat[i] for i in test_idx]
train_t_hat = [data_t_hat[i] for i in train_idx]
test_t_hat = [data_t_hat[i] for i in test_idx]
return train_x, train_x_hat, test_x, test_x_hat, train_t, train_t_hat, test_t, test_t_hat
def extract_time (data):
"""Returns Maximum sequence length and each sequence length.
Args:
- data: original data
Returns:
- time: extracted time information
- max_seq_len: maximum sequence length
"""
time = list()
max_seq_len = 0
for i in range(len(data)):
max_seq_len = max(max_seq_len, len(data[i][:,0]))
time.append(len(data[i][:,0]))
return time, max_seq_len
def random_generator (batch_size, z_dim, T_mb, max_seq_len):
"""Random vector generation.
Args:
- batch_size: size of the random vector
- z_dim: dimension of random vector
- T_mb: time information for the random vector
- max_seq_len: maximum sequence length
Returns:
- Z_mb: generated random vector
"""
Z_mb = list()
for i in range(batch_size):
temp = np.zeros([max_seq_len, z_dim])
temp_Z = np.random.uniform(0., 1, [T_mb[i], z_dim])
temp[:T_mb[i],:] = temp_Z
Z_mb.append(temp_Z)
return Z_mb
def NormMinMax(data):
"""Min-Max Normalizer.
Args:
- data: raw data
Returns:
- norm_data: normalized data
- min_val: minimum values (for renormalization)
- max_val: maximum values (for renormalization)
"""
min_val = np.min(np.min(data, axis=0), axis=0)
data = data - min_val # [3661, 24, 6]
max_val = np.max(np.max(data, axis=0), axis=0)
norm_data = data / (max_val + 1e-7)
return norm_data, min_val, max_val