-
Notifications
You must be signed in to change notification settings - Fork 9
/
tppg.py
222 lines (190 loc) · 7.47 KB
/
tppg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
TPPG: Temporal Point Process Generator
Dependencies:
- Python 3.6.7
"""
import sys
import utils
import arrow
import numpy as np
from tqdm import tqdm
from scipy.stats import norm
import matplotlib.pyplot as plt
class ExpKernel(object):
"""
Kernel function including the diffusion-type model proposed by Musmeci and
Vere-Jones (1992).
"""
def __init__(self, beta=1.):
self.beta = beta
def nu(self, t, his_t):
delta_t = t - his_t
return self.beta * np.exp(- self.beta * delta_t)
class HawkesLam(object):
"""Intensity of Spatio-temporal Hawkes point process"""
def __init__(self, mu, kernel, maximum=1e+4):
self.mu = mu
self.kernel = kernel
self.maximum = maximum
def value(self, t, his_t):
"""
return the intensity value at t.
The last element of seq_t is the location t that we are
going to inspect. Prior to that are the past locations which have
occurred.
"""
if len(his_t) > 0:
val = self.mu + np.sum(self.kernel.nu(t, his_t))
else:
val = self.mu
return val
def upper_bound(self):
"""return the upper bound of the intensity value"""
return self.maximum
def __str__(self):
return "Hawkes processes"
class TemporalPointProcess(object):
"""
Marked Temporal Hawkes Process
A stochastic temporal points generator based on Hawkes process.
"""
def __init__(self, lam):
"""
Params:
"""
# model parameters
self.lam = lam
def _homogeneous_poisson_sampling(self, T=[0, 1]):
"""
To generate a homogeneous Poisson point pattern in space T, it basically
takes two steps:
1. Simulate the number of events n = N(T) occurring in T according to a
Poisson distribution with mean lam * |T|.
2. Sample each of the n location according to a uniform distribution on S
respectively.
Args:
lam: intensity (or maximum intensity when used by thining algorithm)
T: (min_t, max_t) indicates the range of coordinates.
Returns:
samples: point process samples:
[t1, t2, ..., tn]
"""
# sample the number of events from S
n = T[1] - T[0]
N = np.random.poisson(size=1, lam=self.lam.upper_bound() * n)
# simulate spatial sequence and temporal sequence separately.
points = np.random.uniform(T[0], T[1], N)
# sort the sequence regarding the ascending order of the temporal sample.
points = points[points.argsort()]
return points
def _inhomogeneous_poisson_thinning(self, homo_points, verbose):
"""
To generate a realization of an inhomogeneous Poisson process in T, this
function uses a thining algorithm as follows. For a given intensity function
lam(t):
1. Define an upper bound max_lam for the intensity function lam(t)
2. Simulate a homogeneous Poisson process with intensity max_lam.
3. "Thin" the simulated process as follows,
a. Compute p = lam(t)/max_lam for each point (t) of the homogeneous
Poisson process
b. Generate a sample u from the uniform distribution on (0, 1)
c. Retain the locations for which u <= p.
"""
retained_points = np.empty((0))
if verbose:
print("[%s] generate %s samples from homogeneous poisson point process" % \
(arrow.now(), homo_points.shape), file=sys.stderr)
# thining samples by acceptance rate.
for i in range(homo_points.shape[0]):
# current time, location and generated historical times and locations.
t = homo_points[i]
his_t = retained_points
# thinning
lam_value = self.lam.value(t, his_t)
lam_bar = self.lam.upper_bound()
D = np.random.uniform()
# - if lam_value is greater than lam_bar, then skip the generation process
# and return None.
if lam_value > lam_bar:
if verbose:
print("intensity %f is greater than upper bound %f." % (lam_value, lam_bar), file=sys.stderr)
return None
# accept
if lam_value >= D * lam_bar:
# retained_points.append(homo_points[i])
retained_points = np.concatenate([retained_points, homo_points[[i]]], axis=0)
# monitor the process of the generation
if verbose and i != 0 and i % int(homo_points.shape[0] / 10) == 0:
print("[%s] %d raw samples have been checked. %d samples have been retained." % \
(arrow.now(), i, retained_points.shape[0]), file=sys.stderr)
# log the final results of the thinning algorithm
if verbose:
print("[%s] thining samples %s based on %s." % \
(arrow.now(), retained_points.shape, self.lam), file=sys.stderr)
return retained_points
def generate(self, T=[0, 1], batch_size=10, min_n_points=5, verbose=True):
"""
generate spatio-temporal points given lambda and kernel function
"""
points_list = []
sizes = []
max_len = 0
b = 0
# generate inhomogeneous poisson points iterately
pbar = tqdm(total = batch_size, desc="Generating ...")
while b < batch_size:
homo_points = self._homogeneous_poisson_sampling(T)
points = self._inhomogeneous_poisson_thinning(homo_points, verbose)
if points is None or len(points) < min_n_points:
continue
max_len = points.shape[0] if max_len < points.shape[0] else max_len
points_list.append(points)
sizes.append(len(points))
pbar.update(1)
b += 1
if verbose:
print("[%s] %d-th sequence is generated." % (arrow.now(), b+1), file=sys.stderr)
# fit the data into a tensor
data = np.zeros((batch_size, max_len))
for b in range(batch_size):
data[b, :len(points_list[b])] = points_list[b]
return data, sizes
def plot_1d_pointprocess(points, lam, T, ngrid=100):
"""
visualize 1 dimensional point process
"""
ts = np.linspace(T[0], T[1], ngrid)
lamvals = []
for t in ts:
his_t = points[(points <= t) * (points > 0)]
lamval = lam.value(t, his_t)
lamvals.append(lamval)
evals = []
print(points)
for t in points:
his_t = points[(points <= t) * (points > 0)]
lamval = lam.value(t, his_t)
evals.append(lamval)
fig, ax = plt.subplots()
ax.plot(ts, lamvals, linestyle="--", color="grey")
ax.scatter(points, evals, c="r")
plt.show()
if __name__ == "__main__":
# np.random.seed(0)
# np.set_printoptions(suppress=True)
# parameters initialization
mu = 10
T = [0., 1.]
beta = 100.
kernel = ExpKernel(beta=beta)
lam = HawkesLam(mu, kernel, maximum=1e+3)
pp = TemporalPointProcess(lam)
# generate points
points, sizes = pp.generate(
T=T, batch_size=1000, verbose=False)
np.save("data-1d-mu%.1f-beta%.1f-points.npy" % (mu, beta), points)
np.save("data-1d-mu%.1f-beta%.1f-sizes.npy" % (mu, beta), sizes)
print(sizes)
plot_1d_pointprocess(points[0], lam, T=T, ngrid=1000)