Skip to content

Commit 50d1431

Browse files
committed
Experiment to compare graph and timeseries embeddings.
1 parent b6c6c76 commit 50d1431

32 files changed

+2931
-235
lines changed

.gitignore

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,19 @@ RELEASE.md
170170
*.xml
171171
fundimentials
172172
fundamential
173+
174+
tutorials/abnormal_heartbeat/**
175+
!tutorials/abnormal_heartbeat/.gitkeep
176+
tutorials/ACSF1/**
177+
!tutorials/ACSF1/.gitkeep
178+
tutorials/adiac/**
179+
!tutorials/adiac/.gitkeep
180+
tutorials/dodger_loop_weekend/**
181+
!tutorials/dodger_loop_weekend/.gitkeep
182+
tutorials/embedding_models/**
183+
!tutorials/embedding_models/.gitkeep
184+
tutorials/kendall_tau_results/**
185+
!tutorials/kendall_tau_results/.gitkeep
186+
embeddings/ts2vec/**
187+
!embeddings/ts2vec/.gitkeep
188+

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
TS2G<sup>2</sup> stands for "timeseries to graphs and back". The library implements a variety of strategies to convert timeseries into graphs, and convert graphs into sequences. Below, we provide a code snippet to generate a graph from timeseries:
44

55
# load time series from a file
6-
timegraph = Timeseries(CsvFile(amazon_path, "Close").from_csv())\
6+
timegraph = Timeseries(CsvStock(some_file_path, "ColumnOfInterest").from_csv())\
77

88
# and preprocess the timeseries with multiple preprocessing strategies
99
.with_preprocessing(TimeseriesPreprocessingComposite()\
10-
.add(TimeseriesPreprocessingSegmentation(60, 120))\
11-
.add(TimeseriesPreprocessingSlidingWindow(5)))\
10+
.add_strategy(TimeseriesPreprocessingSegmentation(60, 120))\
11+
.add_strategy(TimeseriesPreprocessingSlidingWindow(5)))\
1212

1313
# then create a graph from the timeseries, following a particular strategy
1414
.to_graph(BuildTimeseriesToGraphNaturalVisibilityStrategy().get_strategy())\
@@ -17,10 +17,10 @@ TS2G<sup>2</sup> stands for "timeseries to graphs and back". The library impleme
1717
.link(LinkGraphs().sliding_window())\
1818

1919
# and combine identical graphs that result from the abovementioned time windows into single nodes
20-
.combine_identical_subgraphs()\
20+
.combine_identical_nodes_slid_win()\
2121

2222
# finally, draw the graph
23-
.draw("blue")
23+
.draw("red")
2424

2525
For a more detailed example, look at the [Amazon stocks demo](https://github.com/graph-massivizer/ts2g2/blob/main/tutorials/demo-ts2g2.ipynb).
2626

core/model.py

Lines changed: 157 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,19 @@
66
import matplotlib.pyplot as plt
77
import networkx as nx
88
import hashlib
9+
import numpy as np
910

1011
import to_graph.strategy_linking_multi_graphs as mgl
1112
import to_graph.strategy_to_graph
1213
from to_graph.strategy_to_graph import BuildStrategyForTimeseriesToGraph
1314
import copy
15+
import math
16+
from scipy.fft import fft
1417

1518

16-
19+
class StrategyNotImplementedError(Exception):
20+
"""Custom exception for strategies that are not implemented."""
21+
pass
1722

1823
class Timeseries:
1924
"""Saves extracted data as timeseries."""
@@ -132,6 +137,11 @@ def __init__(self, ts, attribute = 'value'):
132137
self.attribute = attribute
133138
self.graph = None
134139
self.graph_order = []
140+
self.histogram_frequencies = None
141+
self.histogram_bins = None
142+
self.w = 1
143+
self.tau = 1
144+
self.is_implemented = True
135145

136146
def get_ts(self):
137147
return self.ts
@@ -142,7 +152,9 @@ def add(self, ts):
142152
self.ts.append(time_ser)
143153
return self
144154

145-
def to_graph(self, strategy: to_graph.strategy_to_graph.BuildStrategyForTimeseriesToGraph):
155+
156+
def to_graph(self, strategy: to_graph.strategy_to_graph):
157+
self.w, self.tau = strategy._get_w_tau()
146158
for ts in self.ts:
147159
graph_dict = {}
148160
order=[]
@@ -151,17 +163,18 @@ def to_graph(self, strategy: to_graph.strategy_to_graph.BuildStrategyForTimeseri
151163
for timeseries in ts:
152164
g = strategy.to_graph(TimeseriesArrayStream(timeseries))
153165
g = g.graph
154-
155-
for i in range(len(g.nodes)):
156-
old_value = g.nodes[i][self.attribute]
157-
new_value = [old_value]
158-
g.nodes[i][self.attribute] = new_value
166+
167+
if strategy._has_value():
168+
for i in range(len(g.nodes)):
169+
old_value = g.nodes[i][self.attribute]
170+
new_value = [old_value]
171+
g.nodes[i][self.attribute] = new_value
159172

160173
hash = self._hash(g)
161174
mapping = {node: f"{hash}_{node}" for node in g.nodes}
162175
g = nx.relabel_nodes(g, mapping)
163176

164-
nx.set_edge_attributes(g, strategy.get_name(), "strategy")
177+
nx.set_edge_attributes(g, strategy._get_name(), "strategy")
165178
graph_dict[self._hash(g) + f"_{counter}"] = g
166179
order.append(self._hash(g) + f"_{counter}")
167180

@@ -172,15 +185,15 @@ def to_graph(self, strategy: to_graph.strategy_to_graph.BuildStrategyForTimeseri
172185

173186
self.graphs.append(graph_dict)
174187
self.graph_order.append(order)
175-
176-
177-
if len(self.ts) == 1 and len(self.ts[0]) == 1:
178-
return TimeGraph(self.graph, graphs = self.graphs[0])
188+
189+
if (len(self.ts) == 1 and len(self.ts[0]) == 1):
190+
return TimeGraph(self.graph, graphs = self.graphs[0], is_implemented=strategy._has_implemented_to_ts(), histogram_frequencies = self.histogram_frequencies, histogram_bins = self.histogram_bins, w = self.w, tau = self.tau)
179191
else:
192+
self.is_implemented = strategy._has_implemented_to_ts()
180193
return self
181194

182195
def link(self, link_strategy: mgl.LinkGraphs):
183-
return TimeGraph(link_strategy.link(self.graphs, self.graph_order), graphs = self.graphs)
196+
return TimeGraph(link_strategy.link(self.graphs, self.graph_order), graphs = self.graphs, is_implemented=self.is_implemented, histogram_frequencies = self.histogram_frequencies, histogram_bins = self.histogram_bins, w = self.w, tau = self.tau)
184197

185198
def _get_graphs(self):
186199
return self.graphs
@@ -195,10 +208,18 @@ def _hash(self, graph):
195208
"""Returns unique hash of this graph."""
196209
str_to_hash = str(graph.nodes()) + str(graph.edges())
197210
return hashlib.md5(str_to_hash.encode()).hexdigest()
211+
212+
def to_histogram(self, bins):
213+
self.histogram_frequencies = []
214+
self.histogram_bins = []
215+
for i in range(len(self.ts)):
216+
a, b = np.histogram(self.ts[i], bins = bins)
217+
self.histogram_frequencies.append(a)
218+
self.histogram_bins.append(b)
219+
220+
return self
198221

199222

200-
# TODO: to be renamed into TimeGraph?
201-
# TODO: we need to delete the TimeGraph object (not this one - the redundant one)?
202223
class TimeGraph:
203224
"""
204225
Stores already made graph, allows us to add edges and links between nodes.
@@ -208,13 +229,22 @@ class TimeGraph:
208229
- `graph`: object networkx.Graph
209230
210231
"""
211-
def __init__(self, graph, graphs = None):
232+
def __init__(self, graph, graphs = None, is_implemented = True, histogram_frequencies = None, histogram_bins = None, w = 1, tau = 1):
212233
self.graph = graph
213234
self.orig_graph = None
214235
self.graphs = graphs
215236
self.attribute = 'value'
216237
self.sequences = None
217238
self.sequence_visitor = None
239+
self.is_implemented = is_implemented
240+
self.histogram_frequencies = histogram_frequencies
241+
self.histogram_bins = histogram_bins
242+
self.w = w
243+
self.tau = tau
244+
self.embeddings = None
245+
246+
def get_is_implemented(self):
247+
return self.is_implemented
218248

219249
def _get_graph(self):
220250
return self.graph
@@ -249,14 +279,16 @@ def combine_identical_subgraphs(self):
249279
if node_1 not in self.graph:
250280
continue
251281

252-
for node_2 in list(self.graphs[j].values())[i+1:]:
282+
for key, node_2 in list(self.graphs[j].items())[i+1:]:
253283
if node_2 == None:
254284
break
255285
if node_2 not in self.graph:
256286
continue
257287

258288
if(set(list(node_1.edges)) == set(list(node_2.edges))):
259289
self.graph = self._combine_subgraphs(self.graph, node_1, node_2, self.attribute)
290+
del self.graphs[j][key]
291+
260292

261293
return self
262294

@@ -303,19 +335,25 @@ def _combine_nodes(self, graph, node_1, node_2, att):
303335

304336
def draw(self, color = "black"):
305337
"""Draws the created graph"""
338+
colors = []
339+
for j in range(len(self.graph.nodes)):
340+
colors.append(color)
306341
pos=nx.spring_layout(self.graph, seed=1)
307-
nx.draw(self.graph, pos, node_size=40, node_color=color)
342+
nx.draw(self.graph, pos, node_size=40, node_color=colors)
308343
plt.show()
309344
return self
310345

311346
def to_sequence(self, sequence_visitor):
347+
if not self.is_implemented:
348+
raise StrategyNotImplementedError(f"This function is not yet implemented for this type of graph.")
312349
self.sequence_visitor = sequence_visitor
313350
self.sequences = sequence_visitor.to_sequence(self)
314351
return self
315352

316353
def draw_sequence(self):
317354
"""Draws timeseries."""
318-
355+
if not self.is_implemented:
356+
raise StrategyNotImplementedError(f"This function is not yet implemented for this type of graph.")
319357
colors = []
320358
for j in range(len(self.sequences)):
321359
colors.append("black")
@@ -334,7 +372,28 @@ def plot_timeseries(self, sequence, title, x_legend, y_legend, color):
334372
plt.xlabel(x_legend)
335373
plt.ylabel(y_legend)
336374
plt.grid(True)
375+
376+
def _get_histogram(self):
377+
return self.histogram_frequencies, self.histogram_bins
378+
379+
def _get_w_tau(self):
380+
return self.w, self.tau
337381

382+
def to_embedding(self, embedding_visitor):
383+
self.embeddings = embedding_visitor.get_graph_embedding()
384+
return self
385+
386+
def get_embedding(self):
387+
return self.embeddings
388+
389+
390+
class VisitorGraphEmbedding:
391+
def __init__(self):
392+
self.embedding = None
393+
394+
def get_graph_embedding(self, graph):
395+
self.embedding = np.array(list(nx.eigenvector_centrality_numpy(graph)))
396+
return self.embedding
338397

339398
class ToSequenceVisitorMaster:
340399
"""
@@ -451,9 +510,10 @@ def to_sequence(self, graph):
451510
ts_len = 0
452511
while len(self.sequences[0]) < self.timeseries_len:
453512
for j in range(len(self.sequences)):
454-
513+
455514
index = 0
456-
for i in range(len(self.nodes[j])):
515+
for i in range(len(list(self.nodes[j]))):
516+
457517
if(self._is_equal(current_nodes[j], list(self.graph.nodes)[i])):
458518
index = i
459519
break
@@ -465,7 +525,7 @@ def to_sequence(self, graph):
465525
for j in range(self.skip_values + 1):
466526
for k in range(len(current_nodes)):
467527

468-
current_nodes[k] = self.node_strategy.next_node(ts_len, k, current_nodes, self.switch_graphs, current_nodes[0])
528+
current_nodes[k] = self.node_strategy.next_node(ts_len, k, current_nodes, self.switch_graphs, current_nodes[k])
469529

470530
if(current_nodes[k] == None):
471531
return self
@@ -523,19 +583,19 @@ def to_sequence(self, graph):
523583

524584
index = 0
525585

526-
for i in range(len(list(self.nodes[j]))):
527-
if(current_nodes_data[j] == self.data_nodes[j][i]):
528-
index = i
586+
for b in range(len(list(self.nodes[j]))):
587+
if(current_nodes_data[j] == self.data_nodes[j][b]):
588+
index = b
529589
break
530590

531591
self.sequences[j] = self.value_strategy.append(self.sequences[j], current_nodes_data[j], j, index)
532592
if self.sequences[j][-1] == None:
533593
return
534-
594+
535595
for j in range(self.skip_values+1):
536596
for k in range(len(current_nodes)):
537-
current_nodes[k] = self.node_strategy.next_node(i, k, current_nodes, self.switch_graphs, current_nodes[0])
538-
597+
current_nodes[k] = self.node_strategy.next_node(i, k, current_nodes, self.switch_graphs, current_nodes[k])
598+
539599
new_index = self.nodes[k].index(current_nodes[k])
540600
current_nodes_data[k] = self.data_nodes[k][new_index]
541601
if(current_nodes[k] == None):
@@ -546,3 +606,72 @@ def to_sequence(self, graph):
546606

547607

548608

609+
class ToSequenceVisitorOrdinalPartition(ToSequenceVisitorMaster):
610+
"""Converts graphs made using ordinal partition mechanism back to timeseries"""
611+
def __init__(self):
612+
super().__init__()
613+
self.histogram_frequencies = None
614+
self.histogram_bins = None
615+
self.w = 1
616+
self.tau = 1
617+
618+
def _set_nodes(self, dict: dict):
619+
620+
if isinstance(dict, list):
621+
graphs = {}
622+
for i in range(len(dict)):
623+
graphs[list(dict[i].items())[0]] = list(dict[i].values())[0]
624+
dict = graphs
625+
626+
self.nodes = []
627+
self.data_nodes = []
628+
for graph in dict.values():
629+
self.nodes.append(list(graph.nodes))
630+
self.data_nodes.append(list(graph.nodes(data=True)))
631+
return self
632+
633+
def to_sequence(self, graph):
634+
635+
636+
self.graph = graph._get_graph()
637+
638+
self.histogram_frequencies, self.histogram_bins = graph._get_histogram()
639+
self._set_nodes(graph._get_graphs())
640+
self.w, self.tau = graph._get_w_tau()
641+
one_ts_length = self.timeseries_len/self.tau
642+
short_series = [[[] for i in range(self.tau)] for i in range(len(graph._get_graphs()))]
643+
current_nodes = [None for _ in range(len(self.nodes))]
644+
current_nodes_data = [None for _ in range(len(self.data_nodes))]
645+
for i in range(len(self.nodes)):
646+
current_nodes[i] = self.nodes[i][0]
647+
current_nodes_data[i] = self.data_nodes[i][self.nodes[i].index(current_nodes[i])]
648+
649+
self.node_strategy.set_arguments(self.graph, self.nodes, {}, self.att)
650+
651+
i = 0
652+
while(len(short_series[0][0]) < one_ts_length):
653+
for k in range(len(graph._get_graphs())):
654+
for i in range(self.tau):
655+
if(len(short_series[k][i]) == 0):
656+
short_series[k][i] = self.value_strategy.append_start(short_series[k][i], k, current_nodes_data[k], self.histogram_frequencies, self.histogram_bins, self.w)
657+
else:
658+
short_series[k][i] = self.value_strategy.append(short_series[k][i], k, current_nodes_data[k], self.histogram_frequencies, self.histogram_bins, self.w)
659+
if(i < self.tau-1):
660+
current_nodes[k] = self.node_strategy.next_node(i, k, current_nodes, self.switch_graphs, current_nodes[k])
661+
current_nodes_data[k] = self.data_nodes[k][self.nodes[k].index(current_nodes[k])]
662+
663+
for j in range(self.skip_values+1):
664+
for k in range(len(graph._get_graphs())):
665+
current_nodes[k] = self.node_strategy.next_node(i, k, current_nodes, self.switch_graphs, current_nodes[k])
666+
current_nodes_data[k] = self.data_nodes[k][self.nodes[k].index(current_nodes[k])]
667+
668+
i+=1
669+
670+
671+
self.sequences = [[] for i in range(len(graph._get_graphs()))]
672+
for k in range(len(graph._get_graphs())):
673+
for j in range(self.tau):
674+
for i in range(len(short_series[k][j])):
675+
self.sequences[k].append(short_series[k][j][i])
676+
677+
return self.sequences

0 commit comments

Comments
 (0)