Skip to content

Commit 8572170

Browse files
authored
Merge pull request #12 from zangrcar/main
Refactoring
2 parents 4d44621 + 183892e commit 8572170

File tree

10 files changed

+276
-262
lines changed

10 files changed

+276
-262
lines changed

core/model.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@ def with_preprocessing(self, strategy):
2929
class TimeseriesPreprocessing:
3030
"""Processes timeseries."""
3131
def __init__(self):
32-
pass
32+
self.ts = None
3333

3434
def process(self, ts):
35-
pass
35+
self.ts = ts
36+
return TimeseriesView([ts])
3637

3738

3839
class TimeseriesPreprocessingSegmentation(TimeseriesPreprocessing):
@@ -48,6 +49,7 @@ class TimeseriesPreprocessingSegmentation(TimeseriesPreprocessing):
4849
def __init__(self, segment_start, segment_end):
4950
self.seg_st = segment_start
5051
self.seg_end = segment_end
52+
self.ts = None
5153

5254
def process(self, ts):
5355
self.ts = ts[self.seg_st:self.seg_end]
@@ -90,7 +92,7 @@ def __init__(self):
9092
self.segments = None
9193
self.strategy = []
9294

93-
def add_strategy(self, strat):
95+
def add(self, strat):
9496
self.strategy.append(strat)
9597
return self
9698

@@ -238,7 +240,7 @@ def _hash(self):
238240
str_to_hash = str(self.graph.nodes()) + str(self.graph.edges())
239241
return hashlib.md5(str_to_hash.encode()).hexdigest()
240242

241-
def combine_identical_nodes_slid_win(self):
243+
def combine_identical_subgraphs(self):
242244
"""Combines nodes that have same value of attribute self.attribute if graph is classical graph and
243245
nodes that are identical graphs if graph is created using sliding window mechanism."""
244246
self.orig_graph = self.graph.copy()
@@ -254,11 +256,11 @@ def combine_identical_nodes_slid_win(self):
254256
continue
255257

256258
if(set(list(node_1.edges)) == set(list(node_2.edges))):
257-
self.graph = self._combine_nodes_win(self.graph, node_1, node_2, self.attribute)
259+
self.graph = self._combine_subgraphs(self.graph, node_1, node_2, self.attribute)
258260

259261
return self
260262

261-
def _combine_nodes_win(self, graph, node_1, node_2, att):
263+
def _combine_subgraphs(self, graph, node_1, node_2, att):
262264
"""Combines nodes node_1 and node_2, that are graphs."""
263265
for i in range(len(list(node_1.nodes(data=True)))):
264266
for j in range(len(list(node_2.nodes(data=True))[i][1][att])):
@@ -446,7 +448,7 @@ def to_sequence(self, graph):
446448
self.node_strategy.set_arguments(self.graph, self.nodes, dictionaries, self.att)
447449

448450

449-
i = 0
451+
ts_len = 0
450452
while len(self.sequences[0]) < self.timeseries_len:
451453
for j in range(len(self.sequences)):
452454

@@ -463,12 +465,12 @@ def to_sequence(self, graph):
463465
for j in range(self.skip_values + 1):
464466
for k in range(len(current_nodes)):
465467

466-
current_nodes[k] = self.node_strategy.next_node(i, k, current_nodes, self.switch_graphs, current_nodes[0])
468+
current_nodes[k] = self.node_strategy.next_node(ts_len, k, current_nodes, self.switch_graphs, current_nodes[0])
467469

468470
if(current_nodes[k] == None):
469471
return self
470472

471-
i += 1
473+
ts_len += 1
472474
return self.sequences
473475

474476
class ToSequenceVisitor(ToSequenceVisitorMaster):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
## **Csv and xml files**:
22

3-
::: input_output.input
3+
::: tsg_io.input

docs/tutorial.ipynb

Lines changed: 68 additions & 88 deletions
Large diffs are not rendered by default.

from_graph/strategy_to_time_sequence.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import random
2+
import numpy as np
23

34
class StrategyNextValueInNode:
45
"""
@@ -103,6 +104,9 @@ def get_name(self):
103104
return "round robin"
104105

105106

107+
108+
109+
106110
class StrategySelectNextNode:
107111
"""
108112
Stores strategy to chose next node from the neighbors of the previous node.
@@ -141,7 +145,7 @@ def get_name(self):
141145
pass
142146

143147

144-
class StrategySelectNextNodeRandomlyAcrossGraphs(StrategySelectNextNode):
148+
class StrategySelectNextNodeRandomlyFromNeighboursAcrossGraphs(StrategySelectNextNode):
145149
"""Walks through all graphs in a multivariate graph and chooses next node randomly."""
146150
def __init__(self):
147151
super().__init__()
@@ -155,10 +159,10 @@ def next_node(self, i, graph_index, nodes, switch, node):
155159
return random.choice(neighbors)
156160

157161
def get_name(self):
158-
return "walkthrough all graphs randomly"
162+
return "walkthrough all graphs randomly from neighbours"
159163

160164

161-
class StrategySelectNextNodeRandomlyFromFirstGraph(StrategySelectNextNode):
165+
class StrategySelectNextNodeRandomlyFromNeighboursFromFirstGraph(StrategySelectNextNode):
162166
"""Walks through first graph and chooses next node randomly."""
163167
def __init__(self):
164168
super().__init__()
@@ -171,4 +175,64 @@ def next_node(self, i, graph_index, nodes, switch, node):
171175
return random.choice(neighbors)
172176

173177
def get_name(self):
174-
return "walkthrough one graph randomly"
178+
return "walkthrough one graph randomly from neighbours"
179+
180+
#TODO: fix this
181+
class StrategySelectNextNodeRandomly(StrategySelectNextNode):
182+
183+
def __init__(self):
184+
super().__init__()
185+
186+
def next_node(self, i, graph_index, nodes, switch, node):
187+
return random.choice(self.nodes[graph_index])
188+
189+
def get_name(self):
190+
return "Random walkthrough the nodes"
191+
192+
193+
#TODO: fix this
194+
class StrategySelectNextNodeRandomDegree(StrategySelectNextNode):
195+
196+
def __init__(self):
197+
super().__init__()
198+
199+
def next_node(self, i, graph_index, nodes, switch, node):
200+
nodes_weighted_tuples = [(n, float(len([x for x in list(set(self.nodes[graph_index]) & set(self.graph.neighbors(n)))]))/float(len(nodes[graph_index]))) for n in nodes[graph_index]]
201+
nodes = [n[0] for n in nodes_weighted_tuples]
202+
node_weights = [n[1] for n in nodes_weighted_tuples]
203+
if np.min(node_weights)>0:
204+
node_weights = np.round(np.divide(node_weights, np.min(node_weights)), decimals=4)
205+
node_weights = np.divide(node_weights, np.sum(node_weights))
206+
207+
return np.random.choice(nodes, p=node_weights)
208+
209+
210+
def get_name(self):
211+
return "Random degree walkthrough the nodes"
212+
213+
#TODO: fix this
214+
class StrategySelectNextNodeRandomWithRestart(StrategySelectNextNode):
215+
216+
def __init__(self):
217+
super().__init__()
218+
self.first_node = None
219+
220+
def next_node(self, i, graph_index, nodes, switch, node):
221+
222+
if self.first_node == None:
223+
self.first_node = []
224+
for i in range(len(nodes)):
225+
self.first_node.append(np.random.choice(nodes[i]))
226+
227+
if np.random.random() <0.15:
228+
return self.first_node[graph_index]
229+
230+
if len(nodes) == 0:
231+
node = self.first_node[graph_index]
232+
else:
233+
node = np.random.choice(nodes[graph_index])
234+
235+
return node
236+
237+
def get_name(self):
238+
return "Random walk with restart"

generation/strategies.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,6 @@ class GraphToTimeseriesStrategy:
77
def to_sequence(self, graph, sequence_length):
88
return None
99

10-
class RandomNodeSequenceGenerationStrategy(GraphToTimeseriesStrategy):
11-
def to_sequence(self, graph, sequence_length):
12-
sequence = []
13-
nodes = [n for n in graph.nodes()]
14-
15-
while len(sequence) < sequence_length:
16-
node = np.random.choice(nodes)
17-
sequence = sequence + [graph.nodes[node]['value']]
18-
19-
return sequence
2010

2111
class RandomNodeNeighbourSequenceGenerationStrategy(GraphToTimeseriesStrategy):
2212
def to_sequence(self, graph, sequence_length):

mkdocs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ nav:
8181
- Home: index.md
8282

8383
- Documentation:
84-
- Input/output: documentation/input_output.md
84+
- Input/output: documentation/tsg_io.md
8585
- Model: documentation/model.md
8686
- To Time sequence strategy: documentation/strategy_to_time_sequence.md
8787
- Graph linking strategy: documentation/strategy_linking_graph.md

to_graph/strategy_linking_graph.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ def __init__(self, graph, strategy_precedence):
2121
def set_graph(self, graph):
2222
self.graph = graph
2323

24-
#TODO: implement/make it comparable :)
25-
#TODO: rename :)
2624
def get_strategy_precedence(self):
2725
return self.strategy_precedence
2826

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def __init__(self):
1010
def from_csv(self):
1111
pass
1212

13-
class CsvStock(CsvRead):
13+
class CsvFile(CsvRead):
1414
"""
1515
Returns proccessed data from csv file sorted by "Date".
1616
@@ -41,10 +41,11 @@ def __init__(self):
4141
def from_xml(self):
4242
pass
4343

44-
#TODO: rename
45-
class XmlSomething(XmlRead):
44+
class FundamentalsReportFinancialStatements(XmlRead):
4645
"""
47-
One of the ways of extracting the data from xml file.
46+
Extracting data from an xml file.
47+
Further explanation on file format can be found on:
48+
https://docs-2-0--quantrocket.netlify.app/data/reuters/
4849
4950
**Attributes:**
5051

tutorials/demo-ts2g2.ipynb

Lines changed: 67 additions & 88 deletions
Large diffs are not rendered by default.

tutorials/test.py

Lines changed: 55 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4,90 +4,90 @@
44
if nb_dir not in sys.path:
55
sys.path.append(nb_dir)
66

7-
from core import model
7+
from core.model import Timeseries, TimeseriesPreprocessing, TimeseriesPreprocessingSegmentation, TimeseriesPreprocessingSlidingWindow, TimeseriesPreprocessingComposite, TimeseriesView, TimeGraph, ToSequenceVisitorSlidingWindow, ToSequenceVisitor
88

9-
from input_output import input as inp
10-
import from_graph.strategy_to_time_sequence as tts
11-
import to_graph.strategy_linking_graph as gls
12-
import to_graph.strategy_linking_multi_graphs as mgl
13-
import to_graph.strategy_to_graph as tgs
9+
from tsg_io.input import CsvFile
10+
from from_graph.strategy_to_time_sequence import StrategyNextValueInNodeRandom, StrategyNextValueInNodeRandomForSlidingWindow, StrategyNextValueInNodeRoundRobin, StrategyNextValueInNodeRoundRobinForSlidingWindow, StrategySelectNextNodeRandomlyFromNeighboursAcrossGraphs, StrategySelectNextNodeRandomlyFromNeighboursFromFirstGraph, StrategySelectNextNodeRandomly, StrategySelectNextNodeRandomDegree, StrategySelectNextNodeRandomWithRestart
11+
from to_graph.strategy_linking_graph import StrategyLinkingGraphByValueWithinRange, LinkNodesWithinGraph
12+
from to_graph.strategy_linking_multi_graphs import LinkGraphs
13+
from to_graph.strategy_to_graph import BuildTimeseriesToGraphNaturalVisibilityStrategy, BuildTimeseriesToGraphHorizontalVisibilityStrategy
1414

1515
amazon_path = os.path.join(os.getcwd(), "amazon", "AMZN.csv")
1616
apple_path = os.path.join(os.getcwd(), "apple", "APPLE.csv")
1717

1818

19-
test = model.Timeseries(inp.CsvStock(amazon_path, "Close").from_csv())\
20-
.with_preprocessing(model.TimeseriesPreprocessingSegmentation(60, 90))\
21-
.to_graph(tgs.BuildTimeseriesToGraphNaturalVisibilityStrategy().with_limit(1).get_strategy())\
19+
20+
timegraph_1 = Timeseries(CsvFile(amazon_path, "Close").from_csv())\
21+
.with_preprocessing(TimeseriesPreprocessingSegmentation(60, 90))\
22+
.to_graph(BuildTimeseriesToGraphNaturalVisibilityStrategy().with_limit(1).get_strategy())\
2223
.add_edge(0,2)\
2324
.add_edge(13, 21, weight = 17)\
24-
.link(gls.LinkNodesWithinGraph().by_value(gls.StrategyLinkingGraphByValueWithinRange(2)).seasonalities(15))\
25+
.link(LinkNodesWithinGraph().by_value(StrategyLinkingGraphByValueWithinRange(2)).seasonalities(15))\
2526
.draw("blue")
2627

27-
28-
x = model.Timeseries(inp.CsvStock(apple_path, "Close").from_csv())\
29-
.with_preprocessing(model.TimeseriesPreprocessingComposite()\
30-
.add_strategy(model.TimeseriesPreprocessingSegmentation(60, 120))\
31-
.add_strategy(model.TimeseriesPreprocessingSlidingWindow(5)))\
32-
.to_graph(tgs.BuildTimeseriesToGraphNaturalVisibilityStrategy().get_strategy())\
33-
.link(mgl.LinkGraphs().sliding_window())\
34-
.combine_identical_nodes_slid_win()\
28+
timegraph_2 = Timeseries(CsvFile(apple_path, "Close").from_csv())\
29+
.with_preprocessing(TimeseriesPreprocessingComposite()\
30+
.add(TimeseriesPreprocessingSegmentation(60, 120))\
31+
.add(TimeseriesPreprocessingSlidingWindow(5)))\
32+
.to_graph(BuildTimeseriesToGraphNaturalVisibilityStrategy().get_strategy())\
33+
.link(LinkGraphs().sliding_window())\
34+
.combine_identical_subgraphs()\
3535
.draw("red")
3636

37-
i = model.Timeseries(inp.CsvStock(apple_path, "Close").from_csv())\
38-
.with_preprocessing(model.TimeseriesPreprocessingSegmentation(60, 90))\
39-
.add(model.Timeseries(inp.CsvStock(apple_path, "Close").from_csv())\
40-
.with_preprocessing(model.TimeseriesPreprocessingSegmentation(90, 120)))\
41-
.add(model.Timeseries(inp.CsvStock(apple_path, "Close").from_csv())\
42-
.with_preprocessing(model.TimeseriesPreprocessingSegmentation(150, 180)))\
43-
.to_graph(tgs.BuildTimeseriesToGraphNaturalVisibilityStrategy().with_limit(1).get_strategy())\
44-
.link(mgl.LinkGraphs().time_cooccurrence())\
45-
.link(gls.LinkNodesWithinGraph().by_value(gls.StrategyLinkingGraphByValueWithinRange(0.5)))\
37+
timegraph_3 = Timeseries(CsvFile(apple_path, "Close").from_csv())\
38+
.with_preprocessing(TimeseriesPreprocessingSegmentation(60, 90))\
39+
.add(Timeseries(CsvFile(apple_path, "Close").from_csv())\
40+
.with_preprocessing(TimeseriesPreprocessingSegmentation(90, 120)))\
41+
.add(Timeseries(CsvFile(apple_path, "Close").from_csv())\
42+
.with_preprocessing(TimeseriesPreprocessingSegmentation(150, 180)))\
43+
.to_graph(BuildTimeseriesToGraphNaturalVisibilityStrategy().with_limit(1).get_strategy())\
44+
.link(LinkGraphs().time_cooccurrence())\
45+
.link(LinkNodesWithinGraph().by_value(StrategyLinkingGraphByValueWithinRange(0.5)))\
4646
.combine_identical_nodes()\
4747
.draw("brown")
4848

49-
j = model.Timeseries(inp.CsvStock(apple_path, "Close").from_csv())\
50-
.with_preprocessing(model.TimeseriesPreprocessingComposite()\
51-
.add_strategy(model.TimeseriesPreprocessingSegmentation(60, 110))\
52-
.add_strategy(model.TimeseriesPreprocessingSlidingWindow(5)))\
53-
.add(model.Timeseries(inp.CsvStock(apple_path, "Close").from_csv())\
54-
.with_preprocessing(model.TimeseriesPreprocessingComposite()\
55-
.add_strategy(model.TimeseriesPreprocessingSegmentation(120, 170))\
56-
.add_strategy(model.TimeseriesPreprocessingSlidingWindow(5)))\
57-
.add(model.Timeseries(inp.CsvStock(apple_path, "Close").from_csv())\
58-
.with_preprocessing(model.TimeseriesPreprocessingComposite()\
59-
.add_strategy(model.TimeseriesPreprocessingSegmentation(190, 240))\
60-
.add_strategy(model.TimeseriesPreprocessingSlidingWindow(5)))))\
61-
.to_graph(tgs.BuildTimeseriesToGraphNaturalVisibilityStrategy().get_strategy())\
62-
.link(mgl.LinkGraphs().sliding_window().time_cooccurrence())\
63-
.combine_identical_nodes_slid_win()\
64-
.link(gls.LinkNodesWithinGraph().seasonalities(15))\
49+
timegraph_4 = Timeseries(CsvFile(apple_path, "Close").from_csv())\
50+
.with_preprocessing(TimeseriesPreprocessingComposite()\
51+
.add(TimeseriesPreprocessingSegmentation(60, 110))\
52+
.add(TimeseriesPreprocessingSlidingWindow(5)))\
53+
.add(Timeseries(CsvFile(apple_path, "Close").from_csv())\
54+
.with_preprocessing(TimeseriesPreprocessingComposite()\
55+
.add(TimeseriesPreprocessingSegmentation(120, 170))\
56+
.add(TimeseriesPreprocessingSlidingWindow(5)))\
57+
.add(Timeseries(CsvFile(apple_path, "Close").from_csv())\
58+
.with_preprocessing(TimeseriesPreprocessingComposite()\
59+
.add(TimeseriesPreprocessingSegmentation(190, 240))\
60+
.add(TimeseriesPreprocessingSlidingWindow(5)))))\
61+
.to_graph(BuildTimeseriesToGraphNaturalVisibilityStrategy().get_strategy())\
62+
.link(LinkGraphs().sliding_window().time_cooccurrence())\
63+
.combine_identical_subgraphs()\
64+
.link(LinkNodesWithinGraph().seasonalities(15))\
6565
.draw("green")
6666

6767

68-
test.to_sequence(model.ToSequenceVisitor()\
69-
.next_node_strategy(tts.StrategySelectNextNodeRandomlyAcrossGraphs())\
70-
.next_value_strategy(tts.StrategyNextValueInNodeRoundRobin().skip_every_x_steps(1))\
68+
timegraph_1.to_sequence(ToSequenceVisitor()\
69+
.next_node_strategy(StrategySelectNextNodeRandomWithRestart())\
70+
.next_value_strategy(StrategyNextValueInNodeRoundRobin().skip_every_x_steps(1))\
7171
.ts_length(100))\
7272
.draw_sequence()
7373

7474

75-
x.to_sequence(model.ToSequenceVisitorSlidingWindow()\
76-
.next_node_strategy(tts.StrategySelectNextNodeRandomlyFromFirstGraph())\
77-
.next_value_strategy(tts.StrategyNextValueInNodeRandomForSlidingWindow().skip_every_x_steps(1))\
75+
timegraph_2.to_sequence(ToSequenceVisitorSlidingWindow()\
76+
.next_node_strategy(StrategySelectNextNodeRandomly())\
77+
.next_value_strategy(StrategyNextValueInNodeRandomForSlidingWindow().skip_every_x_steps(1))\
7878
.ts_length(50))\
7979
.draw_sequence()
8080

8181

82-
i.to_sequence(model.ToSequenceVisitor()\
83-
.next_node_strategy(tts.StrategySelectNextNodeRandomlyAcrossGraphs().change_graphs_every_x_steps(2))\
84-
.next_value_strategy(tts.StrategyNextValueInNodeRoundRobin().skip_every_x_steps(1))\
82+
timegraph_3.to_sequence(ToSequenceVisitor()\
83+
.next_node_strategy(StrategySelectNextNodeRandomlyFromNeighboursAcrossGraphs().change_graphs_every_x_steps(2))\
84+
.next_value_strategy(StrategyNextValueInNodeRoundRobin().skip_every_x_steps(1))\
8585
.ts_length(50))\
8686
.draw_sequence()
8787

8888

89-
j.to_sequence(model.ToSequenceVisitorSlidingWindow()\
90-
.next_node_strategy(tts.StrategySelectNextNodeRandomlyFromFirstGraph())\
91-
.next_value_strategy(tts.StrategyNextValueInNodeRoundRobinForSlidingWindow())\
89+
timegraph_4.to_sequence(ToSequenceVisitorSlidingWindow()\
90+
.next_node_strategy(StrategySelectNextNodeRandomlyFromNeighboursAcrossGraphs())\
91+
.next_value_strategy(StrategyNextValueInNodeRoundRobinForSlidingWindow())\
9292
.ts_length(100))\
9393
.draw_sequence()

0 commit comments

Comments
 (0)