Skip to content

Commit 3c16740

Browse files
Hecate2shargonJim8y
authored
Basic block analyser (neo-project#981)
* Add directory `Analysers` * entry point analyser * basic block analyser (tested but unused) * fix tests * Apply suggestions from code review * Update tests/Neo.Compiler.CSharp.UnitTests/UnitTest_Optimizer/UnitTest_BasicBlock.cs * Update src/Neo.Compiler.CSharp/Optimizer/Analysers/InstructionCoverage.cs * fix ut test * Remove generatedRegex * string.IsNullOrEmpty --------- Co-authored-by: Shargon <[email protected]> Co-authored-by: Jimmy <[email protected]> Co-authored-by: Jim8y <[email protected]>
1 parent c68b73e commit 3c16740

File tree

11 files changed

+429
-296
lines changed

11 files changed

+429
-296
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
using Neo.Json;
2+
using Neo.SmartContract;
3+
using Neo.SmartContract.Manifest;
4+
using Neo.VM;
5+
using System.Collections.Generic;
6+
7+
namespace Neo.Optimizer
8+
{
9+
static class BasicBlock
10+
{
11+
public static Dictionary<int, Dictionary<int, Instruction>> FindBasicBlocks(NefFile nef, ContractManifest manifest, JToken debugInfo)
12+
=> new InstructionCoverage(nef, manifest, debugInfo).basicBlocks;
13+
}
14+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
using Neo.Json;
2+
using Neo.SmartContract;
3+
using Neo.SmartContract.Manifest;
4+
using Neo.VM;
5+
using System;
6+
using System.Collections.Generic;
7+
using System.Linq;
8+
using static Neo.Optimizer.JumpTarget;
9+
using static Neo.Optimizer.OpCodeTypes;
10+
11+
namespace Neo.Optimizer
12+
{
13+
public enum TryStackType
14+
{
15+
ENTRY,
16+
TRY,
17+
CATCH,
18+
FINALLY,
19+
}
20+
21+
public enum BranchType
22+
{
23+
OK, // One of the branches may return without exception
24+
THROW, // All branches surely have exceptions, but can be catched
25+
ABORT, // All branches abort, and cannot be catched
26+
UNCOVERED,
27+
}
28+
29+
public class InstructionCoverage
30+
{
31+
Script script;
32+
// Starting from the address, whether the call will surely throw or surely abort, or may be OK
33+
public Dictionary<int, BranchType> coveredMap { get; protected set; }
34+
public Dictionary<int, Dictionary<int, Instruction>> basicBlocks { get; protected set; }
35+
public List<(int a, Instruction i)> addressAndInstructions { get; init; }
36+
public Dictionary<int, HashSet<int>> jumpTargetToSources { get; init; }
37+
public InstructionCoverage(NefFile nef, ContractManifest manifest, JToken debugInfo)
38+
{
39+
this.script = nef.Script;
40+
coveredMap = new();
41+
basicBlocks = new();
42+
addressAndInstructions = script.EnumerateInstructions().ToList();
43+
(_, _, jumpTargetToSources) = FindAllJumpAndTrySourceToTargets(addressAndInstructions);
44+
foreach ((int addr, Instruction _) in addressAndInstructions)
45+
coveredMap.Add(addr, BranchType.UNCOVERED);
46+
47+
// It is unsafe to go parallel, because the coveredMap value is not true/false
48+
//Parallel.ForEach(manifest.Abi.Methods, method =>
49+
// CoverInstruction(method.Offset, script, coveredMap)
50+
//);
51+
foreach ((int addr, _) in EntryPoint.EntryPointsByMethod(manifest, debugInfo))
52+
CoverInstruction(addr);
53+
}
54+
55+
public static Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> CopyStack
56+
(Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> stack) => new(stack.Reverse());
57+
58+
public BranchType HandleThrow(int entranceAddr, int addr, Stack<((int catchAddr, int finallyAddr), TryStackType stackType)> stack)
59+
{
60+
stack = CopyStack(stack);
61+
TryStackType stackType;
62+
int catchAddr; int finallyAddr;
63+
do
64+
((catchAddr, finallyAddr), stackType) = stack.Pop();
65+
while (stackType != TryStackType.TRY && stackType != TryStackType.CATCH && stack.Count > 0);
66+
if (stackType == TryStackType.TRY) // goto CATCH or FINALLY
67+
{
68+
// try with catch: cancel throw and execute catch
69+
if (catchAddr != -1)
70+
{
71+
addr = catchAddr;
72+
stack.Push(((-1, finallyAddr), TryStackType.CATCH));
73+
coveredMap[entranceAddr] = CoverInstruction(addr, stack: stack, throwed: false);
74+
return coveredMap[entranceAddr];
75+
}
76+
// try without catch: execute finally but keep throwing
77+
else if (finallyAddr != -1)
78+
{
79+
coveredMap[addr] = BranchType.THROW;
80+
addr = finallyAddr;
81+
stack.Push(((-1, -1), TryStackType.FINALLY));
82+
coveredMap[entranceAddr] = CoverInstruction(addr, stack: stack, throwed: true);
83+
return coveredMap[entranceAddr];
84+
}
85+
}
86+
// throwed in catch with finally: execute finally but keep throwing
87+
if (stackType == TryStackType.CATCH)
88+
{
89+
if (finallyAddr != -1)
90+
{
91+
addr = finallyAddr;
92+
stack.Push(((-1, -1), TryStackType.FINALLY));
93+
}
94+
return CoverInstruction(addr, stack: stack, throwed: true);
95+
}
96+
// not in try and not in catch
97+
coveredMap[entranceAddr] = BranchType.THROW;
98+
return BranchType.THROW;
99+
}
100+
101+
public BranchType HandleAbort(int entranceAddr, int addr, Stack<((int returnAddr, int finallyAddr), TryStackType stackType)> stack)
102+
{
103+
// See if we are in a try or catch. There may still be runtime exceptions
104+
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
105+
if (stackType == TryStackType.TRY && catchAddr != -1 ||
106+
stackType == TryStackType.CATCH && finallyAddr != -1)
107+
{
108+
// Visit catchAddr because there may still be exceptions at runtime
109+
if (HandleThrow(entranceAddr, addr, stack) == BranchType.OK)
110+
{
111+
coveredMap[entranceAddr] = BranchType.OK;
112+
return BranchType.OK;
113+
}
114+
}
115+
coveredMap[entranceAddr] = BranchType.ABORT;
116+
return coveredMap[entranceAddr];
117+
}
118+
119+
/// <summary>
120+
/// Cover a basic block, and recursively cover all branches
121+
/// </summary>
122+
/// <param name="addr"></param>
123+
/// <param name="script"></param>
124+
/// <param name="coveredMap"></param>
125+
/// <returns>Whether it is possible to return without exception</returns>
126+
/// <exception cref="BadScriptException"></exception>
127+
/// <exception cref="NotImplementedException"></exception>
128+
public BranchType CoverInstruction(int addr,
129+
Stack<((int returnAddr, int finallyAddr), TryStackType stackType)>? stack = null,
130+
bool throwed = false)
131+
{
132+
int entranceAddr = addr;
133+
if (stack == null)
134+
{
135+
stack = new();
136+
stack.Push(((-1, -1), TryStackType.ENTRY));
137+
}
138+
else
139+
stack = CopyStack(stack);
140+
if (throwed)
141+
{
142+
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
143+
if (stackType != TryStackType.FINALLY)
144+
{
145+
coveredMap[entranceAddr] = BranchType.THROW;
146+
return BranchType.THROW;
147+
}
148+
}
149+
while (true)
150+
{
151+
// For the analysis of basic blocks,
152+
// we launched new recursion when exception is catched.
153+
// Here we have the exception not catched
154+
if (!coveredMap.ContainsKey(addr))
155+
throw new BadScriptException($"wrong address {addr}");
156+
if (coveredMap[addr] != BranchType.UNCOVERED)
157+
// We have visited the code. Skip it.
158+
return coveredMap[addr];
159+
if (jumpTargetToSources.ContainsKey(addr) && addr != entranceAddr)
160+
// on target of jump, start a new recursion to split basic blocks
161+
return CoverInstruction(addr, stack, throwed);
162+
Instruction instruction = script.GetInstruction(addr);
163+
if (instruction.OpCode != OpCode.NOP)
164+
{
165+
coveredMap[addr] = BranchType.OK;
166+
// Add a basic block starting from entranceAddr
167+
if (!basicBlocks.TryGetValue(entranceAddr, out Dictionary<int, Instruction>? instructions))
168+
{
169+
instructions = new Dictionary<int, Instruction>();
170+
basicBlocks.Add(entranceAddr, instructions);
171+
}
172+
// Add this instruction to the basic block starting from entranceAddr
173+
instructions.Add(addr, instruction);
174+
}
175+
176+
// TODO: ABORTMSG may THROW instead of ABORT. Just throw new NotImplementedException for ABORTMSG?
177+
if (instruction.OpCode == OpCode.ABORT || instruction.OpCode == OpCode.ABORTMSG)
178+
return HandleAbort(entranceAddr, addr, stack);
179+
if (callWithJump.Contains(instruction.OpCode))
180+
{
181+
int callTarget = ComputeJumpTarget(addr, instruction);
182+
BranchType returnedType = CoverInstruction(callTarget);
183+
if (returnedType == BranchType.OK)
184+
return CoverInstruction(addr + instruction.Size, stack);
185+
if (returnedType == BranchType.ABORT)
186+
return HandleAbort(entranceAddr, addr, stack);
187+
if (returnedType == BranchType.THROW)
188+
return HandleThrow(entranceAddr, addr, stack);
189+
}
190+
if (instruction.OpCode == OpCode.RET)
191+
{
192+
// See if we are in a try. There may still be runtime exceptions
193+
HandleThrow(entranceAddr, addr, stack);
194+
coveredMap[entranceAddr] = BranchType.OK;
195+
return coveredMap[entranceAddr];
196+
}
197+
if (tryThrowFinally.Contains(instruction.OpCode))
198+
{
199+
if (instruction.OpCode == OpCode.TRY || instruction.OpCode == OpCode.TRY_L)
200+
{
201+
stack.Push((ComputeTryTarget(addr, instruction), TryStackType.TRY));
202+
return CoverInstruction(addr + instruction.Size, stack);
203+
}
204+
if (instruction.OpCode == OpCode.THROW)
205+
return HandleThrow(entranceAddr, addr, stack);
206+
if (instruction.OpCode == OpCode.ENDTRY || instruction.OpCode == OpCode.ENDTRY_L)
207+
{
208+
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Peek();
209+
if (stackType != TryStackType.TRY && stackType != TryStackType.CATCH)
210+
throw new BadScriptException("No try stack on ENDTRY");
211+
212+
// Visit catchAddr and finallyAddr because there may still be exceptions at runtime
213+
HandleThrow(entranceAddr, addr, stack);
214+
coveredMap[entranceAddr] = BranchType.OK;
215+
216+
stack.Pop();
217+
int endPointer = ComputeJumpTarget(addr, instruction);
218+
if (finallyAddr != -1)
219+
{
220+
stack.Push(((-1, endPointer), TryStackType.FINALLY));
221+
addr = finallyAddr;
222+
}
223+
else
224+
addr = endPointer;
225+
return CoverInstruction(addr, stack, throwed);
226+
}
227+
if (instruction.OpCode == OpCode.ENDFINALLY)
228+
{
229+
((int catchAddr, int finallyAddr), TryStackType stackType) = stack.Pop();
230+
if (stackType != TryStackType.FINALLY)
231+
throw new BadScriptException("No finally stack on ENDFINALLY");
232+
if (throwed)
233+
{
234+
// For this basic block in finally, the branch type is OK
235+
coveredMap[entranceAddr] = BranchType.OK;
236+
// The throw is caused by previous codes
237+
return BranchType.THROW;
238+
}
239+
return CoverInstruction(addr + instruction.Size, stack, false);
240+
}
241+
}
242+
if (unconditionalJump.Contains(instruction.OpCode))
243+
//addr = ComputeJumpTarget(addr, instruction);
244+
//continue;
245+
// For the analysis of basic blocks, we launch a new recursion
246+
return CoverInstruction(ComputeJumpTarget(addr, instruction), stack, throwed);
247+
if (conditionalJump.Contains(instruction.OpCode) || conditionalJump_L.Contains(instruction.OpCode))
248+
{
249+
BranchType noJump = CoverInstruction(addr + instruction.Size, stack);
250+
BranchType jump = CoverInstruction(ComputeJumpTarget(addr, instruction), stack);
251+
if (noJump == BranchType.OK || jump == BranchType.OK)
252+
{
253+
// See if we are in a try. There may still be runtime exceptions
254+
HandleThrow(entranceAddr, addr, stack);
255+
coveredMap[entranceAddr] = BranchType.OK;
256+
return coveredMap[entranceAddr];
257+
}
258+
if (noJump == BranchType.ABORT && jump == BranchType.ABORT)
259+
return HandleAbort(entranceAddr, addr, stack);
260+
if (noJump == BranchType.THROW || jump == BranchType.THROW) // THROW, ABORT => THROW
261+
return HandleThrow(entranceAddr, addr, stack);
262+
throw new Exception($"Unknown {nameof(BranchType)} {noJump} {jump}");
263+
}
264+
265+
addr += instruction.Size;
266+
}
267+
}
268+
}
269+
}

src/Neo.Compiler.CSharp/Optimizer/Analysers/JumpTarget.cs

+29-22
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,20 @@ public static (int catchTarget, int finallyTarget) ComputeTryTarget(int addr, In
5656

5757
public static (Dictionary<Instruction, Instruction>,
5858
Dictionary<Instruction, (Instruction, Instruction)>,
59-
Dictionary<Instruction, HashSet<Instruction>>)
59+
Dictionary<int, HashSet<int>>)
6060
FindAllJumpAndTrySourceToTargets(NefFile nef)
6161
{
6262
Script script = nef.Script;
6363
return FindAllJumpAndTrySourceToTargets(script);
6464
}
6565
public static (Dictionary<Instruction, Instruction>,
6666
Dictionary<Instruction, (Instruction, Instruction)>,
67-
Dictionary<Instruction, HashSet<Instruction>>)
67+
Dictionary<int, HashSet<int>>)
6868
FindAllJumpAndTrySourceToTargets(Script script) => FindAllJumpAndTrySourceToTargets(script.EnumerateInstructions().ToList());
6969
public static (
7070
Dictionary<Instruction, Instruction>, // jump source to target
7171
Dictionary<Instruction, (Instruction, Instruction)>, // try source to targets
72-
Dictionary<Instruction, HashSet<Instruction>> // target to source
72+
Dictionary<int, HashSet<int>> // target to source
7373
)
7474
FindAllJumpAndTrySourceToTargets(List<(int, Instruction)> addressAndInstructionsList)
7575
{
@@ -78,33 +78,40 @@ public static (
7878
addressToInstruction.Add(a, i);
7979
Dictionary<Instruction, Instruction> jumpSourceToTargets = new();
8080
Dictionary<Instruction, (Instruction, Instruction)> trySourceToTargets = new();
81-
Dictionary<Instruction, HashSet<Instruction>> targetToSources = new();
81+
Dictionary<int, HashSet<int>> targetToSources = new();
8282
foreach ((int a, Instruction i) in addressAndInstructionsList)
8383
{
8484
if (SingleJumpInOperand(i))
8585
{
86-
Instruction target = addressToInstruction[ComputeJumpTarget(a, i)];
86+
int targetAddr = ComputeJumpTarget(a, i);
87+
Instruction target = addressToInstruction[targetAddr];
8788
jumpSourceToTargets.TryAdd(i, target);
88-
if (!targetToSources.TryGetValue(target, out HashSet<Instruction>? sources)) sources = new();
89-
sources.Add(i);
89+
if (!targetToSources.TryGetValue(targetAddr, out HashSet<int>? sources))
90+
{
91+
sources = new();
92+
targetToSources.Add(targetAddr, sources);
93+
}
94+
sources.Add(a);
9095
}
91-
if (i.OpCode == TRY)
96+
if (i.OpCode == TRY || i.OpCode == TRY_L)
9297
{
93-
(Instruction t1, Instruction t2) = (addressToInstruction[a + i.TokenI8], addressToInstruction[a + i.TokenI8_1]);
98+
(int a1, int a2) = i.OpCode == TRY ?
99+
(a + i.TokenI8, a + i.TokenI8_1) :
100+
(a + i.TokenI32, a + i.TokenI32_1);
101+
(Instruction t1, Instruction t2) = (addressToInstruction[a1], addressToInstruction[a2]);
94102
trySourceToTargets.TryAdd(i, (t1, t2));
95-
if (!targetToSources.TryGetValue(t1, out HashSet<Instruction>? sources1)) sources1 = new();
96-
sources1.Add(i);
97-
if (!targetToSources.TryGetValue(t2, out HashSet<Instruction>? sources2)) sources2 = new();
98-
sources2.Add(i);
99-
}
100-
if (i.OpCode == TRY_L)
101-
{
102-
(Instruction t1, Instruction t2) = (addressToInstruction[a + i.TokenI32], addressToInstruction[a + i.TokenI32_1]);
103-
trySourceToTargets.TryAdd(i, (t1, t2));
104-
if (!targetToSources.TryGetValue(t1, out HashSet<Instruction>? sources1)) sources1 = new();
105-
sources1.Add(i);
106-
if (!targetToSources.TryGetValue(t2, out HashSet<Instruction>? sources2)) sources2 = new();
107-
sources2.Add(i);
103+
if (!targetToSources.TryGetValue(a1, out HashSet<int>? sources1))
104+
{
105+
sources1 = new();
106+
targetToSources.Add(a1, sources1);
107+
}
108+
sources1.Add(a);
109+
if (!targetToSources.TryGetValue(a1, out HashSet<int>? sources2))
110+
{
111+
sources2 = new();
112+
targetToSources.Add(a2, sources2);
113+
}
114+
sources2.Add(a);
108115
}
109116
}
110117
return (jumpSourceToTargets, trySourceToTargets, targetToSources);

0 commit comments

Comments
 (0)