Skip to content

Commit 7ad9f7a

Browse files
committed
instcountci: testing multiple 80bit stores using SVE
In preparation for #4166 which should improve on these results.
1 parent 22058c0 commit 7ad9f7a

File tree

1 file changed

+206
-0
lines changed

1 file changed

+206
-0
lines changed
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
{
2+
"Features": {
3+
"Bitness": 64,
4+
"EnabledHostFeatures": [
5+
"SVE128",
6+
"SVE256"
7+
],
8+
"DisabledHostFeatures": [
9+
"AFP",
10+
"FLAGM",
11+
"FLAGM2",
12+
"RPRES"
13+
]
14+
},
15+
"Instructions": {
16+
"fstp tword [rax]": {
17+
"ExpectedInstructionCount": 15,
18+
"Comment": "Single 80-bit store.",
19+
"ExpectedArm64ASM": [
20+
"ldrb w20, [x28, #1019]",
21+
"add x0, x28, x20, lsl #4",
22+
"ldr q2, [x0, #1040]",
23+
"str d2, [x4]",
24+
"mov x21, v2.d[1]",
25+
"add x22, x4, #0x8 (8)",
26+
"strh w21, [x22]",
27+
"ldrb w21, [x28, #1298]",
28+
"mov w22, #0x1",
29+
"lsl w22, w22, w20",
30+
"bic w21, w21, w22",
31+
"strb w21, [x28, #1298]",
32+
"add w20, w20, #0x1 (1)",
33+
"and w20, w20, #0x7",
34+
"strb w20, [x28, #1019]"
35+
]
36+
},
37+
"2-store 80bit": {
38+
"x86InstructionCount": 2,
39+
"ExpectedInstructionCount": 29,
40+
"x86Insts": [
41+
"fstp tword [rax]",
42+
"fstp tword [rax+10]"
43+
],
44+
"ExpectedArm64ASM": [
45+
"ldrb w20, [x28, #1019]",
46+
"add x0, x28, x20, lsl #4",
47+
"ldr q2, [x0, #1040]",
48+
"str d2, [x4]",
49+
"mov x21, v2.d[1]",
50+
"add x22, x4, #0x8 (8)",
51+
"strh w21, [x22]",
52+
"ldrb w21, [x28, #1298]",
53+
"mov w22, #0x1",
54+
"lsl w23, w22, w20",
55+
"bic w21, w21, w23",
56+
"strb w21, [x28, #1298]",
57+
"add w20, w20, #0x1 (1)",
58+
"and w20, w20, #0x7",
59+
"strb w20, [x28, #1019]",
60+
"add x21, x4, #0xa (10)",
61+
"add x0, x28, x20, lsl #4",
62+
"ldr q2, [x0, #1040]",
63+
"str d2, [x21]",
64+
"mov x23, v2.d[1]",
65+
"add x21, x21, #0x8 (8)",
66+
"strh w23, [x21]",
67+
"ldrb w21, [x28, #1298]",
68+
"lsl w22, w22, w20",
69+
"bic w21, w21, w22",
70+
"strb w21, [x28, #1298]",
71+
"add w20, w20, #0x1 (1)",
72+
"and w20, w20, #0x7",
73+
"strb w20, [x28, #1019]"
74+
]
75+
},
76+
"8-store 80bit": {
77+
"x86InstructionCount": 8,
78+
"ExpectedInstructionCount": 113,
79+
"x86Insts": [
80+
"fstp tword [rax]",
81+
"fstp tword [rax+10]",
82+
"fstp tword [rax+20]",
83+
"fstp tword [rax+30]",
84+
"fstp tword [rax+40]",
85+
"fstp tword [rax+50]",
86+
"fstp tword [rax+60]",
87+
"fstp tword [rax+70]"
88+
],
89+
"ExpectedArm64ASM": [
90+
"ldrb w20, [x28, #1019]",
91+
"add x0, x28, x20, lsl #4",
92+
"ldr q2, [x0, #1040]",
93+
"str d2, [x4]",
94+
"mov x21, v2.d[1]",
95+
"add x22, x4, #0x8 (8)",
96+
"strh w21, [x22]",
97+
"ldrb w21, [x28, #1298]",
98+
"mov w22, #0x1",
99+
"lsl w23, w22, w20",
100+
"bic w21, w21, w23",
101+
"strb w21, [x28, #1298]",
102+
"add w20, w20, #0x1 (1)",
103+
"and w20, w20, #0x7",
104+
"strb w20, [x28, #1019]",
105+
"add x21, x4, #0xa (10)",
106+
"add x0, x28, x20, lsl #4",
107+
"ldr q2, [x0, #1040]",
108+
"str d2, [x21]",
109+
"mov x23, v2.d[1]",
110+
"add x21, x21, #0x8 (8)",
111+
"strh w23, [x21]",
112+
"ldrb w21, [x28, #1298]",
113+
"lsl w23, w22, w20",
114+
"bic w21, w21, w23",
115+
"strb w21, [x28, #1298]",
116+
"add w20, w20, #0x1 (1)",
117+
"and w20, w20, #0x7",
118+
"strb w20, [x28, #1019]",
119+
"add x21, x4, #0x14 (20)",
120+
"add x0, x28, x20, lsl #4",
121+
"ldr q2, [x0, #1040]",
122+
"str d2, [x21]",
123+
"mov x23, v2.d[1]",
124+
"add x21, x21, #0x8 (8)",
125+
"strh w23, [x21]",
126+
"ldrb w21, [x28, #1298]",
127+
"lsl w23, w22, w20",
128+
"bic w21, w21, w23",
129+
"strb w21, [x28, #1298]",
130+
"add w20, w20, #0x1 (1)",
131+
"and w20, w20, #0x7",
132+
"strb w20, [x28, #1019]",
133+
"add x21, x4, #0x1e (30)",
134+
"add x0, x28, x20, lsl #4",
135+
"ldr q2, [x0, #1040]",
136+
"str d2, [x21]",
137+
"mov x23, v2.d[1]",
138+
"add x21, x21, #0x8 (8)",
139+
"strh w23, [x21]",
140+
"ldrb w21, [x28, #1298]",
141+
"lsl w23, w22, w20",
142+
"bic w21, w21, w23",
143+
"strb w21, [x28, #1298]",
144+
"add w20, w20, #0x1 (1)",
145+
"and w20, w20, #0x7",
146+
"strb w20, [x28, #1019]",
147+
"add x21, x4, #0x28 (40)",
148+
"add x0, x28, x20, lsl #4",
149+
"ldr q2, [x0, #1040]",
150+
"str d2, [x21]",
151+
"mov x23, v2.d[1]",
152+
"add x21, x21, #0x8 (8)",
153+
"strh w23, [x21]",
154+
"ldrb w21, [x28, #1298]",
155+
"lsl w23, w22, w20",
156+
"bic w21, w21, w23",
157+
"strb w21, [x28, #1298]",
158+
"add w20, w20, #0x1 (1)",
159+
"and w20, w20, #0x7",
160+
"strb w20, [x28, #1019]",
161+
"add x21, x4, #0x32 (50)",
162+
"add x0, x28, x20, lsl #4",
163+
"ldr q2, [x0, #1040]",
164+
"str d2, [x21]",
165+
"mov x23, v2.d[1]",
166+
"add x21, x21, #0x8 (8)",
167+
"strh w23, [x21]",
168+
"ldrb w21, [x28, #1298]",
169+
"lsl w23, w22, w20",
170+
"bic w21, w21, w23",
171+
"strb w21, [x28, #1298]",
172+
"add w20, w20, #0x1 (1)",
173+
"and w20, w20, #0x7",
174+
"strb w20, [x28, #1019]",
175+
"add x21, x4, #0x3c (60)",
176+
"add x0, x28, x20, lsl #4",
177+
"ldr q2, [x0, #1040]",
178+
"str d2, [x21]",
179+
"mov x23, v2.d[1]",
180+
"add x21, x21, #0x8 (8)",
181+
"strh w23, [x21]",
182+
"ldrb w21, [x28, #1298]",
183+
"lsl w23, w22, w20",
184+
"bic w21, w21, w23",
185+
"strb w21, [x28, #1298]",
186+
"add w20, w20, #0x1 (1)",
187+
"and w20, w20, #0x7",
188+
"strb w20, [x28, #1019]",
189+
"add x21, x4, #0x46 (70)",
190+
"add x0, x28, x20, lsl #4",
191+
"ldr q2, [x0, #1040]",
192+
"str d2, [x21]",
193+
"mov x23, v2.d[1]",
194+
"add x21, x21, #0x8 (8)",
195+
"strh w23, [x21]",
196+
"ldrb w21, [x28, #1298]",
197+
"lsl w22, w22, w20",
198+
"bic w21, w21, w22",
199+
"strb w21, [x28, #1298]",
200+
"add w20, w20, #0x1 (1)",
201+
"and w20, w20, #0x7",
202+
"strb w20, [x28, #1019]"
203+
]
204+
}
205+
}
206+
}

0 commit comments

Comments
 (0)