|
34 | 34 | }, |
35 | 35 | "2-store 80bit": { |
36 | 36 | "x86InstructionCount": 2, |
37 | | - "ExpectedInstructionCount": 25, |
| 37 | + "ExpectedInstructionCount": 24, |
38 | 38 | "x86Insts": [ |
39 | 39 | "fstp tword [rax]", |
40 | 40 | "fstp tword [rax+10]" |
|
56 | 56 | "add x21, x4, #0xa (10)", |
57 | 57 | "add x0, x28, x20, lsl #4", |
58 | 58 | "ldr q2, [x0, #1040]", |
59 | | - "ptrue p2.h, vl5", |
60 | 59 | "st1h {z2.h}, p2, [x21]", |
61 | 60 | "ldrb w21, [x28, #1298]", |
62 | 61 | "lsl w22, w22, w20", |
|
69 | 68 | }, |
70 | 69 | "8-store 80bit": { |
71 | 70 | "x86InstructionCount": 8, |
72 | | - "ExpectedInstructionCount": 97, |
| 71 | + "ExpectedInstructionCount": 90, |
73 | 72 | "x86Insts": [ |
74 | 73 | "fstp tword [rax]", |
75 | 74 | "fstp tword [rax+10]", |
|
97 | 96 | "add x21, x4, #0xa (10)", |
98 | 97 | "add x0, x28, x20, lsl #4", |
99 | 98 | "ldr q2, [x0, #1040]", |
100 | | - "ptrue p2.h, vl5", |
101 | 99 | "st1h {z2.h}, p2, [x21]", |
102 | 100 | "ldrb w21, [x28, #1298]", |
103 | 101 | "lsl w23, w22, w20", |
|
109 | 107 | "add x21, x4, #0x14 (20)", |
110 | 108 | "add x0, x28, x20, lsl #4", |
111 | 109 | "ldr q2, [x0, #1040]", |
112 | | - "ptrue p2.h, vl5", |
113 | 110 | "st1h {z2.h}, p2, [x21]", |
114 | 111 | "ldrb w21, [x28, #1298]", |
115 | 112 | "lsl w23, w22, w20", |
|
121 | 118 | "add x21, x4, #0x1e (30)", |
122 | 119 | "add x0, x28, x20, lsl #4", |
123 | 120 | "ldr q2, [x0, #1040]", |
124 | | - "ptrue p2.h, vl5", |
125 | 121 | "st1h {z2.h}, p2, [x21]", |
126 | 122 | "ldrb w21, [x28, #1298]", |
127 | 123 | "lsl w23, w22, w20", |
|
133 | 129 | "add x21, x4, #0x28 (40)", |
134 | 130 | "add x0, x28, x20, lsl #4", |
135 | 131 | "ldr q2, [x0, #1040]", |
136 | | - "ptrue p2.h, vl5", |
137 | 132 | "st1h {z2.h}, p2, [x21]", |
138 | 133 | "ldrb w21, [x28, #1298]", |
139 | 134 | "lsl w23, w22, w20", |
|
145 | 140 | "add x21, x4, #0x32 (50)", |
146 | 141 | "add x0, x28, x20, lsl #4", |
147 | 142 | "ldr q2, [x0, #1040]", |
148 | | - "ptrue p2.h, vl5", |
149 | 143 | "st1h {z2.h}, p2, [x21]", |
150 | 144 | "ldrb w21, [x28, #1298]", |
151 | 145 | "lsl w23, w22, w20", |
|
157 | 151 | "add x21, x4, #0x3c (60)", |
158 | 152 | "add x0, x28, x20, lsl #4", |
159 | 153 | "ldr q2, [x0, #1040]", |
160 | | - "ptrue p2.h, vl5", |
161 | 154 | "st1h {z2.h}, p2, [x21]", |
162 | 155 | "ldrb w21, [x28, #1298]", |
163 | 156 | "lsl w23, w22, w20", |
|
169 | 162 | "add x21, x4, #0x46 (70)", |
170 | 163 | "add x0, x28, x20, lsl #4", |
171 | 164 | "ldr q2, [x0, #1040]", |
172 | | - "ptrue p2.h, vl5", |
173 | 165 | "st1h {z2.h}, p2, [x21]", |
174 | 166 | "ldrb w21, [x28, #1298]", |
175 | 167 | "lsl w22, w22, w20", |
|
201 | 193 | }, |
202 | 194 | "2-load 80bit": { |
203 | 195 | "x86InstructionCount": 2, |
204 | | - "ExpectedInstructionCount": 22, |
| 196 | + "ExpectedInstructionCount": 21, |
205 | 197 | "x86Insts": [ |
206 | 198 | "fld tword [rax]", |
207 | 199 | "fld tword [rax+10]" |
|
210 | 202 | "ptrue p2.h, vl5", |
211 | 203 | "ld1h {z2.h}, p2/z, [x4]", |
212 | 204 | "add x20, x4, #0xa (10)", |
213 | | - "ptrue p2.h, vl5", |
214 | 205 | "ld1h {z3.h}, p2/z, [x20]", |
215 | 206 | "ldrb w20, [x28, #1019]", |
216 | 207 | "sub w20, w20, #0x2 (2)", |
|
233 | 224 | }, |
234 | 225 | "8-load 80bit": { |
235 | 226 | "x86InstructionCount": 8, |
236 | | - "ExpectedInstructionCount": 59, |
| 227 | + "ExpectedInstructionCount": 52, |
237 | 228 | "x86Insts": [ |
238 | 229 | "fld tword [rax]", |
239 | 230 | "fld tword [rax+10]", |
|
248 | 239 | "ptrue p2.h, vl5", |
249 | 240 | "ld1h {z2.h}, p2/z, [x4]", |
250 | 241 | "add x20, x4, #0xa (10)", |
251 | | - "ptrue p2.h, vl5", |
252 | 242 | "ld1h {z3.h}, p2/z, [x20]", |
253 | 243 | "add x20, x4, #0x14 (20)", |
254 | | - "ptrue p2.h, vl5", |
255 | 244 | "ld1h {z4.h}, p2/z, [x20]", |
256 | 245 | "add x20, x4, #0x1e (30)", |
257 | | - "ptrue p2.h, vl5", |
258 | 246 | "ld1h {z5.h}, p2/z, [x20]", |
259 | 247 | "add x20, x4, #0x28 (40)", |
260 | | - "ptrue p2.h, vl5", |
261 | 248 | "ld1h {z6.h}, p2/z, [x20]", |
262 | 249 | "add x20, x4, #0x32 (50)", |
263 | | - "ptrue p2.h, vl5", |
264 | 250 | "ld1h {z7.h}, p2/z, [x20]", |
265 | 251 | "add x20, x4, #0x3c (60)", |
266 | | - "ptrue p2.h, vl5", |
267 | 252 | "ld1h {z8.h}, p2/z, [x20]", |
268 | 253 | "add x20, x4, #0x46 (70)", |
269 | | - "ptrue p2.h, vl5", |
270 | 254 | "ld1h {z9.h}, p2/z, [x20]", |
271 | 255 | "ldrb w20, [x28, #1019]", |
272 | 256 | "sub w20, w20, #0x8 (8)", |
|
0 commit comments