Skip to content

Commit ead0b82

Browse files
authored
Merge pull request #250 from isildur-g/static-fat-dispatch
static dispatch for fat runtimes. eliminates the need for ifunc.
2 parents 5dff481 + 927b460 commit ead0b82

File tree

1 file changed

+198
-29
lines changed

1 file changed

+198
-29
lines changed

src/dispatcher.c

Lines changed: 198 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
* Copyright (c) 2016-2020, Intel Corporation
3+
* Copyright (c) 2024, VectorCamp PC
34
*
45
* Redistribution and use in source and binary forms, with or without
56
* modification, are permitted provided that the following conditions are met:
@@ -30,6 +31,39 @@
3031
#include "hs_common.h"
3132
#include "hs_runtime.h"
3233
#include "ue2common.h"
34+
35+
/* Streamlining the dispatch to eliminate runtime checking/branching:
36+
* What we want to do is, first call to the function will run the resolve
37+
* code and set the static resolved/dispatch pointer to point to the
38+
* correct function. Subsequent calls to the function will go directly to
39+
* the resolved ptr. The simplest way to accomplish this is, to
40+
* initially set the pointer to the resolve function.
41+
* To accomplish this in a manner invisible to the user,
42+
* we do involve some rather ugly/confusing macros in here.
43+
* There are four macros that assemble the code for each function
44+
* we want to dispatch in this manner:
45+
* CREATE_DISPATCH
46+
* this generates the declarations for the candidate target functions,
47+
* for the fat_dispatch function pointer, for the resolve_ function,
48+
* points the function pointer to the resolve function, and contains
49+
* most of the definition of the resolve function. The very end of the
50+
* resolve function is completed by the next macro, because in the
51+
* CREATE_DISPATCH macro we have the argument list with the arg declarations,
52+
* which is needed to generate correct function signatures, but we
53+
* can't generate from this, in a macro, a _call_ to one of those functions.
54+
* CONNECT_ARGS_1
55+
* this macro fills in the actual call at the end of the resolve function,
56+
* with the correct arg list. hence the name connect args.
57+
* CONNECT_DISPATCH_2
58+
* this macro likewise gives up the beginning of the definition of the
59+
* actual entry point function (the 'real name' that's called by the user)
60+
* but again in the pass-through call, cannot invoke the target without
61+
* getting the arg list , which is supplied by the final macro,
62+
* CONNECT_ARGS_3
63+
*
64+
*/
65+
66+
3367
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
3468
#include "util/arch/x86/cpuid_inline.h"
3569
#include "util/join.h"
@@ -57,30 +91,38 @@
5791
return (RTYPE)HS_ARCH_ERROR; \
5892
} \
5993
\
94+
/* dispatch routing pointer for this function */ \
95+
/* initially point it at the resolve function */ \
96+
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
97+
static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
98+
&JOIN(resolve_, NAME); \
99+
\
60100
/* resolver */ \
61-
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
101+
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
62102
if (check_avx512vbmi()) { \
63-
return JOIN(avx512vbmi_, NAME); \
103+
fat_dispatch_ ## NAME = &JOIN(avx512vbmi_, NAME); \
64104
} \
65-
if (check_avx512()) { \
66-
return JOIN(avx512_, NAME); \
105+
else if (check_avx512()) { \
106+
fat_dispatch_ ## NAME = &JOIN(avx512_, NAME); \
67107
} \
68-
if (check_avx2()) { \
69-
return JOIN(avx2_, NAME); \
108+
else if (check_avx2()) { \
109+
fat_dispatch_ ## NAME = &JOIN(avx2_, NAME); \
70110
} \
71-
if (check_sse42() && check_popcnt()) { \
72-
return JOIN(corei7_, NAME); \
111+
else if (check_sse42() && check_popcnt()) { \
112+
fat_dispatch_ ## NAME = &JOIN(corei7_, NAME); \
73113
} \
74-
if (check_ssse3()) { \
75-
return JOIN(core2_, NAME); \
114+
else if (check_ssse3()) { \
115+
fat_dispatch_ ## NAME = &JOIN(core2_, NAME); \
116+
} else { \
117+
/* anything else is fail */ \
118+
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
76119
} \
77-
/* anything else is fail */ \
78-
return JOIN(error_, NAME); \
79-
} \
80-
\
81-
/* function */ \
82-
HS_PUBLIC_API \
83-
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
120+
121+
122+
123+
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
124+
125+
84126

85127
#elif defined(ARCH_AARCH64)
86128
#include "util/arch/arm/cpuid_inline.h"
@@ -97,99 +139,226 @@
97139
return (RTYPE)HS_ARCH_ERROR; \
98140
} \
99141
\
142+
/* dispatch routing pointer for this function */ \
143+
/* initially point it at the resolve function */ \
144+
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__); \
145+
static RTYPE (* JOIN(fat_dispatch_, NAME))(__VA_ARGS__) = \
146+
&JOIN(resolve_, NAME); \
147+
\
100148
/* resolver */ \
101-
static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \
149+
static RTYPE JOIN(resolve_, NAME)(__VA_ARGS__) { \
102150
if (check_sve2()) { \
103-
return JOIN(sve2_, NAME); \
151+
fat_dispatch_ ## NAME = &JOIN(sve2_, NAME); \
104152
} \
105-
if (check_sve()) { \
106-
return JOIN(sve_, NAME); \
153+
else if (check_sve()) { \
154+
fat_dispatch_ ## NAME = &JOIN(sve_, NAME); \
107155
} \
108-
if (check_neon()) { \
109-
return JOIN(neon_, NAME); \
156+
else if (check_neon()) { \
157+
fat_dispatch_ ## NAME = &JOIN(neon_, NAME); \
158+
} else { \
159+
/* anything else is fail */ \
160+
fat_dispatch_ ## NAME = &JOIN(error_, NAME); \
110161
} \
111-
/* anything else is fail */ \
112-
return JOIN(error_, NAME); \
162+
163+
164+
/* the rest of the function is completed in the CONNECT_ARGS_1 macro. */
165+
166+
167+
#endif
168+
169+
170+
#define CONNECT_ARGS_1(RTYPE, NAME, ...) \
171+
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
113172
} \
114-
\
115-
/* function */ \
173+
174+
175+
#define CONNECT_DISPATCH_2(RTYPE, NAME, ...) \
176+
/* new function */ \
116177
HS_PUBLIC_API \
117-
RTYPE NAME(__VA_ARGS__) __attribute__((ifunc("resolve_" #NAME)))
178+
RTYPE NAME(__VA_ARGS__) { \
179+
180+
181+
#define CONNECT_ARGS_3(RTYPE, NAME, ...) \
182+
return (*fat_dispatch_ ## NAME)(__VA_ARGS__); \
183+
} \
118184

119-
#endif
120185

121186
#pragma GCC diagnostic push
122187
#pragma GCC diagnostic ignored "-Wunused-parameter"
123188
#pragma GCC diagnostic push
124189
#pragma GCC diagnostic ignored "-Wunused-function"
190+
191+
/* this gets a bit ugly to compose the static redirect functions,
192+
* as we necessarily need first the typed arg list and then just the arg
193+
* names, twice in a row, to define the redirect function and the
194+
* dispatch function call */
195+
125196
CREATE_DISPATCH(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
126197
unsigned length, unsigned flags, hs_scratch_t *scratch,
127198
match_event_handler onEvent, void *userCtx);
199+
CONNECT_ARGS_1(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
200+
CONNECT_DISPATCH_2(hs_error_t, hs_scan, const hs_database_t *db, const char *data,
201+
unsigned length, unsigned flags, hs_scratch_t *scratch,
202+
match_event_handler onEvent, void *userCtx);
203+
CONNECT_ARGS_3(hs_error_t, hs_scan, db, data, length, flags, scratch, onEvent, userCtx);
128204

129205
CREATE_DISPATCH(hs_error_t, hs_stream_size, const hs_database_t *database,
130206
size_t *stream_size);
207+
CONNECT_ARGS_1(hs_error_t, hs_stream_size, database, stream_size);
208+
CONNECT_DISPATCH_2(hs_error_t, hs_stream_size, const hs_database_t *database,
209+
size_t *stream_size);
210+
CONNECT_ARGS_3(hs_error_t, hs_stream_size, database, stream_size);
131211

132212
CREATE_DISPATCH(hs_error_t, hs_database_size, const hs_database_t *db,
133213
size_t *size);
214+
CONNECT_ARGS_1(hs_error_t, hs_database_size, db, size);
215+
CONNECT_DISPATCH_2(hs_error_t, hs_database_size, const hs_database_t *db,
216+
size_t *size);
217+
CONNECT_ARGS_3(hs_error_t, hs_database_size, db, size);
218+
134219
CREATE_DISPATCH(hs_error_t, dbIsValid, const hs_database_t *db);
220+
CONNECT_ARGS_1(hs_error_t, dbIsValid, db);
221+
CONNECT_DISPATCH_2(hs_error_t, dbIsValid, const hs_database_t *db);
222+
CONNECT_ARGS_3(hs_error_t, dbIsValid, db);
223+
135224
CREATE_DISPATCH(hs_error_t, hs_free_database, hs_database_t *db);
225+
CONNECT_ARGS_1(hs_error_t, hs_free_database, db);
226+
CONNECT_DISPATCH_2(hs_error_t, hs_free_database, hs_database_t *db);
227+
CONNECT_ARGS_3(hs_error_t, hs_free_database, db);
136228

137229
CREATE_DISPATCH(hs_error_t, hs_open_stream, const hs_database_t *db,
138230
unsigned int flags, hs_stream_t **stream);
231+
CONNECT_ARGS_1(hs_error_t, hs_open_stream, db, flags, stream);
232+
CONNECT_DISPATCH_2(hs_error_t, hs_open_stream, const hs_database_t *db,
233+
unsigned int flags, hs_stream_t **stream);
234+
CONNECT_ARGS_3(hs_error_t, hs_open_stream, db, flags, stream);
139235

140236
CREATE_DISPATCH(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
141237
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
142238
match_event_handler onEvent, void *ctxt);
239+
CONNECT_ARGS_1(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
240+
CONNECT_DISPATCH_2(hs_error_t, hs_scan_stream, hs_stream_t *id, const char *data,
241+
unsigned int length, unsigned int flags, hs_scratch_t *scratch,
242+
match_event_handler onEvent, void *ctxt);
243+
CONNECT_ARGS_3(hs_error_t, hs_scan_stream, id, data, length, flags, scratch, onEvent, ctxt);
143244

144245
CREATE_DISPATCH(hs_error_t, hs_close_stream, hs_stream_t *id,
145246
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
247+
CONNECT_ARGS_1(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
248+
CONNECT_DISPATCH_2(hs_error_t, hs_close_stream, hs_stream_t *id,
249+
hs_scratch_t *scratch, match_event_handler onEvent, void *ctxt);
250+
CONNECT_ARGS_3(hs_error_t, hs_close_stream, id, scratch, onEvent, ctxt);
146251

147252
CREATE_DISPATCH(hs_error_t, hs_scan_vector, const hs_database_t *db,
148253
const char *const *data, const unsigned int *length,
149254
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
150255
match_event_handler onevent, void *context);
256+
CONNECT_ARGS_1(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
257+
CONNECT_DISPATCH_2(hs_error_t, hs_scan_vector, const hs_database_t *db,
258+
const char *const *data, const unsigned int *length,
259+
unsigned int count, unsigned int flags, hs_scratch_t *scratch,
260+
match_event_handler onevent, void *context);
261+
CONNECT_ARGS_3(hs_error_t, hs_scan_vector, db, data, length, count, flags, scratch, onevent, context);
151262

152263
CREATE_DISPATCH(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
264+
CONNECT_ARGS_1(hs_error_t, hs_database_info, db, info);
265+
CONNECT_DISPATCH_2(hs_error_t, hs_database_info, const hs_database_t *db, char **info);
266+
CONNECT_ARGS_3(hs_error_t, hs_database_info, db, info);
153267

154268
CREATE_DISPATCH(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
155269
const hs_stream_t *from_id);
270+
CONNECT_ARGS_1(hs_error_t, hs_copy_stream, to_id, from_id);
271+
CONNECT_DISPATCH_2(hs_error_t, hs_copy_stream, hs_stream_t **to_id,
272+
const hs_stream_t *from_id);
273+
CONNECT_ARGS_3(hs_error_t, hs_copy_stream, to_id, from_id);
156274

157275
CREATE_DISPATCH(hs_error_t, hs_reset_stream, hs_stream_t *id,
158276
unsigned int flags, hs_scratch_t *scratch,
159277
match_event_handler onEvent, void *context);
278+
CONNECT_ARGS_1(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
279+
CONNECT_DISPATCH_2(hs_error_t, hs_reset_stream, hs_stream_t *id,
280+
unsigned int flags, hs_scratch_t *scratch,
281+
match_event_handler onEvent, void *context);
282+
CONNECT_ARGS_3(hs_error_t, hs_reset_stream, id, flags, scratch, onEvent, context);
160283

161284
CREATE_DISPATCH(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
162285
const hs_stream_t *from_id, hs_scratch_t *scratch,
163286
match_event_handler onEvent, void *context);
287+
CONNECT_ARGS_1(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
288+
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_copy_stream, hs_stream_t *to_id,
289+
const hs_stream_t *from_id, hs_scratch_t *scratch,
290+
match_event_handler onEvent, void *context);
291+
CONNECT_ARGS_3(hs_error_t, hs_reset_and_copy_stream, to_id, from_id, scratch, onEvent, context);
164292

165293
CREATE_DISPATCH(hs_error_t, hs_serialize_database, const hs_database_t *db,
166294
char **bytes, size_t *length);
295+
CONNECT_ARGS_1(hs_error_t, hs_serialize_database, db, bytes, length);
296+
CONNECT_DISPATCH_2(hs_error_t, hs_serialize_database, const hs_database_t *db,
297+
char **bytes, size_t *length);
298+
CONNECT_ARGS_3(hs_error_t, hs_serialize_database, db, bytes, length);
167299

168300
CREATE_DISPATCH(hs_error_t, hs_deserialize_database, const char *bytes,
169301
const size_t length, hs_database_t **db);
302+
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database, bytes, length, db);
303+
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database, const char *bytes,
304+
const size_t length, hs_database_t **db);
305+
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database, bytes, length, db);
170306

171307
CREATE_DISPATCH(hs_error_t, hs_deserialize_database_at, const char *bytes,
172308
const size_t length, hs_database_t *db);
309+
CONNECT_ARGS_1(hs_error_t, hs_deserialize_database_at, bytes, length, db);
310+
CONNECT_DISPATCH_2(hs_error_t, hs_deserialize_database_at, const char *bytes,
311+
const size_t length, hs_database_t *db);
312+
CONNECT_ARGS_3(hs_error_t, hs_deserialize_database_at, bytes, length, db);
173313

174314
CREATE_DISPATCH(hs_error_t, hs_serialized_database_info, const char *bytes,
175315
size_t length, char **info);
316+
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_info, bytes, length, info);
317+
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_info, const char *bytes,
318+
size_t length, char **info);
319+
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_info, bytes, length, info);
176320

177321
CREATE_DISPATCH(hs_error_t, hs_serialized_database_size, const char *bytes,
178322
const size_t length, size_t *deserialized_size);
323+
CONNECT_ARGS_1(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
324+
CONNECT_DISPATCH_2(hs_error_t, hs_serialized_database_size, const char *bytes,
325+
const size_t length, size_t *deserialized_size);
326+
CONNECT_ARGS_3(hs_error_t, hs_serialized_database_size, bytes, length, deserialized_size);
179327

180328
CREATE_DISPATCH(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
181329
char *buf, size_t buf_space, size_t *used_space);
330+
CONNECT_ARGS_1(hs_error_t, hs_compress_stream, stream,
331+
buf, buf_space, used_space);
332+
CONNECT_DISPATCH_2(hs_error_t, hs_compress_stream, const hs_stream_t *stream,
333+
char *buf, size_t buf_space, size_t *used_space);
334+
CONNECT_ARGS_3(hs_error_t, hs_compress_stream, stream,
335+
buf, buf_space, used_space);
182336

183337
CREATE_DISPATCH(hs_error_t, hs_expand_stream, const hs_database_t *db,
184338
hs_stream_t **stream, const char *buf,size_t buf_size);
339+
CONNECT_ARGS_1(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
340+
CONNECT_DISPATCH_2(hs_error_t, hs_expand_stream, const hs_database_t *db,
341+
hs_stream_t **stream, const char *buf,size_t buf_size);
342+
CONNECT_ARGS_3(hs_error_t, hs_expand_stream, db, stream, buf,buf_size);
185343

186344
CREATE_DISPATCH(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
187345
const char *buf, size_t buf_size, hs_scratch_t *scratch,
188346
match_event_handler onEvent, void *context);
347+
CONNECT_ARGS_1(hs_error_t, hs_reset_and_expand_stream, to_stream,
348+
buf, buf_size, scratch, onEvent, context);
349+
CONNECT_DISPATCH_2(hs_error_t, hs_reset_and_expand_stream, hs_stream_t *to_stream,
350+
const char *buf, size_t buf_size, hs_scratch_t *scratch,
351+
match_event_handler onEvent, void *context);
352+
CONNECT_ARGS_3(hs_error_t, hs_reset_and_expand_stream, to_stream,
353+
buf, buf_size, scratch, onEvent, context);
189354

190355
/** INTERNALS **/
191356

192357
CREATE_DISPATCH(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
358+
CONNECT_ARGS_1(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
359+
CONNECT_DISPATCH_2(u32, Crc32c_ComputeBuf, u32 inCrc32, const void *buf, size_t bufLen);
360+
CONNECT_ARGS_3(u32, Crc32c_ComputeBuf, inCrc32, buf, bufLen);
193361

194362
#pragma GCC diagnostic pop
195363
#pragma GCC diagnostic pop
364+

0 commit comments

Comments
 (0)