forked from rurban/smhasher
-
Notifications
You must be signed in to change notification settings - Fork 10
/
clhash.h
89 lines (75 loc) · 2.63 KB
/
clhash.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/*
* CLHash is a very fast hashing function that uses the
* carry-less multiplication and SSE instructions.
*
* Daniel Lemire, Owen Kaser, Faster 64-bit universal hashing
* using carry-less multiplications, Journal of Cryptographic Engineering (to appear)
*
* Best used on recent x64 processors (Haswell or better).
*
* Compile option: if you define BITMIX during compilation, extra work is done to
* pass smhasher's avalanche test succesfully. Disabled by default.
**/
#ifndef INCLUDE_CLHASH_H_
#define INCLUDE_CLHASH_H_
#include <stdlib.h>
#include <stdint.h> // life is short, please use a C99-compliant compiler
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// BITMIX necessary to pass the Avalanche Tests
#define BITMIX
enum {RANDOM_64BITWORDS_NEEDED_FOR_CLHASH=133,RANDOM_BYTES_NEEDED_FOR_CLHASH=133*8};
/**
* random : the random data source (should contain at least
* RANDOM_BYTES_NEEDED_FOR_CLHASH random bytes), it should
* also be aligned on 16-byte boundaries so that (((uintptr_t) random & 15) == 0)
* for performance reasons. This is usually generated once and reused with many
* inputs.
*
*
* stringbyte : the input data source, could be anything you want to has
*
*
* length : number of bytes in the string
*/
uint64_t clhash(const void* random, const char * stringbyte,
const size_t lengthbyte);
/**
* Convenience method. Will generate a random key from two 64-bit seeds.
* Caller is responsible to call "free" on the result.
*/
void * get_random_key_for_clhash(uint64_t seed1, uint64_t seed2);
#ifdef __cplusplus
} // extern "C"
#endif
#ifdef __cplusplus
#include <vector>
#include <string>
#include <cstring> // For std::strlen
struct clhasher {
const void *random_data_;
clhasher(uint64_t seed1=137, uint64_t seed2=777): random_data_(get_random_key_for_clhash(seed1, seed2)) {}
template<typename T>
uint64_t operator()(const T *data, const size_t len) const {
return clhash(random_data_, (const char *)data, len * sizeof(T));
}
uint64_t operator()(const char *str) const {return operator()(str, std::strlen(str));}
template<typename T>
uint64_t operator()(const T &input) const {
return operator()((const char *)&input, sizeof(T));
}
template<typename T>
uint64_t operator()(const std::vector<T> &input) const {
return operator()((const char *)input.data(), sizeof(T) * input.size());
}
uint64_t operator()(const std::string &str) const {
return operator()(str.data(), str.size());
}
~clhasher() {
free((void *)random_data_);
}
};
#endif // #ifdef __cplusplus
#endif /* INCLUDE_CLHASH_H_ */