Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support phmap for aggregation #9555

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions dbms/src/Common/ColumnsHashingImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,11 @@ class HashMethodBase
public:
using EmplaceResult = EmplaceResultImpl<Mapped>;
using FindResult = FindResultImpl<Mapped>;
static constexpr bool has_mapped = !std::is_same<Mapped, VoidMapped>::value;
using Cache = LastElementCache<Value, consecutive_keys_optimization>;

static constexpr bool has_mapped = !std::is_same<Mapped, VoidMapped>::value;
static constexpr size_t prefetch_step = 16;

template <typename Data>
ALWAYS_INLINE inline EmplaceResult emplaceKey(
Data & data,
Expand All @@ -136,7 +138,24 @@ class HashMethodBase
std::vector<String> & sort_key_containers)
{
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, &pool, sort_key_containers);
return emplaceImpl(key_holder, data);
return emplaceImpl<false>(key_holder, data, 0);
}

template <typename Data>
ALWAYS_INLINE inline EmplaceResult emplaceKey(
Data & data,
size_t row,
const std::vector<size_t> & hashvals,
Arena & pool,
std::vector<String> & sort_key_containers)
{
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, &pool, sort_key_containers);

const size_t prefetch_idx = row + prefetch_step;
if likely (prefetch_idx < hashvals.size())
data.prefetch_hash(hashvals[prefetch_idx]);

return emplaceImpl<true>(key_holder, data, hashvals[row]);
}

template <typename Data>
Expand All @@ -155,9 +174,10 @@ class HashMethodBase
const Data & data,
size_t row,
Arena & pool,
std::vector<String> & sort_key_containers)
std::vector<String> & sort_key_containers) const
{
auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, &pool, sort_key_containers);
auto key_holder = static_cast<const Derived &>(*this).getKeyHolder(row, &pool, sort_key_containers);
// TODO enable prefetch
return data.hash(keyHolderGetKey(key_holder));
}

Expand All @@ -179,8 +199,8 @@ class HashMethodBase
}
}

template <typename Data, typename KeyHolder>
ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
template <bool enable_prefetch, typename Data, typename KeyHolder>
ALWAYS_INLINE inline EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data, size_t hashval [[maybe_unused]])
{
if constexpr (Cache::consecutive_keys_optimization)
{
Expand All @@ -195,7 +215,11 @@ class HashMethodBase

typename Data::LookupResult it;
bool inserted = false;
data.emplace(key_holder, it, inserted);

if constexpr (enable_prefetch)
data.emplace(key_holder, it, inserted, hashval);
else
data.emplace(key_holder, it, inserted);

[[maybe_unused]] Mapped * cached = nullptr;
if constexpr (has_mapped)
Expand Down
2 changes: 2 additions & 0 deletions dbms/src/Common/HashTable/FixedHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ class FixedHashTable
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;

static constexpr bool isPhMap = false;
static constexpr bool isNestedMap = false;

size_t hash(const Key & x) const { return x; }

Expand Down
85 changes: 69 additions & 16 deletions dbms/src/Common/HashTable/Hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#pragma once

#include <Common/Decimal.h>
#include <Common/phmap/phmap_bits.h>
#include <Core/Types.h>
#include <city.h>
#include <common/StringRef.h>
Expand Down Expand Up @@ -130,8 +131,8 @@ inline DB::UInt64 wideIntHashCRC32(const T & x, DB::UInt64 updated_value)
return updated_value;
}
static_assert(
DB::IsDecimal<
T> || is_boost_number_v<T> || std::is_same_v<T, DB::UInt128> || std::is_same_v<T, DB::Int128> || std::is_same_v<T, DB::UInt256>);
DB::IsDecimal<T> || is_boost_number_v<T> || std::is_same_v<T, DB::UInt128> || std::is_same_v<T, DB::Int128>
|| std::is_same_v<T, DB::UInt256>);
__builtin_unreachable();
}

Expand Down Expand Up @@ -244,8 +245,8 @@ inline size_t defaultHash64(const std::enable_if_t<!is_fit_register<T>, T> & key
return boost::multiprecision::hash_value(key);
}
static_assert(
is_boost_number_v<
T> || std::is_same_v<T, DB::UInt128> || std::is_same_v<T, DB::Int128> || std::is_same_v<T, DB::UInt256>);
is_boost_number_v<T> || std::is_same_v<T, DB::UInt128> || std::is_same_v<T, DB::Int128>
|| std::is_same_v<T, DB::UInt256>);
__builtin_unreachable();
}

Expand Down Expand Up @@ -297,20 +298,26 @@ inline size_t hashCRC32(const std::enable_if_t<!is_fit_register<T>, T> & key)
template <typename T>
struct HashCRC32;

#define DEFINE_HASH(T) \
template <> \
struct HashCRC32<T> \
{ \
static_assert(is_fit_register<T>); \
size_t operator()(T key) const { return hashCRC32<T>(key); } \
#define DEFINE_HASH(T) \
template <> \
struct HashCRC32<T> \
{ \
static_assert(is_fit_register<T>); \
size_t operator()(T key) const \
{ \
return hashCRC32<T>(key); \
} \
};

#define DEFINE_HASH_WIDE(T) \
template <> \
struct HashCRC32<T> \
{ \
static_assert(!is_fit_register<T>); \
size_t operator()(const T & key) const { return hashCRC32<T>(key); } \
#define DEFINE_HASH_WIDE(T) \
template <> \
struct HashCRC32<T> \
{ \
static_assert(!is_fit_register<T>); \
size_t operator()(const T & key) const \
{ \
return hashCRC32<T>(key); \
} \
};

DEFINE_HASH(DB::UInt8)
Expand Down Expand Up @@ -416,3 +423,49 @@ struct IntHash32<T, salt, std::enable_if_t<!is_fit_register<T>, void>>
}
}
};

enum PhHashSeed
{
PhHashSeed1,
PhHashSeed2
};

template <int n, PhHashSeed seed>
class PhHashMixSeed
{
public:
inline size_t operator()(size_t) const;
};

template <>
class PhHashMixSeed<4, PhHashSeed1>
{
public:
inline size_t operator()(size_t a) const
{
static constexpr uint64_t kmul = 0xcc9e2d51UL;
uint64_t l = a * kmul;
return static_cast<size_t>(l ^ (l >> 32u));
}
};

template <>
class PhHashMixSeed<8, PhHashSeed1>
{
public:
inline size_t operator()(size_t a) const
{
static constexpr uint64_t k = 0xde5fb9d2630458e9ULL;
uint64_t h;
uint64_t l = umul128(a, k, &h);
return static_cast<size_t>(h + l);
}
};
template <typename T, PhHashSeed seed>
struct PhHash
{
std::size_t operator()(const T & value) const
{
return PhHashMixSeed<sizeof(size_t), seed>()(std::hash<T>()(value));
}
};
4 changes: 4 additions & 0 deletions dbms/src/Common/HashTable/HashMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <Common/HashTable/Hash.h>
#include <Common/HashTable/HashTable.h>
// #include <Common/HashTable/PhHashTable.h>
#include <Common/HashTable/HashTableAllocator.h>


Expand Down Expand Up @@ -345,6 +346,9 @@ template <
typename Allocator = HashTableAllocator>
using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>;

// TODO
// template <typename Key, typename Mapped>
// using PhHashMap = PhHashTable<Key, Mapped, PhHash<Key, PhHashSeed1>>;

template <
typename Key,
Expand Down
3 changes: 3 additions & 0 deletions dbms/src/Common/HashTable/HashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,9 @@ class HashTable
using Grower = GrowerType;
using Allocator = AllocatorType;

static constexpr bool isPhMap = false;
static constexpr bool isNestedMap = false;

protected:
friend class const_iterator;
friend class iterator;
Expand Down
Loading