Skip to content

Commit 8562452

Browse files
committed
temp
1 parent d4b02cc commit 8562452

File tree

4 files changed

+175
-170
lines changed

4 files changed

+175
-170
lines changed

src/Common/HashMapsTemplate.h

Lines changed: 114 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,105 @@ class WriteBuffer;
1313
class ReadBuffer;
1414

1515

16+
template <typename Mapped>
17+
using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
18+
19+
/// Dummy key getter, always find nothing, used for JOIN ON NULL
20+
template <typename Mapped>
21+
class KeyGetterEmpty
22+
{
23+
public:
24+
struct MappedType
25+
{
26+
using mapped_type = Mapped;
27+
};
28+
29+
using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped>;
30+
31+
KeyGetterEmpty() = default;
32+
33+
FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); }
34+
};
35+
36+
template <HashType type, typename Value, typename Mapped>
37+
struct KeyGetterForTypeImpl;
38+
39+
template <typename Value, typename Mapped>
40+
struct KeyGetterForTypeImpl<HashType::key8, Value, Mapped>
41+
{
42+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false, true>;
43+
};
44+
45+
template <typename Value, typename Mapped>
46+
struct KeyGetterForTypeImpl<HashType::key16, Value, Mapped>
47+
{
48+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false, true>;
49+
};
50+
51+
template <typename Value, typename Mapped>
52+
struct KeyGetterForTypeImpl<HashType::key32, Value, Mapped>
53+
{
54+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false, true>;
55+
};
56+
57+
template <typename Value, typename Mapped>
58+
struct KeyGetterForTypeImpl<HashType::key64, Value, Mapped>
59+
{
60+
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false, true>;
61+
};
62+
63+
template <typename Value, typename Mapped>
64+
struct KeyGetterForTypeImpl<HashType::key_string, Value, Mapped>
65+
{
66+
using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false, true>;
67+
};
68+
69+
template <typename Value, typename Mapped>
70+
struct KeyGetterForTypeImpl<HashType::key_fixed_string, Value, Mapped>
71+
{
72+
using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false, true>;
73+
};
74+
75+
template <typename Value, typename Mapped>
76+
struct KeyGetterForTypeImpl<HashType::keys128, Value, Mapped>
77+
{
78+
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false, true>;
79+
};
80+
81+
template <typename Value, typename Mapped>
82+
struct KeyGetterForTypeImpl<HashType::keys256, Value, Mapped>
83+
{
84+
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt256, Mapped, false, false, false, true>;
85+
};
86+
87+
template <typename Value, typename Mapped>
88+
struct KeyGetterForTypeImpl<HashType::hashed, Value, Mapped>
89+
{
90+
using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false, true>;
91+
};
92+
93+
template <HashType type, typename Data>
94+
struct KeyGetterForType
95+
{
96+
using Value = typename Data::value_type;
97+
using Mapped_t = typename Data::mapped_type;
98+
using Mapped = std::conditional_t<std::is_const_v<Data>, const Mapped_t, Mapped_t>;
99+
using Type = typename KeyGetterForTypeImpl<type, Value, Mapped>::Type;
100+
};
101+
102+
template <typename KeyGetter, typename Map, typename MappedHandler>
103+
requires (std::is_invocable_v<MappedHandler, typename Map::mapped_type /*mapped*/, bool /*inserted*/, size_t /*row*/>)
104+
void insertIntoHashMap(
105+
Map & map, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, size_t rows, Arena & pool, MappedHandler && mapped_handler)
106+
{
107+
KeyGetter key_getter(key_columns, key_sizes, nullptr);
108+
for (size_t i = 0; i < rows; ++i)
109+
{
110+
auto emplace_result = key_getter.emplaceKey(map, i, pool);
111+
mapped_handler(emplace_result.getMapped(), emplace_result.isInserted(), i);
112+
}
113+
}
114+
16115
template <typename Map, typename MappedSerializer>
17116
void serializeHashMap(const Map & map, MappedSerializer && mapped_serializer, WriteBuffer & wb)
18117
{
@@ -139,6 +238,21 @@ struct HashMapsTemplate
139238
type = which;
140239
}
141240

241+
template <typename MappedHandler>
242+
void insert(const ColumnRawPtrs & key_columns, const Sizes & key_sizes, size_t rows, Arena & pool, MappedHandler && mapped_handler)
243+
{
244+
switch (which)
245+
{
246+
#define M(NAME) \
247+
case HashType::NAME: \
248+
using KeyGetter = typename KeyGetterForType<HashType::NAME, std::remove_reference_t<decltype(*NAME)>>::Type; \
249+
insertIntoHashMap<KeyGetter>(*NAME, key_columns, key_sizes, rows, pool, std::mode(mapped_handler)); \
250+
break;
251+
APPLY_FOR_HASH_KEY_VARIANTS(M)
252+
#undef M
253+
}
254+
}
255+
142256
size_t getTotalRowCount() const
143257
{
144258
switch (type)
@@ -219,89 +333,4 @@ struct HashMapsTemplate
219333
HashType type;
220334
};
221335

222-
template <typename Mapped>
223-
using FindResultImpl = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped, true>;
224-
225-
/// Dummy key getter, always find nothing, used for JOIN ON NULL
226-
template <typename Mapped>
227-
class KeyGetterEmpty
228-
{
229-
public:
230-
struct MappedType
231-
{
232-
using mapped_type = Mapped;
233-
};
234-
235-
using FindResult = ColumnsHashing::columns_hashing_impl::FindResultImpl<Mapped>;
236-
237-
KeyGetterEmpty() = default;
238-
239-
FindResult findKey(MappedType, size_t, const Arena &) { return FindResult(); }
240-
};
241-
242-
template <HashType type, typename Value, typename Mapped>
243-
struct KeyGetterForTypeImpl;
244-
245-
template <typename Value, typename Mapped>
246-
struct KeyGetterForTypeImpl<HashType::key8, Value, Mapped>
247-
{
248-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt8, false, true>;
249-
};
250-
251-
template <typename Value, typename Mapped>
252-
struct KeyGetterForTypeImpl<HashType::key16, Value, Mapped>
253-
{
254-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt16, false, true>;
255-
};
256-
257-
template <typename Value, typename Mapped>
258-
struct KeyGetterForTypeImpl<HashType::key32, Value, Mapped>
259-
{
260-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt32, false, true>;
261-
};
262-
263-
template <typename Value, typename Mapped>
264-
struct KeyGetterForTypeImpl<HashType::key64, Value, Mapped>
265-
{
266-
using Type = ColumnsHashing::HashMethodOneNumber<Value, Mapped, UInt64, false, true>;
267-
};
268-
269-
template <typename Value, typename Mapped>
270-
struct KeyGetterForTypeImpl<HashType::key_string, Value, Mapped>
271-
{
272-
using Type = ColumnsHashing::HashMethodString<Value, Mapped, true, false, true>;
273-
};
274-
275-
template <typename Value, typename Mapped>
276-
struct KeyGetterForTypeImpl<HashType::key_fixed_string, Value, Mapped>
277-
{
278-
using Type = ColumnsHashing::HashMethodFixedString<Value, Mapped, true, false, true>;
279-
};
280-
281-
template <typename Value, typename Mapped>
282-
struct KeyGetterForTypeImpl<HashType::keys128, Value, Mapped>
283-
{
284-
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt128, Mapped, false, false, false, true>;
285-
};
286-
287-
template <typename Value, typename Mapped>
288-
struct KeyGetterForTypeImpl<HashType::keys256, Value, Mapped>
289-
{
290-
using Type = ColumnsHashing::HashMethodKeysFixed<Value, UInt256, Mapped, false, false, false, true>;
291-
};
292-
293-
template <typename Value, typename Mapped>
294-
struct KeyGetterForTypeImpl<HashType::hashed, Value, Mapped>
295-
{
296-
using Type = ColumnsHashing::HashMethodHashed<Value, Mapped, false, true>;
297-
};
298-
299-
template <HashType type, typename Data>
300-
struct KeyGetterForType
301-
{
302-
using Value = typename Data::value_type;
303-
using Mapped_t = typename Data::mapped_type;
304-
using Mapped = std::conditional_t<std::is_const_v<Data>, const Mapped_t, Mapped_t>;
305-
using Type = typename KeyGetterForTypeImpl<type, Value, Mapped>::Type;
306-
};
307336
}

src/Interpreters/Streaming/AsofHashJoin.cpp

Lines changed: 33 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ AsofHashJoin::AsofHashJoin(
1414
, asof_type(*table_join->getAsofType())
1515
, asof_inequality(table_join->getAsofInequality())
1616
{
17+
/// last key is asof column, not use as a hash key
18+
hash_key_sizes = key_sizes;
19+
hash_key_sizes.back();
1720
}
1821

1922
void AsofHashJoin::joinLeftBlock(Block & block)
@@ -39,61 +42,44 @@ void AsofHashJoin::insertRightBlock(Block block)
3942
for (const auto & name : key_names)
4043
key_columns.push_back(all_key_columns[name]);
4144

42-
/// We will insert to the map only keys, where all components are not NULL.
43-
ConstNullMapPtr null_map{};
44-
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
45-
46-
/// If LEFT, RIGHT or FULL save blocks with nulls for NotJoinedBlocks
47-
UInt8 save_nullmap = 0;
48-
if (isRightOrFull(table_join->kind()) && null_map)
49-
{
50-
/// Save rows with NULL keys
51-
for (size_t i = 0; !save_nullmap && i < null_map->size(); ++i)
52-
save_nullmap |= (*null_map)[i];
53-
}
45+
auto asof_column = key_columns.back();
46+
key_columns.pop_back();
5447

5548
/// Add `block_to_save` to target stream data
5649
/// Note `block_to_save` may be empty for cases in which the query doesn't care other non-key columns.
5750
/// For example, SELECT count() FROM stream_a JOIN stream_b ON i=ii;
58-
auto start_row = buffered_hash_data->addOrConcatDataBlock(std::move(block_to_save));
59-
auto rows = buffered_hash_data->lastDataBlock().rows();
51+
auto rows = block_to_save.rows();
52+
auto start_row = right_buffered_hash_data->blocks.pushBackOrConcat(std::move(block_to_save));
53+
auto row_ref_handler = [&](AsofRowRef & row_ref, bool inserted, size_t original_row, size_t row) {
54+
AsofRowRef * row_ref_ptr = &row_ref;
55+
if (inserted)
56+
row_ref_ptr = new (row_ref_ptr) AsofRowRef(asof_type);
6057

61-
switch (hash_method_type)
62-
{
63-
#define M(TYPE) \
64-
case HashType::TYPE: \
65-
return insertFromBlockImplType< \
66-
Strictness::Asof, \
67-
typename KeyGetterForType<HashType::TYPE, std::remove_reference_t<decltype(*(buffered_hash_data->maps->TYPE))>>::Type>( \
68-
join, \
69-
*(buffered_hash_data->maps->TYPE), \
70-
rows, \
71-
key_columns, \
72-
key_sizes[0], \
73-
&buffered_hash_data->blocks, \
74-
start_row, \
75-
null_map, \
76-
buffered_hash_data->pool); \
77-
break;
78-
APPLY_FOR_HASH_KEY_VARIANTS(M)
79-
#undef M
80-
}
81-
insertFromBlockImpl<strictness_>(
82-
hash_method_type,
83-
map,
84-
rows,
85-
key_columns,
86-
key_sizes[0],
87-
&target_hash_blocks->blocks,
88-
start_row,
89-
null_map,
90-
target_hash_blocks->pool);
58+
row_ref_ptr->insert(
59+
asof_type,
60+
asof_column,
61+
&(right_buffered_hash_data->blocks),
62+
original_row,
63+
row,
64+
asof_inequality,
65+
rightJoinStreamDescription()->keep_versions);
66+
};
9167

92-
if (save_nullmap)
93-
/// FIXME, we will need account the allocated bytes for null_map_holder / not_joined_map as well
94-
buffered_hash_data->blocks_nullmaps.emplace_back(&buffered_hash_data->lastDataBlock(), null_map_holder);
68+
right_buffered_hash_data->map.insert(std::move(block_to_save), key_columns, hash_key_sizes, rows, right_buffered_hash_data->pool, std::move(row_ref_handler));
9569

9670
checkLimits();
9771
}
72+
73+
void AsofHashJoin::checkLimits() const
74+
{
75+
auto current_total_bytes = right_buffered_hash_data->totalBufferedBytes();
76+
if (current_total_bytes >= join_max_cached_bytes)
77+
throw Exception(
78+
ErrorCodes::SET_SIZE_LIMIT_EXCEEDED,
79+
"Streaming asof join's memory reaches max size: {}, current total: {}, right: {}",
80+
join_max_cached_bytes,
81+
current_total_bytes,
82+
buffered_hash_data->getMetricsString());
83+
}
9884
}
9985
}

src/Interpreters/Streaming/AsofHashJoin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class AsofHashJoin final : public HashJoin
2525

2626
using DataBlock = LightChunkWithTimestamp;
2727
using BufferedAsofHashData = BufferedHashData<DataBlock, AsofRowRefs<DataBlock>>;
28-
SERDE std::unique_ptr<BufferedAsofHashData> buffered_hash_data;
28+
SERDE std::unique_ptr<BufferedAsofHashData> right_buffered_hash_data;
2929
};
3030

3131
}

0 commit comments

Comments
 (0)