Skip to content

Commit 3ba9952

Browse files
compress copy 5 (jfalcou#1671)
1 parent 0267b6c commit 3ba9952

File tree

14 files changed

+200
-248
lines changed

14 files changed

+200
-248
lines changed

examples/algorithms/writing_new/collect_indexes__complicated_real_example.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
*/
77
//==================================================================================================
88

9-
//
10-
// NOTE: another way of writing the same example can be found in: algorithms/writing_new/collect_indexes__writing_custom_loop.cpp
119
//
1210
// In this example we will have a look at a problem from this blog post:
1311
// https://maxdemarzi.com/2021/08/30/lets-build-something-outrageous-part-13-finding-things-faster/
@@ -55,7 +53,7 @@ void collect_indexes(R&& r, P p, std::vector<IdxType, Alloc>& res)
5553
// Prepare the output in case it was not empty.
5654
res.clear();
5755

58-
// Over allocating to always use `compress_store[unsafe]`.
56+
// Over allocating to always use `compress_copy[unsafe]`.
5957
// eve won't go beyound eve::expected_cardinal_v<IdxType> per wide here.
6058
res.resize((r.end() - r.begin()) + eve::expected_cardinal_v<IdxType>);
6159
IdxType* out = res.data();
@@ -81,26 +79,28 @@ void collect_indexes(R&& r, P p, std::vector<IdxType, Alloc>& res)
8179
r_with_idx,
8280
[&](eve::algo::iterator auto r_idx_it, eve::relative_conditional_expr auto ignore) mutable
8381
{
82+
auto [elems_it, idxs_it] = r_idx_it;
83+
8484
// load an element and an index for each element.
8585
// The values in the `ignored` part are garbage.
86-
auto [elems, idxs] = eve::load[ignore](r_idx_it);
86+
auto elems = eve::load[ignore](elems_it);
8787

8888
// Apply the predicate
8989
auto test = p(elems);
9090

91-
// We don't know what was the result of applying a predicate to garbage, we need to mask it.
92-
test = eve::replace_ignored(test, ignore, eve::false_);
93-
94-
// compress_store[unsafe] - write elements marked as true to the output.
91+
// compress_copy[unsafe] - copies elements marked as true to the output.
9592
// the elements are packed together to the left.
9693
// unsafe means we can write up to the register width of extra stuff.
97-
// returns pointer behind last written element
94+
// sparse means we expect few elements (within test) to be true
95+
// first ignore means that 'true' for garbage element should still not be written.
96+
// second 'ignore_none' means we have enough space in the output to write 4 elements.
97+
// returns pointer behind last written element.
9898
//
9999
// idxs : [ 1 2 3 4 ]
100100
// test : [ f t f t ]
101101
// written : [ 2 4 x x ]
102102
// returns : out + 2
103-
out = eve::compress_store[eve::unsafe](idxs, test, out);
103+
out = eve::compress_copy[eve::unsafe][eve::sparse][ignore][eve::ignore_none](idxs_it, test, out);
104104
});
105105

106106
res.resize(out - res.data());

examples/algorithms/writing_new/collect_indexes__writing_custom_loop.cpp

Lines changed: 0 additions & 187 deletions
This file was deleted.

include/eve/module/algo/algo/remove.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ namespace eve::algo
3131
EVE_FORCEINLINE bool step(auto it, eve::relative_conditional_expr auto ignore, auto /*idx*/)
3232
{
3333
auto loaded = eve::load[ignore](it);
34-
auto mask = !p(loaded); // we decied that ! can be optimized well enough
35-
out = compress_store[unsafe][ignore](loaded, mask, out);
34+
auto mask = !p(loaded); // we decied that ! can be optimized well enough
35+
auto density = density_for_compress_copy<typename TraitsSupport::traits_type>();
36+
out = compress_copy[unsafe][density][ignore](it, loaded, mask, out);
3637
return false;
3738
}
3839

include/eve/module/algo/algo/set_intersection.hpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ namespace detail
257257
auto v2 = eve::load(f2);
258258

259259
auto matches = eve::has_equal_in(v1, v2, equal);
260-
o = eve::compress_store[eve::unsafe](v1, matches, o);
260+
auto density = density_for_compress_copy<decltype(modified_traits())>();
261+
o = eve::compress_copy[eve::unsafe][density](f1, v1, matches, o);
261262

262263
// maybe benefictial to move up but we will hope for a compiler
263264
//
@@ -361,12 +362,17 @@ template<typename TraitsSupport> struct set_intersection_ : TraitsSupport
361362
//!
362363
//! @brief SIMD variation on std::set_intersection that HAS A SLIGHTLY DIFFERENT SEMANTICS.
363364
//!
365+
//! **Defined in Header**
366+
//!
367+
//! @code
368+
//! #include <eve/module/algo.hpp>
369+
//! @endcode
370+
//!
364371
//! The main idea for the algorithm comes from
365372
//! "Faster-Than-Native Alternatives for x86 VP2INTERSECT Instructions"
366373
//! by Guillermo Diez-Canas.
367374
//! Link: https://arxiv.org/abs/2112.06342
368375
//!
369-
//!
370376
//! Differences:
371377
//! * duplicate handling, `eve::algo::set_intersection` does not
372378
//! guarantee how many copies of a duplicated element will be in the output.
@@ -394,14 +400,10 @@ template<typename TraitsSupport> struct set_intersection_ : TraitsSupport
394400
//! * The provided solution is minimal, eve won't search for beginning of intersection in
395401
//! any way. eve also won't do any dispatch based on size.
396402
//! If this is something that can be beneficial for your case - consider it.
403+
//! * sparse_output/dense_output - controls which eve::algo::compress_copy is used, default is
404+
//! dense.
397405
//! * Basic version does not support aligning/unrolling. `expect_smaller_range` do.
398406
//!
399-
//! **Header**
400-
//!
401-
//! @code
402-
//! #include <eve/module/algo.hpp>
403-
//! @endcode
404-
//!
405407
//! @groupheader{Callable Signatures}
406408
//!
407409
//! @code

include/eve/module/algo/algo/traits.hpp

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,48 @@ namespace eve::algo
393393
//============================================================================
394394
template<std::unsigned_integral T> inline constexpr auto index_type = (index_type_key = std::type_identity<T>{});
395395

396-
// getters -------------------
396+
struct density_key_t : rbr::as_keyword<density_key_t>
397+
{
398+
template<typename Value> constexpr auto operator=(Value const&) const noexcept
399+
{
400+
return rbr::option<density_key_t,Value>{};
401+
}
402+
};
403+
inline constexpr density_key_t density_key;
397404

405+
//============================================================================
406+
//! @addtogroup algo_traits
407+
//! @{
408+
//! @var sparse_output
409+
//!
410+
//! @brief for algorithms that output data based on input (eve::algo::copy_if,
411+
//! eve::algo::remove_if, eve::algo::set_intersection etc),
412+
//! tells the algorithm to optimize for the case where there will be fairly few
413+
//! elements per iteration.
414+
//! eve::algo::dense_output is default since it's better in most cases measured.
415+
//!
416+
//! @see eve::algo::dense_output
417+
//! @}
418+
//============================================================================
419+
inline constexpr auto sparse_output = (density_key = eve::sparse);
420+
421+
//============================================================================
422+
//! @addtogroup algo_traits
423+
//! @{
424+
//! @var dense_output
425+
//!
426+
//! @brief for algorithms that output data based on input (eve::algo::copy_if,
427+
//! eve::algo::remove_if, eve::algo::set_intersection etc),
428+
//! tells the algorithm to optimize for the case where there will be many
429+
//! elements per iteration.
430+
//! eve::algo::dense_output is default since it's better in most cases measured.
431+
//!
432+
//! @see eve::algo::sparse_output
433+
//! @}
434+
//============================================================================
435+
inline constexpr auto dense_output = (density_key = eve::dense);
436+
437+
// getters -------------------
398438

399439
//================================================================================================
400440
//! @addtogroup algo_traits
@@ -549,6 +589,19 @@ namespace eve::algo
549589
template <typename Traits>
550590
constexpr bool has_type_overrides_v = Traits::contains(force_type_key) || Traits::contains(common_with_types_key);
551591

592+
//================================================================================================
593+
//! @addtogroup algo_traits
594+
//! @brief returns eve::sparse or eve::dense (default is eve::dense)
595+
//! @tparam Traits
596+
//================================================================================================
597+
template<typename Traits>
598+
constexpr auto
599+
density_for_compress_copy()
600+
{
601+
using res_t = rbr::result::fetch_t<(density_key | eve::dense), Traits>;
602+
return res_t {};
603+
}
604+
552605
//================================================================================================
553606
//! @addtogroup algo_traits
554607
//! @brief some traits should just be replaced with a combination of different traits.

include/eve/module/core/compress/detail/compress_callable.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ template<typename Callable, typename... Settings> struct compress_callable : Cal
7373
{
7474
return (*this)[ignore_none][ignore_none](args...);
7575
}
76-
else if constexpr( sizeof...(Settings) == 3 ) { return (*this)[get<3>(settings)](args...); }
76+
else if constexpr( sizeof...(Settings) == 3 ) { return (*this)[get<2>(settings)](args...); }
7777
else
7878
{
7979
auto s0 = get<0>(settings);

0 commit comments

Comments
 (0)