Skip to content

Commit

Permalink
[SYCL] Adopt the experimental free function extension
Browse files Browse the repository at this point in the history
Access the current nd_item via a free function instead of storing it
as a data member of the various SYCL accelerator base classes.
  • Loading branch information
fwyzard committed Aug 10, 2023
1 parent 23edf57 commit 497128d
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 91 deletions.
13 changes: 6 additions & 7 deletions include/alpaka/acc/AccGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,27 +75,26 @@ namespace alpaka

AccGenericSycl(
Vec<TDim, TIdx> const& threadElemExtent,
sycl::nd_item<TDim::value> work_item,
sycl::local_accessor<std::byte> dyn_shared_acc,
sycl::local_accessor<std::byte> st_shared_acc,
sycl::accessor<int, 1, sycl::access_mode::read_write, sycl::target::device> global_fence_dummy,
sycl::local_accessor<int> local_fence_dummy)
: WorkDivGenericSycl<TDim, TIdx>{threadElemExtent, work_item}
, gb::IdxGbGenericSycl<TDim, TIdx>{work_item}
, bt::IdxBtGenericSycl<TDim, TIdx>{work_item}
: WorkDivGenericSycl<TDim, TIdx>{threadElemExtent}
, gb::IdxGbGenericSycl<TDim, TIdx>{}
, bt::IdxBtGenericSycl<TDim, TIdx>{}
, AtomicHierarchy<AtomicGenericSycl, AtomicGenericSycl, AtomicGenericSycl>{}
, math::MathGenericSycl{}
, BlockSharedMemDynGenericSycl{dyn_shared_acc}
, BlockSharedMemStGenericSycl{st_shared_acc}
, BlockSyncGenericSycl<TDim>{work_item}
, BlockSyncGenericSycl<TDim>{}
, IntrinsicGenericSycl{}
, MemFenceGenericSycl{global_fence_dummy, local_fence_dummy}
# ifdef ALPAKA_DISABLE_VENDOR_RNG
, rand::RandDefault{}
# else
, rand::RandGenericSycl<TDim>{work_item}
, rand::RandGenericSycl<TDim>{}
# endif
, warp::WarpGenericSycl<TDim>{work_item}
, warp::WarpGenericSycl<TDim>{}
{
}
};
Expand Down
32 changes: 16 additions & 16 deletions include/alpaka/block/sync/BlockSyncGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,7 @@ namespace alpaka
public:
using BlockSyncBase = BlockSyncGenericSycl<TDim>;

BlockSyncGenericSycl(sycl::nd_item<TDim::value> work_item) : my_item{work_item}
{
}

sycl::nd_item<TDim::value> my_item;
BlockSyncGenericSycl() = default;
};
} // namespace alpaka

Expand All @@ -32,20 +28,22 @@ namespace alpaka::trait
template<typename TDim>
struct SyncBlockThreads<BlockSyncGenericSycl<TDim>>
{
static auto syncBlockThreads(BlockSyncGenericSycl<TDim> const& blockSync) -> void
static auto syncBlockThreads(BlockSyncGenericSycl<TDim> const&) -> void
{
blockSync.my_item.barrier();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
item.barrier();
}
};

template<typename TDim>
struct SyncBlockThreadsPredicate<BlockCount, BlockSyncGenericSycl<TDim>>
{
static auto syncBlockThreadsPredicate(BlockSyncGenericSycl<TDim> const& blockSync, int predicate) -> int
static auto syncBlockThreadsPredicate(BlockSyncGenericSycl<TDim> const&, int predicate) -> int
{
auto const group = blockSync.my_item.get_group();
blockSync.my_item.barrier();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
item.barrier();

auto const group = item.get_group();
auto const counter = (predicate != 0) ? 1 : 0;
return sycl::reduce_over_group(group, counter, sycl::plus<>{});
}
Expand All @@ -54,23 +52,25 @@ namespace alpaka::trait
template<typename TDim>
struct SyncBlockThreadsPredicate<BlockAnd, BlockSyncGenericSycl<TDim>>
{
static auto syncBlockThreadsPredicate(BlockSyncGenericSycl<TDim> const& blockSync, int predicate) -> int
static auto syncBlockThreadsPredicate(BlockSyncGenericSycl<TDim> const&, int predicate) -> int
{
auto const group = blockSync.my_item.get_group();
blockSync.my_item.barrier();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
item.barrier();

auto const group = item.get_group();
return static_cast<int>(sycl::all_of_group(group, static_cast<bool>(predicate)));
}
};

template<typename TDim>
struct SyncBlockThreadsPredicate<BlockOr, BlockSyncGenericSycl<TDim>>
{
static auto syncBlockThreadsPredicate(BlockSyncGenericSycl<TDim> const& blockSync, int predicate) -> int
static auto syncBlockThreadsPredicate(BlockSyncGenericSycl<TDim> const&, int predicate) -> int
{
auto const group = blockSync.my_item.get_group();
blockSync.my_item.barrier();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
item.barrier();

auto const group = item.get_group();
return static_cast<int>(sycl::any_of_group(group, static_cast<bool>(predicate)));
}
};
Expand Down
22 changes: 10 additions & 12 deletions include/alpaka/idx/bt/IdxBtGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,7 @@ namespace alpaka::bt
public:
using IdxBtBase = IdxBtGenericSycl;

explicit IdxBtGenericSycl(sycl::nd_item<TDim::value> work_item) : m_item_bt{work_item}
{
}

sycl::nd_item<TDim::value> m_item_bt;
IdxBtGenericSycl() = default;
};
} // namespace alpaka::bt

Expand All @@ -46,22 +42,24 @@ namespace alpaka::trait
{
//! \return The index of the current thread in the block.
template<typename TWorkDiv>
static auto getIdx(bt::IdxBtGenericSycl<TDim, TIdx> const& idx, TWorkDiv const&) -> Vec<TDim, TIdx>
static auto getIdx(bt::IdxBtGenericSycl<TDim, TIdx> const&, TWorkDiv const&) -> Vec<TDim, TIdx>
{
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();

if constexpr(TDim::value == 1)
return Vec<TDim, TIdx>{static_cast<TIdx>(idx.m_item_bt.get_local_id(0))};
return Vec<TDim, TIdx>{static_cast<TIdx>(item.get_local_id(0))};
else if constexpr(TDim::value == 2)
{
return Vec<TDim, TIdx>{
static_cast<TIdx>(idx.m_item_bt.get_local_id(1)),
static_cast<TIdx>(idx.m_item_bt.get_local_id(0))};
static_cast<TIdx>(item.get_local_id(1)),
static_cast<TIdx>(item.get_local_id(0))};
}
else
{
return Vec<TDim, TIdx>{
static_cast<TIdx>(idx.m_item_bt.get_local_id(2)),
static_cast<TIdx>(idx.m_item_bt.get_local_id(1)),
static_cast<TIdx>(idx.m_item_bt.get_local_id(0))};
static_cast<TIdx>(item.get_local_id(2)),
static_cast<TIdx>(item.get_local_id(1)),
static_cast<TIdx>(item.get_local_id(0))};
}
}
};
Expand Down
22 changes: 9 additions & 13 deletions include/alpaka/idx/gb/IdxGbGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,7 @@ namespace alpaka::gb
public:
using IdxGbBase = IdxGbGenericSycl;

explicit IdxGbGenericSycl(sycl::nd_item<TDim::value> work_item) : m_item_gb{work_item}
{
}

sycl::nd_item<TDim::value> m_item_gb;
IdxGbGenericSycl() = default;
};
} // namespace alpaka::gb

Expand All @@ -46,22 +42,22 @@ namespace alpaka::trait
{
//! \return The index of the current block in the grid.
template<typename TWorkDiv>
static auto getIdx(gb::IdxGbGenericSycl<TDim, TIdx> const& idx, TWorkDiv const&)
static auto getIdx(gb::IdxGbGenericSycl<TDim, TIdx> const&, TWorkDiv const&)
{
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();

if constexpr(TDim::value == 1)
return Vec<TDim, TIdx>(static_cast<TIdx>(idx.m_item_gb.get_group(0)));
return Vec<TDim, TIdx>(static_cast<TIdx>(item.get_group(0)));
else if constexpr(TDim::value == 2)
{
return Vec<TDim, TIdx>(
static_cast<TIdx>(idx.m_item_gb.get_group(1)),
static_cast<TIdx>(idx.m_item_gb.get_group(0)));
return Vec<TDim, TIdx>(static_cast<TIdx>(item.get_group(1)), static_cast<TIdx>(item.get_group(0)));
}
else
{
return Vec<TDim, TIdx>(
static_cast<TIdx>(idx.m_item_gb.get_group(2)),
static_cast<TIdx>(idx.m_item_gb.get_group(1)),
static_cast<TIdx>(idx.m_item_gb.get_group(0)));
static_cast<TIdx>(item.get_group(2)),
static_cast<TIdx>(item.get_group(1)),
static_cast<TIdx>(item.get_group(0)));
}
}
};
Expand Down
2 changes: 0 additions & 2 deletions include/alpaka/kernel/TaskKernelGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
{ \
auto acc = TAcc{ \
item_elements, \
work_item, \
dyn_shared_accessor, \
st_shared_accessor, \
global_fence_dummy, \
Expand All @@ -73,7 +72,6 @@
{ \
auto acc = TAcc{ \
item_elements, \
work_item, \
dyn_shared_accessor, \
st_shared_accessor, \
global_fence_dummy, \
Expand Down
9 changes: 3 additions & 6 deletions include/alpaka/rand/RandGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,7 @@ namespace alpaka::rand
template<typename TDim>
struct RandGenericSycl : concepts::Implements<ConceptRand, RandGenericSycl<TDim>>
{
explicit RandGenericSycl(sycl::nd_item<TDim::value> my_item) : m_item_rand{my_item}
{
}

sycl::nd_item<TDim::value> m_item_rand;
RandGenericSycl() = default;
};

# if !defined(ALPAKA_HOST_ONLY)
Expand Down Expand Up @@ -72,7 +68,8 @@ namespace alpaka::rand

Minstd(RandGenericSycl<TDim> rand, std::uint32_t const& seed)
{
oneapi::dpl::minstd_rand engine(seed, rand.m_item_rand.get_global_linear_id());
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
oneapi::dpl::minstd_rand engine(seed, item.get_global_linear_id());
rng_engine = engine;
}

Expand Down
36 changes: 19 additions & 17 deletions include/alpaka/warp/WarpGenericSycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,7 @@ namespace alpaka::warp
class WarpGenericSycl : public concepts::Implements<alpaka::warp::ConceptWarp, WarpGenericSycl<TDim>>
{
public:
WarpGenericSycl(sycl::nd_item<TDim::value> my_item) : m_item_warp{my_item}
{
}

sycl::nd_item<TDim::value> m_item_warp;
WarpGenericSycl() = default;
};
} // namespace alpaka::warp

Expand All @@ -33,9 +29,10 @@ namespace alpaka::warp::trait
template<typename TDim>
struct GetSize<warp::WarpGenericSycl<TDim>>
{
static auto getSize(warp::WarpGenericSycl<TDim> const& warp) -> std::int32_t
static auto getSize(warp::WarpGenericSycl<TDim> const&) -> std::int32_t
{
auto const sub_group = warp.m_item_warp.get_sub_group();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
auto const sub_group = item.get_sub_group();
// SYCL sub-groups are always 1D
return static_cast<std::int32_t>(sub_group.get_max_local_range()[0]);
}
Expand All @@ -47,11 +44,12 @@ namespace alpaka::warp::trait
// FIXME This should be std::uint64_t on AMD GCN architectures and on CPU,
// but the former is not targeted in alpaka and CPU case is not supported in SYCL yet.
// Restrict to warpSize <= 32 for now.
static auto activemask(warp::WarpGenericSycl<TDim> const& warp) -> std::uint32_t
static auto activemask(warp::WarpGenericSycl<TDim> const&) -> std::uint32_t
{
// SYCL has no way of querying this. Since sub-group functions have to be executed in convergent code
// regions anyway we return the full mask.
auto const sub_group = warp.m_item_warp.get_sub_group();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
auto const sub_group = item.get_sub_group();
auto const mask = sycl::ext::oneapi::group_ballot(sub_group, true);
// FIXME This should be std::uint64_t on AMD GCN architectures and on CPU,
// but the former is not targeted in alpaka and CPU case is not supported in SYCL yet.
Expand All @@ -65,19 +63,21 @@ namespace alpaka::warp::trait
template<typename TDim>
struct All<warp::WarpGenericSycl<TDim>>
{
static auto all(warp::WarpGenericSycl<TDim> const& warp, std::int32_t predicate) -> std::int32_t
static auto all(warp::WarpGenericSycl<TDim> const&, std::int32_t predicate) -> std::int32_t
{
auto const sub_group = warp.m_item_warp.get_sub_group();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
auto const sub_group = item.get_sub_group();
return static_cast<std::int32_t>(sycl::all_of_group(sub_group, static_cast<bool>(predicate)));
}
};

template<typename TDim>
struct Any<warp::WarpGenericSycl<TDim>>
{
static auto any(warp::WarpGenericSycl<TDim> const& warp, std::int32_t predicate) -> std::int32_t
static auto any(warp::WarpGenericSycl<TDim> const&, std::int32_t predicate) -> std::int32_t
{
auto const sub_group = warp.m_item_warp.get_sub_group();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
auto const sub_group = item.get_sub_group();
return static_cast<std::int32_t>(sycl::any_of_group(sub_group, static_cast<bool>(predicate)));
}
};
Expand All @@ -88,9 +88,10 @@ namespace alpaka::warp::trait
// FIXME This should be std::uint64_t on AMD GCN architectures and on CPU,
// but the former is not targeted in alpaka and CPU case is not supported in SYCL yet.
// Restrict to warpSize <= 32 for now.
static auto ballot(warp::WarpGenericSycl<TDim> const& warp, std::int32_t predicate) -> std::uint32_t
static auto ballot(warp::WarpGenericSycl<TDim> const&, std::int32_t predicate) -> std::uint32_t
{
auto const sub_group = warp.m_item_warp.get_sub_group();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
auto const sub_group = item.get_sub_group();
auto const mask = sycl::ext::oneapi::group_ballot(sub_group, static_cast<bool>(predicate));
// FIXME This should be std::uint64_t on AMD GCN architectures and on CPU,
// but the former is not targeted in alpaka and CPU case is not supported in SYCL yet.
Expand All @@ -105,7 +106,7 @@ namespace alpaka::warp::trait
struct Shfl<warp::WarpGenericSycl<TDim>>
{
template<typename T>
static auto shfl(warp::WarpGenericSycl<TDim> const& warp, T value, std::int32_t srcLane, std::int32_t width)
static auto shfl(warp::WarpGenericSycl<TDim> const&, T value, std::int32_t srcLane, std::int32_t width)
{
ALPAKA_ASSERT_OFFLOAD(width > 0);
ALPAKA_ASSERT_OFFLOAD(srcLane < width);
Expand All @@ -117,7 +118,8 @@ namespace alpaka::warp::trait
Example: If we assume a sub-group size of 32 and a width of 16 we will receive two subdivisions:
The first starts at sub-group index 0 and the second at sub-group index 16. For srcLane = 4 the
first subdivision will access the value at sub-group index 4 and the second at sub-group index 20. */
auto const actual_group = warp.m_item_warp.get_sub_group();
auto const item = sycl::ext::oneapi::experimental::this_nd_item<TDim::value>();
auto const actual_group = item.get_sub_group();
auto const actual_item_id = static_cast<std::int32_t>(actual_group.get_local_linear_id());
auto const actual_group_id = actual_item_id / width;
auto const actual_src_id = static_cast<std::size_t>(srcLane + actual_group_id * width);
Expand Down
Loading

0 comments on commit 497128d

Please sign in to comment.