Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ struct acoustic_free_surface<specfem::element::dimension_tag::dim2> {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int lane = 0; lane < mask_type::size(); ++lane) {
if (index.mask(lane)) {
Expand Down Expand Up @@ -284,7 +284,7 @@ struct acoustic_free_surface<specfem::element::dimension_tag::dim2> {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int lane = 0; lane < mask_type::size(); ++lane) {
if (index.mask(lane)) {
Expand Down
8 changes: 4 additions & 4 deletions core/specfem/assembly/boundaries/dim2/impl/stacey.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ template <> struct stacey<specfem::element::dimension_tag::dim2> {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int lane = 0; lane < mask_type::size(); ++lane) {
if (index.mask(lane)) {
Expand Down Expand Up @@ -375,7 +375,7 @@ template <> struct stacey<specfem::element::dimension_tag::dim2> {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int lane = 0; lane < mask_type::size(); ++lane) {
if (index.mask(lane)) {
Expand Down Expand Up @@ -496,7 +496,7 @@ template <> struct stacey<specfem::element::dimension_tag::dim2> {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int lane = 0; lane < mask_type::size(); ++lane) {
if (index.mask(lane)) {
Expand Down Expand Up @@ -546,7 +546,7 @@ template <> struct stacey<specfem::element::dimension_tag::dim2> {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int lane = 0; lane < mask_type::size(); ++lane) {
if (index.mask(lane)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class boundary_medium_container<specfem::element::dimension_tag::dim2,
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int icomp = 0; icomp < components; ++icomp)
Kokkos::Experimental::where(mask, acceleration(icomp))
Expand Down Expand Up @@ -144,7 +144,7 @@ class boundary_medium_container<specfem::element::dimension_tag::dim2,
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int icomp = 0; icomp < components; ++icomp)
Kokkos::Experimental::where(mask, acceleration(icomp))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class boundary_medium_container<specfem::element::dimension_tag::dim3,
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int icomp = 0; icomp < components; ++icomp)
Kokkos::Experimental::where(mask, acceleration(icomp))
Expand Down Expand Up @@ -148,7 +148,7 @@ class boundary_medium_container<specfem::element::dimension_tag::dim3,
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

for (int icomp = 0; icomp < components; ++icomp)
Kokkos::Experimental::where(mask, acceleration(icomp))
Expand Down
7 changes: 4 additions & 3 deletions core/specfem/assembly/fields/impl/add_access_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,10 @@ KOKKOS_FORCEINLINE_FUNCTION void add_after_simd_dispatch(

check_accessor_compatibility<AccessorTypes...>();

using mask_type = typename std::tuple_element_t<
0, std::tuple<AccessorTypes...> >::simd::mask_type;
mask_type mask([&](std::size_t lane) { return index.mask(lane); });
using simd =
typename std::tuple_element_t<0, std::tuple<AccessorTypes...> >::simd;
using mask_type = typename simd::mask_type;
const auto mask = index.template get_mask<simd>();

const int iglob = index.iglob;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,10 @@ KOKKOS_FORCEINLINE_FUNCTION void atomic_add_after_simd_dispatch(

check_accessor_compatibility<AccessorTypes...>();

using mask_type = typename std::tuple_element_t<
0, std::tuple<AccessorTypes...> >::simd::mask_type;
mask_type mask([&](std::size_t lane) { return index.mask(lane); });
using simd =
typename std::tuple_element_t<0, std::tuple<AccessorTypes...> >::simd;
using mask_type = typename simd::mask_type;
const auto mask = index.template get_mask<simd>();

const int iglob = index.iglob;

Expand Down
7 changes: 4 additions & 3 deletions core/specfem/assembly/fields/impl/load_access_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,10 @@ KOKKOS_FORCEINLINE_FUNCTION void load_after_simd_dispatch(

check_accessor_compatibility<AccessorTypes...>();

using mask_type = typename std::tuple_element_t<
0, std::tuple<AccessorTypes...> >::simd::mask_type;
mask_type mask([&](std::size_t lane) { return index.mask(lane); });
using simd =
typename std::tuple_element_t<0, std::tuple<AccessorTypes...> >::simd;
using mask_type = typename simd::mask_type;
const auto mask = index.template get_mask<simd>();

const int iglob = index.iglob;

Expand Down
7 changes: 4 additions & 3 deletions core/specfem/assembly/fields/impl/store_access_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ KOKKOS_FORCEINLINE_FUNCTION void store_after_simd_dispatch(

check_accessor_compatibility<AccessorTypes...>();

using mask_type = typename std::tuple_element_t<
0, std::tuple<AccessorTypes...> >::simd::mask_type;
mask_type mask([&](std::size_t lane) { return index.mask(lane); });
using simd =
typename std::tuple_element_t<0, std::tuple<AccessorTypes...> >::simd;
using mask_type = typename simd::mask_type;
const auto mask = index.template get_mask<simd>();

const int iglob = index.iglob;

Expand Down
8 changes: 4 additions & 4 deletions core/specfem/assembly/impl/domain_accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class DomainAccessor {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();
static_cast<const DataContainer *>(this)->for_each_on_device(
index, [&](const type_real &value, const std::size_t i) mutable {
Kokkos::Experimental::where(mask, values[i])
Expand All @@ -70,7 +70,7 @@ class DomainAccessor {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();
static_cast<const DataContainer *>(this)->for_each_on_host(
index, [&](const type_real &value, const std::size_t i) mutable {
Kokkos::Experimental::where(mask, values[i])
Expand Down Expand Up @@ -104,7 +104,7 @@ class DomainAccessor {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();
static_cast<const DataContainer *>(this)->for_each_on_device(
index, [&](type_real &value, const std::size_t i) {
Kokkos::Experimental::where(mask, values[i])
Expand All @@ -120,7 +120,7 @@ class DomainAccessor {
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();
static_cast<const DataContainer *>(this)->for_each_on_host(
index, [&](type_real &value, const std::size_t i) {
Kokkos::Experimental::where(mask, values[i])
Expand Down
2 changes: 1 addition & 1 deletion core/specfem/assembly/jacobian_matrix/dim2/impl_load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ KOKKOS_FORCEINLINE_FUNCTION void impl_load(const IndexType &index,
const auto &mapping = container.xix.get_mapping();
const std::size_t _index = mapping(ispec, iz, ix);

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

if constexpr (on_device) {
Kokkos::Experimental::where(mask, point.xix)
Expand Down
2 changes: 1 addition & 1 deletion core/specfem/assembly/jacobian_matrix/dim2/impl_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ inline void impl_store(const IndexType &index, const ContainerType &derivatives,
using mask_type = typename simd::mask_type;
using tag_type = typename simd::tag_type;

mask_type mask([&](std::size_t lane) { return index.mask(lane); });
const auto mask = index.template get_mask<simd>();

const auto &mapping = derivatives.xix.get_mapping();
const std::size_t _index = mapping(ispec, iz, ix);
Expand Down
20 changes: 20 additions & 0 deletions core/specfem/point/assembly_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,26 @@ struct assembly_index<true>
KOKKOS_FUNCTION
bool mask(const std::size_t &lane) const { return int(lane) < number_points; }

/**
* @brief Returns a SIMD mask for valid lanes.
*
* For full chunks (all lanes valid) returns an all-true mask using the cheap
* broadcast constructor, avoiding the per-lane generator. For the rare
* partial last chunk falls back to per-lane evaluation.
*
* @tparam simd_type SIMD wrapper type (e.g. specfem::datatype::simd<float,
* true>)
* @return SIMD mask with true for valid lanes
*/
template <typename simd_type>
KOKKOS_INLINE_FUNCTION typename simd_type::mask_type get_mask() const {
if (number_points >= simd_type::size()) {
return typename simd_type::mask_type(true);
}
return typename simd_type::mask_type(
[&](std::size_t lane) { return int(lane) < number_points; });
}

/**
* @name Constructors
* @brief Constructors for initializing the SIMD assembly index.
Expand Down
40 changes: 40 additions & 0 deletions core/specfem/point/index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,26 @@ struct index<specfem::element::dimension_tag::dim2, true>
bool mask(const std::size_t &lane) const {
return int(lane) < number_elements;
}

/**
* @brief Returns a SIMD mask for valid lanes.
*
* For full chunks (all lanes valid) returns an all-true mask using the cheap
* broadcast constructor, avoiding the per-lane generator. For the rare
* partial last chunk falls back to per-lane evaluation.
*
* @tparam simd_type SIMD wrapper type (e.g. specfem::datatype::simd<float,
* true>)
* @return SIMD mask with true for valid lanes
*/
template <typename simd_type>
KOKKOS_INLINE_FUNCTION typename simd_type::mask_type get_mask() const {
if (number_elements >= simd_type::size()) {
return typename simd_type::mask_type(true);
}
return typename simd_type::mask_type(
[&](std::size_t lane) { return int(lane) < number_elements; });
}
};

//-------------------------- 3D Specializations ------------------------------//
Expand Down Expand Up @@ -334,6 +354,26 @@ struct index<specfem::element::dimension_tag::dim3, true>
bool mask(const std::size_t &lane) const {
return int(lane) < number_elements;
}

/**
* @brief Returns a SIMD mask for valid lanes.
*
* For full chunks (all lanes valid) returns an all-true mask using the cheap
* broadcast constructor, avoiding the per-lane generator. For the rare
* partial last chunk falls back to per-lane evaluation.
*
* @tparam simd_type SIMD wrapper type (e.g. specfem::datatype::simd<float,
* true>)
* @return SIMD mask with true for valid lanes
*/
template <typename simd_type>
KOKKOS_INLINE_FUNCTION typename simd_type::mask_type get_mask() const {
if (number_elements >= simd_type::size()) {
return typename simd_type::mask_type(true);
}
return typename simd_type::mask_type(
[&](std::size_t lane) { return int(lane) < number_elements; });
}
};

} // namespace point
Expand Down