Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions src/impl/particles_impl.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ namespace libcloudphxx

// housekeeping data (per particle)
thrust_device::vector<thrust_size_t>
i, j, k, ijk, // Eulerian grid cell indices (always zero for 0D)
ijk, // Eulerian grid cell indices (always zero for 0D); i, j and k use temporary vectors from tmp_device_size_part via i_gp, j_gp and k_gp; TODO: make ijk, sorted_id and sorted_ijk also use such temporary vectors?
sorted_id, sorted_ijk;

// Arakawa-C grid helper vars
Expand Down Expand Up @@ -209,8 +209,8 @@ namespace libcloudphxx
tmp_vector_pool<thrust_device::vector<unsigned int>>
tmp_device_n_part;
tmp_vector_pool<thrust_device::vector<thrust_size_t>>
tmp_device_size_cell;
// tmp_device_size_part;
tmp_device_size_cell,
tmp_device_size_part;

// guards for temp vectors that are used in multiple functions and need to stay unchanged inbetween
// tmp_vector_pool<thrust_device::vector<real_t>>::guard asd;
Expand All @@ -223,8 +223,6 @@ namespace libcloudphxx
sstp_dlt_rhod_gp,
sstp_dlt_p_gp,
drv_gp,
lft_id_gp,
rgt_id_gp,
lambda_D_gp,
lambda_K_gp,
rw_mom3_gp,
Expand All @@ -238,6 +236,14 @@ namespace libcloudphxx
typename tmp_vector_pool<thrust_device::vector<unsigned int>>::guard
> chem_flag_gp;

std::unique_ptr<
typename tmp_vector_pool<thrust_device::vector<thrust_size_t>>::guard
> lft_id_gp,
rgt_id_gp,
i_gp,
j_gp,
k_gp;

// to simplify foreach calls
const thrust::counting_iterator<thrust_size_t> zero;

Expand All @@ -251,7 +257,7 @@ namespace libcloudphxx
std::pair<detail::bcond_t, detail::bcond_t> bcond;

// number of particles to be copied left/right in distmem setup
unsigned int lft_count, rgt_count;
thrust_size_t lft_count, rgt_count;

// nx in devices to the left of this one
unsigned int n_x_bfr,
Expand Down Expand Up @@ -458,12 +464,21 @@ namespace libcloudphxx
tmp_device_real_part.add_vector();
}

// init number of temporary size_t vctrs
// 1 needed if n_dims == 1 (i)
// 2 needed if distmem (lft_id, rgt_id)
// 2 needed if n_dims == 2 (i, k)
// 3 needed if n_dims == 3 (i, j, k)
// 1 already initialized by default ctor;
// NOTE: cell index (i,j,k) could reuse tmp_device_n_part, but then it would have to be recalculated after each random sort (maybe its not a problem?)
if(n_dims>=2 || distmem())
tmp_device_size_part.add_vector();
if(n_dims==3)
tmp_device_size_part.add_vector();

resize_size_vctrs.insert(&ijk);
resize_size_vctrs.insert(&sorted_ijk);
resize_size_vctrs.insert(&sorted_id);
if (opts_init.nx != 0) resize_size_vctrs.insert(&i);
if (opts_init.ny != 0) resize_size_vctrs.insert(&j);
if (opts_init.nz != 0) resize_size_vctrs.insert(&k);
}

void sanity_checks();
Expand Down
6 changes: 6 additions & 0 deletions src/impl/particles_impl_adve.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ namespace libcloudphxx
{
const pi_real_size C_lft(courant_x.begin(), pi_size_size(lft.begin(), thrust::make_transform_iterator(ijk.begin(), detail::add_val<thrust_size_t>(offset)))),
C_rgt(courant_x.begin(), pi_size_size(rgt.begin(), thrust::make_transform_iterator(ijk.begin(), detail::add_val<thrust_size_t>(offset))));
thrust_device::vector<thrust_size_t> &i(i_gp->get());

thrust::transform(
thrust::make_zip_iterator(make_tuple(x.begin(), i.begin(), C_lft, C_rgt )), // input - begin
thrust::make_zip_iterator(make_tuple(x.end(), i.end(), C_lft+n_part, C_rgt+n_part)), // input - end
Expand All @@ -131,6 +133,8 @@ namespace libcloudphxx
{
const pi_real_size C_fre(courant_y.begin(), pi_size_size(fre.begin(), thrust::make_transform_iterator(ijk.begin(), detail::add_val<thrust_size_t>(offset)))),
C_hnd(courant_y.begin(), pi_size_size(hnd.begin(), thrust::make_transform_iterator(ijk.begin(), detail::add_val<thrust_size_t>(offset))));
thrust_device::vector<thrust_size_t> &j(j_gp->get());

thrust::transform(
thrust::make_zip_iterator(make_tuple(y.begin(), j.begin(), C_fre, C_hnd )), // input - begin
thrust::make_zip_iterator(make_tuple(y.end(), j.end(), C_fre+n_part, C_hnd+n_part)), // input - end
Expand All @@ -143,6 +147,8 @@ namespace libcloudphxx
{
const pi_real_size C_abv(courant_z.begin(), pi_size_size(abv.begin(), thrust::make_transform_iterator(ijk.begin(), detail::add_val<thrust_size_t>(offset)))),
C_blw(courant_z.begin(), pi_size_size(blw.begin(), thrust::make_transform_iterator(ijk.begin(), detail::add_val<thrust_size_t>(offset))));
thrust_device::vector<thrust_size_t> &k(k_gp->get());

thrust::transform(
thrust::make_zip_iterator(make_tuple(z.begin(), k.begin(), C_blw, C_abv )), // input - begin
thrust::make_zip_iterator(make_tuple(z.end(), k.end(), C_blw+n_part, C_abv+n_part)), // input - end
Expand Down
13 changes: 9 additions & 4 deletions src/impl/particles_impl_bcnd.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,15 @@ namespace libcloudphxx
{
namespace arg = thrust::placeholders;

reset_guardp(lft_id_gp, tmp_device_real_part);
thrust_device::vector<real_t> &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
reset_guardp(rgt_id_gp, tmp_device_real_part);
thrust_device::vector<real_t> &rgt_id(rgt_id_gp->get());
// release tmp vectors for reuse as lft/rgt_id; cell indices (i,j,k,ijk) are anyway undefined by distmem copy and will be recalculated in post_copy
i_gp.reset();
k_gp.reset();

reset_guardp(lft_id_gp, tmp_device_size_part);
thrust_device::vector<thrust_size_t> &lft_id(lft_id_gp->get());

reset_guardp(rgt_id_gp, tmp_device_size_part);
thrust_device::vector<thrust_size_t> &rgt_id(rgt_id_gp->get());

// save ids of SDs to copy
lft_count = thrust::copy_if(
Expand Down
60 changes: 54 additions & 6 deletions src/impl/particles_impl_hskpng_ijk.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,34 @@ namespace libcloudphxx
template <typename real_t, backend_t device>
void particles_t<real_t, device>::impl::ravel_ijk(const thrust_size_t begin_shift) // default = 0
{
namespace arg = thrust::placeholders;
switch (n_dims)
{
case 0:
break;
case 1:
{
thrust_device::vector<thrust_size_t> &i(i_gp->get());
thrust::copy(i.begin()+begin_shift, i.end(), ijk.begin()+begin_shift);
break;
}
case 2:
namespace arg = thrust::placeholders;
{
thrust_device::vector<thrust_size_t> &i(i_gp->get());
thrust_device::vector<thrust_size_t> &k(k_gp->get());
thrust::transform(
i.begin()+begin_shift, i.end(), // input - first arg
k.begin()+begin_shift, // input - second arg
ijk.begin()+begin_shift, // output
arg::_1 * opts_init.nz + arg::_2 // assuming z varies first
);
break;
}
case 3:
namespace arg = thrust::placeholders;
{
thrust_device::vector<thrust_size_t> &i(i_gp->get());
thrust_device::vector<thrust_size_t> &j(j_gp->get());
thrust_device::vector<thrust_size_t> &k(k_gp->get());
thrust::transform(
i.begin()+begin_shift, i.end(), // input - first arg
j.begin()+begin_shift, // input - second arg
Expand All @@ -65,6 +75,7 @@ namespace libcloudphxx
);
// TODO: replace these two transforms with single one
break;
}
default:
assert(false);
}
Expand All @@ -75,10 +86,18 @@ namespace libcloudphxx
template <typename real_t, backend_t device>
void particles_t<real_t, device>::impl::unravel_ijk(const thrust_size_t begin_shift) // default = 0
{
namespace arg = thrust::placeholders;
switch(n_dims)
{
case 3:
namespace arg = thrust::placeholders;
{
reset_guardp(i_gp, tmp_device_size_part); // acquire tmp array to store i
reset_guardp(j_gp, tmp_device_size_part);
reset_guardp(k_gp, tmp_device_size_part);

thrust_device::vector<thrust_size_t> &i(i_gp->get());
thrust_device::vector<thrust_size_t> &j(j_gp->get());
thrust_device::vector<thrust_size_t> &k(k_gp->get());
// y
thrust::transform(
ijk.begin() + begin_shift, ijk.end(), // input - first arg
Expand All @@ -98,7 +117,14 @@ namespace libcloudphxx
arg::_1 / (opts_init.nz * opts_init.ny) // z and y vary first
);
break;
}
case 2:
{
reset_guardp(i_gp, tmp_device_size_part); // acquire tmp array to store i
reset_guardp(k_gp, tmp_device_size_part);

thrust_device::vector<thrust_size_t> &i(i_gp->get());
thrust_device::vector<thrust_size_t> &k(k_gp->get());
// z
thrust::transform(
ijk.begin() + begin_shift, ijk.end(), // input - first arg
Expand All @@ -112,8 +138,15 @@ namespace libcloudphxx
arg::_1 / (opts_init.nz)
);
break;
}
case 1:
{
reset_guardp(i_gp, tmp_device_size_part); // acquire tmp array to store i
thrust_device::vector<thrust_size_t> &i(i_gp->get());

thrust::copy(ijk.begin() + begin_shift, ijk.end(), i.begin() + begin_shift); // only x
Copy link

Copilot AI Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing break statement after case 1. Without a break, execution will fall through to case 0, which may not be the intended behavior.

Suggested change
thrust::copy(ijk.begin() + begin_shift, ijk.end(), i.begin() + begin_shift); // only x
thrust::copy(ijk.begin() + begin_shift, ijk.end(), i.begin() + begin_shift); // only x
break;

Copilot uses AI. Check for mistakes.
break;
}
case 0:
break;
default:
Expand All @@ -140,9 +173,24 @@ namespace libcloudphxx
}
} helper;

if (opts_init.nx != 0) helper(x, i, opts_init.dx);
if (opts_init.ny != 0) helper(y, j, opts_init.dy);
if (opts_init.nz != 0) helper(z, k, opts_init.dz);
if (opts_init.nx != 0)
{
reset_guardp(i_gp, tmp_device_size_part); // acquire tmp array to store i
thrust_device::vector<thrust_size_t> &i(i_gp->get());
helper(x, i, opts_init.dx);
}
if (opts_init.ny != 0)
{
reset_guardp(j_gp, tmp_device_size_part); // acquire tmp array to store j
thrust_device::vector<thrust_size_t> &j(j_gp->get());
helper(y, j, opts_init.dy);
}
if (opts_init.nz != 0)
{
reset_guardp(k_gp, tmp_device_size_part); // acquire tmp array to store k
thrust_device::vector<thrust_size_t> &k(k_gp->get());
helper(z, k, opts_init.dz);
}

// raveling i, j & k into ijk
ravel_ijk();
Expand Down
22 changes: 3 additions & 19 deletions src/impl/particles_impl_hskpng_remove.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,6 @@ namespace libcloudphxx
template <typename real_t, backend_t device>
void particles_t<real_t, device>::impl::hskpng_remove_n0()
{
// TODO: init these vctrs once per run
std::set<thrust_device::vector<thrust_size_t>*> n_t_vctrs;
n_t_vctrs.insert(&ijk);

if (opts_init.nx != 0) n_t_vctrs.insert(&i);
if (opts_init.ny != 0) n_t_vctrs.insert(&j);
if (opts_init.nz != 0) n_t_vctrs.insert(&k);

namespace arg = thrust::placeholders;

// remove chemical stuff
Expand Down Expand Up @@ -64,17 +56,6 @@ namespace libcloudphxx
);
}

// remove from n_t vectors
for(auto vec: n_t_vctrs)
{
thrust::remove_if(
vec->begin(),
vec->begin() + n_part,
n.begin(),
arg::_1 == 0
);
}

// remove from n and set new n_part
auto new_end = thrust::remove_if(
n.begin(),
Expand All @@ -89,6 +70,9 @@ namespace libcloudphxx
// resize chem vectors and update chem iterators
if(opts_init.chem_switch)
init_chem();

// NOTE: hskpng_ijk needs to be called aftewards, as i,j,k,ijk were not removed, but were resized - they are undefined!
// TODO: call it here? bu remove_n0 is called multiple times in rcyc(), so more optimal to do it manually afterwards...
}
};
};
2 changes: 1 addition & 1 deletion src/impl/particles_impl_hskpng_resize.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace libcloudphxx
// vec->resize(n_part);
tmp_device_real_part.resize(n_part);
tmp_device_n_part.resize(n_part);
// tmp_device_size_part.resize(n_part);
tmp_device_size_part.resize(n_part);
tmp_host_size_part.resize(n_part);
tmp_host_real_part.resize(n_part);

Expand Down
7 changes: 4 additions & 3 deletions src/impl/particles_impl_init_xyz.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ namespace libcloudphxx
const real_t a[3] = { opts_init.x0, opts_init.y0, opts_init.z0 };
const real_t b[3] = { opts_init.x1, opts_init.y1, opts_init.z1 };
const real_t d[3] = { opts_init.dx, opts_init.dy, opts_init.dz };
thrust_device::vector<thrust_size_t>
*ii[3] = { &i, &j, &k };

for (int ix = 0; ix < 3; ++ix)
{
Expand All @@ -60,11 +58,14 @@ namespace libcloudphxx
// shifting from [0,1] to random position within respective cell
// TODO: now the rand range is [0,1), include this here
{
auto &cell_idx_gp = ix == 0 ? i_gp : (ix == 1 ? j_gp : k_gp);
thrust_device::vector<thrust_size_t> &cell_idx(cell_idx_gp->get());

namespace arg = thrust::placeholders;
thrust::transform(
u01.begin(),
u01.begin() + n_part_to_init,
ii[ix]->begin() + n_part_old,
cell_idx.begin() + n_part_old,
v[ix]->begin() + n_part_old,
detail::pos_lgrngn_domain<real_t>(a[ix], b[ix], d[ix])
);
Expand Down
13 changes: 7 additions & 6 deletions src/impl/particles_impl_pack.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace libcloudphxx
assert(out_n_bfr.size() >= lft_count * distmem_n_vctrs.size());
assert(in_n_bfr.size() >= lft_count * distmem_n_vctrs.size());

thrust_device::vector<real_t> &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
thrust_device::vector<thrust_size_t> &lft_id(lft_id_gp->get());

auto it = distmem_n_vctrs.begin();
while (it != distmem_n_vctrs.end())
Expand All @@ -50,8 +50,9 @@ namespace libcloudphxx
void particles_t<real_t, device>::impl::pack_n_rgt()
{
assert(out_n_bfr.size() >= rgt_count * distmem_n_vctrs.size());
assert(in_n_bfr.size() >= rgt_count * distmem_n_vctrs.size());

thrust_device::vector<real_t> &rgt_id(rgt_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many availableassert(in_n_bfr.size() >= rgt_count * distmem_n_vctrs.size());
thrust_device::vector<thrust_size_t> &rgt_id(rgt_id_gp->get());

auto it = distmem_n_vctrs.begin();
while (it != distmem_n_vctrs.end())
Expand All @@ -71,7 +72,7 @@ namespace libcloudphxx
assert(out_real_bfr.size() >= lft_count * distmem_real_vctrs.size());
assert(in_real_bfr.size() >= lft_count * distmem_real_vctrs.size());

thrust_device::vector<real_t> &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
thrust_device::vector<thrust_size_t> &lft_id(lft_id_gp->get());

auto it = distmem_real_vctrs.begin();
while (it != distmem_real_vctrs.end())
Expand All @@ -91,7 +92,7 @@ namespace libcloudphxx
assert(out_real_bfr.size() >= rgt_count * distmem_real_vctrs.size());
assert(in_real_bfr.size() >= rgt_count * distmem_real_vctrs.size());

thrust_device::vector<real_t> &rgt_id(rgt_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
thrust_device::vector<thrust_size_t> &rgt_id(rgt_id_gp->get());

auto it = distmem_real_vctrs.begin();
while (it != distmem_real_vctrs.end())
Expand All @@ -108,7 +109,7 @@ namespace libcloudphxx
template <typename real_t, backend_t device>
void particles_t<real_t, device>::impl::bcnd_remote_lft(const real_t &x0, const real_t &x1)
{
thrust_device::vector<real_t> &lft_id(lft_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
thrust_device::vector<thrust_size_t> &lft_id(lft_id_gp->get());

thrust::transform(
thrust::make_permutation_iterator(x.begin(), lft_id.begin()),
Expand All @@ -121,7 +122,7 @@ namespace libcloudphxx
template <typename real_t, backend_t device>
void particles_t<real_t, device>::impl::bcnd_remote_rgt(const real_t &x1, const real_t &x0)
{
thrust_device::vector<real_t> &rgt_id(rgt_id_gp->get()); // id type is thrust_size_t, but we use real_t tmp vector because there are many available
thrust_device::vector<thrust_size_t> &rgt_id(rgt_id_gp->get());

thrust::transform(
thrust::make_permutation_iterator(x.begin(), rgt_id.begin()),
Expand Down
2 changes: 2 additions & 0 deletions src/impl/particles_impl_rcyc.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ namespace libcloudphxx
// if not all were recycled, remove those with n==0
if(n_flagged < n_to_rcyc) hskpng_remove_n0();
return n_to_rcyc;

// NOTE: hskpng_ijk needs to be called aftewards, as i,j,k,ijk were not copied!
}
};
};
Loading
Loading