diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index 13e4c699..94b52a9f 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -222,6 +222,7 @@ void MeshBoundaryValues::InitializeBuffers(const int nvar) { particles::ParticlesBoundaryValues::ParticlesBoundaryValues( particles::Particles *pp, ParameterInput *pin) : sendlist("sendlist",1), + destroylist("destroylist",1), #if MPI_PARALLEL_ENABLED prtcl_rsendbuf("rsend",1), prtcl_rrecvbuf("rrecv",1), diff --git a/src/bvals/bvals.hpp b/src/bvals/bvals.hpp index 134638a8..d39c99e2 100644 --- a/src/bvals/bvals.hpp +++ b/src/bvals/bvals.hpp @@ -247,8 +247,9 @@ class ParticlesBoundaryValues { ParticlesBoundaryValues(particles::Particles *ppart, ParameterInput *pin); ~ParticlesBoundaryValues(); - int nprtcl_send, nprtcl_recv; + int nprtcl_send, nprtcl_recv, nprtcl_destroy; DualArray1D sendlist; + DualArray1D destroylist; // Data needed to count number of messages and particles to send between ranks int nsends; // number of MPI sends to neighboring ranks on this rank diff --git a/src/bvals/bvals_part.cpp b/src/bvals/bvals_part.cpp index 0aeafe12..ab7b5ba2 100644 --- a/src/bvals/bvals_part.cpp +++ b/src/bvals/bvals_part.cpp @@ -44,6 +44,23 @@ void UpdateGID(int &newgid, NeighborBlock nghbr, int myrank, int *pcounter, return; } +//---------------------------------------------------------------------------------------- +//! \fn void ParticlesBoundaryValues::MarkForDestruction() +//! \brief Adds particles that cross the user-defined mesh boundariesto a list used then +//! to destroy them. The list uses the same structure as the sendlist for convenience +//! in the following functions. +//! This opeartion should also be performed for single process runs + +KOKKOS_INLINE_FUNCTION +void MarkForDestruction(int *pcounter, DualArray1D dlist, int p) { + int index = Kokkos::atomic_fetch_add(pcounter,1); + dlist.d_view(index).prtcl_indx = p; + // These particles don't actually get sent, thus following information is not needed + dlist.d_view(index).dest_gid = 0; + dlist.d_view(index).dest_rank = 0; + return; +} + //---------------------------------------------------------------------------------------- //! \fn void ParticlesBoundaryValues::SetNewGID() //! \brief @@ -61,11 +78,19 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { auto &nghbr = pmy_part->pmy_pack->pmb->nghbr; auto &psendl = sendlist; int counter=0; - int *pcounter = &counter; + Kokkos::View atom_count("atom_count"); + Kokkos::deep_copy(atom_count, counter); bool &multi_d = pmy_part->pmy_pack->pmesh->multi_d; bool &three_d = pmy_part->pmy_pack->pmesh->three_d; - - Kokkos::realloc(sendlist, static_cast(0.1*npart)); + // Compatibility with user-defined boundary conditions + auto &mb_bcs = pmy_part->pmy_pack->pmb->mb_bcs; + int destroy_count = 0; + Kokkos::View atom_d_count("atom_d_count"); + Kokkos::deep_copy(atom_d_count, destroy_count); + auto &pdestroyl = destroylist; + + Kokkos::realloc(sendlist, static_cast(npart)); + Kokkos::realloc(destroylist, static_cast(npart)); par_for("part_update",DevExeSpace(),0,(npart-1), KOKKOS_LAMBDA(const int p) { int m = pi(PGID,p) - gids; int mylevel = mblev.d_view(m); @@ -92,89 +117,212 @@ TaskStatus ParticlesBoundaryValues::SetNewPrtclGID() { // only update particle GID if it has crossed MeshBlock boundary if ((abs(ix) + abs(iy) + abs(iz)) != 0) { - if (iz == 0) { - if (iy == 0) { - // x1 face - int indx = NeighborIndex(ix,0,0,0,0); // neighbor at same level - if (nghbr.d_view(m,indx).lev > mylevel) { // neighbor at finer level - indx = NeighborIndex(ix,0,0,fy,fz); + // The way GIDs are determined currently is not reliable in SMR cases + // due to nighbours across edges and corners being uninitialized. + // Need extra check + bool send_to_coarser = false; + if (ix < 0) { + // level might be initizialized to -1 on some neighbours + for (int iop = 0; iop <= 3; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} // neighbor at coarser level - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); - } else if (ix == 0) { - // x2 face - int indx = NeighborIndex(0,iy,0,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(0,iy,0,fx,fz); + } + } else if (ix > 0) { + for (int iop = 4; iop <= 7; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); - } else { - // x1x2 edge - int indx = NeighborIndex(ix,iy,0,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(ix,iy,0,fz,0); + } + } + if (iy < 0) { + for (int iop = 8; iop <= 11; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); } - } else if (iy == 0) { - if (ix == 0) { - // x3 face - int indx = NeighborIndex(0,0,iz,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(0,0,iz,fx,fy); + } else if (iy > 0) { + for (int iop = 12; iop <= 15; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); - } else { - // x3x1 edge - int indx = NeighborIndex(ix,0,iz,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(ix,0,iz,fy,0); + } + } + if (iz < 0) { + for (int iop = 24; iop <= 27; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); } + } else if (iz > 0) { + for (int iop = 28; iop <= 31; ++iop) { + if (nghbr.d_view(m,iop).lev < mylevel && nghbr.d_view(m,iop).lev > 0) { + send_to_coarser = true; + } + } + } + int indx = 0; + bool check_boundary = + ( mb_bcs.d_view(m,BoundaryFace::inner_x3) == BoundaryFlag::user && iz < 0) + || ( mb_bcs.d_view(m,BoundaryFace::outer_x3) == BoundaryFlag::user && iz > 0) + || ( mb_bcs.d_view(m,BoundaryFace::inner_x2) == BoundaryFlag::user && iy < 0) + || ( mb_bcs.d_view(m,BoundaryFace::outer_x2) == BoundaryFlag::user && iy > 0) + || ( mb_bcs.d_view(m,BoundaryFace::inner_x1) == BoundaryFlag::user && ix < 0) + || ( mb_bcs.d_view(m,BoundaryFace::outer_x1) == BoundaryFlag::user && ix > 0); + // Add particle to destruction list + // At the time of sending the particles that need to be destroyed + // are treated like those that have been sent + // without actually being sent (i.e. they're remove from arrays) + if (check_boundary) { + MarkForDestruction(&atom_d_count(), pdestroyl, p); } else { - if (ix == 0) { - // x2x3 edge - int indx = NeighborIndex(0,iy,iz,0,0); - if (nghbr.d_view(m,indx).lev > mylevel) { - indx = NeighborIndex(0,iy,iz,fx,0); + if (iz == 0) { + if (iy == 0) { + // x1 face + indx = NeighborIndex(ix,0,0,0,0); // neighbor at same level + if (nghbr.d_view(m,indx).lev > mylevel) { // neighbor at finer level + indx = NeighborIndex(ix,0,0,fy,fz); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} // neighbor at coarser level + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else if (ix == 0) { + // x2 face + indx = NeighborIndex(0,iy,0,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(0,iy,0,fx,fz); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else { + // x1x2 edge + indx = NeighborIndex(ix,iy,0,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(ix,iy,0,fz,0); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + // Using SMR some edge and corner neighbours are uninitialized, + // thus check if the index has increased over the appropriate range + // and try to communicate through faces to a coarser meshblock + // Communication to coarser meshblocks should always go through faces + if (indx > 23 || send_to_coarser) { + bool found_coarser = false; + // First try through x face + indx = NeighborIndex(ix,0,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + // If all faces in the x direction are on a finer level this should + // have already been covered by previous logic, thus check y + if ( !found_coarser ) { + indx = NeighborIndex(0,iy,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } + } else if (iy == 0) { + if (ix == 0) { + // x3 face + indx = NeighborIndex(0,0,iz,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(0,0,iz,fx,fy); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else { + // x3x1 edge + indx = NeighborIndex(ix,0,iz,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(ix,0,iz,fy,0); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (indx > 39 || send_to_coarser) { + bool found_coarser = false; + indx = NeighborIndex(ix,0,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + if ( !found_coarser ) { + indx = NeighborIndex(0,0,iz,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); } - while (nghbr.d_view(m,indx).gid < 0) {indx++;} - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); } else { - // corners - int indx = NeighborIndex(ix,iy,iz,0,0); - UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, pcounter, psendl, p); + if (ix == 0) { + // x2x3 edge + indx = NeighborIndex(0,iy,iz,0,0); + if (nghbr.d_view(m,indx).lev > mylevel) { + indx = NeighborIndex(0,iy,iz,fx,0); + } + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (indx > 47 || send_to_coarser) { + bool found_coarser = false; + indx = NeighborIndex(0,iy,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + if ( !found_coarser ) { + indx = NeighborIndex(0,0,iz,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } else { + // corners + indx = NeighborIndex(ix,iy,iz,0,0); + if (nghbr.d_view(m,indx).gid < 0 || send_to_coarser) { + bool found_coarser = false; + indx = NeighborIndex(ix,0,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + if ( !found_coarser ) { + indx = NeighborIndex(0,iy,0,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + } + if ( !found_coarser ) { + indx = NeighborIndex(0,0,iz,0,0); + while (nghbr.d_view(m,indx).gid < 0) {indx++;} + if (nghbr.d_view(m,indx).lev < mylevel) { found_coarser = true; } + } + } + UpdateGID(pi(PGID,p), nghbr.d_view(m,indx), myrank, &atom_count(), psendl, p); + } } - } - // reset x,y,z positions if particle crosses Mesh boundary using periodic BCs - if (x1 < meshsize.x1min) { - pr(IPX,p) += (meshsize.x1max - meshsize.x1min); - } else if (x1 > meshsize.x1max) { - pr(IPX,p) -= (meshsize.x1max - meshsize.x1min); - } - if (x2 < meshsize.x2min) { - pr(IPY,p) += (meshsize.x2max - meshsize.x2min); - } else if (x2 > meshsize.x2max) { - pr(IPY,p) -= (meshsize.x2max - meshsize.x2min); - } - if (x3 < meshsize.x3min) { - pr(IPZ,p) += (meshsize.x3max - meshsize.x3min); - } else if (x3 > meshsize.x3max) { - pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min); + // reset x,y,z positions if particle crosses Mesh boundary using periodic BCs + if (x1 < meshsize.x1min) { + pr(IPX,p) += (meshsize.x1max - meshsize.x1min); + } else if (x1 > meshsize.x1max) { + pr(IPX,p) -= (meshsize.x1max - meshsize.x1min); + } + if (x2 < meshsize.x2min) { + pr(IPY,p) += (meshsize.x2max - meshsize.x2min); + } else if (x2 > meshsize.x2max) { + pr(IPY,p) -= (meshsize.x2max - meshsize.x2min); + } + if (x3 < meshsize.x3min) { + pr(IPZ,p) += (meshsize.x3max - meshsize.x3min); + } else if (x3 > meshsize.x3max) { + pr(IPZ,p) -= (meshsize.x3max - meshsize.x3min); + } } } }); + Kokkos::deep_copy(counter, atom_count); + Kokkos::deep_copy(destroy_count, atom_d_count); nprtcl_send = counter; + nprtcl_destroy = destroy_count; Kokkos::resize(sendlist, nprtcl_send); + Kokkos::resize(destroylist, nprtcl_destroy); // sync sendlist device array with host sendlist.template modify(); sendlist.template sync(); + // sync destroylist device array with host + destroylist.template modify(); + destroylist.template sync(); return TaskStatus::complete; } @@ -248,6 +396,8 @@ TaskStatus ParticlesBoundaryValues::CountSendsAndRecvs() { //! \brief TaskStatus ParticlesBoundaryValues::InitPrtclRecv() { + // Moved assignment here for better compatibility with destruction logic + nprtcl_recv=0; #if MPI_PARALLEL_ENABLED // load STL::vector of ParticleMessageData with for // receives // on this rank. Length will be nrecvs, initially this length is unknown @@ -262,7 +412,6 @@ TaskStatus ParticlesBoundaryValues::InitPrtclRecv() { nrecvs = recvs_thisrank.size(); // Figure out how many particles will be received from all ranks - nprtcl_recv=0; for (int n=0; n(); + destroylist.template sync(); + + //In single process runs, size of particle arrays can only be reduced + int &npart = pmy_part->nprtcl_thispack; + int new_npart = npart + (nprtcl_recv - nprtcl_send - nprtcl_destroy); + + // nremain defined outside compiler instructions for end logic + int nremain = 0; + int nremain_d = nprtcl_send + nprtcl_destroy - nprtcl_recv; + #if MPI_PARALLEL_ENABLED // Sort sendlist on host by index in particle array - namespace KE = Kokkos::Experimental; std::sort(KE::begin(sendlist.h_view), KE::end(sendlist.h_view), SortByIndex); // sync sendlist host array with device. This results in sorted array on device sendlist.template modify(); sendlist.template sync(); // increase size of particle arrays if needed - int new_npart = pmy_part->nprtcl_thispack + (nprtcl_recv - nprtcl_send); - if (nprtcl_recv > nprtcl_send) { + if (nprtcl_recv > (nprtcl_send + nprtcl_destroy) ) { Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart); Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart); } @@ -458,7 +620,7 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() { // exit if particle communications have not completed if (bflag) {return TaskStatus::incomplete;} - // unpack particles into positions of sent particles + // unpack particles into positions of sent or destroyed particles if (nprtcl_recv > 0) { int nrdata = pmy_part->nrdata; int nidata = pmy_part->nidata; @@ -466,13 +628,16 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() { auto &pi = pmy_part->prtcl_idata; auto &rrecvbuf = prtcl_rrecvbuf; auto &irecvbuf = prtcl_irecvbuf; - int &npart = pmy_part->nprtcl_thispack; par_for("punpack",DevExeSpace(),0,(nprtcl_recv-1), KOKKOS_LAMBDA(const int n) { int p; - if (n < nprtcl_send) { - p = sendlist.d_view(n).prtcl_indx; // place particles in holes created by sends + if (n < nprtcl_send ) { + p = sendlist.d_view(n).prtcl_indx; // place particles in holes created by send + } else if (n < nprtcl_send + nprtcl_destroy ) { + // place particles in holes created by destroy + p = destroylist.d_view(n-nprtcl_send).prtcl_indx; } else { - p = npart + (n - nprtcl_send); // place particle at end of arrays + // place particle at end of arrays + p = npart + (n - nprtcl_send - nprtcl_destroy ); } for (int i=0; i 0) { - int &npart = pmy_part->nprtcl_thispack; int i_last_hole = nprtcl_send-1; int i_next_hole = nprtcl_recv; for (int n=1; n<=nremain; ++n) { int nend = npart-n; + --nremain_d; if (nend > sendlist.h_view(i_last_hole).prtcl_indx) { // copy particle from end into hole int next_hole = sendlist.h_view(i_next_hole).prtcl_indx; @@ -508,18 +673,46 @@ TaskStatus ParticlesBoundaryValues::RecvAndUnpackPrtcls() { i_last_hole -= 1; } } + } + // Update nparticles_thisrank. Update cost array (use npart_thismb[nmb]?) + MPI_Allgather(&new_npart,1,MPI_INT,(pmy_part->pmy_pack->pmesh->nprtcl_eachrank),1, + MPI_INT,MPI_COMM_WORLD); +#endif + // If there are still holes due to particle destruction + //Destroy particles by moving remaining particles in their places + if (nremain_d > 0) { + int i_last_hole = nprtcl_destroy-1; + // If the holes left by sending have been entirely covered by receives + // and there are more receives left, start from where you left off in the + // (nprtcl_recv>0) branch, otherwise simply destroy all particles in the list + int i_next_hole = nprtcl_recv-nprtcl_send < 0 ? 0 : nprtcl_recv-nprtcl_send; + for (int n=1; n<=nremain_d; ++n) { + //At this point already nremain particles might have been removed, check for that + int nend = nremain > 0 ? npart-nremain-n : npart - n; + if (nend > destroylist.h_view(i_last_hole).prtcl_indx) { + // copy particle from end into hole + int next_hole = destroylist.h_view(i_next_hole).prtcl_indx; + auto rdest = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, next_hole); + auto rsrc = Kokkos::subview(pmy_part->prtcl_rdata, Kokkos::ALL, nend); + Kokkos::deep_copy(rdest, rsrc); + auto idest = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, next_hole); + auto isrc = Kokkos::subview(pmy_part->prtcl_idata, Kokkos::ALL, nend); + Kokkos::deep_copy(idest, isrc); + i_next_hole += 1; + } else { + // this index contains a hole, so do nothing except find new index of last hole + i_last_hole -= 1; + } + } + } + if (nprtcl_send + nprtcl_destroy - nprtcl_recv > 0) { // shrink size of particle data arrays Kokkos::resize(pmy_part->prtcl_idata, pmy_part->nidata, new_npart); Kokkos::resize(pmy_part->prtcl_rdata, pmy_part->nrdata, new_npart); } - - // Update nparticles_thisrank. Update cost array (use npart_thismb[nmb]?) pmy_part->nprtcl_thispack = new_npart; pmy_part->pmy_pack->pmesh->nprtcl_thisrank = new_npart; - MPI_Allgather(&new_npart,1,MPI_INT,(pmy_part->pmy_pack->pmesh->nprtcl_eachrank),1, - MPI_INT,MPI_COMM_WORLD); -#endif return TaskStatus::complete; }