From 07dde55244a740c96e7174046b2fdea34374649a Mon Sep 17 00:00:00 2001 From: Andrew Reisner Date: Tue, 30 Jan 2024 12:53:28 -0700 Subject: [PATCH 1/2] Use span for vectors. This uses a span instead of std::vector as the interface to vector values. --- raptor/core/comm_pkg.cpp | 20 +- raptor/core/comm_pkg.hpp | 298 ++++++++++---------- raptor/core/par_matrix.cpp | 93 +++--- raptor/core/par_matrix.hpp | 214 +++++++------- raptor/core/tests/test_par_block_matrix.cpp | 23 +- raptor/core/utilities.hpp | 123 +++++++- raptor/core/vector.cpp | 20 +- raptor/core/vector.hpp | 57 ++-- raptor/krylov/bicgstab.cpp | 2 +- 9 files changed, 490 insertions(+), 360 deletions(-) diff --git a/raptor/core/comm_pkg.cpp b/raptor/core/comm_pkg.cpp index 278acce7..5f27de8f 100644 --- a/raptor/core/comm_pkg.cpp +++ b/raptor/core/comm_pkg.cpp @@ -3,6 +3,7 @@ #include "comm_pkg.hpp" #include "par_matrix.hpp" +#include "utilities.hpp" namespace raptor { @@ -57,7 +58,7 @@ namespace raptor template<> void CommPkg::communicate_T(const double* values, - std::vector& result, + std::vector& result, const int block_size, std::function result_func, std::function init_result_func, @@ -68,7 +69,7 @@ namespace raptor } template<> void CommPkg::communicate_T(const double* values, - std::vector& result, + std::vector& result, const int block_size, std::function result_func, std::function init_result_func, @@ -79,7 +80,7 @@ namespace raptor } template<> void CommPkg::communicate_T(const int* values, - std::vector& result, + std::vector& result, const int block_size, std::function result_func, std::function init_result_func, @@ -90,10 +91,10 @@ namespace raptor } template<> void CommPkg::communicate_T(const int* values, - std::vector& result, + std::vector& result, const int block_size, std::function result_func, - std::function init_result_func, + std::function init_result_func, int init_result_func_val) { init_int_comm_T(values, block_size, init_result_func, init_result_func_val); @@ -136,7 +137,7 @@ namespace raptor } template<> - void CommPkg::complete_comm_T(std::vector& result, + void CommPkg::complete_comm_T(span result, const int block_size, std::function result_func, std::function init_result_func, @@ -145,7 +146,7 @@ namespace raptor complete_double_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); } template<> - void CommPkg::complete_comm_T(std::vector& result, + void CommPkg::complete_comm_T(span result, const int block_size, std::function result_func, std::function init_result_func, @@ -154,7 +155,7 @@ namespace raptor complete_double_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); } template<> - void CommPkg::complete_comm_T(std::vector& result, + void CommPkg::complete_comm_T(span result, const int block_size, std::function result_func, std::function init_result_func, @@ -163,7 +164,7 @@ namespace raptor complete_int_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); } template<> - void CommPkg::complete_comm_T(std::vector& result, + void CommPkg::complete_comm_T(span result, const int block_size, std::function result_func, std::function init_result_func, @@ -200,4 +201,3 @@ void CommPkg::init_comm(ParVector& v, const int block_size) { init_double_comm(v.local.data(), block_size); } - diff --git a/raptor/core/comm_pkg.hpp b/raptor/core/comm_pkg.hpp index a0dc1340..e7d75458 100644 --- a/raptor/core/comm_pkg.hpp +++ b/raptor/core/comm_pkg.hpp @@ -24,7 +24,7 @@ ***** Communicates values to processes, based on underlying ***** communication package ***** form_col_to_proc(...) - ***** Maps each column in off_proc_column_map to process + ***** Maps each column in off_proc_column_map to process ***** on which corresponding values are stored **************************************************************/ namespace raptor @@ -41,7 +41,7 @@ namespace raptor topology->num_shared++; num_shared = 0; } - + CommPkg(Topology* _topology) { topology = _topology; @@ -74,38 +74,38 @@ namespace raptor // Matrix Communication // TODO -- Block transpose communication // -- Should b_rows / b_cols be switched? - virtual CSRMatrix* communicate(const std::vector& rowptr, + virtual CSRMatrix* communicate(const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual CSRMatrix* communicate(const std::vector& rowptr, + virtual CSRMatrix* communicate(const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, + virtual void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, + virtual void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, + virtual CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; virtual CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, + const std::vector& col_indices, const std::vector& values, const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; virtual CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, + const std::vector& col_indices, const std::vector& values, const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, + virtual void init_mat_comm_T(std::vector& send_buffer, + const std::vector& rowptr, const std::vector& col_indices, + const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; virtual void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, + const std::vector& rowptr, const std::vector& col_indices, + const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual CSRMatrix* complete_mat_comm_T(const int n_result_rows, + virtual CSRMatrix* complete_mat_comm_T(const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; @@ -136,7 +136,7 @@ namespace raptor } CSRMatrix* communicate_T(CSRMatrix* A, const int has_vals = true) { - return communicate_T(A->idx1, A->idx2, get_vals(A), A->n_rows, A->b_rows, + return communicate_T(A->idx1, A->idx2, get_vals(A), A->n_rows, A->b_rows, A->b_cols, has_vals); } @@ -147,7 +147,7 @@ namespace raptor // Standard Communication template std::vector& communicate(const std::vector& values, const int block_size = 1) - { + { return communicate(values.data(), block_size); } template @@ -170,44 +170,44 @@ namespace raptor std::function result_func = &sum_func, std::function init_result_func = &sum_func, T init_result_func_val = 0) - { - communicate_T(values.data(), result, block_size, result_func, + { + communicate_T(values.data(), result, block_size, result_func, init_result_func, init_result_func_val); } template void communicate_T(const std::vector& values, - const int block_size = 1, + const int block_size = 1, std::function init_result_func = &sum_func, T init_result_func_val = 0) - { + { communicate_T(values.data(), block_size, init_result_func, init_result_func_val); } template void init_comm_T(const std::vector& values, - const int block_size = 1, + const int block_size = 1, std::function init_result_func = &sum_func, T init_result_func_val = 0) { init_comm_T(values.data(), block_size, init_result_func, init_result_func_val); } template void init_comm_T(const T* values, - const int block_size = 1, - std::function init_result_func = &sum_func, + const int block_size = 1, + std::function init_result_func = &sum_func, T init_result_func_val = 0); - template void complete_comm_T(std::vector& result, + template void complete_comm_T(span result, const int block_size = 1, std::function result_func = &sum_func, - std::function init_result_func = &sum_func, + std::function init_result_func = &sum_func, T init_result_func_val = 0); template void complete_comm_T( const int block_size = 1, std::function init_result_func = &sum_func, T init_result_func_val = 0); - template void communicate_T(const T* values, - std::vector& result, const int block_size = 1, + template void communicate_T(const T* values, + std::vector& result, const int block_size = 1, std::function result_func = &sum_func, - std::function init_result_func = &sum_func, + std::function init_result_func = &sum_func, T init_result_func_val = 0); template void communicate_T(const T* values, const int block_size = 1, @@ -215,35 +215,35 @@ namespace raptor T init_result_func_val = 0); virtual void init_double_comm_T(const double* values, const int block_size, - std::function init_result_func = - &sum_func, + std::function init_result_func = + &sum_func, double init_result_func_val = 0) = 0; virtual void init_int_comm_T(const int* values, const int block_size, std::function init_result_func = &sum_func, int init_result_func_val = 0) = 0; - virtual void complete_double_comm_T(std::vector& result, + virtual void complete_double_comm_T(span result, const int block_size, std::function result_func = &sum_func, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) = 0; - virtual void complete_double_comm_T(std::vector& result, + virtual void complete_double_comm_T(span result, const int block_size, std::function result_func = &sum_func, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) = 0; - virtual void complete_int_comm_T(std::vector& result, + virtual void complete_int_comm_T(span result, const int block_size, std::function result_func = &sum_func, std::function init_result_func = &sum_func, int init_result_func_val = 0) = 0; - virtual void complete_int_comm_T(std::vector& result, + virtual void complete_int_comm_T(span result, const int block_size, std::function result_func = &sum_func, std::function init_result_func = &sum_func, int init_result_func_val = 0) = 0; virtual void complete_double_comm_T(const int block_size, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) = 0; virtual void complete_int_comm_T(const int block_size, std::function init_result_func = &sum_func, @@ -265,21 +265,21 @@ namespace raptor /************************************************************** ***** ParComm Class ************************************************************** - ***** This class constructs a standard parallel communicator: + ***** This class constructs a standard parallel communicator: ***** which messages must be sent/recieved for matrix operations ***** ***** Attributes ***** ------------- ***** num_sends : index_t - ***** Number of messages this process must send during + ***** Number of messages this process must send during ***** matrix operations ***** num_recvs : index_t ***** Number of messages this process will recv during ***** matrix operations - ***** size_sends : index_t + ***** size_sends : index_t ***** Total number of elements this process sends in all ***** messages - ***** size_recvs : index_t + ***** size_recvs : index_t ***** Total number of elements this process recvs from ***** all messages ***** send_procs : std::vector @@ -287,7 +287,7 @@ namespace raptor ***** send_row_starts : std::vector ***** Pointer to first position in send_row_indices ***** that a given process will send. - ***** send_row_indices : std::vector + ***** send_row_indices : std::vector ***** The indices of values that must be sent to each ***** process in send_procs ***** recv_procs : std::vector @@ -313,7 +313,7 @@ namespace raptor ***** _key : int (optional) ***** Tag to be used in RAPtor_MPI Communication (default 0) **************************************************************/ - ParComm(Partition* partition, int _key = 0, + ParComm(Partition* partition, int _key = 0, RAPtor_MPI_Comm _comm = RAPtor_MPI_COMM_WORLD, CommData* r_data = NULL) : CommPkg(partition) { @@ -326,7 +326,7 @@ namespace raptor recv_data = new ContigData(); } - ParComm(Topology* topo, int _key = 0, + ParComm(Topology* topo, int _key = 0, RAPtor_MPI_Comm _comm = RAPtor_MPI_COMM_WORLD, CommData* r_data = NULL) : CommPkg(topo) { @@ -370,7 +370,7 @@ namespace raptor ParComm(Partition* partition, const std::vector& off_proc_column_map, const std::vector& on_proc_column_map, - int _key = 9999, + int _key = 9999, RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD, CommData* r_data = NULL) : CommPkg(partition) { @@ -386,7 +386,7 @@ namespace raptor { send_data->indices[i] -= partition->first_local_col; } - + if (partition->local_num_cols) { part_col_to_new.resize(partition->local_num_cols, -1); @@ -403,7 +403,7 @@ namespace raptor send_data->indices[i] = part_col_to_new[idx]; assert(part_col_to_new[idx] >= 0); } - + } ParComm(Topology* _topology, @@ -480,10 +480,10 @@ namespace raptor } // For each process I recv from, send the global column indices - // for which I must recv corresponding rows + // for which I must recv corresponding rows std::vector recv_sizes(num_procs, 0); for (int i = 0; i < recv_data->num_msgs; i++) - recv_sizes[recv_data->procs[i]] = + recv_sizes[recv_data->procs[i]] = recv_data->indptr[i+1] - recv_data->indptr[i]; RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, recv_sizes.data(), num_procs, RAPtor_MPI_INT, RAPtor_MPI_SUM, RAPtor_MPI_COMM_WORLD); @@ -516,9 +516,9 @@ namespace raptor init_off_proc_new(comm, off_proc_col_to_new); } - + ParComm(ParComm* comm, const std::vector& on_proc_col_to_new, - const std::vector& off_proc_col_to_new) + const std::vector& off_proc_col_to_new) : CommPkg(comm->topology) { mpi_comm = comm->mpi_comm; @@ -584,13 +584,13 @@ namespace raptor send_data->size_msgs = send_data->indices.size(); send_data->finalize(); - + } /************************************************************** ***** ParComm Class Destructor ************************************************************** - ***** + ***** **************************************************************/ ~ParComm() { @@ -654,39 +654,39 @@ namespace raptor // Transpose Communication void init_double_comm_T(const double* values, const int block_size = 1, - std::function init_result_func = - &sum_func, + std::function init_result_func = + &sum_func, double init_result_func_val = 0) { initialize_T(values, block_size, init_result_func, init_result_func_val); } void init_int_comm_T(const int* values, const int block_size = 1, - std::function init_result_func = - &sum_func, + std::function init_result_func = + &sum_func, int init_result_func_val = 0) { initialize_T(values, block_size, init_result_func, init_result_func_val); } - void complete_double_comm_T(std::vector& result, + void complete_double_comm_T(span result, const int block_size = 1, std::function result_func = &sum_func, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) { complete_T(result, block_size, result_func, init_result_func, init_result_func_val); } - void complete_double_comm_T(std::vector& result, + void complete_double_comm_T(span result, const int block_size = 1, std::function result_func = &sum_func, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) { complete_T(result, block_size, result_func, init_result_func, init_result_func_val); } - void complete_int_comm_T(std::vector& result, + void complete_int_comm_T(span result, const int block_size = 1, std::function result_func = &sum_func, std::function init_result_func = &sum_func, @@ -694,7 +694,7 @@ namespace raptor { complete_T(result, block_size, result_func, init_result_func, init_result_func_val); } - void complete_int_comm_T(std::vector& result, + void complete_int_comm_T(span result, const int block_size = 1, std::function result_func = &sum_func, std::function init_result_func = &sum_func, @@ -704,7 +704,7 @@ namespace raptor } void complete_double_comm_T(const int block_size = 1, std::function init_result_func = - &sum_func, + &sum_func, double init_result_func_val = 0) { complete_T(block_size, init_result_func, init_result_func_val); @@ -719,7 +719,7 @@ namespace raptor void communicate_T(const std::vector& values, std::vector& result, const int block_size = 1, std::function result_func = &sum_func, - std::function init_result_func = &sum_func, + std::function init_result_func = &sum_func, T init_result_func_val = 0) { CommPkg::communicate_T(values.data(), result, block_size, @@ -754,7 +754,7 @@ namespace raptor template void initialize_T(const T* values, const int block_size = 1, - std::function init_result_func = &sum_func, + std::function init_result_func = &sum_func, T init_result_func_val = 0) { if (profile) vec_t -= RAPtor_MPI_Wtime(); @@ -764,7 +764,7 @@ namespace raptor } template - void complete_T(std::vector& result, + void complete_T(span result, const int block_size = 1, std::function result_func = &sum_func, std::function init_result_func = &sum_func, @@ -802,8 +802,8 @@ namespace raptor // Conditional communication template std::vector& conditional_comm( - const std::vector& vals, - const std::vector& states, + const std::vector& vals, + const std::vector& states, const std::vector& off_proc_states, std::function compare_func, const int block_size = 1) @@ -811,10 +811,10 @@ namespace raptor int ctr, n_sends, n_recvs; int tag = 325493; bool comparison; - + if (profile) vec_t -= RAPtor_MPI_Wtime(); send_data->send(vals.data(), tag, mpi_comm, states, compare_func, &n_sends, block_size); - recv_data->recv(tag, mpi_comm, off_proc_states, + recv_data->recv(tag, mpi_comm, off_proc_states, compare_func, &ctr, &n_recvs, block_size); send_data->waitall(n_sends); @@ -856,11 +856,11 @@ namespace raptor } template - void conditional_comm_T(const std::vector& vals, - const std::vector& states, + void conditional_comm_T(const std::vector& vals, + const std::vector& states, const std::vector& off_proc_states, std::function compare_func, - std::vector& result, + std::vector& result, std::function result_func, const int block_size = 1) { @@ -873,7 +873,7 @@ namespace raptor recv_data->send(vals.data(), tag, mpi_comm, off_proc_states, compare_func, &n_sends, block_size); send_data->recv(tag, mpi_comm, states, compare_func, &ctr, &n_recvs, block_size); - + recv_data->waitall(n_sends); send_data->waitall(n_recvs); if (profile) vec_t += RAPtor_MPI_Wtime(); @@ -905,38 +905,38 @@ namespace raptor // Matrix Communication - CSRMatrix* communicate(const std::vector& rowptr, + CSRMatrix* communicate(const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate(const std::vector& rowptr, + CSRMatrix* communicate(const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, + void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, + void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, + CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, + CSRMatrix* communicate_T(const std::vector& rowptr, + const std::vector& col_indices, const std::vector& values, + const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, + CSRMatrix* communicate_T(const std::vector& rowptr, + const std::vector& col_indices, const std::vector& values, + const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, + void init_mat_comm_T(std::vector& send_buffer, + const std::vector& rowptr, const std::vector& col_indices, + const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) ; void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, + const std::vector& rowptr, const std::vector& col_indices, + const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) ; - CSRMatrix* complete_mat_comm_T(const int n_result_rows, + CSRMatrix* complete_mat_comm_T(const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) ; @@ -990,7 +990,7 @@ namespace raptor /************************************************************** ***** TAPComm Class ************************************************************** - ***** This class constructs a topology-aware parallel communicator: + ***** This class constructs a topology-aware parallel communicator: ***** which messages must be sent/recieved for matrix operations, ***** using topology-aware methods to limit the number and size ***** of inter-node messages @@ -1003,9 +1003,9 @@ namespace raptor ***** communication occurs. ***** local_R_par_comm : ParComm* ***** Parallel communication package for redistributing previously - ***** received values (from inter-node communication step) to + ***** received values (from inter-node communication step) to ***** processes local to rank which need said values - ***** local_L_par_comm : ParComm* + ***** local_L_par_comm : ParComm* ***** Parallel communication package for communicating values ***** that both originate and have a final destination on node ***** (fully intra-node communication) @@ -1053,7 +1053,7 @@ namespace raptor ************************************************************** ***** Initializes a TAPComm for a matrix without contiguous ***** row-wise partitions across processes. Instead, each - ***** process holds a random assortment of rows. + ***** process holds a random assortment of rows. ***** ***** Parameters ***** ------------- @@ -1064,7 +1064,7 @@ namespace raptor ***** local_num_cols : int ***** Number of columns local to rank **************************************************************/ - TAPComm(Partition* partition, + TAPComm(Partition* partition, const std::vector& off_proc_column_map, bool form_S = true, RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD) @@ -1129,7 +1129,7 @@ namespace raptor /************************************************************** ***** TAPComm Class Constructor ************************************************************** - ***** Create topology-aware communication class from + ***** Create topology-aware communication class from ***** original communication package (which processes rank ***** communication which, and what is sent to / recv from ***** each process. @@ -1160,14 +1160,14 @@ namespace raptor } } - TAPComm(TAPComm* tap_comm, const std::vector& off_proc_col_to_new, + TAPComm(TAPComm* tap_comm, const std::vector& off_proc_col_to_new, ParComm* local_L = NULL) : CommPkg(tap_comm->topology) { init_off_proc_new(tap_comm, off_proc_col_to_new, local_L); } TAPComm(TAPComm* tap_comm, const std::vector& on_proc_col_to_new, - const std::vector& off_proc_col_to_new, + const std::vector& off_proc_col_to_new, ParComm* local_L = NULL) : CommPkg(tap_comm->topology) { int idx; @@ -1222,9 +1222,9 @@ namespace raptor local_R_par_comm = new ParComm(tap_comm->local_R_par_comm, off_proc_col_to_new); // Create global par comm / update R send indices - std::vector& local_R_int_buffer = + std::vector& local_R_int_buffer = tap_comm->local_R_par_comm->send_data->get_buffer(); - std::vector& global_int_buffer = + std::vector& global_int_buffer = tap_comm->global_par_comm->send_data->get_buffer(); std::vector G_to_new(tap_comm->global_par_comm->recv_data->size_msgs, -1); @@ -1255,7 +1255,7 @@ namespace raptor if (*it != -1) *it = idx++; } - global_par_comm = new ParComm(tap_comm->global_par_comm, + global_par_comm = new ParComm(tap_comm->global_par_comm, local_R_int_buffer); @@ -1285,7 +1285,7 @@ namespace raptor *it = S_to_new[*it]; } idx = 0; - for (std::vector::iterator it = global_int_buffer.begin(); + for (std::vector::iterator it = global_int_buffer.begin(); it != global_int_buffer.end(); ++it) { if (*it != -1) *it = idx++; @@ -1296,7 +1296,7 @@ namespace raptor } else local_S_par_comm = NULL; - // Determine size of final recvs (should be equal to + // Determine size of final recvs (should be equal to // number of off_proc cols) recv_size = local_R_par_comm->recv_data->size_msgs + local_L_par_comm->recv_data->size_msgs; @@ -1305,13 +1305,13 @@ namespace raptor // Want a single recv buffer local_R and local_L par_comms buffer.resize(recv_size); int_buffer.resize(recv_size); - } + } } /************************************************************** ***** ParComm Class Destructor ************************************************************** - ***** + ***** **************************************************************/ ~TAPComm() { @@ -1335,7 +1335,7 @@ namespace raptor RAPtor_MPI_Comm_size(comm, &num_procs); // Initialize class variables - local_S_par_comm = new ParComm(partition, 2345, partition->topology->local_comm, + local_S_par_comm = new ParComm(partition, 2345, partition->topology->local_comm, new DuplicateData()); local_R_par_comm = new ParComm(partition, 3456, partition->topology->local_comm, new NonContigData()); @@ -1365,7 +1365,7 @@ namespace raptor off_node_column_map, off_node_col_to_node, off_node_to_off_proc); // Gather all nodes with which any local process must communication - form_local_R_par_comm(off_node_column_map, off_node_col_to_node, + form_local_R_par_comm(off_node_column_map, off_node_col_to_node, orig_procs); // Find global processes with which rank communications @@ -1375,7 +1375,7 @@ namespace raptor // processes, before inter-node communication form_local_S_par_comm(orig_procs); - // Adjust send indices (currently global vector indices) to be index + // Adjust send indices (currently global vector indices) to be index // of global vector value from previous recv adjust_send_indices(partition->first_local_col); @@ -1384,7 +1384,7 @@ namespace raptor form_local_L_par_comm(on_node_column_map, on_node_col_to_proc, partition->first_local_col); - // Determine size of final recvs (should be equal to + // Determine size of final recvs (should be equal to // number of off_proc cols) update_recv(on_node_to_off_proc, off_node_to_off_proc); } @@ -1400,7 +1400,7 @@ namespace raptor // Initialize class variables local_S_par_comm = NULL; - local_R_par_comm = new ParComm(partition, 3456, partition->topology->local_comm, + local_R_par_comm = new ParComm(partition, 3456, partition->topology->local_comm, new NonContigData()); local_L_par_comm = new ParComm(partition, 4567, partition->topology->local_comm, new NonContigData()); @@ -1444,7 +1444,7 @@ namespace raptor form_local_L_par_comm(on_node_column_map, on_node_col_to_proc, partition->first_local_col); - // Determine size of final recvs (should be equal to + // Determine size of final recvs (should be equal to // number of off_proc cols) update_recv(on_node_to_off_proc, off_node_to_off_proc); @@ -1491,9 +1491,9 @@ namespace raptor { return complete(block_size); } - + template - std::vector& communicate(const std::vector& values, + std::vector& communicate(const std::vector& values, const int block_size = 1) { return CommPkg::communicate(values.data(), block_size); @@ -1516,7 +1516,7 @@ namespace raptor // Initial redistribution among node std::vector& S_vals = local_S_par_comm->communicate(values, block_size); - // Begin inter-node communication + // Begin inter-node communication global_par_comm->initialize(S_vals.data(), block_size); } else @@ -1542,7 +1542,7 @@ namespace raptor if ((int)recvbuf.size() < recv_size * block_size) recvbuf.resize(recv_size * block_size); - // Add values from L_recv and R_recv to appropriate positions in + // Add values from L_recv and R_recv to appropriate positions in // Vector recv int idx, pos; int R_recv_size = local_R_par_comm->recv_data->size_msgs; @@ -1576,7 +1576,7 @@ namespace raptor // Transpose Communication void init_double_comm_T(const double* values, const int block_size, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) { @@ -1589,25 +1589,25 @@ namespace raptor { initialize_T(values, block_size, init_result_func, init_result_func_val); } - void complete_double_comm_T(std::vector& result, + void complete_double_comm_T(span result, const int block_size, std::function result_func = &sum_func, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) { complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_double_comm_T(std::vector& result, + } + void complete_double_comm_T(span result, const int block_size, std::function result_func = &sum_func, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) { complete_T(result, block_size, result_func, init_result_func, init_result_func_val); } - void complete_int_comm_T(std::vector& result, + void complete_int_comm_T(span result, const int block_size, std::function result_func = &sum_func, std::function init_result_func = &sum_func, @@ -1615,7 +1615,7 @@ namespace raptor { complete_T(result, block_size, result_func, init_result_func, init_result_func_val); } - void complete_int_comm_T(std::vector& result, + void complete_int_comm_T(span result, const int block_size, std::function result_func = &sum_func, std::function init_result_func = &sum_func, @@ -1625,14 +1625,14 @@ namespace raptor } void complete_double_comm_T(const int block_size, - std::function init_result_func = + std::function init_result_func = &sum_func, double init_result_func_val = 0) { complete_T(block_size, init_result_func, init_result_func_val); } void complete_int_comm_T(const int block_size, - std::function init_result_func = + std::function init_result_func = &sum_func, int init_result_func_val = 0) { @@ -1686,13 +1686,13 @@ namespace raptor // Initial redistribution among node local_R_par_comm->communicate_T(values, block_size, init_result_func, init_result_func_val); - // Begin inter-node communication + // Begin inter-node communication std::vector& R_sendbuf = local_R_par_comm->send_data->get_buffer(); global_par_comm->init_comm_T(R_sendbuf, block_size, init_result_func, init_result_func_val); } template - void complete_T(std::vector& result, const int block_size = 1, + void complete_T(span result, const int block_size = 1, std::function result_func = &sum_func, std::function init_result_func = &sum_func, T init_result_func_val = 0) @@ -1745,7 +1745,7 @@ namespace raptor { // Complete inter-node communication global_par_comm->complete_comm_T(block_size, init_result_func, init_result_func_val); - + if (local_S_par_comm) { std::vector& G_sendbuf = global_par_comm->send_data->get_buffer(); @@ -1756,38 +1756,38 @@ namespace raptor // Matrix Communication - CSRMatrix* communicate(const std::vector& rowptr, + CSRMatrix* communicate(const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate(const std::vector& rowptr, + CSRMatrix* communicate(const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, + void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, + void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, const std::vector& col_indices, const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, + CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, + CSRMatrix* communicate_T(const std::vector& rowptr, + const std::vector& col_indices, const std::vector& values, + const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, + CSRMatrix* communicate_T(const std::vector& rowptr, + const std::vector& col_indices, const std::vector& values, + const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, + void init_mat_comm_T(std::vector& send_buffer, + const std::vector& rowptr, const std::vector& col_indices, + const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) ; void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, + const std::vector& rowptr, const std::vector& col_indices, + const std::vector& values, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) ; - CSRMatrix* complete_mat_comm_T(const int n_result_rows, + CSRMatrix* complete_mat_comm_T(const int n_result_rows, const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); @@ -1808,7 +1808,7 @@ namespace raptor return CommPkg::communicate_T(A, has_vals); } - // Vector Communication + // Vector Communication std::vector& communicate(ParVector& v, const int block_size = 1) { diff --git a/raptor/core/par_matrix.cpp b/raptor/core/par_matrix.cpp index a7203243..edb1e611 100644 --- a/raptor/core/par_matrix.cpp +++ b/raptor/core/par_matrix.cpp @@ -11,29 +11,29 @@ void bsr_to_csr_copy_helper(ParBSRMatrix* A, ParCSRMatrix* B); ***** ParMatrix Add Value ************************************************************** ***** Adds a value to the local portion of the parallel matrix, -***** determining whether it should be added to diagonal or -***** off-diagonal block. +***** determining whether it should be added to diagonal or +***** off-diagonal block. ***** ***** Parameters ***** ------------- ***** row : index_t ***** Local row of value -***** global_col : index_t +***** global_col : index_t ***** Global column of value ***** value : data_t ***** Value to be added to parallel matrix -**************************************************************/ +**************************************************************/ void ParMatrix::add_value( - int row, - index_t global_col, + int row, + index_t global_col, data_t value) { - if (global_col >= partition->first_local_col + if (global_col >= partition->first_local_col && global_col <= partition->last_local_col) { on_proc->add_value(row, global_col - partition->first_local_col, value); } - else + else { off_proc->add_value(row, global_col, value); } @@ -43,21 +43,21 @@ void ParMatrix::add_value( ***** ParMatrix Add Global Value ************************************************************** ***** Adds a value to the local portion of the parallel matrix, -***** determining whether it should be added to diagonal or -***** off-diagonal block. +***** determining whether it should be added to diagonal or +***** off-diagonal block. ***** ***** Parameters ***** ------------- ***** global_row : index_t ***** Global row of value -***** global_col : index_t +***** global_col : index_t ***** Global column of value ***** value : data_t ***** Value to be added to parallel matrix -**************************************************************/ +**************************************************************/ void ParMatrix::add_global_value( - index_t global_row, - index_t global_col, + index_t global_row, + index_t global_col, data_t value) { add_value(global_row - partition->first_local_row, global_col, value); @@ -73,7 +73,7 @@ void ParMatrix::add_global_value( ***** Parameters ***** ------------- ***** create_comm : bool (optional) -***** Boolean for whether parallel communicator should be +***** Boolean for whether parallel communicator should be ***** created (default is true) **************************************************************/ void ParMatrix::condense_off_proc() @@ -92,7 +92,7 @@ void ParMatrix::condense_off_proc() std::sort(off_proc_column_map.begin(), off_proc_column_map.end()); off_proc_num_cols = 0; - for (std::vector::iterator it = off_proc_column_map.begin(); + for (std::vector::iterator it = off_proc_column_map.begin(); it != off_proc_column_map.end(); ++it) { if (*it != prev_col) @@ -180,7 +180,7 @@ int* ParMatrix::map_partition_to_local() void bsr_to_csr_copy_helper(ParBSRMatrix* A, ParCSRMatrix* B) { if (B->on_proc) - { + { delete B->on_proc; } if (B->off_proc) @@ -198,7 +198,7 @@ void bsr_to_csr_copy_helper(ParBSRMatrix* A, ParCSRMatrix* B) B->on_proc_num_cols = B->on_proc->n_cols; B->off_proc_num_cols = B->off_proc->n_cols; - + // Updated partition B->partition = new Partition(B->global_num_rows, B->global_num_cols, B->on_proc->n_rows, B->on_proc->n_cols, @@ -206,9 +206,9 @@ void bsr_to_csr_copy_helper(ParBSRMatrix* A, ParCSRMatrix* B) A->partition->first_local_col * A->on_proc->b_cols); B->local_num_rows = B->partition->local_num_rows; - // Updated column and row maps - + // Updated column and row maps - B->finalize(false); - + // Determine which cols of blocks are non-zero bool* off_proc_nz_cols = new bool[A->off_proc_num_cols * A->off_proc->b_cols]; A->off_proc->block_removal_col_check(off_proc_nz_cols); @@ -223,10 +223,10 @@ void bsr_to_csr_copy_helper(ParBSRMatrix* A, ParCSRMatrix* B) { if (off_proc_nz_cols[i*A->off_proc->b_cols + j]) { - B->off_proc_column_map[off_proc_map_indx] = first_col + j; + B->off_proc_column_map[off_proc_map_indx] = first_col + j; off_proc_map_indx++; } - } + } } // Updated how communicators are created @@ -348,11 +348,11 @@ ParCOOMatrix* ParBSRMatrix::to_ParBCOO() } ParCSRMatrix* ParCSRMatrix::to_ParCSR() { - return this; + return this; } ParCSRMatrix* ParCSRMatrix::to_ParBSR() { - return this->to_ParCSR(); + return this->to_ParCSR(); } ParCSRMatrix* ParBSRMatrix::to_ParCSR() { @@ -454,7 +454,7 @@ void ParCSRMatrix::copy_structure(ParBSRMatrix* A) std::back_inserter(on_proc->idx1)); std::copy(A->on_proc->idx2.begin(), A->on_proc->idx2.end(), std::back_inserter(on_proc->idx2)); - + std::copy(A->off_proc->idx1.begin(), A->off_proc->idx1.end(), std::back_inserter(off_proc->idx1)); std::copy(A->off_proc->idx2.begin(), A->off_proc->idx2.end(), @@ -540,7 +540,7 @@ void ParMatrix::copy_helper(ParCSCMatrix* A) void ParCOOMatrix::copy_helper(ParCOOMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -557,7 +557,7 @@ void ParCOOMatrix::copy_helper(ParCOOMatrix* A) void ParCOOMatrix::copy_helper(ParCSRMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -591,7 +591,7 @@ void ParCOOMatrix::copy_helper(ParCSCMatrix* A) void ParCSRMatrix::copy_helper(ParCSRMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -608,7 +608,7 @@ void ParCSRMatrix::copy_helper(ParCSRMatrix* A) void ParCSRMatrix::copy_helper(ParCSCMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -625,7 +625,7 @@ void ParCSRMatrix::copy_helper(ParCSCMatrix* A) void ParCSRMatrix::copy_helper(ParCOOMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -642,7 +642,7 @@ void ParCSRMatrix::copy_helper(ParCOOMatrix* A) void ParCSCMatrix::copy_helper(ParCSRMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -659,7 +659,7 @@ void ParCSCMatrix::copy_helper(ParCSRMatrix* A) void ParCSCMatrix::copy_helper(ParCSCMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -676,7 +676,7 @@ void ParCSCMatrix::copy_helper(ParCSCMatrix* A) void ParCSCMatrix::copy_helper(ParCOOMatrix* A) { if (on_proc) - { + { delete on_proc; } if (off_proc) @@ -793,7 +793,7 @@ ParCSRMatrix* ParCSRMatrix::transpose() RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &col_size, 1, RAPtor_MPI_INT, comm->mpi_comm); recv_mat->idx2.resize(recv_mat->nnz + col_size); recv_mat->vals.resize(recv_mat->nnz + col_size); - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &(recv_mat->idx2[recv_mat->nnz]), col_size, + RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &(recv_mat->idx2[recv_mat->nnz]), col_size, RAPtor_MPI_INT, comm->mpi_comm); RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &(recv_mat->vals[recv_mat->nnz]), col_size, RAPtor_MPI_DOUBLE, comm->mpi_comm); @@ -901,7 +901,7 @@ ParBSRMatrix* ParCSRMatrix::to_ParBSR(const int block_row_size, const int block_ } else { - prev_col = -1; + prev_col = -1; for (std::vector::iterator it = on_proc_column_map.begin(); it != on_proc_column_map.end(); ++it) { @@ -998,7 +998,7 @@ ParBSRMatrix* ParCSRMatrix::to_ParBSR(const int block_row_size, const int block_ void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) { /********************************* - * Initialize + * Initialize * *******************************/ // Get RAPtor_MPI Information int rank, num_procs; @@ -1006,7 +1006,7 @@ void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) RAPtor_MPI_Comm_size(mpi_comm, &num_procs); // Initialize standard tap_comm - tap_comm = new TAPComm(partition, true); + tap_comm = new TAPComm(partition, true); // Initialize Variables std::vector off_proc_col_to_proc; @@ -1031,8 +1031,8 @@ void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) } /********************************* - * Split columns by processes, - * on-node, and off-node + * Split columns by processes, + * on-node, and off-node * *******************************/ // Find process on which vector value associated with each column is // stored @@ -1055,11 +1055,11 @@ void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) /********************************* - * Form standard 3-step - * node-aware communicator + * Form standard 3-step + * node-aware communicator * *******************************/ // Gather all nodes with which any local process must communication - tap_comm->form_local_R_par_comm(off_node_column_map, off_node_col_to_proc, + tap_comm->form_local_R_par_comm(off_node_column_map, off_node_col_to_proc, orig_procs); // Find global processes with which rank communications @@ -1069,7 +1069,7 @@ void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) // processes, before inter-node communication tap_comm->form_local_S_par_comm(orig_procs); - // Adjust send indices (currently global vector indices) to be index + // Adjust send indices (currently global vector indices) to be index // of global vector value from previous recv tap_comm->adjust_send_indices(partition->first_local_col); @@ -1083,8 +1083,8 @@ void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) /********************************* - * Form simple 2-step - * node-aware communicator + * Form simple 2-step + * node-aware communicator * *******************************/ // Create simple (2-step) TAPComm for matrix communication // Copy local_L_par_comm from 3-step tap_comm @@ -1107,11 +1107,10 @@ void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) tap_mat_comm->update_recv(on_node_to_off_proc, off_node_to_off_proc, false); - for (std::vector::iterator it = + for (std::vector::iterator it = tap_mat_comm->global_par_comm->send_data->indices.begin(); it != tap_mat_comm->global_par_comm->send_data->indices.end(); ++it) { *it = on_proc_to_new[*it]; } } - diff --git a/raptor/core/par_matrix.hpp b/raptor/core/par_matrix.hpp index 3e7c21c4..20326145 100644 --- a/raptor/core/par_matrix.hpp +++ b/raptor/core/par_matrix.hpp @@ -45,23 +45,23 @@ ***** Maps local columns of offd Matrix to global ***** comm : ParComm* ***** Parallel communicator for matrix - ***** + ***** ***** Methods ***** ------- ***** initialize_partition() - ***** Determines which rows are local to process and which - ***** columns fall in local block + ***** Determines which rows are local to process and which + ***** columns fall in local block ***** add_value() - ***** Adds a value to a given local row and global column. - ***** Determines if this value is in the diagonal or + ***** Adds a value to a given local row and global column. + ***** Determines if this value is in the diagonal or ***** off-diagonal block. ***** add_global_value() - ***** Adds a value to a given global row and global column. - ***** Determines if this value is in the diagonal or + ***** Adds a value to a given global row and global column. + ***** Determines if this value is in the diagonal or ***** off-diagonal block. ***** finalize() - ***** Finalizes a matrix after values have been added. - ***** Converts the matrices to the appropriate formats and + ***** Finalizes a matrix after values have been added. + ***** Converts the matrices to the appropriate formats and ***** creates the parallel communicator. **************************************************************/ namespace raptor @@ -95,7 +95,7 @@ namespace raptor off_proc = NULL; } - ParMatrix(Partition* part, index_t glob_rows, index_t glob_cols, int local_rows, + ParMatrix(Partition* part, index_t glob_rows, index_t glob_cols, int local_rows, int on_proc_cols) { partition = part; @@ -129,12 +129,12 @@ namespace raptor off_proc = NULL; } - ParMatrix(index_t glob_rows, - index_t glob_cols, - int local_rows, - int local_cols, - index_t first_row, - index_t first_col, + ParMatrix(index_t glob_rows, + index_t glob_cols, + int local_rows, + int local_cols, + index_t first_row, + index_t first_col, Topology* topology = NULL) { partition = new Partition(glob_rows, glob_cols, @@ -151,7 +151,7 @@ namespace raptor on_proc = NULL; off_proc = NULL; } - + ParMatrix() { local_num_rows = 0; @@ -196,14 +196,14 @@ namespace raptor ***** ParMatrix Add Value ************************************************************** ***** Adds a value to the local portion of the parallel matrix, - ***** determining whether it should be added to diagonal or - ***** off-diagonal block. + ***** determining whether it should be added to diagonal or + ***** off-diagonal block. ***** ***** Parameters ***** ------------- ***** local_row : index_t - ***** Local row of value - ***** global_col : index_t + ***** Local row of value + ***** global_col : index_t ***** Global column of value ***** value : data_t ***** Value to be added to parallel matrix @@ -214,14 +214,14 @@ namespace raptor ***** ParMatrix Add Global Value ************************************************************** ***** Adds a value to the local portion of the parallel matrix, - ***** determining whether it should be added to diagonal or - ***** off-diagonal block. + ***** determining whether it should be added to diagonal or + ***** off-diagonal block. ***** ***** Parameters ***** ------------- ***** global_row : index_t - ***** Global row of value - ***** global_col : index_t + ***** Global row of value + ***** global_col : index_t ***** Global column of value ***** value : data_t ***** Value to be added to parallel matrix @@ -261,15 +261,15 @@ namespace raptor void update_tap_comm(ParMatrix* old, const std::vector& old_to_new) { tap_comm = new TAPComm((TAPComm*) old->tap_comm, old_to_new, NULL); - tap_mat_comm = new TAPComm((TAPComm*) old->tap_mat_comm, old_to_new, + tap_mat_comm = new TAPComm((TAPComm*) old->tap_mat_comm, old_to_new, tap_comm->local_L_par_comm); } void update_tap_comm(ParMatrix* old, const std::vector& on_old_to_new, const std::vector& off_old_to_new) { - tap_comm = new TAPComm((TAPComm*) old->tap_comm, on_old_to_new, off_old_to_new, + tap_comm = new TAPComm((TAPComm*) old->tap_comm, on_old_to_new, off_old_to_new, NULL); - tap_mat_comm = new TAPComm((TAPComm*) old->tap_mat_comm, on_old_to_new, + tap_mat_comm = new TAPComm((TAPComm*) old->tap_mat_comm, on_old_to_new, off_old_to_new, tap_comm->local_L_par_comm); } @@ -318,22 +318,22 @@ namespace raptor int off_proc_num_cols; int on_proc_num_cols; - // Store two matrices: on_proc containing columns + // Store two matrices: on_proc containing columns // corresponding to vector values stored on_process // and off_proc columns correspond to vector values // stored off process (on other processes) - Matrix* on_proc; + Matrix* on_proc; Matrix* off_proc; // Store information about columns of off_proc - // It will be condensed to only store columns with - // nonzeros, and these must be mapped to + // It will be condensed to only store columns with + // nonzeros, and these must be mapped to // global column indices std::vector off_proc_column_map; // Maps off_proc local to global std::vector on_proc_column_map; // Maps on_proc local to global std::vector local_row_map; // Maps local rows to global - // Parallel communication package indicating which + // Parallel communication package indicating which // processes hold vector values associated with off_proc, // and which processes need vector values from this proc Partition* partition; @@ -354,43 +354,43 @@ namespace raptor } } - ParCOOMatrix(index_t glob_rows, + ParCOOMatrix(index_t glob_rows, index_t glob_cols, - int nnz_per_row = 5, bool form_mat = true) + int nnz_per_row = 5, bool form_mat = true) : ParMatrix(glob_rows, glob_cols) { if (form_mat) { - on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, nnz_per_row); - off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, nnz_per_row); } } - ParCOOMatrix(index_t glob_rows, index_t glob_cols, int local_rows, + ParCOOMatrix(index_t glob_rows, index_t glob_cols, int local_rows, int local_cols, index_t first_row, index_t first_col, - int nnz_per_row = 5, bool form_mat = true) + int nnz_per_row = 5, bool form_mat = true) : ParMatrix(glob_rows, glob_cols, local_rows, local_cols, first_row, first_col) { if (form_mat) { - on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, nnz_per_row); - off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, nnz_per_row); } } - - ParCOOMatrix(Partition* part, + + ParCOOMatrix(Partition* part, int nnz_per_row = 5, bool form_mat = true) : ParMatrix(part) { if (form_mat) { - on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, nnz_per_row); - off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, nnz_per_row); } } @@ -434,32 +434,32 @@ namespace raptor int block_row_size, int block_col_size, int nnz_per_row) : ParCOOMatrix(global_block_rows, global_block_cols, nnz_per_row, false) { - on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz_per_row); - off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz_per_row); } ParBCOOMatrix(int global_block_rows, int global_block_cols, - int local_block_rows, int local_block_cols, + int local_block_rows, int local_block_cols, int first_block_row, int first_block_col, - int block_row_size, int block_col_size, int nnz_per_row = 5) + int block_row_size, int block_col_size, int nnz_per_row = 5) : ParCOOMatrix(global_block_rows, global_block_cols, - local_block_rows, local_block_cols, first_block_row, + local_block_rows, local_block_cols, first_block_row, first_block_col, nnz_per_row, false) { - on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz_per_row); - off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz_per_row); } ParBCOOMatrix(Partition* part, int block_row_size, int block_col_size, int nnz_per_row = 5) : ParCOOMatrix(part, nnz_per_row, false) { - on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz_per_row); - off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz_per_row); } @@ -490,40 +490,40 @@ namespace raptor } } - ParCSRMatrix(index_t glob_rows, index_t glob_cols, int nnz = 0, + ParCSRMatrix(index_t glob_rows, index_t glob_cols, int nnz = 0, bool form_mat = true) : ParMatrix(glob_rows, glob_cols) { if (form_mat) { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, nnz); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, nnz); } } - ParCSRMatrix(index_t glob_rows, index_t glob_cols, int local_rows, - int local_cols, index_t first_row, index_t first_col, Topology* topology = NULL, + ParCSRMatrix(index_t glob_rows, index_t glob_cols, int local_rows, + int local_cols, index_t first_row, index_t first_col, Topology* topology = NULL, int nnz = 0, bool form_mat = true) : ParMatrix(glob_rows, glob_cols, local_rows, local_cols, first_row, first_col, topology) { if (form_mat) { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, nnz); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, nnz); } } - ParCSRMatrix(Partition* part, + ParCSRMatrix(Partition* part, int nnz = 0, bool form_mat = true) : ParMatrix(part) { if (form_mat) { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, nnz); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, nnz); } } @@ -539,9 +539,9 @@ namespace raptor } - ParCSRMatrix(Partition* part, index_t glob_rows, index_t glob_cols, + ParCSRMatrix(Partition* part, index_t glob_rows, index_t glob_cols, int local_rows, int on_proc_cols, int off_proc_cols, int nnz = 0, - bool form_mat = true) : ParMatrix(part, glob_rows, glob_cols, + bool form_mat = true) : ParMatrix(part, glob_rows, glob_cols, local_rows, on_proc_cols) { off_proc_num_cols = off_proc_cols; @@ -574,10 +574,10 @@ namespace raptor void copy_helper(ParCSCMatrix* A); void copy_helper(ParCOOMatrix* A); - ParCSRMatrix* strength(strength_t strength_type, double theta = 0.0, + ParCSRMatrix* strength(strength_t strength_type, double theta = 0.0, bool tap_amg = false, int num_variables = 1, int* variables = NULL); ParCSRMatrix* aggregate(); - ParCSRMatrix* fit_candidates(double* B, double* R, int num_candidates, + ParCSRMatrix* fit_candidates(double* B, double* R, int num_candidates, double tol = 1e-10); int maximal_independent_set(std::vector& local_states, std::vector& off_proc_states, int max_iters = -1); @@ -599,14 +599,14 @@ namespace raptor void print_mult_T(ParCSCMatrix* A); void print_mult(); void print_mult_T(); - + void mult_helper(ParCSRMatrix* B, ParCSRMatrix* C, CSRMatrix* recv, CSRMatrix* C_on_on, CSRMatrix* C_on_off); CSRMatrix* mult_T_partial(ParCSCMatrix* A); CSRMatrix* mult_T_partial(CSCMatrix* A_off); void mult_T_combine(ParCSCMatrix* A, ParCSRMatrix* C, CSRMatrix* recv_mat, CSRMatrix* C_on_on, CSRMatrix* C_off_on); - + ParCSRMatrix* transpose(); }; @@ -621,41 +621,41 @@ namespace raptor ParBSRMatrix(int global_block_rows, int global_block_cols, int block_row_size, int block_col_size, - int nnz = 0) + int nnz = 0) : ParCSRMatrix(global_block_rows, global_block_cols, nnz, false) { - on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz); } - ParBSRMatrix(int global_block_rows, int global_block_cols, - int local_block_rows, int local_block_cols, + ParBSRMatrix(int global_block_rows, int global_block_cols, + int local_block_rows, int local_block_cols, int first_block_row, int first_block_col, int block_row_size, int block_col_size, Topology* topology = NULL, int nnz = 0) : ParCSRMatrix(global_block_rows, global_block_cols, - local_block_rows, local_block_cols, + local_block_rows, local_block_cols, first_block_row, first_block_col, topology, nnz, false) { - on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz); } ParBSRMatrix(Partition* part, int block_row_size, int block_col_size, int nnz = 0) : ParCSRMatrix(part, nnz, false) { - on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz); } - ParBSRMatrix(Partition* part, BSRMatrix* _on_proc, BSRMatrix* _off_proc) + ParBSRMatrix(Partition* part, BSRMatrix* _on_proc, BSRMatrix* _off_proc) : ParCSRMatrix(part) { on_proc = _on_proc; @@ -667,16 +667,16 @@ namespace raptor } ParBSRMatrix(Partition* part, int global_block_rows, int global_block_cols, - int local_block_rows, int on_proc_block_cols, int off_proc_block_cols, + int local_block_rows, int on_proc_block_cols, int off_proc_block_cols, int block_row_size, int block_col_size, int nnz = 0) : ParCSRMatrix(part, global_block_rows, global_block_cols, - local_block_rows, on_proc_block_cols, off_proc_block_cols, + local_block_rows, on_proc_block_cols, off_proc_block_cols, nnz, false) { off_proc_num_cols = off_proc_block_cols; - on_proc = new BSRMatrix(local_block_rows, on_proc_block_cols, + on_proc = new BSRMatrix(local_block_rows, on_proc_block_cols, block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(local_block_rows, off_proc_num_cols, + off_proc = new BSRMatrix(local_block_rows, off_proc_num_cols, block_row_size, block_col_size, nnz); } @@ -710,35 +710,35 @@ namespace raptor } } - ParCSCMatrix(index_t glob_rows, index_t glob_cols, int nnz_per_row = 5, + ParCSCMatrix(index_t glob_rows, index_t glob_cols, int nnz_per_row = 5, bool form_mat = true) : ParMatrix(glob_rows, glob_cols) { if (form_mat) { - on_proc = new CSCMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new CSCMatrix(partition->local_num_rows, partition->local_num_cols, nnz_per_row); - off_proc = new CSCMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new CSCMatrix(partition->local_num_rows, partition->global_num_cols, nnz_per_row); } } - ParCSCMatrix(index_t glob_rows, index_t glob_cols, int local_n_rows, - int local_n_cols, index_t first_row, index_t first_col, - int nnz_per_row = 5, bool form_mat = true) - : ParMatrix(glob_rows, glob_cols, local_n_rows, local_n_cols, + ParCSCMatrix(index_t glob_rows, index_t glob_cols, int local_n_rows, + int local_n_cols, index_t first_row, index_t first_col, + int nnz_per_row = 5, bool form_mat = true) + : ParMatrix(glob_rows, glob_cols, local_n_rows, local_n_cols, first_row, first_col) { if (form_mat) { - on_proc = new CSCMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new CSCMatrix(partition->local_num_rows, partition->local_num_cols, nnz_per_row); - off_proc = new CSCMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new CSCMatrix(partition->local_num_rows, partition->global_num_cols, nnz_per_row); } } ParCSCMatrix(Partition* part, index_t glob_rows, index_t glob_cols, int local_rows, - int on_proc_cols, int off_proc_cols, int nnz_per_row = 5, bool form_mat = true) + int on_proc_cols, int off_proc_cols, int nnz_per_row = 5, bool form_mat = true) : ParMatrix(part, glob_rows, glob_cols, local_rows, on_proc_cols) { off_proc_num_cols = off_proc_cols; @@ -750,14 +750,14 @@ namespace raptor } - ParCSCMatrix(Partition* part, int nnz_per_row = 5, bool form_mat = true) + ParCSCMatrix(Partition* part, int nnz_per_row = 5, bool form_mat = true) : ParMatrix(part) { if (form_mat) { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, nnz_per_row); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, nnz_per_row); } } @@ -800,34 +800,34 @@ class ParBSCMatrix : public ParCSCMatrix ParBSCMatrix(int global_block_rows, int global_block_cols, int block_row_size, int block_col_size, - int nnz = 0) + int nnz = 0) : ParCSCMatrix(global_block_rows, global_block_cols, nnz, false) { - on_proc = new BSCMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BSCMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz); - off_proc = new BSCMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BSCMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz); } ParBSCMatrix(Partition* part, int block_row_size, int block_col_size, int nnz = 0) : ParCSCMatrix(part, nnz, false) { - on_proc = new BSCMatrix(partition->local_num_rows, partition->local_num_cols, + on_proc = new BSCMatrix(partition->local_num_rows, partition->local_num_cols, block_row_size, block_col_size, nnz); - off_proc = new BSCMatrix(partition->local_num_rows, partition->global_num_cols, + off_proc = new BSCMatrix(partition->local_num_rows, partition->global_num_cols, block_row_size, block_col_size, nnz); } ParBSCMatrix(Partition* part, int global_block_rows, int global_block_cols, - int local_block_rows, int on_proc_block_cols, int off_proc_block_cols, + int local_block_rows, int on_proc_block_cols, int off_proc_block_cols, int block_row_size, int block_col_size, int nnz = 0) - : ParCSCMatrix(part, global_block_rows, global_block_cols, local_block_rows, + : ParCSCMatrix(part, global_block_rows, global_block_cols, local_block_rows, on_proc_block_cols, off_proc_block_cols, nnz, false) { off_proc_num_cols = off_proc_block_cols; - on_proc = new BSCMatrix(local_num_rows, on_proc_block_cols, + on_proc = new BSCMatrix(local_num_rows, on_proc_block_cols, block_row_size, block_col_size, nnz); - off_proc = new BSCMatrix(local_num_rows, off_proc_num_cols, + off_proc = new BSCMatrix(local_num_rows, off_proc_num_cols, block_row_size, block_col_size, nnz); } diff --git a/raptor/core/tests/test_par_block_matrix.cpp b/raptor/core/tests/test_par_block_matrix.cpp index 31536fea..6668ae79 100644 --- a/raptor/core/tests/test_par_block_matrix.cpp +++ b/raptor/core/tests/test_par_block_matrix.cpp @@ -57,7 +57,7 @@ TEST(ParBlockMatrixTest, TestsInCore) double* stencil = diffusion_stencil_2d(eps, theta); ParCSRMatrix* A = par_stencil_grid(stencil, grid.data(), 2); ParBSRMatrix* A_bsr = A->to_ParBSR(block_n, block_n); - + ParVector x(A->global_num_rows, A->local_num_rows); ParVector b(A->global_num_rows, A->local_num_rows); ParVector tmp(A->global_num_rows, A->local_num_rows); @@ -78,10 +78,10 @@ TEST(ParBlockMatrixTest, TestsInCore) A_bsr->mult(x, tmp); for (int i = 0; i < A->local_num_rows; i++) ASSERT_NEAR(tmp[i], b[i], 1e-10); - + // Test Blocked Transpose Communication - A->comm->communicate_T(x.local.values, b.local.values); - A_bsr->comm->communicate_T(x.local.values, tmp.local.values, A_bsr->off_proc->b_cols); + A->comm->communicate_T(*x.local.storage, *b.local.storage); + A_bsr->comm->communicate_T(*x.local.storage, *tmp.local.storage, A_bsr->off_proc->b_cols); ASSERT_EQ(std.size(), blocked.size()); for (int i = 0; i < n; i++) ASSERT_NEAR(b[i], tmp[i], 1e-10); @@ -91,22 +91,22 @@ TEST(ParBlockMatrixTest, TestsInCore) A_bsr->mult_T(x, tmp); for (int i = 0; i < A->local_num_rows; i++) ASSERT_NEAR(tmp[i], b[i], 1e-10); - + // Test Blocked TAPSpMVs A->tap_comm = new TAPComm(A->partition, A->off_proc_column_map); A_bsr->tap_comm = new TAPComm(A_bsr->partition, A_bsr->off_proc_column_map); std = A->tap_comm->communicate(x); blocked = A_bsr->tap_comm->communicate(x, A_bsr->off_proc->b_cols); - ASSERT_EQ(std.size(), blocked.size()); - + ASSERT_EQ(std.size(), blocked.size()); + A->tap_mult(x, b); A_bsr->tap_mult(x, tmp); for (int i = 0; i < A->local_num_rows; i++) ASSERT_NEAR(b[i], tmp[i], 1e-10); // Test Blocked Transpose TAPSpMVs - A->tap_comm->communicate_T(x.local.values, b.local.values); - A_bsr->comm->communicate_T(x.local.values, tmp.local.values, A_bsr->off_proc->b_cols); + A->tap_comm->communicate_T(*x.local.storage, *b.local.storage); + A_bsr->comm->communicate_T(*x.local.storage, *tmp.local.storage, A_bsr->off_proc->b_cols); ASSERT_EQ(std.size(), blocked.size()); for (int i = 0; i < n; i++) ASSERT_NEAR(b[i], tmp[i], 1e-10); @@ -131,9 +131,6 @@ TEST(ParBlockMatrixTest, TestsInCore) delete A_bsr; setenv("PPN", "16", 1); - - -} // end of TEST(MatrixTest, TestsInCore) // - +} // end of TEST(MatrixTest, TestsInCore) // diff --git a/raptor/core/utilities.hpp b/raptor/core/utilities.hpp index 68498592..e586826b 100644 --- a/raptor/core/utilities.hpp +++ b/raptor/core/utilities.hpp @@ -1,15 +1,19 @@ #ifndef RAPTOR_CORE_UTILITIES_HPP #define RAPTOR_CORE_UTILITIES_HPP +#include +#include +#include + #include "types.hpp" // BLAS LU routine that is used for coarse solve -extern "C" void dgetrf_(int* dim1, int* dim2, double* a, int* lda, +extern "C" void dgetrf_(int* dim1, int* dim2, double* a, int* lda, int* ipiv, int* info); -extern "C" void dgetrs_(char *TRANS, int *N, int *NRHS, double *A, +extern "C" void dgetrs_(char *TRANS, int *N, int *NRHS, double *A, int *LDA, int *IPIV, double *B, int *LDB, int *INFO ); - +namespace raptor { template void vec_sort(std::vector& vec1, std::vector& vec2, int start = 0, int end = -1) { @@ -48,7 +52,7 @@ void vec_sort(std::vector& vec1, std::vector& vec2, int start = 0, int end } template -void vec_sort(std::vector& vec1, std::vector& vec2, +void vec_sort(std::vector& vec1, std::vector& vec2, std::vector& vec3, int start = 0, int end = -1) { @@ -70,7 +74,7 @@ void vec_sort(std::vector& vec1, std::vector& vec2, { int idx1 = i + start; int idx2 = j + start; - if (vec1[idx1] == vec1[idx2]) + if (vec1[idx1] == vec1[idx2]) return vec2[idx1] < vec2[idx2]; else return vec1[idx1] < vec1[idx2]; @@ -94,5 +98,114 @@ void vec_sort(std::vector& vec1, std::vector& vec2, } +enum extents : std::size_t { + dynamic_extent = std::numeric_limits::max() +}; +template +struct extent_storage +{ + extent_storage(std::size_t) {} + constexpr std::size_t value() const { return E; } +}; +template <> +struct extent_storage +{ + constexpr std::size_t value() const { return e; } + std::size_t e; +}; + + +template +struct span { + using element_type = T; + using value_type = typename std::remove_cv::type; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using iterator = T*; + using reverse_iterator = std::reverse_iterator; + + static constexpr std::size_t extent = Extent; + + + template::type> + span() : b(nullptr), ext{0} {} + + constexpr span(pointer p, size_type s) : b(p), ext{s} {} + + constexpr span(std::vector & v) : + span(v.data(), v.size()) {} + + constexpr iterator begin() const noexcept { + return b; + } + + constexpr iterator end() const noexcept { + return b + size(); + } + + constexpr reverse_iterator rbegin() const noexcept { + return reverse_iterator(end()); + } + constexpr reverse_iterator rend() const noexcept { + return reverse_iterator(begin()); + } + + constexpr reference front() const { + return *b; + } + + constexpr reference back() const { + return *(b + (size() - 1)); + } + + constexpr reference operator[](size_type idx) const { + return begin()[idx]; + } + + constexpr pointer data() const noexcept { + return b; + } + + constexpr size_type size() const noexcept { + return ext.value(); + } + + constexpr size_type size_bytes() const noexcept { + return sizeof(T)*size(); + } + + [[nodiscard]] constexpr bool empty() const noexcept { + return size() == 0; + } + + template + constexpr span first() const noexcept { + return {b, Count}; + } + + constexpr span first(size_type count) const noexcept { + return {data(), count}; + } + + template + constexpr span last() const noexcept { + return {data() + (size() - Count), Count}; + } + + constexpr span last(size_type count) const noexcept { + return {data() + (size() - count), count}; + } + +protected: + pointer b; + extent_storage ext; +}; + +} #endif diff --git a/raptor/core/vector.cpp b/raptor/core/vector.cpp index 5fc959d4..5154a139 100644 --- a/raptor/core/vector.cpp +++ b/raptor/core/vector.cpp @@ -16,7 +16,7 @@ using namespace raptor; **************************************************************/ void Vector::set_const_value(data_t alpha) { - for (index_t i = 0; i < num_values; i++) + for (index_t i = 0; i < size(); i++) { values[i] = alpha; } @@ -31,7 +31,7 @@ void Vector::set_const_value(data_t alpha) void Vector::set_rand_values() { srand(time(NULL)); - for (index_t i = 0; i < num_values; i++) + for (index_t i = 0; i < size(); i++) { values[i] = ((double)rand()) / RAND_MAX; } @@ -52,7 +52,7 @@ void Vector::set_rand_values() **************************************************************/ void Vector::axpy(Vector& x, data_t alpha) { - for (index_t i = 0; i < num_values; i++) + for (index_t i = 0; i < size(); i++) { values[i] += x.values[i]*alpha; } @@ -71,8 +71,8 @@ void Vector::axpy(Vector& x, data_t alpha) **************************************************************/ void Vector::copy(const Vector& y) { - num_values = y.num_values; - values.resize(num_values); + if (!storage) storage = std::make_shared(); + resize(y.size()); std::copy(y.values.begin(), y.values.end(), values.begin()); } @@ -88,7 +88,7 @@ void Vector::copy(const Vector& y) **************************************************************/ void Vector::scale(data_t alpha) { - for (index_t i = 0; i < num_values; i++) + for (index_t i = 0; i < size(); i++) { values[i] *= alpha; } @@ -108,7 +108,7 @@ data_t Vector::norm(index_t p) { data_t result = 0.0; double val; - for (index_t i = 0; i < num_values; i++) + for (index_t i = 0; i < size(); i++) { val = values[i]; if (fabs(val) > zero_tol) @@ -129,8 +129,8 @@ data_t Vector::norm(index_t p) **************************************************************/ void Vector::print(const char* vec_name) { - printf("Size = %d\n", num_values); - for (int i = 0; i < num_values; i++) + printf("Size = %d\n", size()); + for (int i = 0; i < size(); i++) { if (fabs(values[i]) > zero_tol) printf("%s[%d] = %e\n", vec_name, i, values[i]); @@ -156,7 +156,7 @@ data_t Vector::inner_product(Vector& x) { data_t result = 0.0; - for (int i = 0; i < num_values; i++) + for (int i = 0; i < size(); i++) { result += values[i] * x[i]; } diff --git a/raptor/core/vector.hpp b/raptor/core/vector.hpp index bce67f0a..12f235fc 100644 --- a/raptor/core/vector.hpp +++ b/raptor/core/vector.hpp @@ -3,7 +3,10 @@ #ifndef RAPTOR_CORE_VECTOR_HPP_ #define RAPTOR_CORE_VECTOR_HPP_ +#include + #include "types.hpp" +#include "utilities.hpp" // Vector Class // @@ -35,11 +38,10 @@ // data() // Returns the data values as a data_t* // -namespace raptor -{ +namespace raptor { + class Vector { - public: /************************************************************** ***** Vector Class Constructor @@ -51,32 +53,44 @@ class Vector ***** len : index_t ***** Size of the vector **************************************************************/ - Vector(int len) - { - resize(len); - } + Vector(int len) : + storage(std::make_shared(len)), + values(*storage) + {} /************************************************************** ***** Vector Class Constructor ************************************************************** ***** Initializes an empty vector without setting the size **************************************************************/ - Vector() - { - num_values = 0; - } + Vector() : + storage(std::make_shared()), + values(*storage) + {} + + Vector(double * base, std::size_t len) : + values(base, len) {} Vector(const Vector& v) { copy(v); } - void resize(int len) + void resize(std::size_t len) { - values.resize(len); - num_values = len; + if (owns_data()) { + storage->resize(len); + values = span(*storage); + } else { + assert(len <= values.size()); + values = values.first(len); + } } + bool owns_data() { + return static_cast(storage); + } + /************************************************************** ***** Vector Set Constant Value ************************************************************** @@ -186,15 +200,22 @@ class Vector return values.data(); } - index_t size() + index_t size() const { - return num_values; + return values.size(); } data_t inner_product(Vector& x); - std::vector values; - index_t num_values; + void set_base(double *base) { + auto sz = size(); + if (storage) storage.reset(); + values = span(base, sz); + } + + using storage_type = std::vector; + std::shared_ptr storage; + span values; }; } diff --git a/raptor/krylov/bicgstab.cpp b/raptor/krylov/bicgstab.cpp index 553bb3e6..c95da97a 100644 --- a/raptor/krylov/bicgstab.cpp +++ b/raptor/krylov/bicgstab.cpp @@ -20,7 +20,7 @@ void BiCGStab(CSRMatrix* A, Vector& x, Vector& b, std::vector& res, doub if (max_iter <= 0) { - max_iter = x.num_values + 5; + max_iter = x.size() + 5; } // Fixed Constructors From cd6b1e20910daa1fa60d8c0c6207d4d2f4cf8c32 Mon Sep 17 00:00:00 2001 From: Luke Olson Date: Mon, 8 Apr 2024 14:30:04 -0500 Subject: [PATCH 2/2] add codecov.yml --- codecov.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..35cde5cd --- /dev/null +++ b/codecov.yml @@ -0,0 +1,4 @@ +coverage: + status: + project: off + patch: off