From 0282396b281da9aff4943962810eb855a3b17d40 Mon Sep 17 00:00:00 2001 From: DavidSCN Date: Mon, 1 Feb 2021 15:13:05 +0100 Subject: [PATCH 1/5] Vectorize residual assembly --- include/material.h | 2 +- include/mf_elasticity.h | 240 +++++++++++++++++++++++++++------------- 2 files changed, 164 insertions(+), 78 deletions(-) diff --git a/include/material.h b/include/material.h index 4866c62..0c6eaad 100644 --- a/include/material.h +++ b/include/material.h @@ -28,7 +28,7 @@ template Number divide_by_dim(const Number &x, const int dim) { - return x / dim; + return Number(x / dim); } template void - Solid::assemble_system() + Solid::assemble_system(const int it_nr) { TimerOutput::Scope t(timer, "Assemble linear system"); pcout << " ASM " << std::flush; system_rhs = 0.0; - Vector cell_rhs(dofs_per_cell); - std::vector local_dof_indices(dofs_per_cell); + const bool assemble_fast = it_nr < 4; - std::vector> solution_grads_u_total(qf_cell.size()); - std::vector> local_acceleration(qf_cell.size()); + if (!assemble_fast) + { + Vector cell_rhs(dofs_per_cell); + std::vector local_dof_indices(dofs_per_cell); - // values at quadrature points: - std::vector> grad_Nx(dofs_per_cell); - std::vector> symm_grad_Nx(dofs_per_cell); + std::vector> solution_grads_u_total( + qf_cell.size()); + std::vector> local_acceleration(qf_cell.size()); - FEValues fe_values( - fe, qf_cell, update_values | update_gradients | update_JxW_values); + // values at quadrature points: + std::vector> grad_Nx(dofs_per_cell); + std::vector> symm_grad_Nx( + dofs_per_cell); - for (const auto &cell : dof_handler.active_cell_iterators()) - if (cell->is_locally_owned()) - { - const auto &cell_mat = - (cell->material_id() == 2 ? material_inclusion : material); + FEValues fe_values( + fe, qf_cell, update_values | update_gradients | update_JxW_values); - cell_rhs = 0.; - fe_values.reinit(cell); - cell->get_dof_indices(local_dof_indices); + for (const auto &cell : dof_handler.active_cell_iterators()) + if (cell->is_locally_owned()) + { + const auto &cell_mat = + (cell->material_id() == 2 ? material_inclusion : material); + + cell_rhs = 0.; + fe_values.reinit(cell); + cell->get_dof_indices(local_dof_indices); + + // We first need to find the solution gradients at quadrature + // points inside the current cell and then we update each local QP + // using the displacement gradient: + fe_values[u_fe].get_function_gradients(total_displacement, + solution_grads_u_total); + + fe_values[u_fe].get_function_values(acceleration, + local_acceleration); + + // Now we build the residual. In doing so, we first extract some + // configuration dependent variables from our QPH history objects + // for the current quadrature point. + for (unsigned int q_point = 0; q_point < n_q_points; ++q_point) + { + const Tensor<2, dim, Number> &grad_u = + solution_grads_u_total[q_point]; + const Tensor<2, dim, Number> F = + Physics::Elasticity::Kinematics::F(grad_u); + const SymmetricTensor<2, dim, Number> b = + Physics::Elasticity::Kinematics::b(F); + + const Number det_F = determinant(F); + Assert(det_F > Number(0.0), ExcInternalError()); + const Tensor<2, dim, Number> F_inv = invert(F); + + // don't calculate b_bar if we don't need to: + const SymmetricTensor<2, dim, Number> b_bar = + cell_mat->formulation == 0 ? + Physics::Elasticity::Kinematics::b( + Physics::Elasticity::Kinematics::F_iso(F)) : + SymmetricTensor<2, dim, Number>(); + + for (unsigned int k = 0; k < dofs_per_cell; ++k) + { + grad_Nx[k] = fe_values[u_fe].gradient(k, q_point) * F_inv; + symm_grad_Nx[k] = symmetrize(grad_Nx[k]); + } + + SymmetricTensor<2, dim, Number> tau; + cell_mat->get_tau(tau, det_F, b_bar, b); + const double JxW = fe_values.JxW(q_point); + + // loop over j first to make caching a bit more + // straight-forward without recourse to symmetry + for (unsigned int j = 0; j < dofs_per_cell; ++j) + { + cell_rhs(j) -= (symm_grad_Nx[j] * tau) * JxW; + const unsigned int component_j = + fe.system_to_component_index(j).first; + + for (unsigned int i = 0; i < dofs_per_cell; ++i) + cell_rhs(j) -= + fe_values[u_fe].value(j, q_point) * cell_mat->rho * + fe_values[u_fe].value(i, q_point) * + local_acceleration[q_point][component_j] * JxW; + } + + } // end loop over quadrature points + constraints.distribute_local_to_global(cell_rhs, + local_dof_indices, + system_rhs); + } + } + else + { + FEEvaluation phi_reference( + *mf_data_reference); + // Copy constructor + FEEvaluation phi_acc( + phi_reference); + const unsigned int n_cells = mf_data_reference->n_cell_batches(); + + for (unsigned int cell = 0; cell < n_cells; ++cell) + { + const unsigned int material_id = + mf_data_reference->get_cell_iterator(cell, 0)->material_id(); + const auto &cell_mat = + (material_id == 0 ? material_vec : material_inclusion_vec); - // We first need to find the solution gradients at quadrature points - // inside the current cell and then we update each local QP using - // the displacement gradient: - fe_values[u_fe].get_function_gradients(total_displacement, - solution_grads_u_total); + phi_reference.reinit(cell); + phi_reference.read_dof_values_plain(total_displacement); + phi_reference.evaluate(false, true, false); - fe_values[u_fe].get_function_values(acceleration, local_acceleration); + phi_acc.reinit(cell); + phi_acc.read_dof_values_plain(acceleration); + phi_acc.evaluate(true, false); - // Now we build the residual. In doing so, we first extract some - // configuration dependent variables from our QPH history objects - // for the current quadrature point. - for (unsigned int q_point = 0; q_point < n_q_points; ++q_point) - { - const Tensor<2, dim, Number> &grad_u = - solution_grads_u_total[q_point]; - const Tensor<2, dim, Number> F = - Physics::Elasticity::Kinematics::F(grad_u); - const SymmetricTensor<2, dim, Number> b = - Physics::Elasticity::Kinematics::b(F); - - const Number det_F = determinant(F); - Assert(det_F > Number(0.0), ExcInternalError()); - const Tensor<2, dim, Number> F_inv = invert(F); - - // don't calculate b_bar if we don't need to: - const SymmetricTensor<2, dim, Number> b_bar = - cell_mat->formulation == 0 ? - Physics::Elasticity::Kinematics::b( - Physics::Elasticity::Kinematics::F_iso(F)) : - SymmetricTensor<2, dim, Number>(); - - for (unsigned int k = 0; k < dofs_per_cell; ++k) - { - grad_Nx[k] = fe_values[u_fe].gradient(k, q_point) * F_inv; - symm_grad_Nx[k] = symmetrize(grad_Nx[k]); - } - SymmetricTensor<2, dim, Number> tau; - cell_mat->get_tau(tau, det_F, b_bar, b); - const double JxW = fe_values.JxW(q_point); + // Now we build the residual. In doing so, we first extract some + // configuration dependent variables from our QPH history objects + // for the current quadrature point. + for (unsigned int q_point = 0; q_point < phi_reference.n_q_points; + ++q_point) + { + const Tensor<2, dim, VectorizedArray> grad_u = + phi_reference.get_gradient(q_point); + + const Tensor<2, dim, VectorizedArray> F = + Physics::Elasticity::Kinematics::F(grad_u); + + const SymmetricTensor<2, dim, VectorizedArray> b = + Physics::Elasticity::Kinematics::b(F); + + const VectorizedArray det_F = determinant(F); + + Assert(*std::min_element( + det_F.begin(), + det_F.begin() + + mf_data_reference->n_active_entries_per_cell_batch( + cell)) > Number(0.0), + ExcInternalError()); + + const Tensor<2, dim, VectorizedArray> F_inv = invert(F); + + // don't calculate b_bar if we don't need to: + const SymmetricTensor<2, dim, VectorizedArray> b_bar = + cell_mat->formulation == 0 ? + Physics::Elasticity::Kinematics::b( + Physics::Elasticity::Kinematics::F_iso(F)) : + SymmetricTensor<2, dim, VectorizedArray>(); + + SymmetricTensor<2, dim, VectorizedArray> tau; + cell_mat->get_tau(tau, det_F, b_bar, b); + + const Tensor<2, dim, VectorizedArray> res = + Tensor<2, dim, VectorizedArray>(tau); + + phi_reference.submit_gradient(-res * transpose(F_inv), q_point); + phi_acc.submit_value(-phi_acc.get_value(q_point) * + cell_mat->rho, + q_point); + } // end loop over quadrature points + + phi_reference.integrate(false, true); + phi_reference.distribute_local_to_global(system_rhs); + phi_acc.integrate(true, false); + phi_acc.distribute_local_to_global(system_rhs); + } + } + - // loop over j first to make caching a bit more - // straight-forward without recourse to symmetry - for (unsigned int j = 0; j < dofs_per_cell; ++j) - { - cell_rhs(j) -= (symm_grad_Nx[j] * tau) * JxW; - const unsigned int component_j = - fe.system_to_component_index(j).first; - - for (unsigned int i = 0; i < dofs_per_cell; ++i) - cell_rhs(j) -= - fe_values[u_fe].value(j, q_point) * cell_mat->rho * - fe_values[u_fe].value(i, q_point) * - local_acceleration[q_point][component_j] * JxW; - } - - } // end loop over quadrature points - constraints.distribute_local_to_global(cell_rhs, - local_dof_indices, - system_rhs); - } FEFaceEvaluation phi( *mf_data_reference); From 316c2017534617ad5b4cadccb624414fe1c68abd Mon Sep 17 00:00:00 2001 From: DavidSCN Date: Mon, 1 Feb 2021 15:31:29 +0100 Subject: [PATCH 2/5] Fix brackets --- include/material.h | 2 +- include/mf_elasticity.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/material.h b/include/material.h index 0c6eaad..5d5cd93 100644 --- a/include/material.h +++ b/include/material.h @@ -28,7 +28,7 @@ template Number divide_by_dim(const Number &x, const int dim) { - return Number(x / dim); + return x / Number(dim); } template Date: Mon, 1 Feb 2021 17:38:36 +0100 Subject: [PATCH 3/5] Fix parallel vector layout --- include/mf_elasticity.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/mf_elasticity.h b/include/mf_elasticity.h index 5018d29..2407dd9 100644 --- a/include/mf_elasticity.h +++ b/include/mf_elasticity.h @@ -1141,11 +1141,13 @@ namespace FSI *mf_data_reference->get_vector_partitioner().get()), ExcInternalError()); + // TODO: Use initialize_dof_vector adjust_ghost_range_if_necessary(partitioner, newton_update); adjust_ghost_range_if_necessary(partitioner, system_rhs); - adjust_ghost_range_if_necessary(partitioner, total_displacement); + adjust_ghost_range_if_necessary(partitioner, acceleration); total_displacement.update_ghost_values(); + acceleration.update_ghost_values(); } else // FIXME: interpolate_to_mg will resize MG vector, make sure it has the From 75827bb8076ed042964a52225f7dd27a52d0afb4 Mon Sep 17 00:00:00 2001 From: DavidSCN Date: Mon, 8 Feb 2021 18:13:08 +0100 Subject: [PATCH 4/5] Add estimate for initial interface size --- include/mf_elasticity.h | 10 +++++----- include/precice_adapter.h | 12 ++++++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/include/mf_elasticity.h b/include/mf_elasticity.h index 2407dd9..ee11452 100644 --- a/include/mf_elasticity.h +++ b/include/mf_elasticity.h @@ -7,7 +7,7 @@ * 2 - CG iterations * 3 - GMG iterations */ -static const unsigned int debug_level = 1; +static const unsigned int debug_level = 0; // We start by including all the necessary deal.II header files and some C++ // related ones. They have been discussed in detail in previous tutorial @@ -1707,7 +1707,7 @@ namespace FSI system_rhs = 0.0; - const bool assemble_fast = it_nr < 4; + const bool assemble_fast = it_nr < 5; if (!assemble_fast) { @@ -2009,7 +2009,6 @@ namespace FSI double cond_number = 1.0; // reset solution vector each iteration - // TODO: We use zero dst anyway, so this can be removed newton_update = 0.; // We solve for the incremental displacement $d\mathbf{u}$. @@ -2143,8 +2142,9 @@ namespace FSI degree, DataOut::curved_inner_cells); - const std::string filename = parameters.output_folder + "solution-" + - std::to_string(result_number) + ".vtu"; + const std::string filename = parameters.output_folder + "solution_" + + Utilities::int_to_string(result_number, 3) + + ".vtu"; data_out.write_vtu_in_parallel(filename, mpi_communicator); diff --git a/include/precice_adapter.h b/include/precice_adapter.h index 38d5c57..30c45eb 100644 --- a/include/precice_adapter.h +++ b/include/precice_adapter.h @@ -504,8 +504,9 @@ namespace Adapter const unsigned int mesh_id = is_read_mesh ? read_mesh_id : write_mesh_id; auto &interface_nodes_ids = is_read_mesh ? read_nodes_ids : write_nodes_ids; - // TODO: Find a suitable guess for the number of interface points (optional) - interface_nodes_ids.reserve(20); + // Initial guess: half of the boundary is part of the coupling interface + interface_nodes_ids.reserve(mf_data_reference->n_boundary_face_batches() * + 0.5); // TODO: n_qpoints_1D is hard coded FEFaceEvaluation unrolled_vertices; std::array node_ids; + unsigned int size = 0; for (unsigned int face = mf_data_reference->n_inner_face_batches(); face < mf_data_reference->n_boundary_face_batches() + @@ -538,6 +540,9 @@ namespace Adapter const auto local_vertex = phi.quadrature_point(q); // Transform Point into preCICE conform format + // We store here also the potential 'dummy'/empty lanes (not only + // active_faces), but it allows us to use a static loop as well as a + // static array for the indices for (int d = 0; d < dim; ++d) for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v) unrolled_vertices[d + dim * v] = local_vertex[d][v]; @@ -547,7 +552,10 @@ namespace Adapter unrolled_vertices.data(), node_ids.data()); interface_nodes_ids.emplace_back(node_ids); + ++size; } + // resize the IDs in case the initial guess was too large + interface_nodes_ids.resize(size); } } From 35eaea6dcff7e178a8435eb5ce9a460490d4bc18 Mon Sep 17 00:00:00 2001 From: DavidSCN Date: Tue, 9 Feb 2021 09:11:44 +0100 Subject: [PATCH 5/5] Add a comment for fast assembly and rename system to residual --- include/mf_elasticity.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/include/mf_elasticity.h b/include/mf_elasticity.h index ee11452..5226dc4 100644 --- a/include/mf_elasticity.h +++ b/include/mf_elasticity.h @@ -178,7 +178,7 @@ namespace FSI // Function to assemble the right hand side vector. void - assemble_system(const int it_nr); + assemble_residual(const int it_nr); // Apply Dirichlet boundary conditions on the displacement field void @@ -1509,7 +1509,7 @@ namespace FSI // TODO: merge this function call with zeroing in main loop update_acceleration(delta_displacement); - assemble_system(newton_iteration); + assemble_residual(newton_iteration); if (check_convergence(newton_iteration)) break; @@ -1700,15 +1700,22 @@ namespace FSI // the matrix is reset before any assembly operations can occur. template void - Solid::assemble_system(const int it_nr) + Solid::assemble_residual(const int it_nr) { - TimerOutput::Scope t(timer, "Assemble linear system"); - pcout << " ASM " << std::flush; + TimerOutput::Scope t(timer, "Assemble residual"); + pcout << " ASR " << std::flush; system_rhs = 0.0; + // FIXME: The fast assembly (FEEValuation) fails sometimes to converge with + // and stagnates shorty before the convergence limit as compared to the + // FEValues assembly e.g. try one of the tests. Hence, we use it only for + // the first five iterations and return to the more accurate assembly + // afterwards. However, most of the cases will already be converged at this + // stage. const bool assemble_fast = it_nr < 5; + // The usual assembly strategy if (!assemble_fast) { Vector cell_rhs(dofs_per_cell);