diff --git a/examples/partitioned-heat-conduction/dirichlet.prm b/examples/partitioned-heat-conduction/dirichlet.prm
index ae31e33..298550f 100644
--- a/examples/partitioned-heat-conduction/dirichlet.prm
+++ b/examples/partitioned-heat-conduction/dirichlet.prm
@@ -2,10 +2,10 @@
# ---------------------
subsection Finite element system
# Displacement system polynomial order
- set Polynomial degree = 2
+ set Polynomial degree = 1
# Gauss quadrature order
- set Quadrature order = 3
+ set Quadrature order = 2
end
@@ -14,7 +14,7 @@ subsection Geometry
set Dimension = 2
# Number of global refinements
- set Global refinement = 3
+ set Global refinement = 4
# Testcase to compute
set Testcase = partitioned_heat_dirichlet
diff --git a/examples/partitioned-heat-conduction/neumann.prm b/examples/partitioned-heat-conduction/neumann.prm
index eb2ffae..ec7a0b4 100644
--- a/examples/partitioned-heat-conduction/neumann.prm
+++ b/examples/partitioned-heat-conduction/neumann.prm
@@ -2,10 +2,10 @@
# ---------------------
subsection Finite element system
# Displacement system polynomial order
- set Polynomial degree = 2
+ set Polynomial degree = 1
# Gauss quadrature order
- set Quadrature order = 3
+ set Quadrature order = 2
end
@@ -14,7 +14,7 @@ subsection Geometry
set Dimension = 2
# Number of global refinements
- set Global refinement = 3
+ set Global refinement = 4
# Testcase to compute
set Testcase = partitioned_heat_neumann
diff --git a/examples/partitioned-heat-conduction/precice-config.xml b/examples/partitioned-heat-conduction/precice-config.xml
index 4789f5d..243add5 100644
--- a/examples/partitioned-heat-conduction/precice-config.xml
+++ b/examples/partitioned-heat-conduction/precice-config.xml
@@ -71,8 +71,8 @@
from="Neumann"
to="Dirichlet"
initialize="true" />
-
-
+
+
diff --git a/include/heat_transfer/cuda_laplace_operator.h b/include/heat_transfer/cuda_laplace_operator.h
index 6551018..ccae218 100644
--- a/include/heat_transfer/cuda_laplace_operator.h
+++ b/include/heat_transfer/cuda_laplace_operator.h
@@ -2,6 +2,12 @@
#ifdef DEAL_II_COMPILER_CUDA_AWARE
+# include
+# include
+# include
+
+# include
+
# include
# include
# include
@@ -10,6 +16,7 @@
# include
# include
+# include
namespace Heat_Transfer
{
@@ -19,188 +26,398 @@ namespace Heat_Transfer
class Coefficient;
- template
- class LaplaceOperator
- : public MatrixFreeOperators::
- Base>
+ template
+ class VaryingCoefficientFunctor
{
public:
- using FECellIntegrator =
- FECellIntegrators>;
- using FEFaceIntegrator =
- FEFaceIntegrators>;
- using VectorType = LinearAlgebra::distributed::Vector;
+ VaryingCoefficientFunctor(double *coefficient)
+ : coef(coefficient)
+ {}
- LaplaceOperator();
+ __device__ void
+ operator()(
+ const unsigned int cell,
+ const typename CUDAWrappers::MatrixFree::Data *gpu_data);
- void
- clear() override;
- void
- evaluate_coefficient(const Coefficient &coefficient_function);
+ static const unsigned int n_dofs_1d = fe_degree + 1;
+ static const unsigned int n_local_dofs = ::Utilities::pow(n_dofs_1d, dim);
+ static const unsigned int n_q_points = ::Utilities::pow(n_dofs_1d, dim);
- void
- set_delta_t(const double delta_t_)
- {
- delta_t = delta_t_;
- }
+ private:
+ double *coef;
+ };
+
+
+ template
+ __device__ void
+ VaryingCoefficientFunctor::operator()(
+ const unsigned int cell,
+ const typename CUDAWrappers::MatrixFree::Data *gpu_data)
+ {
+ const unsigned int pos = CUDAWrappers::local_q_point_id(
+ cell, gpu_data, n_dofs_1d, n_q_points);
+
+ coef[pos] = 1.;
+ }
+
+ template
+ class LaplaceOperatorQuad
+ {
+ public:
+ __device__
+ LaplaceOperatorQuad(double coef, double delta_t)
+ : coef(coef)
+ , delta_t(delta_t)
+ {}
- virtual void
- compute_diagonal() override;
+ __device__ void
+ operator()(
+ CUDAWrappers::FEEvaluation
+ *fe_eval) const;
private:
- virtual void
- apply_add(VectorType &dst, const VectorType &src) const override;
+ double coef;
+ // TODO: Maybe remove from this class
+ double delta_t;
+ };
+
+
+
+ template
+ __device__ void
+ LaplaceOperatorQuad::operator()(
+ CUDAWrappers::FEEvaluation
+ *fe_eval) const
+ {
+ fe_eval->submit_value(fe_eval->get_value());
+ fe_eval->submit_gradient(coef * fe_eval->get_gradient() * delta_t);
+ }
+
+ template
+ class LocalLaplaceOperator
+ {
+ public:
+ LocalLaplaceOperator(double *coefficient, double delta_t)
+ : coef(coefficient)
+ , delta_t(delta_t)
+ {}
+
+ __device__ void
+ operator()(
+ const unsigned int cell,
+ const typename CUDAWrappers::MatrixFree::Data *gpu_data,
+ CUDAWrappers::SharedData *shared_data,
+ const double *src,
+ double *dst) const;
+
+ static const unsigned int n_dofs_1d = fe_degree + 1;
+ static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim);
+ static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim);
+
+ private:
+ double *coef;
+ double delta_t;
+ };
+
+
+ template
+ __device__ void
+ LocalLaplaceOperator::operator()(
+ const unsigned int cell,
+ const typename CUDAWrappers::MatrixFree::Data *gpu_data,
+ CUDAWrappers::SharedData *shared_data,
+ const double *src,
+ double *dst) const
+ {
+ const unsigned int pos = CUDAWrappers::local_q_point_id(
+ cell, gpu_data, n_dofs_1d, n_q_points);
+
+ CUDAWrappers::FEEvaluation
+ fe_eval(cell, gpu_data, shared_data);
+
+ fe_eval.read_dof_values(src);
+ fe_eval.evaluate(true, true);
+ fe_eval.apply_for_each_quad_point(
+ LaplaceOperatorQuad(coef[pos], delta_t));
+ fe_eval.integrate(true, true);
+
+ fe_eval.distribute_local_to_global(dst);
+ }
+
+ template
+ class CUDALaplaceOperator
+ {
+ public:
+ using VectorType =
+ LinearAlgebra::distributed::Vector;
+
+ CUDALaplaceOperator();
+
+ // and initialize the coefficient
+ void
+ initialize(const DoFHandler &dof_handler,
+ AffineConstraints &constraints);
void
- local_apply(const MatrixFree & data,
- VectorType & dst,
- const VectorType & src,
- const std::pair &cell_range) const;
+ set_delta_t(double dt);
void
- do_operation_on_cell(FECellIntegrator &phi) const;
+ vmult(VectorType &dst, const VectorType &src) const;
- Table<2, VectorizedArray> coefficient;
- double delta_t = 0;
+ void
+ initialize_dof_vector(VectorType &vec) const;
+
+ private:
+ CUDAWrappers::MatrixFree mf_data;
+ LinearAlgebra::CUDAWrappers::Vector coef;
+ double delta_t;
};
- template
- LaplaceOperator::LaplaceOperator()
- : MatrixFreeOperators::Base()
+ template
+ CUDALaplaceOperator::CUDALaplaceOperator()
{}
- template
+ template
void
- LaplaceOperator::clear()
+ CUDALaplaceOperator::initialize(
+ const DoFHandler &dof_handler,
+ AffineConstraints &constraints)
{
- coefficient.reinit(0, 0);
- MatrixFreeOperators::Base::clear();
- }
+ const int fe_degree = 1;
+ MappingQ mapping(fe_degree);
+ typename CUDAWrappers::MatrixFree::AdditionalData
+ additional_data;
+ additional_data.mapping_update_flags =
+ (update_values | update_JxW_values | update_gradients |
+ update_normal_vectors | update_quadrature_points);
+ const QGauss<1> quad(fe_degree + 1);
+ mf_data.reinit(mapping, dof_handler, constraints, quad, additional_data);
- template
- void
- LaplaceOperator::evaluate_coefficient(
- const Coefficient &coefficient_function)
- {
- const unsigned int n_cells = this->data->n_cell_batches();
- FECellIntegrator phi(*this->data);
-
- coefficient.reinit(n_cells, phi.n_q_points);
- for (unsigned int cell = 0; cell < n_cells; ++cell)
- {
- phi.reinit(cell);
- for (unsigned int q = 0; q < phi.n_q_points; ++q)
- coefficient(cell, q) =
- coefficient_function.value(phi.quadrature_point(q), 0);
- }
- }
+ const unsigned int n_owned_cells =
+ dynamic_cast *>(
+ &dof_handler.get_triangulation())
+ ->n_locally_owned_active_cells();
+ coef.reinit(Utilities::pow(fe_degree + 1, dim) * n_owned_cells);
-
- template
- void
- LaplaceOperator::local_apply(
- const MatrixFree & data,
- VectorType & dst,
- const VectorType & src,
- const std::pair &cell_range) const
- {
- FECellIntegrator phi(data);
-
- for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell)
- {
- AssertDimension(coefficient.size(0), data.n_cell_batches());
- AssertDimension(coefficient.size(1), phi.n_q_points);
-
- phi.reinit(cell);
- phi.read_dof_values(src);
- do_operation_on_cell(phi);
- phi.distribute_local_to_global(dst);
- }
+ const VaryingCoefficientFunctor functor(coef.get_values());
+ mf_data.evaluate_coefficients(functor);
}
-
- template
+ template
void
- LaplaceOperator::apply_add(
- VectorType & dst,
- const VectorType &src) const
+ CUDALaplaceOperator::set_delta_t(double dt)
{
- this->data->cell_loop(&LaplaceOperator::local_apply, this, dst, src);
+ delta_t = dt;
}
-
- template
+ template
void
- LaplaceOperator::compute_diagonal()
+ CUDALaplaceOperator::vmult(VectorType &dst,
+ const VectorType &src) const
{
- this->inverse_diagonal_entries.reset(new DiagonalMatrix());
- VectorType &inverse_diagonal = this->inverse_diagonal_entries->get_vector();
- this->data->initialize_dof_vector(inverse_diagonal);
-
- MatrixFreeTools::compute_diagonal(*(this->data),
- inverse_diagonal,
- &LaplaceOperator::do_operation_on_cell,
- this);
-
- this->set_constrained_entries_to_one(inverse_diagonal);
-
- for (unsigned int i = 0; i < inverse_diagonal.locally_owned_size(); ++i)
- {
- Assert(inverse_diagonal.local_element(i) > 0.,
- ExcMessage("No diagonal entry in a positive definite operator "
- "should be zero"));
- inverse_diagonal.local_element(i) =
- 1. / inverse_diagonal.local_element(i);
- }
+ dst = 0.;
+ const int fe_degree = 1;
+ LocalLaplaceOperator local_operator(coef.get_values(),
+ delta_t);
+ mf_data.cell_loop(local_operator, src, dst);
+ // We handle here only homogeneous constraints, so the copy here can
+ // probably ne omitted
+ mf_data.copy_constrained_values(src, dst);
}
-
- template
+ template
void
- LaplaceOperator::do_operation_on_cell(
- FECellIntegrator &phi) const
+ CUDALaplaceOperator::initialize_dof_vector(VectorType &vec) const
{
- Assert(delta_t > 0, ExcNotInitialized());
- const unsigned int cell = phi.get_current_cell_index();
- phi.evaluate(EvaluationFlags::values | EvaluationFlags::gradients);
- for (unsigned int q = 0; q < phi.n_q_points; ++q)
- {
- phi.submit_value(phi.get_value(q), q);
- phi.submit_gradient(coefficient(cell, q) * delta_t *
- phi.get_gradient(q),
- q);
- }
- phi.integrate(EvaluationFlags::values | EvaluationFlags::gradients);
+ mf_data.initialize_dof_vector(vec);
}
- // Helper function in order to evaluate the vectorized point
- template
- VectorizedArray
- evaluate_function(const Function & function,
- const Point> &p_vectorized,
- const unsigned int component = 0)
- {
- VectorizedArray result;
- for (unsigned int v = 0; v < VectorizedArray::size(); ++v)
- {
- Point p;
- for (unsigned int d = 0; d < dim; ++d)
- p[d] = p_vectorized[d][v];
- result[v] = function.value(p, component);
- }
- return result;
- }
+
+ /**
+ * Partial template specialization for CUDA
+ */
+ // template
+ // class LaplaceOperator
+ // : public MatrixFreeOperators::
+ // Base>
+ // {
+ // public:
+ // using FECellIntegrator =
+ // FECellIntegrators>;
+ // using FEFaceIntegrator =
+ // FEFaceIntegrators>;
+ // using VectorType =
+ // LinearAlgebra::distributed::Vector;
+
+ // LaplaceOperator();
+
+ // void
+ // clear() override;
+
+ // void
+ // evaluate_coefficient(const Coefficient &coefficient_function);
+
+ // void
+ // set_delta_t(const double delta_t_)
+ // {
+ // delta_t = delta_t_;
+ // }
+
+ // virtual void
+ // compute_diagonal() override;
+
+ // private:
+ // virtual void
+ // apply_add(VectorType &dst, const VectorType &src) const override;
+
+ // void
+ // local_apply(const MatrixFree &data,
+ // VectorType &dst,
+ // const VectorType &src,
+ // const std::pair &cell_range)
+ // const;
+
+ // void
+ // do_operation_on_cell(FECellIntegrator &phi) const;
+
+ // Table<2, VectorizedArray> coefficient;
+ // LinearAlgebra::CUDAWrappers::Vector coef;
+ // double delta_t = 0;
+ // };
+
+
+
+ // template
+ // LaplaceOperator::LaplaceOperator()
+ // : MatrixFreeOperators::Base()
+ // {}
+
+
+
+ // template
+ // void
+ // LaplaceOperator::clear()
+ // {
+ // coefficient.reinit(0, 0);
+ // MatrixFreeOperators::Base::clear();
+ // }
+
+
+
+ // template
+ // void
+ // LaplaceOperator::evaluate_coefficient(
+ // const Coefficient &coefficient_function)
+ // {
+ // const unsigned int n_cells = this->data->n_cell_batches();
+ // FECellIntegrator phi(*this->data);
+
+ // coefficient.reinit(n_cells, phi.n_q_points);
+ // for (unsigned int cell = 0; cell < n_cells; ++cell)
+ // {
+ // phi.reinit(cell);
+ // for (unsigned int q = 0; q < phi.n_q_points; ++q)
+ // coefficient(cell, q) =
+ // coefficient_function.value(phi.quadrature_point(q), 0);
+ // }
+ // }
+
+
+
+ // template
+ // void
+ // LaplaceOperator::local_apply(
+ // const MatrixFree &data,
+ // VectorType &dst,
+ // const VectorType &src,
+ // const std::pair &cell_range) const
+ // {
+ // FECellIntegrator phi(data);
+
+ // for (unsigned int cell = cell_range.first; cell < cell_range.second;
+ // ++cell)
+ // {
+ // AssertDimension(coefficient.size(0), data.n_cell_batches());
+ // AssertDimension(coefficient.size(1), phi.n_q_points);
+
+ // phi.reinit(cell);
+ // phi.read_dof_values(src);
+ // do_operation_on_cell(phi);
+ // phi.distribute_local_to_global(dst);
+ // }
+ // }
+
+
+
+ // template
+ // void
+ // LaplaceOperator::apply_add(
+ // VectorType &dst,
+ // const VectorType &src) const
+ // {
+ // this->data->cell_loop(&LaplaceOperator::local_apply, this, dst, src);
+ // }
+
+
+
+ // template
+ // void
+ // LaplaceOperator::compute_diagonal()
+ // {
+ // this->inverse_diagonal_entries.reset(new DiagonalMatrix());
+ // VectorType &inverse_diagonal =
+ // this->inverse_diagonal_entries->get_vector();
+ // this->data->initialize_dof_vector(inverse_diagonal);
+
+ // MatrixFreeTools::compute_diagonal(*(this->data),
+ // inverse_diagonal,
+ // &LaplaceOperator::do_operation_on_cell,
+ // this);
+
+ // this->set_constrained_entries_to_one(inverse_diagonal);
+
+ // for (unsigned int i = 0; i < inverse_diagonal.locally_owned_size(); ++i)
+ // {
+ // Assert(inverse_diagonal.local_element(i) > 0.,
+ // ExcMessage("No diagonal entry in a positive definite operator
+ // "
+ // "should be zero"));
+ // inverse_diagonal.local_element(i) =
+ // 1. / inverse_diagonal.local_element(i);
+ // }
+ // }
+
+
+
+ // template
+ // void
+ // LaplaceOperator::do_operation_on_cell(
+ // FECellIntegrator &phi) const
+ // {
+ // Assert(delta_t > 0, ExcNotInitialized());
+ // const unsigned int cell = phi.get_current_cell_index();
+ // phi.evaluate(EvaluationFlags::values | EvaluationFlags::gradients);
+ // for (unsigned int q = 0; q < phi.n_q_points; ++q)
+ // {
+ // phi.submit_value(phi.get_value(q), q);
+ // phi.submit_gradient(coefficient(cell, q) * delta_t *
+ // phi.get_gradient(q),
+ // q);
+ // }
+ // phi.integrate(EvaluationFlags::values | EvaluationFlags::gradients);
+ // }
} // namespace Heat_Transfer
#endif
\ No newline at end of file
diff --git a/include/heat_transfer/heat_transfer.h b/include/heat_transfer/heat_transfer.h
index 498bb45..ebc220b 100644
--- a/include/heat_transfer/heat_transfer.h
+++ b/include/heat_transfer/heat_transfer.h
@@ -92,6 +92,9 @@ namespace Heat_Transfer
using VectorType = LinearAlgebra::distributed::Vector;
using LevelVectorType = LinearAlgebra::distributed::Vector;
+ using DeviceVector =
+ LinearAlgebra::distributed::Vector;
+
LaplaceProblem(const Parameters::HeatParameters ¶meters);
void
run(std::shared_ptr> testcase_);
@@ -184,13 +187,15 @@ namespace Heat_Transfer
Adapter::Adapter>>
precice_adapter;
+ std::unique_ptr> cuda_operator;
+
ConditionalOStream pcout;
mutable TimerOutput timer;
unsigned long int total_n_cg_iterations;
unsigned int total_n_cg_solve;
// Valid options are none, jacobi and gmg
- std::string preconditioner_type = "gmg";
+ std::string preconditioner_type = "none";
Time time;
};
@@ -293,6 +298,10 @@ namespace Heat_Transfer
system_matrix.evaluate_coefficient(Coefficient());
system_matrix.set_delta_t(time.get_delta_t());
+ cuda_operator.reset(new CUDALaplaceOperator());
+ cuda_operator->initialize(dof_handler, constraints);
+ cuda_operator->set_delta_t(time.get_delta_t());
+
// ... the second matrix-free operator for inhomogenous BCs
AffineConstraints no_constraints;
no_constraints.close();
@@ -536,11 +545,23 @@ namespace Heat_Transfer
}
else if (preconditioner_type == "none")
{
- SolverCG cg(solver_control);
- cg.solve(system_matrix,
- solution_update,
- system_rhs,
- PreconditionIdentity());
+ SolverCG cg(solver_control);
+ DeviceVector update, rhs;
+ cuda_operator->initialize_dof_vector(update);
+ cuda_operator->initialize_dof_vector(rhs);
+ update = 0;
+
+ LinearAlgebra::ReadWriteVector rw_vector(
+ dof_handler.locally_owned_dofs());
+ constraints.set_zero(system_rhs);
+
+ rw_vector.import(system_rhs, VectorOperation::insert);
+ rhs.import(rw_vector, VectorOperation::insert);
+
+ cg.solve(*cuda_operator.get(), update, rhs, PreconditionIdentity());
+
+ rw_vector.import(update, VectorOperation::insert);
+ solution_update.import(rw_vector, VectorOperation::insert);
}
else if (preconditioner_type == "gmg")
{