numerical-collection-cpp 0.10.0
A collection of algorithms in numerical analysis implemented in C++
Loading...
Searching...
No Matches
parallel_symmetric_successive_over_relaxation.h
Go to the documentation of this file.
1/*
2 * Copyright 2023 MusicScience37 (Kenta Kabashima)
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
20#pragma once
21
22#include <algorithm>
23#include <cmath>
24
25#include <Eigen/Core>
26#include <omp.h>
27
38
39namespace num_collect::linear {
40
41template <base::concepts::sparse_matrix Matrix>
42class parallel_symmetric_successive_over_relaxation;
43
47 "num_collect::linear::parallel_symmetric_successive_over_relaxation");
48
49namespace impl {
50
56template <base::concepts::sparse_matrix Matrix>
62
63} // namespace impl
64
71template <base::concepts::sparse_matrix Matrix>
73 : public iterative_solver_base<
74 parallel_symmetric_successive_over_relaxation<Matrix>>,
76 static_assert(Matrix::IsRowMajor == 1, "Row major matrix is required.");
78 "Complex matrices are not supported.");
79
80public:
84
85 using typename base_type::matrix_type;
86 using typename base_type::real_scalar_type;
87 using typename base_type::scalar_type;
88 using typename base_type::storage_index_type;
89
90protected:
91 using base_type::coeff;
92
93public:
95 using vector_type = Eigen::VectorX<scalar_type>;
96
103
109 void compute(const matrix_type& coeff) {
111 diag_ = coeff.diagonal();
112 inv_diag_ = diag_.cwiseInverse();
113 intermidiate_solution_.resize(coeff.cols());
114 NUM_COLLECT_PRECONDITION(inv_diag_.array().isFinite().all(),
115 "All diagonal elements of the coefficient matrix must not be "
116 "zero.");
118 (coeff.nonZeros() / omp_get_max_threads() > 1000); // NOLINT
119 }
120
129 template <base::concepts::dense_vector_of<scalar_type> Right,
130 base::concepts::dense_vector_of<scalar_type> Solution>
131 void solve_vector_in_place(const Right& right, Solution& solution) const {
132 const auto& coeff_ref = coeff();
133
134 NUM_COLLECT_PRECONDITION(coeff_ref.rows() == coeff_ref.cols(),
135 "Coefficient matrix must be a square matrix.");
136 NUM_COLLECT_PRECONDITION(right.rows() == coeff_ref.cols(),
137 "Right-hand-side vector must have the number of elements same as "
138 "the size of the coefficient matrix.");
139 NUM_COLLECT_PRECONDITION(solution.rows() == coeff_ref.cols(),
140 "Solution vector must have the number of elements same as the size "
141 "of the coefficient matrix.");
142
143 iterations_ = 0;
144 const scalar_type right_norm = right.squaredNorm();
146 while (iterations_ < max_iterations) {
147 if (run_parallel_) {
148 iterate_parallel(coeff_ref, right, solution);
149 } else {
150 iterate_no_parallel(coeff_ref, right, solution);
151 }
152 if (!std::isfinite(residual_)) {
154 "Failure in "
155 "parallel_symmetric_successive_over_relaxation.");
156 }
157 ++iterations_;
158 using std::sqrt;
159 residual_rate_ = sqrt(residual_ / right_norm);
161 break;
162 }
163 }
164 }
165
173 [[nodiscard]] auto iterations() const noexcept -> index_type {
174 return iterations_;
175 }
176
184 [[nodiscard]] auto residual_rate() const noexcept -> scalar_type {
185 return residual_rate_;
186 }
187
196 NUM_COLLECT_PRECONDITION(static_cast<scalar_type>(0) < val &&
197 val < static_cast<scalar_type>(2),
198 "Relaxation coefficient must be in the range (0, 2).");
199 relaxation_coeff_ = val;
200 return *this;
201 }
202
211 auto run_parallel(bool val)
213 run_parallel_ = val;
214 return *this;
215 }
216
217private:
227 template <base::concepts::dense_vector_of<scalar_type> Right,
228 base::concepts::dense_vector_of<scalar_type> Solution>
229 void iterate_parallel(const matrix_type& coeff_ref, const Right& right,
230 Solution& solution) const {
231 const index_type size = coeff_ref.rows();
232 const scalar_type prev_sol_coeff =
233 static_cast<scalar_type>(1) - relaxation_coeff_;
234 residual_ = static_cast<scalar_type>(0);
235
236#pragma omp parallel
237 {
238 const index_type num_threads = omp_get_num_threads();
239 const index_type thread_id = omp_get_thread_num();
240 const index_type rows_per_thread =
241 (size + num_threads - 1) / num_threads;
242 const index_type my_start_row = thread_id * rows_per_thread;
243 const index_type my_last_row =
244 std::min(my_start_row + rows_per_thread, size);
245
246 if (iterations_ == 0) {
248 "my rows: {} - {} (whole rows: {})", my_start_row,
249 my_last_row, size);
250 }
251
252 auto my_residual = static_cast<scalar_type>(0);
253
254 // Forward update.
255 for (index_type i = my_start_row; i < my_last_row; ++i) {
256 my_residual += process_row_forward(coeff_ref, right, solution,
257 i, my_start_row, prev_sol_coeff);
258 }
259
260#pragma omp barrier
261
262 // Backward update.
263 for (index_type i = my_last_row - 1; i >= my_start_row; --i) {
265 coeff_ref, right, solution, i, my_last_row, prev_sol_coeff);
266 }
267
268#pragma omp critical
269 residual_ += my_residual;
270 }
271 }
272
282 template <base::concepts::dense_vector_of<scalar_type> Right,
283 base::concepts::dense_vector_of<scalar_type> Solution>
284 void iterate_no_parallel(const matrix_type& coeff_ref, const Right& right,
285 Solution& solution) const {
286 const index_type size = coeff_ref.rows();
287 const scalar_type prev_sol_coeff =
288 static_cast<scalar_type>(1) - relaxation_coeff_;
289 residual_ = static_cast<scalar_type>(0);
290
291 // Forward update.
292 for (index_type i = 0; i < size; ++i) {
294 coeff_ref, right, solution, i, 0, prev_sol_coeff);
295 }
296
297 // Backward update.
298 for (index_type i = size - 1; i >= 0; --i) {
300 coeff_ref, right, solution, i, size, prev_sol_coeff);
301 }
302 }
303
317 template <base::concepts::dense_vector_of<scalar_type> Right,
318 base::concepts::dense_vector_of<scalar_type> Solution>
319 auto process_row_forward(const matrix_type& coeff_ref, const Right& right,
320 Solution& solution, index_type i, index_type start_row,
321 const scalar_type& prev_sol_coeff) const -> scalar_type {
322 scalar_type numerator = right(i);
323 for (typename matrix_type::InnerIterator iter(coeff_ref, i); iter;
324 ++iter) {
325 if (start_row <= iter.index() && iter.index() < i) {
326 numerator -=
327 iter.value() * intermidiate_solution_(iter.index());
328 } else if (iter.index() != i) {
329 numerator -= iter.value() * solution(iter.index());
330 }
331 }
332 const scalar_type row_residual = numerator - diag_(i) * solution(i);
334 relaxation_coeff_ * numerator * inv_diag_(i) +
335 prev_sol_coeff * solution(i);
336 return row_residual * row_residual;
337 }
338
351 template <base::concepts::dense_vector_of<scalar_type> Right,
352 base::concepts::dense_vector_of<scalar_type> Solution>
353 void process_row_backward(const matrix_type& coeff_ref, const Right& right,
354 Solution& solution, index_type i, index_type end_row,
355 const scalar_type& prev_sol_coeff) const {
356 scalar_type numerator = right(i);
357 for (typename matrix_type::InnerIterator iter(coeff_ref, i); iter;
358 ++iter) {
359 if (i < iter.index() && iter.index() < end_row) {
360 numerator -= iter.value() * solution(iter.index());
361 } else if (iter.index() != i) {
362 numerator -=
363 iter.value() * intermidiate_solution_(iter.index());
364 }
365 }
366 solution(i) = relaxation_coeff_ * numerator * inv_diag_(i) +
367 prev_sol_coeff * intermidiate_solution_(i);
368 }
369
371 bool run_parallel_{true};
372
375
378
381
384
387
390
393};
394
395} // namespace num_collect::linear
Class of exception on failure in algorithm.
Definition exception.h:93
auto compute(const matrix_type &coeff) -> parallel_symmetric_successive_over_relaxation< Matrix > &
Class to solve linear equations using symmetric successive over-relaxation using threads golub2013.
auto iterations() const noexcept -> index_type
Get the number of iterations.
void solve_vector_in_place(const Right &right, Solution &solution) const
Iterate repeatedly until stop criterion is satisfied for a vector.
auto relaxation_coeff(const scalar_type &val) -> parallel_symmetric_successive_over_relaxation &
Set the relaxation coefficient.
void iterate_no_parallel(const matrix_type &coeff_ref, const Right &right, Solution &solution) const
Iterate once in single thread.
auto run_parallel(bool val) -> parallel_symmetric_successive_over_relaxation &
Set whether to run in parallel.
auto process_row_forward(const matrix_type &coeff_ref, const Right &right, Solution &solution, index_type i, index_type start_row, const scalar_type &prev_sol_coeff) const -> scalar_type
Process a row in the forward update.
void iterate_parallel(const matrix_type &coeff_ref, const Right &right, Solution &solution) const
Iterate once in parallel.
void process_row_backward(const matrix_type &coeff_ref, const Right &right, Solution &solution, index_type i, index_type end_row, const scalar_type &prev_sol_coeff) const
Process a row in the backward update.
auto residual_rate() const noexcept -> scalar_type
Get the rate of the last residual.
Class of tags of logs without memory management.
Class to incorporate logging in algorithms.
logging_mixin(log_tag_view tag)
Constructor.
auto logger() const noexcept -> const num_collect::logging::logger &
Access to the logger.
Definition of dense_vector_of concept.
Definition of exceptions.
Definition of index_type type.
Definition of iterative_solver_base class.
Definition of log_tag_view class.
Definition of macros for logging.
#define NUM_COLLECT_LOG_TRACE(LOGGER,...)
Write a trace log.
#define NUM_COLLECT_LOG_AND_THROW(EXCEPTION_TYPE,...)
Write an error log and throw an exception for an error.
Definition of logging_mixin class.
std::ptrdiff_t index_type
Type of indices in this library.
Definition index_type.h:33
Namespace of solvers of linear equations.
constexpr auto parallel_symmetric_successive_over_relaxation_tag
Log tag of parallel_symmetric_successive_over_relaxation.
Definition of NUM_COLLECT_PRECONDITION macro.
#define NUM_COLLECT_PRECONDITION(CONDITION,...)
Check whether a precondition is satisfied and throw an exception if not.
Definition of real_scalar concept.
Definition of sparse_matrix concept.