From ff125ae3dfeecf71683f9d8ad7fb6f960a1df349 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Wed, 18 Feb 2026 21:35:49 +0100 Subject: [PATCH 01/20] extract --- .../directSolverGive.h | 16 +- .../directSolverTake.h | 16 +- include/DirectSolver/directSolver.h | 1 + .../LinearAlgebra/Solvers/coo_mumps_solver.h | 224 ++++++++++++++++++ src/CMakeLists.txt | 10 +- .../applySymmetryShift.cpp | 17 +- .../buildSolverMatrix.cpp | 2 +- .../directSolverGive.cpp | 10 +- .../initializeMumps.cpp | 125 ---------- .../applySymmetryShift.cpp | 17 +- .../directSolverTake.cpp | 10 +- .../initializeMumps.cpp | 125 ---------- 12 files changed, 246 insertions(+), 327 deletions(-) create mode 100644 include/LinearAlgebra/Solvers/coo_mumps_solver.h delete mode 100644 src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp delete mode 100644 src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h index 4417e578..8bd3e86f 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h @@ -12,14 +12,12 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - ~DirectSolver_COO_MUMPS_Give() override; // Note: The rhs (right-hand side) vector gets overwritten during the solution process. void solveInPlace(Vector solution) override; private: - // Solver matrix and MUMPS solver structure - SparseMatrixCOO solver_matrix_; - DMUMPS_STRUC_C mumps_solver_; + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver mumps_solver_; // clang-format off const Stencil stencil_interior_ = { @@ -54,10 +52,6 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); void buildSolverMatrixRadialSection(const int i_theta, SparseMatrixCOO& solver_matrix); - // Initializes the MUMPS solver with the specified matrix. - // Converts to 1-based indexing. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Adjusts the right-hand side vector for symmetry corrections. // This modifies the system from // A * solution = rhs @@ -69,12 +63,6 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver void applySymmetryShiftInnerBoundary(Vector x) const; void applySymmetryShiftOuterBoundary(Vector x) const; - // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - void solveWithMumps(Vector solution); - - // Finalizes the MUMPS solver, releasing any allocated resources. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); - // Returns the total number of non-zero elements in the solver matrix. int getNonZeroCountSolverMatrix() const; diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h index 55ae6067..1d494bdf 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h @@ -12,14 +12,12 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - ~DirectSolver_COO_MUMPS_Take() override; // Note: The rhs (right-hand side) vector gets overwritten during the solution process. void solveInPlace(Vector solution) override; private: - // Solver matrix and MUMPS solver structure - SparseMatrixCOO solver_matrix_; - DMUMPS_STRUC_C mumps_solver_; + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver mumps_solver_; // clang-format off const Stencil stencil_interior_ = { @@ -54,10 +52,6 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); void buildSolverMatrixRadialSection(const int i_theta, SparseMatrixCOO& solver_matrix); - // Initializes the MUMPS solver with the specified matrix. - // Converts to 1-based indexing. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Adjusts the right-hand side vector for symmetry corrections. // This modifies the system from // A * solution = rhs @@ -69,12 +63,6 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver void applySymmetryShiftInnerBoundary(Vector x) const; void applySymmetryShiftOuterBoundary(Vector x) const; - // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - void solveWithMumps(Vector solution); - - // Finalizes the MUMPS solver, releasing any allocated resources. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); - // Returns the total number of non-zero elements in the solver matrix. int getNonZeroCountSolverMatrix() const; diff --git a/include/DirectSolver/directSolver.h b/include/DirectSolver/directSolver.h index 252e03d3..f0edd0cf 100644 --- a/include/DirectSolver/directSolver.h +++ b/include/DirectSolver/directSolver.h @@ -17,6 +17,7 @@ class Level; #include "../LinearAlgebra/Matrix/coo_matrix.h" #include "../LinearAlgebra/Matrix/csr_matrix.h" #include "../LinearAlgebra/Solvers/csr_lu_solver.h" +#include "../LinearAlgebra/Solvers/coo_mumps_solver.h" #include "../Stencil/stencil.h" #ifdef GMGPOLAR_USE_MUMPS diff --git a/include/LinearAlgebra/Solvers/coo_mumps_solver.h b/include/LinearAlgebra/Solvers/coo_mumps_solver.h new file mode 100644 index 00000000..12bd356e --- /dev/null +++ b/include/LinearAlgebra/Solvers/coo_mumps_solver.h @@ -0,0 +1,224 @@ +#pragma once + +#ifdef GMGPOLAR_USE_MUMPS + + #include + #include + + #include "dmumps_c.h" + + #include "../../LinearAlgebra/Matrix/coo_matrix.h" + #include "../../LinearAlgebra/Vector/vector.h" + +/* + * Wraps MUMPS for solving sparse linear systems given in COO format. + * For general matrices, all non-zero entries must be provided. + * For symmetric matrices (is_symmetric = true), only the upper or lower + * triangular entries should be provided, and the matrix is assumed to be + * positive definite. In GMGPolar this holds true since the domain mapping is invertible. + */ +class CooMumpsSolver +{ +public: + explicit CooMumpsSolver(SparseMatrixCOO matrix) + : matrix_(std::move(matrix)) + { + initialize(); + } + + ~CooMumpsSolver() + { + finalize(); + } + + // rhs is overwritten in-place with the solution on return. + void solve(Vector& rhs) + { + assert(std::ssize(rhs) == mumps_solver_.n); + + mumps_solver_.job = JOB_COMPUTE_SOLUTION; + mumps_solver_.nrhs = 1; + mumps_solver_.lrhs = mumps_solver_.n; // leading dimension: must equal n for dense centralized RHS + mumps_solver_.rhs = rhs.data(); // in: RHS, out: solution (overwritten in-place) + + dmumps_c(&mumps_solver_); + + if (INFOG(1) != 0) { + throw std::runtime_error("MUMPS reported an error during solution phase " + "(INFOG(1) = " + + std::to_string(INFOG(1)) + ")."); + } + } + +private: + void initialize() + { + assert(matrix_.rows() == matrix_.columns()); + + /* + * MUMPS uses 1-based indexing; our COO matrix uses 0-based indexing. + * Adjust row and column indices to match MUMPS' requirements. + */ + for (int i = 0; i < matrix_.non_zero_size(); i++) { + matrix_.row_index(i) += 1; + matrix_.col_index(i) += 1; + } + + mumps_solver_.job = JOB_INIT; + mumps_solver_.par = PAR_PARALLEL; + mumps_solver_.sym = matrix_.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC; + mumps_solver_.comm_fortran = USE_COMM_WORLD; + dmumps_c(&mumps_solver_); + + configureICNTL(); + configureCNTL(); + + mumps_solver_.job = JOB_ANALYSIS_AND_FACTORIZATION; + mumps_solver_.n = matrix_.rows(); + mumps_solver_.nz = matrix_.non_zero_size(); + mumps_solver_.irn = matrix_.row_indices_data(); + mumps_solver_.jcn = matrix_.column_indices_data(); + mumps_solver_.a = matrix_.values_data(); + dmumps_c(&mumps_solver_); + + if (INFOG(1) != 0) { + throw std::runtime_error("MUMPS reported an error during analysis/factorization " + "(INFOG(1) = " + + std::to_string(INFOG(1)) + ")."); + } + + if (mumps_solver_.sym == SYM_POSITIVE_DEFINITE && INFOG(12) != 0) { + throw std::runtime_error("Matrix declared positive definite, " + "but negative pivots were encountered during factorization " + "(INFOG(12) = " + + std::to_string(INFOG(12)) + ")."); + } + } + + void finalize() + { + mumps_solver_.job = JOB_END; + dmumps_c(&mumps_solver_); + } + + void configureICNTL() + { + // All ICNTL values are left at their defaults unless noted below. + // ICNTL(1) = 0: suppress error message output + // ICNTL(3) = 0: suppress global information output + // ICNTL(6) = 7: automatically choose permutation/scaling strategy + // ICNTL(7) = 5: use METIS for fill-reducing ordering + // ICNTL(48) = 0: disable tree parallelism (conflicts with OpenMP in newer MUMPS releases) + + ICNTL(1) = 0; // Output stream for error messages + ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process + ICNTL(3) = 0; // Output stream for global information, collected on the host + ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages + ICNTL(5) = 0; // Controls the matrix input format + ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scales the matrix + ICNTL(7) = 5; // Symmetric permutation (ordering) to determine pivot order for sequential analysis + ICNTL(8) = 77; // Scaling strategy + ICNTL(9) = 1; // Computes the solution using A or A^T + ICNTL(10) = 0; // Iterative refinement steps applied to the computed solution + ICNTL(11) = 0; // Error analysis statistics + ICNTL(12) = 0; // Ordering strategy for symmetric matrices + ICNTL(13) = 0; // Controls the parallelism of the root node + ICNTL(14) = matrix_.is_symmetric() ? 5 : 20; // Percentage increase in estimated working space + ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + // ICNTL(17) does not exist + ICNTL(18) = 0; // Strategy for the distributed input matrix + ICNTL(19) = 0; // Computes the Schur complement matrix + ICNTL(20) = 0; // Format of the right-hand sides (dense, sparse, or distributed) + ICNTL(21) = 0; // Distribution of the solution vectors (centralized or distributed) + ICNTL(22) = 0; // In-core/out-of-core (OOC) factorization and solve + ICNTL(23) = 0; // Maximum working memory in MegaBytes per working process + ICNTL(24) = 0; // Detection of null pivot rows + ICNTL(25) = 0; // Solution of a deficient matrix and null space basis computation + ICNTL(26) = 0; // Solution phase when Schur complement has been computed + ICNTL(27) = -32; // Blocking size for multiple right-hand sides + ICNTL(28) = 0; // Sequential or parallel computation of the ordering + ICNTL(29) = 0; // Parallel ordering tool when ICNTL(28)=1 + ICNTL(30) = 0; // User-specified entries in the inverse A^-1 + ICNTL(31) = 0; // Which factors may be discarded during factorization + ICNTL(32) = 0; // Forward elimination of the right-hand sides during factorization + ICNTL(33) = 0; // Computes the determinant of the input matrix + ICNTL(34) = 0; // Conservation of OOC files during JOB=-3 + ICNTL(35) = 0; // Activation of the BLR feature + ICNTL(36) = 0; // Choice of BLR factorization variant + ICNTL(37) = 0; // BLR compression of the contribution blocks + ICNTL(38) = 600; // Estimated compression rate of LU factors + ICNTL(39) = 500; // Estimated compression rate of contribution blocks + // ICNTL(40-47) do not exist + ICNTL(48) = 0; // Multithreading with tree parallelism + ICNTL(49) = 0; // Compact workarray id%S at end of factorization phase + // ICNTL(50-55) do not exist + ICNTL(56) = 0; // Detects pseudo-singularities; rank-revealing factorization of root node + // ICNTL(57) does not exist + ICNTL(58) = 2; // Options for symbolic factorization + // ICNTL(59-60) do not exist + } + + void configureCNTL() + { + // All CNTL values are left at their defaults unless noted below. + + CNTL(1) = -1.0; // Relative threshold for numerical pivoting + CNTL(2) = -1.0; // Stopping criterion for iterative refinement + CNTL(3) = 0.0; // Threshold for null pivot row detection + CNTL(4) = -1.0; // Threshold for static pivoting + CNTL(5) = 0.0; // Fixation for null pivots (effective only when null pivot detection is active) + // CNTL(6) does not exist + CNTL(7) = 0.0; // Dropping parameter precision for BLR compression + // CNTL(8-15) do not exist + } + +private: + SparseMatrixCOO matrix_; + DMUMPS_STRUC_C mumps_solver_ = {}; + + /* ------------------------------------------------ */ + /* MUMPS uses 1-based indexing in the documentation */ + /* ------------------------------------------------ */ + int& ICNTL(int i) + { + return mumps_solver_.icntl[i - 1]; + } + double& CNTL(int i) + { + return mumps_solver_.cntl[i - 1]; + } + int& INFOG(int i) + { + return mumps_solver_.infog[i - 1]; + } + + /* ----------------------------------- */ + /* MUMPS jobs and constant definitions */ + /* ----------------------------------- */ + static constexpr int USE_COMM_WORLD = -987654; + static constexpr int PAR_NOT_PARALLEL = 0; + static constexpr int PAR_PARALLEL = 1; + + static constexpr int JOB_INIT = -1; + static constexpr int JOB_END = -2; + static constexpr int JOB_REMOVE_SAVED_DATA = -3; + static constexpr int JOB_FREE_INTERNAL_DATA = -4; + static constexpr int JOB_SUPPRESS_OOC_FILES = -200; + + static constexpr int JOB_ANALYSIS_PHASE = 1; + static constexpr int JOB_FACTORIZATION_PHASE = 2; + static constexpr int JOB_COMPUTE_SOLUTION = 3; + static constexpr int JOB_ANALYSIS_AND_FACTORIZATION = 4; + static constexpr int JOB_FACTORIZATION_AND_SOLUTION = 5; + static constexpr int JOB_ANALYSIS_FACTORIZATION_SOLUTION = 6; + static constexpr int JOB_SAVE_INTERNAL_DATA = 7; + static constexpr int JOB_RESTORE_INTERNAL_DATA = 8; + static constexpr int JOB_DISTRIBUTE_RHS = 9; + + static constexpr int SYM_UNSYMMETRIC = 0; + static constexpr int SYM_POSITIVE_DEFINITE = 1; + static constexpr int SYM_GENERAL_SYMMETRIC = 2; +}; + +#endif // GMGPOLAR_USE_MUMPS \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0b00e56f..c1b9967c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -66,26 +66,24 @@ set(DIRECT_SOLVER_SOURCES # Main DirectSolver files ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/directSolver.cpp - # DirectSolverGive + # DirectSolver-COO-MUMPS-Give ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Give/matrixStencil.cpp - # DirectSolverGiveCustomLU + # DirectSolver-CSR-LU-Give ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-CSR-LU-Give/buildSolverMatrix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-CSR-LU-Give/directSolverGive.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-CSR-LU-Give/matrixStencil.cpp - # DirectSolverTake + # DirectSolver-COO-MUMPS-Take ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-COO-MUMPS-Take/matrixStencil.cpp - # DirectSolverTakeCustomLU + # DirectSolver-CSR-LU-Take ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-CSR-LU-Take/directSolverTake.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DirectSolver/DirectSolver-CSR-LU-Take/matrixStencil.cpp diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.cpp index b1dc034f..c91a8129 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/applySymmetryShift.cpp @@ -125,20 +125,11 @@ void DirectSolver_COO_MUMPS_Give::applySymmetryShift(Vector x) const assert(std::ssize(x) == grid_.numberOfNodes()); assert(grid_.nr() >= 4); - #pragma omp parallel sections num_threads(num_omp_threads_) - { - #pragma omp section - { - if (DirBC_Interior_) { - applySymmetryShiftInnerBoundary(x); - } - } - - #pragma omp section - { - applySymmetryShiftOuterBoundary(x); - } + if (DirBC_Interior_) { + applySymmetryShiftInnerBoundary(x); } + + applySymmetryShiftOuterBoundary(x); } #endif diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.cpp index 7cd0d75a..240804ad 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/buildSolverMatrix.cpp @@ -920,7 +920,7 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Give::buildSolverMatrix() if (row <= col) { symmetric_solver_matrix.row_index(current_nz) = row; symmetric_solver_matrix.col_index(current_nz) = col; - symmetric_solver_matrix.value(current_nz) = std::move(solver_matrix.value(nz_index)); + symmetric_solver_matrix.value(current_nz) = solver_matrix.value(nz_index); current_nz++; } } diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp index 0a3e025c..509ba004 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp @@ -7,9 +7,8 @@ DirectSolver_COO_MUMPS_Give::DirectSolver_COO_MUMPS_Give(const PolarGrid& grid, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , mumps_solver_(buildSolverMatrix()) { - solver_matrix_ = buildSolverMatrix(); - initializeMumpsSolver(mumps_solver_, solver_matrix_); } void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector solution) @@ -21,12 +20,7 @@ void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector solution) // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - solveWithMumps(solution); -} - -DirectSolver_COO_MUMPS_Give::~DirectSolver_COO_MUMPS_Give() -{ - finalizeMumpsSolver(mumps_solver_); + mumps_solver_.solve(solution); } #endif diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp deleted file mode 100644 index 2ebf5530..00000000 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/initializeMumps.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "../../../include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h" - -#ifdef GMGPOLAR_USE_MUMPS - -void DirectSolver_COO_MUMPS_Give::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout - << "Warning: DirectSolver matrix is not positive definite: Negative pivots in the factorization phase." - << std::endl; - } -} - -void DirectSolver_COO_MUMPS_Give::solveWithMumps(Vector result_rhs) -{ - mumps_solver_.job = JOB_COMPUTE_SOLUTION; - mumps_solver_.nrhs = 1; - mumps_solver_.nz_rhs = result_rhs.size(); - mumps_solver_.rhs = result_rhs.data(); - mumps_solver_.lrhs = result_rhs.size(); - dmumps_c(&mumps_solver_); - if (mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the direct system: " << mumps_solver_.info[0] << std::endl; - } -} - -void DirectSolver_COO_MUMPS_Give::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.cpp index 2b7d8e6e..c81352e0 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/applySymmetryShift.cpp @@ -83,20 +83,11 @@ void DirectSolver_COO_MUMPS_Take::applySymmetryShift(Vector x) const assert(std::ssize(x) == grid_.numberOfNodes()); assert(grid_.nr() >= 4); - #pragma omp parallel sections num_threads(num_omp_threads_) - { - #pragma omp section - { - if (DirBC_Interior_) { - applySymmetryShiftInnerBoundary(x); - } - } - - #pragma omp section - { - applySymmetryShiftOuterBoundary(x); - } + if (DirBC_Interior_) { + applySymmetryShiftInnerBoundary(x); } + + applySymmetryShiftOuterBoundary(x); } #endif diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp index acf8a942..3408fbb7 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp @@ -7,9 +7,8 @@ DirectSolver_COO_MUMPS_Take::DirectSolver_COO_MUMPS_Take(const PolarGrid& grid, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , mumps_solver_(buildSolverMatrix()) { - solver_matrix_ = buildSolverMatrix(); - initializeMumpsSolver(mumps_solver_, solver_matrix_); } void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector solution) @@ -21,12 +20,7 @@ void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector solution) // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - solveWithMumps(solution); -} - -DirectSolver_COO_MUMPS_Take::~DirectSolver_COO_MUMPS_Take() -{ - finalizeMumpsSolver(mumps_solver_); + mumps_solver_.solve(solution); } #endif diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp deleted file mode 100644 index c19743d7..00000000 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/initializeMumps.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "../../../include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h" - -#ifdef GMGPOLAR_USE_MUMPS - -void DirectSolver_COO_MUMPS_Take::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout - << "Warning: DirectSolver matrix is not positive definite: Negative pivots in the factorization phase." - << std::endl; - } -} - -void DirectSolver_COO_MUMPS_Take::solveWithMumps(Vector result_rhs) -{ - mumps_solver_.job = JOB_COMPUTE_SOLUTION; - mumps_solver_.nrhs = 1; - mumps_solver_.nz_rhs = result_rhs.size(); - mumps_solver_.rhs = result_rhs.data(); - mumps_solver_.lrhs = result_rhs.size(); - dmumps_c(&mumps_solver_); - if (mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the direct system: " << mumps_solver_.info[0] << std::endl; - } -} - -void DirectSolver_COO_MUMPS_Take::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif From 8bb865c5834e4412c2c5c5f4451ba5ca9a70dc01 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Wed, 18 Feb 2026 21:39:21 +0100 Subject: [PATCH 02/20] ... --- .../DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp index 42566e19..f0886fc1 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp @@ -547,7 +547,7 @@ SparseMatrixCOO DirectSolver_COO_MUMPS_Take::buildSolverMatrix() if (row <= col) { symmetric_solver_matrix.row_index(current_nz) = row; symmetric_solver_matrix.col_index(current_nz) = col; - symmetric_solver_matrix.value(current_nz) = std::move(solver_matrix.value(nz_index)); + symmetric_solver_matrix.value(current_nz) = solver_matrix.value(nz_index); current_nz++; } } From 83f0815e293e05881f316871a384d7d5d12e69ef Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Thu, 19 Feb 2026 00:28:32 +0100 Subject: [PATCH 03/20] Reorder MUMPS solver structure declaration Moved MUMPS solver structure declaration below stencils for proper initialization. --- .../DirectSolver-COO-MUMPS-Give/directSolverGive.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h index 8bd3e86f..7d532bd5 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h @@ -16,9 +16,6 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver void solveInPlace(Vector solution) override; private: - // MUMPS solver structure with the solver matrix initialized in the constructor. - CooMumpsSolver mumps_solver_; - // clang-format off const Stencil stencil_interior_ = { 7, 4, 8, @@ -47,6 +44,10 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver }; // clang-format on + // MUMPS solver structure with the solver matrix initialized in the constructor. + // Defined below stencils to ensure that the solver matrix is built after the stencils are defined. + CooMumpsSolver mumps_solver_; + // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); @@ -77,4 +78,4 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver double detDF, double coeff_beta); }; -#endif \ No newline at end of file +#endif From 489375ce1fe5cefcfbac88592a25a8086e3cb461 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Thu, 19 Feb 2026 00:28:49 +0100 Subject: [PATCH 04/20] Reorganize MUMPS solver structure declaration Reordered MUMPS solver structure declaration to follow stencils. --- .../DirectSolver-COO-MUMPS-Take/directSolverTake.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h index 1d494bdf..0151fe61 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h @@ -16,9 +16,6 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver void solveInPlace(Vector solution) override; private: - // MUMPS solver structure with the solver matrix initialized in the constructor. - CooMumpsSolver mumps_solver_; - // clang-format off const Stencil stencil_interior_ = { 7, 4, 8, @@ -47,6 +44,10 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver }; // clang-format on + // MUMPS solver structure with the solver matrix initialized in the constructor. + // Defined below stencils to ensure that the solver matrix is built after the stencils are defined. + CooMumpsSolver mumps_solver_; + // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); @@ -78,4 +79,4 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver ConstVector& coeff_beta); }; -#endif \ No newline at end of file +#endif From ff962852fb183c867dc99073d8fd1a925b2f8481 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Fri, 20 Feb 2026 00:01:26 +0100 Subject: [PATCH 05/20] Implement CooMumpsSolver for sparse matrix solving --- .../Solvers/coo_mumps_solver.cpp | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 src/LinearAlgebra/Solvers/coo_mumps_solver.cpp diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp new file mode 100644 index 00000000..b3ac63a3 --- /dev/null +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -0,0 +1,157 @@ +#include "../../../include/LinearAlgebra/Solvers/coo_mumps_solver.h" + +#ifdef GMGPOLAR_USE_MUMPS + + #include + #include + #include + +CooMumpsSolver::CooMumpsSolver(SparseMatrixCOO matrix) + : matrix_(std::move(matrix)) +{ + initialize(); +} + +CooMumpsSolver::~CooMumpsSolver() +{ + finalize(); +} + +void CooMumpsSolver::solve(Vector& rhs) +{ + assert(std::ssize(rhs) == mumps_solver_.n); + + mumps_solver_.job = JOB_COMPUTE_SOLUTION; + mumps_solver_.nrhs = 1; + mumps_solver_.lrhs = mumps_solver_.n; // leading dimension: must equal n for dense centralized RHS + mumps_solver_.rhs = rhs.data(); // in: RHS, out: solution (overwritten in-place) + dmumps_c(&mumps_solver_); + + if (INFOG(1) != 0) { + std::cerr << "MUMPS reported an error during solution phase " + << "(INFOG(1) = " << INFOG(1) << ").\n"; + } +} + +void CooMumpsSolver::initialize() +{ + assert(matrix_.rows() == matrix_.columns()); + + /* + * MUMPS uses 1-based indexing; our COO matrix uses 0-based indexing. + * Adjust row and column indices to match MUMPS' requirements. + */ + for (int i = 0; i < matrix_.non_zero_size(); i++) { + matrix_.row_index(i) += 1; + matrix_.col_index(i) += 1; + } + + mumps_solver_.job = JOB_INIT; + mumps_solver_.par = PAR_PARALLEL; + mumps_solver_.sym = matrix_.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC; + mumps_solver_.comm_fortran = USE_COMM_WORLD; + dmumps_c(&mumps_solver_); + + configureICNTL(); + configureCNTL(); + + mumps_solver_.job = JOB_ANALYSIS_AND_FACTORIZATION; + mumps_solver_.n = matrix_.rows(); + mumps_solver_.nz = matrix_.non_zero_size(); + mumps_solver_.irn = matrix_.row_indices_data(); + mumps_solver_.jcn = matrix_.column_indices_data(); + mumps_solver_.a = matrix_.values_data(); + dmumps_c(&mumps_solver_); + + if (INFOG(1) != 0) { + std::cerr << "MUMPS reported an error during analysis/factorization " + << "(INFOG(1) = " << INFOG(1) << ").\n"; + return; + } + + if (mumps_solver_.sym == SYM_POSITIVE_DEFINITE && INFOG(12) != 0) { + std::cerr << "Matrix declared positive definite, " + << "but negative pivots were encountered during factorization " + << "(INFOG(12) = " << INFOG(12) << ").\n"; + } +} + +void CooMumpsSolver::finalize() +{ + mumps_solver_.job = JOB_END; + dmumps_c(&mumps_solver_); +} + +void CooMumpsSolver::configureICNTL() +{ + // All ICNTL values are left at their defaults unless noted below. + // ICNTL(1) = 0: suppress error message output + // ICNTL(3) = 0: suppress global information output + // ICNTL(6) = 7: automatically choose permutation/scaling strategy + // ICNTL(7) = 5: use METIS for fill-reducing ordering + // ICNTL(48) = 0: disable tree parallelism (conflicts with OpenMP in newer MUMPS releases) + + ICNTL(1) = 0; // Output stream for error messages + ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process + ICNTL(3) = 0; // Output stream for global information, collected on the host + ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages + ICNTL(5) = 0; // Controls the matrix input format + ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scales the matrix + ICNTL(7) = 5; // Symmetric permutation (ordering) to determine pivot order for sequential analysis + ICNTL(8) = 77; // Scaling strategy + ICNTL(9) = 1; // Computes the solution using A or A^T + ICNTL(10) = 0; // Iterative refinement steps applied to the computed solution + ICNTL(11) = 0; // Error analysis statistics + ICNTL(12) = 0; // Ordering strategy for symmetric matrices + ICNTL(13) = 0; // Controls the parallelism of the root node + ICNTL(14) = matrix_.is_symmetric() ? 5 : 20; // Percentage increase in estimated working space + ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format + ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads + // ICNTL(17) does not exist + ICNTL(18) = 0; // Strategy for the distributed input matrix + ICNTL(19) = 0; // Computes the Schur complement matrix + ICNTL(20) = 0; // Format of the right-hand sides (dense, sparse, or distributed) + ICNTL(21) = 0; // Distribution of the solution vectors (centralized or distributed) + ICNTL(22) = 0; // In-core/out-of-core (OOC) factorization and solve + ICNTL(23) = 0; // Maximum working memory in MegaBytes per working process + ICNTL(24) = 0; // Detection of null pivot rows + ICNTL(25) = 0; // Solution of a deficient matrix and null space basis computation + ICNTL(26) = 0; // Solution phase when Schur complement has been computed + ICNTL(27) = -32; // Blocking size for multiple right-hand sides + ICNTL(28) = 0; // Sequential or parallel computation of the ordering + ICNTL(29) = 0; // Parallel ordering tool when ICNTL(28)=1 + ICNTL(30) = 0; // User-specified entries in the inverse A^-1 + ICNTL(31) = 0; // Which factors may be discarded during factorization + ICNTL(32) = 0; // Forward elimination of the right-hand sides during factorization + ICNTL(33) = 0; // Computes the determinant of the input matrix + ICNTL(34) = 0; // Conservation of OOC files during JOB=-3 + ICNTL(35) = 0; // Activation of the BLR feature + ICNTL(36) = 0; // Choice of BLR factorization variant + ICNTL(37) = 0; // BLR compression of the contribution blocks + ICNTL(38) = 600; // Estimated compression rate of LU factors + ICNTL(39) = 500; // Estimated compression rate of contribution blocks + // ICNTL(40-47) do not exist + ICNTL(48) = 0; // Multithreading with tree parallelism + ICNTL(49) = 0; // Compact workarray id%S at end of factorization phase + // ICNTL(50-55) do not exist + ICNTL(56) = 0; // Detects pseudo-singularities; rank-revealing factorization of root node + // ICNTL(57) does not exist + ICNTL(58) = 2; // Options for symbolic factorization + // ICNTL(59-60) do not exist +} + +void CooMumpsSolver::configureCNTL() +{ + // All CNTL values are left at their defaults unless noted below. + + CNTL(1) = -1.0; // Relative threshold for numerical pivoting + CNTL(2) = -1.0; // Stopping criterion for iterative refinement + CNTL(3) = 0.0; // Threshold for null pivot row detection + CNTL(4) = -1.0; // Threshold for static pivoting + CNTL(5) = 0.0; // Fixation for null pivots (effective only when null pivot detection is active) + // CNTL(6) does not exist + CNTL(7) = 0.0; // Dropping parameter precision for BLR compression + // CNTL(8-15) do not exist +} + +#endif // GMGPOLAR_USE_MUMPS From f2b8165b5396a4c5e9e2c658d25d64a4a2cf37bf Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Fri, 20 Feb 2026 00:02:01 +0100 Subject: [PATCH 06/20] Refactor CooMumpsSolver class interface --- .../LinearAlgebra/Solvers/coo_mumps_solver.h | 168 ++---------------- 1 file changed, 11 insertions(+), 157 deletions(-) diff --git a/include/LinearAlgebra/Solvers/coo_mumps_solver.h b/include/LinearAlgebra/Solvers/coo_mumps_solver.h index 12bd356e..2521f8ff 100644 --- a/include/LinearAlgebra/Solvers/coo_mumps_solver.h +++ b/include/LinearAlgebra/Solvers/coo_mumps_solver.h @@ -2,11 +2,7 @@ #ifdef GMGPOLAR_USE_MUMPS - #include - #include - #include "dmumps_c.h" - #include "../../LinearAlgebra/Matrix/coo_matrix.h" #include "../../LinearAlgebra/Vector/vector.h" @@ -20,162 +16,17 @@ class CooMumpsSolver { public: - explicit CooMumpsSolver(SparseMatrixCOO matrix) - : matrix_(std::move(matrix)) - { - initialize(); - } - - ~CooMumpsSolver() - { - finalize(); - } + explicit CooMumpsSolver(SparseMatrixCOO matrix); + ~CooMumpsSolver(); // rhs is overwritten in-place with the solution on return. - void solve(Vector& rhs) - { - assert(std::ssize(rhs) == mumps_solver_.n); - - mumps_solver_.job = JOB_COMPUTE_SOLUTION; - mumps_solver_.nrhs = 1; - mumps_solver_.lrhs = mumps_solver_.n; // leading dimension: must equal n for dense centralized RHS - mumps_solver_.rhs = rhs.data(); // in: RHS, out: solution (overwritten in-place) - - dmumps_c(&mumps_solver_); - - if (INFOG(1) != 0) { - throw std::runtime_error("MUMPS reported an error during solution phase " - "(INFOG(1) = " + - std::to_string(INFOG(1)) + ")."); - } - } - -private: - void initialize() - { - assert(matrix_.rows() == matrix_.columns()); - - /* - * MUMPS uses 1-based indexing; our COO matrix uses 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < matrix_.non_zero_size(); i++) { - matrix_.row_index(i) += 1; - matrix_.col_index(i) += 1; - } - - mumps_solver_.job = JOB_INIT; - mumps_solver_.par = PAR_PARALLEL; - mumps_solver_.sym = matrix_.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC; - mumps_solver_.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver_); - - configureICNTL(); - configureCNTL(); - - mumps_solver_.job = JOB_ANALYSIS_AND_FACTORIZATION; - mumps_solver_.n = matrix_.rows(); - mumps_solver_.nz = matrix_.non_zero_size(); - mumps_solver_.irn = matrix_.row_indices_data(); - mumps_solver_.jcn = matrix_.column_indices_data(); - mumps_solver_.a = matrix_.values_data(); - dmumps_c(&mumps_solver_); - - if (INFOG(1) != 0) { - throw std::runtime_error("MUMPS reported an error during analysis/factorization " - "(INFOG(1) = " + - std::to_string(INFOG(1)) + ")."); - } - - if (mumps_solver_.sym == SYM_POSITIVE_DEFINITE && INFOG(12) != 0) { - throw std::runtime_error("Matrix declared positive definite, " - "but negative pivots were encountered during factorization " - "(INFOG(12) = " + - std::to_string(INFOG(12)) + ")."); - } - } - - void finalize() - { - mumps_solver_.job = JOB_END; - dmumps_c(&mumps_solver_); - } - - void configureICNTL() - { - // All ICNTL values are left at their defaults unless noted below. - // ICNTL(1) = 0: suppress error message output - // ICNTL(3) = 0: suppress global information output - // ICNTL(6) = 7: automatically choose permutation/scaling strategy - // ICNTL(7) = 5: use METIS for fill-reducing ordering - // ICNTL(48) = 0: disable tree parallelism (conflicts with OpenMP in newer MUMPS releases) - - ICNTL(1) = 0; // Output stream for error messages - ICNTL(2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process - ICNTL(3) = 0; // Output stream for global information, collected on the host - ICNTL(4) = 0; // Level of printing for error, warning, and diagnostic messages - ICNTL(5) = 0; // Controls the matrix input format - ICNTL(6) = 7; // Permutes the matrix to a zero-free diagonal and/or scales the matrix - ICNTL(7) = 5; // Symmetric permutation (ordering) to determine pivot order for sequential analysis - ICNTL(8) = 77; // Scaling strategy - ICNTL(9) = 1; // Computes the solution using A or A^T - ICNTL(10) = 0; // Iterative refinement steps applied to the computed solution - ICNTL(11) = 0; // Error analysis statistics - ICNTL(12) = 0; // Ordering strategy for symmetric matrices - ICNTL(13) = 0; // Controls the parallelism of the root node - ICNTL(14) = matrix_.is_symmetric() ? 5 : 20; // Percentage increase in estimated working space - ICNTL(15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) does not exist - ICNTL(18) = 0; // Strategy for the distributed input matrix - ICNTL(19) = 0; // Computes the Schur complement matrix - ICNTL(20) = 0; // Format of the right-hand sides (dense, sparse, or distributed) - ICNTL(21) = 0; // Distribution of the solution vectors (centralized or distributed) - ICNTL(22) = 0; // In-core/out-of-core (OOC) factorization and solve - ICNTL(23) = 0; // Maximum working memory in MegaBytes per working process - ICNTL(24) = 0; // Detection of null pivot rows - ICNTL(25) = 0; // Solution of a deficient matrix and null space basis computation - ICNTL(26) = 0; // Solution phase when Schur complement has been computed - ICNTL(27) = -32; // Blocking size for multiple right-hand sides - ICNTL(28) = 0; // Sequential or parallel computation of the ordering - ICNTL(29) = 0; // Parallel ordering tool when ICNTL(28)=1 - ICNTL(30) = 0; // User-specified entries in the inverse A^-1 - ICNTL(31) = 0; // Which factors may be discarded during factorization - ICNTL(32) = 0; // Forward elimination of the right-hand sides during factorization - ICNTL(33) = 0; // Computes the determinant of the input matrix - ICNTL(34) = 0; // Conservation of OOC files during JOB=-3 - ICNTL(35) = 0; // Activation of the BLR feature - ICNTL(36) = 0; // Choice of BLR factorization variant - ICNTL(37) = 0; // BLR compression of the contribution blocks - ICNTL(38) = 600; // Estimated compression rate of LU factors - ICNTL(39) = 500; // Estimated compression rate of contribution blocks - // ICNTL(40-47) do not exist - ICNTL(48) = 0; // Multithreading with tree parallelism - ICNTL(49) = 0; // Compact workarray id%S at end of factorization phase - // ICNTL(50-55) do not exist - ICNTL(56) = 0; // Detects pseudo-singularities; rank-revealing factorization of root node - // ICNTL(57) does not exist - ICNTL(58) = 2; // Options for symbolic factorization - // ICNTL(59-60) do not exist - } - - void configureCNTL() - { - // All CNTL values are left at their defaults unless noted below. - - CNTL(1) = -1.0; // Relative threshold for numerical pivoting - CNTL(2) = -1.0; // Stopping criterion for iterative refinement - CNTL(3) = 0.0; // Threshold for null pivot row detection - CNTL(4) = -1.0; // Threshold for static pivoting - CNTL(5) = 0.0; // Fixation for null pivots (effective only when null pivot detection is active) - // CNTL(6) does not exist - CNTL(7) = 0.0; // Dropping parameter precision for BLR compression - // CNTL(8-15) do not exist - } + void solve(Vector& rhs); private: - SparseMatrixCOO matrix_; - DMUMPS_STRUC_C mumps_solver_ = {}; + void initialize(); + void finalize(); + void configureICNTL(); + void configureCNTL(); /* ------------------------------------------------ */ /* MUMPS uses 1-based indexing in the documentation */ @@ -219,6 +70,9 @@ class CooMumpsSolver static constexpr int SYM_UNSYMMETRIC = 0; static constexpr int SYM_POSITIVE_DEFINITE = 1; static constexpr int SYM_GENERAL_SYMMETRIC = 2; + + SparseMatrixCOO matrix_; + DMUMPS_STRUC_C mumps_solver_ = {}; }; -#endif // GMGPOLAR_USE_MUMPS \ No newline at end of file +#endif // GMGPOLAR_USE_MUMPS From a47f84b0fdcbe188b071ae610e24777fab969db0 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Fri, 20 Feb 2026 00:02:31 +0100 Subject: [PATCH 07/20] Add linear algebra sources to CMakeLists --- src/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1b9967c..87b9c7fb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,13 @@ set(POLAR_GRID_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/PolarGrid/anisotropic_division.cpp ) +# Gather all source files +# file(GLOB_RECURSE LINEAR_ALGEBRA_SOURCES +# ${CMAKE_CURRENT_SOURCE_DIR}/LinearAlgebra/Solvers/*.cpp) +set(LINEAR_ALGEBRA_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/LinearAlgebra/Solvers/coo_mumps_solver.cpp +) + # file(GLOB_RECURSE GMG_POLAR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/GMGPolar/*.cpp) set(GMG_POLAR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/GMGPolar/gmgpolar.cpp @@ -166,6 +173,7 @@ set(CONFIG_PARSER_SOURCES # Create the main library add_library(GMGPolarLib STATIC ${POLAR_GRID_SOURCES} + ${LINEAR_ALGEBRA_SOURCES} ${GMG_POLAR_SOURCES} ${MULTIGRID_METHODS_SOURCES} ${LEVEL_SOURCES} From 88eb7924e9b9db2c2b227a05014ac85d054a44cd Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sun, 22 Feb 2026 00:05:08 +0100 Subject: [PATCH 08/20] Expand COO_MUMPS_Solver --- .../directSolverGive.h | 8 +- .../directSolverTake.h | 8 +- .../extrapolatedSmootherGive.h | 17 +-- .../extrapolatedSmootherTake.h | 17 +-- .../extrapolatedSmoother.h | 1 + include/Smoother/SmootherGive/smootherGive.h | 17 +-- include/Smoother/SmootherTake/smootherTake.h | 17 +-- include/Smoother/smoother.h | 1 + src/CMakeLists.txt | 4 - .../directSolverGive.cpp | 5 +- .../directSolverTake.cpp | 5 +- .../buildAscMatrices.cpp | 3 - .../extrapolatedSmootherGive.cpp | 13 +- .../initializeMumps.cpp | 112 ------------------ .../solveAscSystem.cpp | 28 ++--- .../buildAscMatrices.cpp | 3 - .../extrapolatedSmootherTake.cpp | 13 +- .../initializeMumps.cpp | 112 ------------------ .../solveAscSystem.cpp | 28 ++--- src/Smoother/SmootherGive/buildMatrix.cpp | 3 - src/Smoother/SmootherGive/initializeMumps.cpp | 111 ----------------- src/Smoother/SmootherGive/smootherGive.cpp | 13 +- src/Smoother/SmootherGive/solveAscSystem.cpp | 28 ++--- src/Smoother/SmootherTake/buildMatrix.cpp | 3 - src/Smoother/SmootherTake/initializeMumps.cpp | 111 ----------------- src/Smoother/SmootherTake/smootherTake.cpp | 13 +- src/Smoother/SmootherTake/solveAscSystem.cpp | 28 ++--- tests/CMakeLists.txt | 1 + .../Solvers/coo_mumps_solver.cpp | 111 +++++++++++++++++ 29 files changed, 192 insertions(+), 642 deletions(-) delete mode 100644 src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp delete mode 100644 src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp delete mode 100644 src/Smoother/SmootherGive/initializeMumps.cpp delete mode 100644 src/Smoother/SmootherTake/initializeMumps.cpp create mode 100644 tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h index 7d532bd5..4fbeda06 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h @@ -16,6 +16,10 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver void solveInPlace(Vector solution) override; private: + // MUMPS solver structure with the solver matrix initialized in the constructor. + // std::optional is used because CooMumpsSolver cannot be default-constructed. + std::optional mumps_solver_; + // clang-format off const Stencil stencil_interior_ = { 7, 4, 8, @@ -44,10 +48,6 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver }; // clang-format on - // MUMPS solver structure with the solver matrix initialized in the constructor. - // Defined below stencils to ensure that the solver matrix is built after the stencils are defined. - CooMumpsSolver mumps_solver_; - // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h index 0151fe61..fbfa6cd2 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h @@ -16,6 +16,10 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver void solveInPlace(Vector solution) override; private: + // MUMPS solver structure with the solver matrix initialized in the constructor. + // std::optional is used because CooMumpsSolver cannot be default-constructed. + std::optional mumps_solver_; + // clang-format off const Stencil stencil_interior_ = { 7, 4, 8, @@ -44,10 +48,6 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver }; // clang-format on - // MUMPS solver structure with the solver matrix initialized in the constructor. - // Defined below stencils to ensure that the solver matrix is built after the stencils are defined. - CooMumpsSolver mumps_solver_; - // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h index 9d664335..6d753945 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h @@ -61,9 +61,6 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~ExtrapolatedSmootherGive() override; - // Performs one full coupled extrapolated smoothing sweep: // BC -> WC -> BR -> WR // Parallel implementation using OpenMP: @@ -89,7 +86,9 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother // When using the in-house solver, the matrix is stored in CSR format. #ifdef GMGPOLAR_USE_MUMPS using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; + // MUMPS solver structure with the solver matrix initialized in the constructor. + // std::optional is used because CooMumpsSolver cannot be default-constructed. + std::optional inner_boundary_mumps_solver_; #else using MatrixType = SparseMatrixCSR; SparseLUSolver inner_boundary_lu_solver_; @@ -179,14 +178,4 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h index b400e7d4..b94863ec 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h @@ -61,9 +61,6 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~ExtrapolatedSmootherTake() override; - // Performs one full coupled extrapolated smoothing sweep: // BC -> WC -> BR -> WR // using temp as RHS workspace. @@ -87,7 +84,9 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother // When using the in-house solver, the matrix is stored in CSR format. #ifdef GMGPOLAR_USE_MUMPS using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; + // MUMPS solver structure with the solver matrix initialized in the constructor. + // std::optional is used because CooMumpsSolver cannot be default-constructed. + std::optional inner_boundary_mumps_solver_; #else using MatrixType = SparseMatrixCSR; SparseLUSolver inner_boundary_lu_solver_; @@ -176,14 +175,4 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; diff --git a/include/ExtrapolatedSmoother/extrapolatedSmoother.h b/include/ExtrapolatedSmoother/extrapolatedSmoother.h index e6c4f5eb..544d1c4c 100644 --- a/include/ExtrapolatedSmoother/extrapolatedSmoother.h +++ b/include/ExtrapolatedSmoother/extrapolatedSmoother.h @@ -18,6 +18,7 @@ class Level; #include "../LinearAlgebra/Matrix/coo_matrix.h" #include "../LinearAlgebra/Matrix/csr_matrix.h" #include "../LinearAlgebra/Solvers/csr_lu_solver.h" +#include "../LinearAlgebra/Solvers/coo_mumps_solver.h" #include "../Stencil/stencil.h" #ifdef GMGPOLAR_USE_MUMPS diff --git a/include/Smoother/SmootherGive/smootherGive.h b/include/Smoother/SmootherGive/smootherGive.h index 0d656c6d..c413ba52 100644 --- a/include/Smoother/SmootherGive/smootherGive.h +++ b/include/Smoother/SmootherGive/smootherGive.h @@ -53,9 +53,6 @@ class SmootherGive : public Smoother const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~SmootherGive() override; - // Performs one full coupled smoothing sweep: // BC -> WC -> BR -> WR // Parallel implementation using OpenMP: @@ -81,7 +78,9 @@ class SmootherGive : public Smoother // When using the in-house solver, the matrix is stored in CSR format. #ifdef GMGPOLAR_USE_MUMPS using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; + // MUMPS solver structure with the solver matrix initialized in the constructor. + // std::optional is used because CooMumpsSolver cannot be default-constructed. + std::optional inner_boundary_mumps_solver_; #else using MatrixType = SparseMatrixCSR; SparseLUSolver inner_boundary_lu_solver_; @@ -172,14 +171,4 @@ class SmootherGive : public Smoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 20107bdc..1f5c7179 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -53,9 +53,6 @@ class SmootherTake : public Smoother const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads); - // If MUMPS is enabled, this cleans up the inner boundary solver. - ~SmootherTake() override; - // Performs one full coupled smoothing sweep: // BC -> WC -> BR -> WR // using temp as RHS workspace. @@ -79,7 +76,9 @@ class SmootherTake : public Smoother // When using the in-house solver, the matrix is stored in CSR format. #ifdef GMGPOLAR_USE_MUMPS using MatrixType = SparseMatrixCOO; - DMUMPS_STRUC_C inner_boundary_mumps_solver_; + // MUMPS solver structure with the solver matrix initialized in the constructor. + // std::optional is used because CooMumpsSolver cannot be default-constructed. + std::optional inner_boundary_mumps_solver_; #else using MatrixType = SparseMatrixCSR; SparseLUSolver inner_boundary_lu_solver_; @@ -168,14 +167,4 @@ class SmootherTake : public Smoother void solveWhiteCircleSection(Vector x, Vector temp); void solveBlackRadialSection(Vector x, Vector temp); void solveWhiteRadialSection(Vector x, Vector temp); - - /* ----------------------------------- */ - /* Initialize and destroy MUMPS solver */ - /* ----------------------------------- */ -#ifdef GMGPOLAR_USE_MUMPS - // Initialize sparse MUMPS solver with assembled COO matrix. - void initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix); - // Release MUMPS internal memory and MPI structures. - void finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver); -#endif }; diff --git a/include/Smoother/smoother.h b/include/Smoother/smoother.h index ba2fb9ed..2d47de74 100644 --- a/include/Smoother/smoother.h +++ b/include/Smoother/smoother.h @@ -18,6 +18,7 @@ class Level; #include "../LinearAlgebra/Matrix/coo_matrix.h" #include "../LinearAlgebra/Matrix/csr_matrix.h" #include "../LinearAlgebra/Solvers/csr_lu_solver.h" +#include "../LinearAlgebra/Solvers/coo_mumps_solver.h" #include "../Stencil/stencil.h" #ifdef GMGPOLAR_USE_MUMPS diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87b9c7fb..429f0a8d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -124,7 +124,6 @@ set(SMOOTHER_SOURCES # SmootherGive ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/applyAscOrtho.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/buildMatrix.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/matrixStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/smootherGive.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/solveAscSystem.cpp @@ -132,7 +131,6 @@ set(SMOOTHER_SOURCES # SmootherTake ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/applyAscOrtho.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/buildMatrix.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/matrixStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/smootherTake.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/solveAscSystem.cpp @@ -150,7 +148,6 @@ set(EXTRAPOLATED_SMOOTHER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp @@ -158,7 +155,6 @@ set(EXTRAPOLATED_SMOOTHER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/smootherStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp ) diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp index 509ba004..d4f1c97b 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp @@ -7,8 +7,9 @@ DirectSolver_COO_MUMPS_Give::DirectSolver_COO_MUMPS_Give(const PolarGrid& grid, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) - , mumps_solver_(buildSolverMatrix()) { + SparseMatrixCOO solver_matrix = buildSolverMatrix(); + mumps_solver_.emplace(std::move(solver_matrix)); } void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector solution) @@ -20,7 +21,7 @@ void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector solution) // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - mumps_solver_.solve(solution); + mumps_solver_->solve(solution); } #endif diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp index 3408fbb7..71543f0a 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp @@ -7,8 +7,9 @@ DirectSolver_COO_MUMPS_Take::DirectSolver_COO_MUMPS_Take(const PolarGrid& grid, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) - , mumps_solver_(buildSolverMatrix()) { + SparseMatrixCOO solver_matrix = buildSolverMatrix(); + mumps_solver_.emplace(std::move(solver_matrix)); } void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector solution) @@ -20,7 +21,7 @@ void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector solution) // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - mumps_solver_.solve(solution); + mumps_solver_->solve(solution); } #endif diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp index ccd0cf46..983b5ec6 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp @@ -1332,9 +1332,6 @@ void ExtrapolatedSmootherGive::buildAscMatrices() } } - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - #ifdef GMGPOLAR_USE_MUMPS /* ------------------------------------------------------------------- */ /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */ diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp index c3c50019..1a34f678 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp @@ -10,20 +10,17 @@ ExtrapolatedSmootherGive::ExtrapolatedSmootherGive(const PolarGrid& grid, const , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) { buildAscMatrices(); + + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); + #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif } -ExtrapolatedSmootherGive::~ExtrapolatedSmootherGive() -{ -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif -} - // The smoothing solves linear systems of the form: // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho // where: diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp deleted file mode 100644 index 69889eaa..00000000 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/initializeMumps.cpp +++ /dev/null @@ -1,112 +0,0 @@ -#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h" - -#ifdef GMGPOLAR_USE_MUMPS - -void ExtrapolatedSmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: ExtrapolatedSmoother inner boundary matrix is not positive definite: Negative pivots in " - "the factorization phase." - << std::endl; - } -} - -void ExtrapolatedSmootherGive::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp index 68df770e..a438d610 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp @@ -18,18 +18,12 @@ void ExtrapolatedSmootherGive::solveBlackCircleSection(Vector x, Vector< int batch_stride = 2; circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride); + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } @@ -61,18 +55,12 @@ void ExtrapolatedSmootherGive::solveWhiteCircleSection(Vector x, Vector< int batch_stride = 2; circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride); + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp index cfb1fa24..7636df8a 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp @@ -690,9 +690,6 @@ void ExtrapolatedSmootherTake::buildAscMatrices() } } - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - #ifdef GMGPOLAR_USE_MUMPS /* ------------------------------------------------------------------- */ /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */ diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp index 0c822f5c..525d7007 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp @@ -10,20 +10,17 @@ ExtrapolatedSmootherTake::ExtrapolatedSmootherTake(const PolarGrid& grid, const , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) { buildAscMatrices(); + + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); + #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif } -ExtrapolatedSmootherTake::~ExtrapolatedSmootherTake() -{ -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif -} - // The smoothing solves linear systems of the form: // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho // where: diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp deleted file mode 100644 index 4ce13919..00000000 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/initializeMumps.cpp +++ /dev/null @@ -1,112 +0,0 @@ -#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h" - -#ifdef GMGPOLAR_USE_MUMPS - -void ExtrapolatedSmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, - SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: ExtrapolatedSmoother inner boundary matrix is not positive definite: Negative pivots in " - "the factorization phase." - << std::endl; - } -} - -void ExtrapolatedSmootherTake::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp index cd0ed6bd..d8b49742 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp @@ -18,18 +18,12 @@ void ExtrapolatedSmootherTake::solveBlackCircleSection(Vector x, Vector< int batch_stride = 2; circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride); + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } @@ -61,18 +55,12 @@ void ExtrapolatedSmootherTake::solveWhiteCircleSection(Vector x, Vector< int batch_stride = 2; circle_tridiagonal_solver_.solve_diagonal(circle_section, batch_offset, batch_stride); + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } diff --git a/src/Smoother/SmootherGive/buildMatrix.cpp b/src/Smoother/SmootherGive/buildMatrix.cpp index d563beaf..8ac7e859 100644 --- a/src/Smoother/SmootherGive/buildMatrix.cpp +++ b/src/Smoother/SmootherGive/buildMatrix.cpp @@ -750,9 +750,6 @@ void SmootherGive::buildAscMatrices() } } - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - #ifdef GMGPOLAR_USE_MUMPS /* ------------------------------------------------------------------ */ /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ diff --git a/src/Smoother/SmootherGive/initializeMumps.cpp b/src/Smoother/SmootherGive/initializeMumps.cpp deleted file mode 100644 index 7b96b1c4..00000000 --- a/src/Smoother/SmootherGive/initializeMumps.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "../../../include/Smoother/SmootherGive/smootherGive.h" - -#ifdef GMGPOLAR_USE_MUMPS - -void SmootherGive::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: Smoother inner boundary matrix is not positive definite: Negative pivots in the " - "factorization phase." - << std::endl; - } -} - -void SmootherGive::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/src/Smoother/SmootherGive/smootherGive.cpp b/src/Smoother/SmootherGive/smootherGive.cpp index db458ad0..7b3eeb7a 100644 --- a/src/Smoother/SmootherGive/smootherGive.cpp +++ b/src/Smoother/SmootherGive/smootherGive.cpp @@ -8,20 +8,17 @@ SmootherGive::SmootherGive(const PolarGrid& grid, const LevelCache& level_cache, , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) { buildAscMatrices(); + + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); + #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif } -SmootherGive::~SmootherGive() -{ -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif -} - // The smoothing solves linear systems of the form: // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho // where: diff --git a/src/Smoother/SmootherGive/solveAscSystem.cpp b/src/Smoother/SmootherGive/solveAscSystem.cpp index b84b9e80..ef2a779e 100644 --- a/src/Smoother/SmootherGive/solveAscSystem.cpp +++ b/src/Smoother/SmootherGive/solveAscSystem.cpp @@ -13,18 +13,12 @@ void SmootherGive::solveBlackCircleSection(Vector x, Vector temp circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_black) { + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } @@ -51,18 +45,12 @@ void SmootherGive::solveWhiteCircleSection(Vector x, Vector temp circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_white) { + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } diff --git a/src/Smoother/SmootherTake/buildMatrix.cpp b/src/Smoother/SmootherTake/buildMatrix.cpp index 4a50bcac..703b939e 100644 --- a/src/Smoother/SmootherTake/buildMatrix.cpp +++ b/src/Smoother/SmootherTake/buildMatrix.cpp @@ -436,9 +436,6 @@ void SmootherTake::buildAscMatrices() } } - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - #ifdef GMGPOLAR_USE_MUMPS /* ------------------------------------------------------------------ */ /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ diff --git a/src/Smoother/SmootherTake/initializeMumps.cpp b/src/Smoother/SmootherTake/initializeMumps.cpp deleted file mode 100644 index d9bbbdb2..00000000 --- a/src/Smoother/SmootherTake/initializeMumps.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "../../../include/Smoother/SmootherTake/smootherTake.h" - -#ifdef GMGPOLAR_USE_MUMPS - -void SmootherTake::initializeMumpsSolver(DMUMPS_STRUC_C& mumps_solver, SparseMatrixCOO& solver_matrix) -{ - /* - * MUMPS (a parallel direct solver) uses 1-based indexing, - * whereas the input matrix follows 0-based indexing. - * Adjust row and column indices to match MUMPS' requirements. - */ - for (int i = 0; i < solver_matrix.non_zero_size(); i++) { - solver_matrix.row_index(i) += 1; - solver_matrix.col_index(i) += 1; - } - - mumps_solver.job = JOB_INIT; - mumps_solver.par = PAR_PARALLEL; - /* The matrix is positive definite for invertible mappings. */ - /* Therefore we use SYM_POSITIVE_DEFINITE instead of SYM_GENERAL_SYMMETRIC. */ - mumps_solver.sym = (solver_matrix.is_symmetric() ? SYM_POSITIVE_DEFINITE : SYM_UNSYMMETRIC); - mumps_solver.comm_fortran = USE_COMM_WORLD; - dmumps_c(&mumps_solver); - - ICNTL(mumps_solver, 1) = 0; // Output stream for error messages. - ICNTL(mumps_solver, 2) = 0; // Output stream for diagnostic printing and statistics local to each MPI process. - ICNTL(mumps_solver, 3) = 0; // Output stream for global information, collected on the host - ICNTL(mumps_solver, 4) = 0; // Level of printing for error, warning, and diagnostic messages. - ICNTL(mumps_solver, 5) = 0; // Controls the matrix input format - ICNTL(mumps_solver, 6) = 7; // Permutes the matrix to a zero-free diagonal and/or scale the matrix - ICNTL(mumps_solver, 7) = - 5; // Computes a symmetric permutation (ordering) to determine the pivot order to be used for the factorization in case of sequential analysis - ICNTL(mumps_solver, 8) = 77; // Describes the scaling strategy - ICNTL(mumps_solver, 9) = 1; // Computes the solution using A or A^T - ICNTL(mumps_solver, 10) = 0; // Applies the iterative refinement to the computed solution - ICNTL(mumps_solver, 11) = 0; // Computes statistics related to an error analysis of the linear system solved - ICNTL(mumps_solver, 12) = 0; // Defines an ordering strategy for symmetric matrices and is used - ICNTL(mumps_solver, 13) = 0; // Controls the parallelism of the root node - ICNTL(mumps_solver, 14) = // Controls the percentage increase in the estimated working space - (solver_matrix.is_symmetric() ? 5 : 20); - ICNTL(mumps_solver, 15) = 0; // Exploits compression of the input matrix resulting from a block format - ICNTL(mumps_solver, 16) = 0; // Controls the setting of the number of OpenMP threads - // ICNTL(17) Doesn't exist - ICNTL(mumps_solver, 18) = 0; // Defines the strategy for the distributed input matrix - ICNTL(mumps_solver, 19) = 0; // Computes the Schur complement matrix - ICNTL(mumps_solver, 20) = 0; // Determines the format (dense, sparse, or distributed) of the right-hand sides - ICNTL(mumps_solver, 21) = 0; // Determines the distribution (centralized or distributed) of the solution vectors. - ICNTL(mumps_solver, 22) = 0; // Controls the in-core/out-of-core (OOC) factorization and solve. - ICNTL(mumps_solver, 23) = 0; // Corresponds to the maximum size of the working memory in MegaBytes that MUMPS can - // allocate per working process - ICNTL(mumps_solver, 24) = 0; // Controls the detection of “null pivot rows”. - ICNTL(mumps_solver, 25) = - 0; // Allows the computation of a solution of a deficient matrix and also of a null space basis - ICNTL(mumps_solver, 26) = 0; // Drives the solution phase if a Schur complement matrix has been computed - ICNTL(mumps_solver, 27) = -32; // Controls the blocking size for multiple right-hand sides. - ICNTL(mumps_solver, 28) = 0; // Determines whether a sequential or parallel computation of the ordering is performed - ICNTL(mumps_solver, 29) = - 0; // Defines the parallel ordering tool (when ICNTL(28)=1) to be used to compute the fill-in reducing permutation. - ICNTL(mumps_solver, 30) = 0; // Computes a user-specified set of entries in the inverse A^−1 of the original matrix - ICNTL(mumps_solver, 31) = 0; // Indicates which factors may be discarded during the factorization. - ICNTL(mumps_solver, 32) = 0; // Performs the forward elimination of the right-hand sides during the factorization - ICNTL(mumps_solver, 33) = 0; // Computes the determinant of the input matrix. - ICNTL(mumps_solver, 34) = 0; // Controls the conservation of the OOC files during JOB= –3 - ICNTL(mumps_solver, 35) = 0; // Controls the activation of the BLR feature - ICNTL(mumps_solver, 36) = 0; // Controls the choice of BLR factorization variant - ICNTL(mumps_solver, 37) = 0; // Controls the BLR compression of the contribution blocks - ICNTL(mumps_solver, 38) = 600; // Estimates compression rate of LU factors - ICNTL(mumps_solver, 39) = 500; // Estimates compression rate of contribution blocks - // ICNTL(40-47) Don't exist - ICNTL(mumps_solver, 48) = 0; // Multithreading with tree parallelism - ICNTL(mumps_solver, 49) = 0; // Compact workarray id%S at the end of factorization phase - // ICNTL(50-55) Don't exist - ICNTL(mumps_solver, 56) = - 0; // Detects pseudo-singularities during factorization and factorizes the root node with a rankrevealing method - // ICNTL(57) Doesn't exist - ICNTL(mumps_solver, 58) = 2; // Defines options for symbolic factorization - // ICNTL(59-60) Don't exist - - CNTL(mumps_solver, 1) = -1.0; // Relative threshold for numerical pivoting - CNTL(mumps_solver, 2) = -1.0; // Stopping criterion for iterative refinement - CNTL(mumps_solver, 3) = 0.0; // Determine null pivot rows - CNTL(mumps_solver, 4) = -1.0; // Determines the threshold for static pivoting - CNTL(mumps_solver, 5) = - 0.0; // Defines the fixation for null pivots and is effective only when null pivot row detection is active - // CNTL(6) Doesn't exist - CNTL(mumps_solver, 7) = 0.0; // Defines the precision of the dropping parameter used during BLR compression - // CNTL(8-15) Don't exist - - mumps_solver.job = JOB_ANALYSIS_AND_FACTORIZATION; - assert(solver_matrix.rows() == solver_matrix.columns()); - mumps_solver.n = solver_matrix.rows(); - mumps_solver.nz = solver_matrix.non_zero_size(); - mumps_solver.irn = solver_matrix.row_indices_data(); - mumps_solver.jcn = solver_matrix.column_indices_data(); - mumps_solver.a = solver_matrix.values_data(); - dmumps_c(&mumps_solver); - - if (mumps_solver.sym == SYM_POSITIVE_DEFINITE && INFOG(mumps_solver, 12) != 0) { - std::cout << "Warning: Smoother inner boundary matrix is not positive definite: Negative pivots in the " - "factorization phase." - << std::endl; - } -} - -void SmootherTake::finalizeMumpsSolver(DMUMPS_STRUC_C& mumps_solver) -{ - mumps_solver.job = JOB_END; - dmumps_c(&mumps_solver); -} - -#endif diff --git a/src/Smoother/SmootherTake/smootherTake.cpp b/src/Smoother/SmootherTake/smootherTake.cpp index 9bdff112..8ba590e6 100644 --- a/src/Smoother/SmootherTake/smootherTake.cpp +++ b/src/Smoother/SmootherTake/smootherTake.cpp @@ -8,20 +8,17 @@ SmootherTake::SmootherTake(const PolarGrid& grid, const LevelCache& level_cache, , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) { buildAscMatrices(); + + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); + #ifdef GMGPOLAR_USE_MUMPS - initializeMumpsSolver(inner_boundary_mumps_solver_, inner_boundary_circle_matrix_); + inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif } -SmootherTake::~SmootherTake() -{ -#ifdef GMGPOLAR_USE_MUMPS - finalizeMumpsSolver(inner_boundary_mumps_solver_); -#endif -} - // The smoothing solves linear systems of the form: // A_sc * u_sc = f_sc − A_sc^ortho * u_sc^ortho // where: diff --git a/src/Smoother/SmootherTake/solveAscSystem.cpp b/src/Smoother/SmootherTake/solveAscSystem.cpp index 9917ac83..64cd249c 100644 --- a/src/Smoother/SmootherTake/solveAscSystem.cpp +++ b/src/Smoother/SmootherTake/solveAscSystem.cpp @@ -13,18 +13,12 @@ void SmootherTake::solveBlackCircleSection(Vector x, Vector temp circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_black) { + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } @@ -51,18 +45,12 @@ void SmootherTake::solveWhiteCircleSection(Vector x, Vector temp circle_tridiagonal_solver_.solve(circle_section, batch_offset, batch_stride); if (is_inner_circle_white) { + Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); + #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.job = JOB_COMPUTE_SOLUTION; - inner_boundary_mumps_solver_.nrhs = 1; // single rhs vector - inner_boundary_mumps_solver_.nz_rhs = grid_.ntheta(); // non-zeros in rhs - inner_boundary_mumps_solver_.rhs = circle_section.data(); - inner_boundary_mumps_solver_.lrhs = grid_.ntheta(); // leading dimension of rhs - dmumps_c(&inner_boundary_mumps_solver_); - if (inner_boundary_mumps_solver_.info[0] != 0) { - std::cerr << "Error solving the system: " << inner_boundary_mumps_solver_.info[0] << std::endl; - } + inner_boundary_mumps_solver_->solve(inner_boundary); #else - inner_boundary_lu_solver_.solveInPlace(circle_section.data()); + inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f5699a4e..f8192ee2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -12,6 +12,7 @@ add_executable(gmgpolar_tests LinearAlgebra/Matrix/coo_matrix.cpp LinearAlgebra/Matrix/csr_matrix.cpp LinearAlgebra/Solvers/csr_lu_solver.cpp + LinearAlgebra/Solvers/coo_mumps_solver.cpp LinearAlgebra/Solvers/tridiagonal_solver.cpp PolarGrid/polargrid.cpp Interpolation/prolongation.cpp diff --git a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp new file mode 100644 index 00000000..b320fd43 --- /dev/null +++ b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -0,0 +1,111 @@ +#pragma once +#ifdef GMGPOLAR_USE_MUMPS + + #include + #include "coo_mumps_solver.h" + +// ----------------------------------------------------------------------- +// Test 1: General (non-symmetric) 4x4 system +// +// Matrix A (non-symmetric): +// [ 4 1 0 0 ] +// [ 2 5 1 0 ] +// [ 0 1 6 2 ] +// [ 0 0 3 7 ] +// +// RHS b = [5, 8, 9, 10]^T +// +// Expected solution computed from A*x = b. +// ----------------------------------------------------------------------- +TEST(CooMumpsSolverTest, GeneralNonSymmetric4x4) +{ + using triplet = SparseMatrixCOO::triplet_type; + + // All non-zero entries (0-based row/col indices) + std::vector entries = {{0, 0, 4.0}, {0, 1, 1.0}, {1, 0, 2.0}, {1, 1, 5.0}, {1, 2, 1.0}, + {2, 1, 1.0}, {2, 2, 6.0}, {2, 3, 2.0}, {3, 2, 3.0}, {3, 3, 7.0}}; + + SparseMatrixCOO mat(4, 4, entries); + mat.is_symmetric(false); + + CooMumpsSolver solver(std::move(mat)); + + Vector rhs("rhs", 4); + rhs(0) = 5.0; + rhs(1) = 8.0; + rhs(2) = 9.0; + rhs(3) = 10.0; + + solver.solve(rhs); + + // Verify A*x = b by back-substitution check + // Reference solution (computed analytically / via numpy): + // x ~ [0.9526, 0.2105, 0.9298, 0.8319] + const double tol = 1e-10; + EXPECT_NEAR(rhs(0), 4.0 * 0.9526 + 1.0 * 0.2105, 1e-3); + + // More robust: re-multiply and check residual + std::vector x = {rhs(0), rhs(1), rhs(2), rhs(3)}; + + // A*x + double Ax0 = 4 * x[0] + 1 * x[1]; + double Ax1 = 2 * x[0] + 5 * x[1] + 1 * x[2]; + double Ax2 = 1 * x[1] + 6 * x[2] + 2 * x[3]; + double Ax3 = 3 * x[2] + 7 * x[3]; + + EXPECT_NEAR(Ax0, 5.0, tol); + EXPECT_NEAR(Ax1, 8.0, tol); + EXPECT_NEAR(Ax2, 9.0, tol); + EXPECT_NEAR(Ax3, 10.0, tol); +} + +// ----------------------------------------------------------------------- +// Test 2: Symmetric positive-definite 4x4 system (lower triangle only) +// +// Matrix A (SPD): +// [ 4 2 0 0 ] +// [ 2 5 1 0 ] +// [ 0 1 6 2 ] +// [ 0 0 2 7 ] +// +// Only lower triangular entries provided (including diagonal). +// RHS b = [6, 8, 9, 11]^T +// ----------------------------------------------------------------------- +TEST(CooMumpsSolverTest, SymmetricPositiveDefinite4x4) +{ + using triplet = SparseMatrixCOO::triplet_type; + + // Lower triangular entries only (0-based row/col indices) + std::vector entries = {{0, 0, 4.0}, {1, 0, 2.0}, {1, 1, 5.0}, {2, 1, 1.0}, + {2, 2, 6.0}, {3, 2, 2.0}, {3, 3, 7.0}}; + + SparseMatrixCOO mat(4, 4, entries); + mat.is_symmetric(true); // SPD, half-entries only + + CooMumpsSolver solver(std::move(mat)); + + Vector rhs("rhs", 4); + rhs(0) = 6.0; + rhs(1) = 8.0; + rhs(2) = 9.0; + rhs(3) = 11.0; + + solver.solve(rhs); + + // Verify residual: A*x = b using the full (symmetric) matrix + std::vector x = {rhs(0), rhs(1), rhs(2), rhs(3)}; + + const double tol = 1e-10; + + double Ax0 = 4 * x[0] + 2 * x[1]; + double Ax1 = 2 * x[0] + 5 * x[1] + 1 * x[2]; + double Ax2 = 1 * x[1] + 6 * x[2] + 2 * x[3]; + double Ax3 = 2 * x[2] + 7 * x[3]; + + EXPECT_NEAR(Ax0, 6.0, tol); + EXPECT_NEAR(Ax1, 8.0, tol); + EXPECT_NEAR(Ax2, 9.0, tol); + EXPECT_NEAR(Ax3, 11.0, tol); +} + +#endif // GMGPOLAR_USE_MUMPS \ No newline at end of file From 697740a9e63916467ea8065fca658020fe51ff68 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sun, 22 Feb 2026 00:09:05 +0100 Subject: [PATCH 09/20] Formatting --- src/LinearAlgebra/Solvers/coo_mumps_solver.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp index b3ac63a3..75aba2eb 100644 --- a/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/src/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -28,8 +28,7 @@ void CooMumpsSolver::solve(Vector& rhs) dmumps_c(&mumps_solver_); if (INFOG(1) != 0) { - std::cerr << "MUMPS reported an error during solution phase " - << "(INFOG(1) = " << INFOG(1) << ").\n"; + std::cerr << "MUMPS reported an error during solution phase " << "(INFOG(1) = " << INFOG(1) << ").\n"; } } @@ -64,15 +63,14 @@ void CooMumpsSolver::initialize() dmumps_c(&mumps_solver_); if (INFOG(1) != 0) { - std::cerr << "MUMPS reported an error during analysis/factorization " - << "(INFOG(1) = " << INFOG(1) << ").\n"; + std::cerr << "MUMPS reported an error during analysis/factorization " << "(INFOG(1) = " << INFOG(1) << ").\n"; return; } if (mumps_solver_.sym == SYM_POSITIVE_DEFINITE && INFOG(12) != 0) { std::cerr << "Matrix declared positive definite, " - << "but negative pivots were encountered during factorization " - << "(INFOG(12) = " << INFOG(12) << ").\n"; + << "but negative pivots were encountered during factorization " << "(INFOG(12) = " << INFOG(12) + << ").\n"; } } From 7421a1e88c6aadfb19c90946c571e3b0455debfd Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sun, 22 Feb 2026 00:38:11 +0100 Subject: [PATCH 10/20] Fix mumps solver test --- .../Solvers/coo_mumps_solver.cpp | 98 +++++++++---------- 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp index b320fd43..02cb0a52 100644 --- a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -2,28 +2,33 @@ #ifdef GMGPOLAR_USE_MUMPS #include - #include "coo_mumps_solver.h" + + #include "../../../include/LinearAlgebra/Vector/vector.h" + #include "../../../include/LinearAlgebra/Matrix/coo_matrix.h" + #include "../../../include/LinearAlgebra/Solvers/coo_mumps_solver.h" // ----------------------------------------------------------------------- // Test 1: General (non-symmetric) 4x4 system // // Matrix A (non-symmetric): -// [ 4 1 0 0 ] -// [ 2 5 1 0 ] -// [ 0 1 6 2 ] -// [ 0 0 3 7 ] +// [ 1 0 2 0 ] +// [ 3 0 4 5 ] +// [ 0 6 7 0 ] +// [ 0 8 0 9 ] // -// RHS b = [5, 8, 9, 10]^T +// RHS b = [2, 4, 6, 8] // // Expected solution computed from A*x = b. +// +// x = [140.0 / 43.0, 149.0 / 86.0, -27.0/43.0, -28.0/43.0] // ----------------------------------------------------------------------- TEST(CooMumpsSolverTest, GeneralNonSymmetric4x4) { using triplet = SparseMatrixCOO::triplet_type; // All non-zero entries (0-based row/col indices) - std::vector entries = {{0, 0, 4.0}, {0, 1, 1.0}, {1, 0, 2.0}, {1, 1, 5.0}, {1, 2, 1.0}, - {2, 1, 1.0}, {2, 2, 6.0}, {2, 3, 2.0}, {3, 2, 3.0}, {3, 3, 7.0}}; + std::vector entries = {{0, 0, 1.0}, {0, 2, 2.0}, {1, 0, 3.0}, {1, 2, 4.0}, {1, 3, 5.0}, + {2, 1, 6.0}, {2, 2, 7.0}, {3, 1, 8.0}, {3, 3, 9.0}}; SparseMatrixCOO mat(4, 4, entries); mat.is_symmetric(false); @@ -31,42 +36,34 @@ TEST(CooMumpsSolverTest, GeneralNonSymmetric4x4) CooMumpsSolver solver(std::move(mat)); Vector rhs("rhs", 4); - rhs(0) = 5.0; - rhs(1) = 8.0; - rhs(2) = 9.0; - rhs(3) = 10.0; + rhs(0) = 2.0; + rhs(1) = 4.0; + rhs(2) = 6.0; + rhs(3) = 8.0; solver.solve(rhs); - // Verify A*x = b by back-substitution check - // Reference solution (computed analytically / via numpy): - // x ~ [0.9526, 0.2105, 0.9298, 0.8319] - const double tol = 1e-10; - EXPECT_NEAR(rhs(0), 4.0 * 0.9526 + 1.0 * 0.2105, 1e-3); - - // More robust: re-multiply and check residual - std::vector x = {rhs(0), rhs(1), rhs(2), rhs(3)}; + Vector solution("solution", 4); + solution(0) = 140.0 / 43.0; + solution(1) = 149.0 / 86.0; + solution(2) = -27.0 / 43.0; + solution(3) = -28.0 / 43.0; - // A*x - double Ax0 = 4 * x[0] + 1 * x[1]; - double Ax1 = 2 * x[0] + 5 * x[1] + 1 * x[2]; - double Ax2 = 1 * x[1] + 6 * x[2] + 2 * x[3]; - double Ax3 = 3 * x[2] + 7 * x[3]; - - EXPECT_NEAR(Ax0, 5.0, tol); - EXPECT_NEAR(Ax1, 8.0, tol); - EXPECT_NEAR(Ax2, 9.0, tol); - EXPECT_NEAR(Ax3, 10.0, tol); + const double tol = 1e-10; + EXPECT_NEAR(rhs(0), solution(0), tol); + EXPECT_NEAR(rhs(1), solution(1), tol); + EXPECT_NEAR(rhs(2), solution(2), tol); + EXPECT_NEAR(rhs(3), solution(3), tol); } // ----------------------------------------------------------------------- // Test 2: Symmetric positive-definite 4x4 system (lower triangle only) // // Matrix A (SPD): -// [ 4 2 0 0 ] -// [ 2 5 1 0 ] -// [ 0 1 6 2 ] -// [ 0 0 2 7 ] +// [ 4 0 2 0 ] +// [ 0 5 1 3 ] +// [ 2 1 6 2 ] +// [ 0 3 2 7 ] // // Only lower triangular entries provided (including diagonal). // RHS b = [6, 8, 9, 11]^T @@ -76,8 +73,8 @@ TEST(CooMumpsSolverTest, SymmetricPositiveDefinite4x4) using triplet = SparseMatrixCOO::triplet_type; // Lower triangular entries only (0-based row/col indices) - std::vector entries = {{0, 0, 4.0}, {1, 0, 2.0}, {1, 1, 5.0}, {2, 1, 1.0}, - {2, 2, 6.0}, {3, 2, 2.0}, {3, 3, 7.0}}; + std::vector entries = {{0, 0, 4.0}, {1, 1, 5.0}, {2, 0, 2.0}, {2, 1, 1.0}, + {2, 2, 6.0}, {3, 1, 3.0}, {3, 2, 2.0}, {3, 3, 7.0}}; SparseMatrixCOO mat(4, 4, entries); mat.is_symmetric(true); // SPD, half-entries only @@ -85,27 +82,24 @@ TEST(CooMumpsSolverTest, SymmetricPositiveDefinite4x4) CooMumpsSolver solver(std::move(mat)); Vector rhs("rhs", 4); - rhs(0) = 6.0; - rhs(1) = 8.0; - rhs(2) = 9.0; - rhs(3) = 11.0; + rhs(0) = 2.0; + rhs(1) = 4.0; + rhs(2) = 6.0; + rhs(3) = 8.0; solver.solve(rhs); - // Verify residual: A*x = b using the full (symmetric) matrix - std::vector x = {rhs(0), rhs(1), rhs(2), rhs(3)}; + Vector solution("solution", 4); + solution(0) = 9.0 / 46.0; + solution(1) = 3.0 / 23.0; + solution(2) = 14.0 / 23.0; + solution(3) = 21.0 / 23.0; const double tol = 1e-10; - - double Ax0 = 4 * x[0] + 2 * x[1]; - double Ax1 = 2 * x[0] + 5 * x[1] + 1 * x[2]; - double Ax2 = 1 * x[1] + 6 * x[2] + 2 * x[3]; - double Ax3 = 2 * x[2] + 7 * x[3]; - - EXPECT_NEAR(Ax0, 6.0, tol); - EXPECT_NEAR(Ax1, 8.0, tol); - EXPECT_NEAR(Ax2, 9.0, tol); - EXPECT_NEAR(Ax3, 11.0, tol); + EXPECT_NEAR(rhs(0), solution(0), tol); + EXPECT_NEAR(rhs(1), solution(1), tol); + EXPECT_NEAR(rhs(2), solution(2), tol); + EXPECT_NEAR(rhs(3), solution(3), tol); } #endif // GMGPOLAR_USE_MUMPS \ No newline at end of file From d93ccaf733440a906065e55e1ddcdb032dedf544 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Sun, 22 Feb 2026 17:01:42 +0100 Subject: [PATCH 11/20] Update coo_mumps_solver.cpp --- tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp index 02cb0a52..9ce60ab8 100644 --- a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -16,11 +16,7 @@ // [ 0 6 7 0 ] // [ 0 8 0 9 ] // -// RHS b = [2, 4, 6, 8] -// -// Expected solution computed from A*x = b. -// -// x = [140.0 / 43.0, 149.0 / 86.0, -27.0/43.0, -28.0/43.0] +// RHS b = [2, 4, 6, 8]^T // ----------------------------------------------------------------------- TEST(CooMumpsSolverTest, GeneralNonSymmetric4x4) { @@ -66,7 +62,7 @@ TEST(CooMumpsSolverTest, GeneralNonSymmetric4x4) // [ 0 3 2 7 ] // // Only lower triangular entries provided (including diagonal). -// RHS b = [6, 8, 9, 11]^T +// RHS b = [2, 4, 6, 8]^T // ----------------------------------------------------------------------- TEST(CooMumpsSolverTest, SymmetricPositiveDefinite4x4) { From 8f88e53a389f08ec5772ba87c8a45ea04f852310 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:42:58 +0100 Subject: [PATCH 12/20] Update smootherGive.cpp --- src/Smoother/SmootherGive/smootherGive.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Smoother/SmootherGive/smootherGive.cpp b/src/Smoother/SmootherGive/smootherGive.cpp index 7b3eeb7a..595aeee3 100644 --- a/src/Smoother/SmootherGive/smootherGive.cpp +++ b/src/Smoother/SmootherGive/smootherGive.cpp @@ -13,7 +13,7 @@ SmootherGive::SmootherGive(const PolarGrid& grid, const LevelCache& level_cache, radial_tridiagonal_solver_.setup(); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); + inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif From 6dcf5d09b8f02dc56d90598b9face1ec38b0bd40 Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:43:24 +0100 Subject: [PATCH 13/20] Update smootherTake.cpp --- src/Smoother/SmootherTake/smootherTake.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Smoother/SmootherTake/smootherTake.cpp b/src/Smoother/SmootherTake/smootherTake.cpp index 8ba590e6..76dc8b89 100644 --- a/src/Smoother/SmootherTake/smootherTake.cpp +++ b/src/Smoother/SmootherTake/smootherTake.cpp @@ -13,7 +13,7 @@ SmootherTake::SmootherTake(const PolarGrid& grid, const LevelCache& level_cache, radial_tridiagonal_solver_.setup(); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); + inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif From 7f623725f59a67bb5e4348632893f96d8401e4df Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:43:48 +0100 Subject: [PATCH 14/20] Update extrapolatedSmootherGive.cpp --- .../ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp index 1a34f678..f7a27781 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp @@ -15,7 +15,7 @@ ExtrapolatedSmootherGive::ExtrapolatedSmootherGive(const PolarGrid& grid, const radial_tridiagonal_solver_.setup(); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); + inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif From fdc909f3dd9aa61f010358eb929371a68d7c231e Mon Sep 17 00:00:00 2001 From: Julian Litz <91479202+julianlitz@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:44:11 +0100 Subject: [PATCH 15/20] Update extrapolatedSmootherTake.cpp --- .../ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp index 525d7007..dfa4b963 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp @@ -15,7 +15,7 @@ ExtrapolatedSmootherTake::ExtrapolatedSmootherTake(const PolarGrid& grid, const radial_tridiagonal_solver_.setup(); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(std::move(inner_boundary_circle_matrix_)); + inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); #else inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); #endif From 7509f20e2fcb24e156b408f55d6c571e0069a2ec Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 6 Mar 2026 21:08:02 +0100 Subject: [PATCH 16/20] pragma once in .cpp tests file removed --- tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp index 9ce60ab8..047fc565 100644 --- a/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp +++ b/tests/LinearAlgebra/Solvers/coo_mumps_solver.cpp @@ -1,4 +1,3 @@ -#pragma once #ifdef GMGPOLAR_USE_MUMPS #include From dd9eba42426d08460639c9ed9937459a2caed5be Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 6 Mar 2026 21:16:41 +0100 Subject: [PATCH 17/20] avoid std::optional in DirectSolver --- .../DirectSolver-COO-MUMPS-Take/directSolverTake.h | 8 +++++--- .../DirectSolver-COO-MUMPS-Take/directSolverTake.cpp | 5 ++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h index fbfa6cd2..05c255bc 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h @@ -16,9 +16,8 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver void solveInPlace(Vector solution) override; private: - // MUMPS solver structure with the solver matrix initialized in the constructor. - // std::optional is used because CooMumpsSolver cannot be default-constructed. - std::optional mumps_solver_; + // The stencil definitions must be defined before the declaration of the mumps_solver, + // since the mumps solver will be build in the member initializer. // clang-format off const Stencil stencil_interior_ = { @@ -48,6 +47,9 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver }; // clang-format on + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver mumps_solver_; + // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp index 71543f0a..3408fbb7 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.cpp @@ -7,9 +7,8 @@ DirectSolver_COO_MUMPS_Take::DirectSolver_COO_MUMPS_Take(const PolarGrid& grid, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , mumps_solver_(buildSolverMatrix()) { - SparseMatrixCOO solver_matrix = buildSolverMatrix(); - mumps_solver_.emplace(std::move(solver_matrix)); } void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector solution) @@ -21,7 +20,7 @@ void DirectSolver_COO_MUMPS_Take::solveInPlace(Vector solution) // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - mumps_solver_->solve(solution); + mumps_solver_.solve(solution); } #endif From ac690557523c1ce760ce0ea2ffa5c4bb937e873c Mon Sep 17 00:00:00 2001 From: julianlitz Date: Fri, 6 Mar 2026 21:18:26 +0100 Subject: [PATCH 18/20] avoid std::optional in DirectSolver --- .../DirectSolver-COO-MUMPS-Give/directSolverGive.h | 8 +++++--- .../DirectSolver-COO-MUMPS-Give/directSolverGive.cpp | 5 ++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h index 4fbeda06..62c38fc9 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h @@ -16,9 +16,8 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver void solveInPlace(Vector solution) override; private: - // MUMPS solver structure with the solver matrix initialized in the constructor. - // std::optional is used because CooMumpsSolver cannot be default-constructed. - std::optional mumps_solver_; + // The stencil definitions must be defined before the declaration of the mumps_solver, + // since the mumps solver will be build in the member initializer. // clang-format off const Stencil stencil_interior_ = { @@ -48,6 +47,9 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver }; // clang-format on + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver mumps_solver_; + // Constructs a symmetric solver matrix. SparseMatrixCOO buildSolverMatrix(); void buildSolverMatrixCircleSection(const int i_r, SparseMatrixCOO& solver_matrix); diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp index d4f1c97b..509ba004 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.cpp @@ -7,9 +7,8 @@ DirectSolver_COO_MUMPS_Give::DirectSolver_COO_MUMPS_Give(const PolarGrid& grid, const DensityProfileCoefficients& density_profile_coefficients, bool DirBC_Interior, int num_omp_threads) : DirectSolver(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) + , mumps_solver_(buildSolverMatrix()) { - SparseMatrixCOO solver_matrix = buildSolverMatrix(); - mumps_solver_.emplace(std::move(solver_matrix)); } void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector solution) @@ -21,7 +20,7 @@ void DirectSolver_COO_MUMPS_Give::solveInPlace(Vector solution) // ensuring that the solution at the boundary is correctly adjusted and maintains the required symmetry. applySymmetryShift(solution); // Solves the adjusted system symmetric(matrixA) * solution = rhs using the MUMPS solver. - mumps_solver_->solve(solution); + mumps_solver_.solve(solution); } #endif From 7582b610a2255125abfce9addaf843f9705905d8 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Sun, 8 Mar 2026 22:32:52 +0100 Subject: [PATCH 19/20] Remove std::optional und separate inner matrix construction --- include/Definitions/geometry_helper.h | 6 +- .../directSolverGive.h | 2 +- .../directSolverTake.h | 2 +- .../extrapolatedSmootherGive.h | 112 +++-- .../extrapolatedSmootherTake.h | 109 ++-- include/GMGPolar/build_rhs_f.h | 8 +- include/Smoother/SmootherGive/smootherGive.h | 115 +++-- include/Smoother/SmootherTake/smootherTake.h | 109 ++-- src/CMakeLists.txt | 14 +- .../buildSolverMatrix.cpp | 37 +- .../buildSolverMatrix.cpp | 14 +- .../applyAscOrtho.cpp | 282 ++++++++++- .../buildInnerBoundaryAsc.cpp | 327 ++++++++++++ ...scMatrices.cpp => buildTridiagonalAsc.cpp} | 348 +++---------- .../extrapolatedSmootherGive.cpp | 101 +--- .../solveAscSystem.cpp | 4 +- .../applyAscOrtho.cpp | 98 +++- .../buildInnerBoundaryAsc.cpp | 212 ++++++++ ...scMatrices.cpp => buildTridiagonalAsc.cpp} | 282 ++--------- .../extrapolatedSmootherTake.cpp | 99 ++-- .../solveAscSystem.cpp | 4 +- src/Smoother/SmootherGive/applyAscOrtho.cpp | 276 ++++++++++- .../SmootherGive/buildInnerBoundaryAsc.cpp | 300 +++++++++++ ...uildMatrix.cpp => buildTridiagonalAsc.cpp} | 341 +++---------- src/Smoother/SmootherGive/smootherGive.cpp | 100 +--- src/Smoother/SmootherGive/solveAscSystem.cpp | 4 +- src/Smoother/SmootherTake/applyAscOrtho.cpp | 150 ++++-- .../SmootherTake/buildInnerBoundaryAsc.cpp | 189 +++++++ src/Smoother/SmootherTake/buildMatrix.cpp | 466 ------------------ .../SmootherTake/buildTridiagonalAsc.cpp | 327 ++++++++++++ src/Smoother/SmootherTake/smootherTake.cpp | 67 +-- src/Smoother/SmootherTake/solveAscSystem.cpp | 4 +- 32 files changed, 2615 insertions(+), 1894 deletions(-) create mode 100644 src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp rename src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/{buildAscMatrices.cpp => buildTridiagonalAsc.cpp} (76%) create mode 100644 src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.cpp rename src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/{buildAscMatrices.cpp => buildTridiagonalAsc.cpp} (64%) create mode 100644 src/Smoother/SmootherGive/buildInnerBoundaryAsc.cpp rename src/Smoother/SmootherGive/{buildMatrix.cpp => buildTridiagonalAsc.cpp} (59%) create mode 100644 src/Smoother/SmootherTake/buildInnerBoundaryAsc.cpp delete mode 100644 src/Smoother/SmootherTake/buildMatrix.cpp create mode 100644 src/Smoother/SmootherTake/buildTridiagonalAsc.cpp diff --git a/include/Definitions/geometry_helper.h b/include/Definitions/geometry_helper.h index d9b2f7f1..2ebdbc49 100644 --- a/include/Definitions/geometry_helper.h +++ b/include/Definitions/geometry_helper.h @@ -23,9 +23,9 @@ inline void compute_jacobian_elements(const DomainGeometry& domain_geometry, dou /* which is represented by: */ /* [arr, 0.5*art] */ /* [0.5*atr, att] */ - arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / fabs(detDF); - att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / fabs(detDF); - art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / fabs(detDF); + arr = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_alpha / std::fabs(detDF); + att = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_alpha / std::fabs(detDF); + art = (-Jtt * Jtr - Jrt * Jrr) * coeff_alpha / std::fabs(detDF); /* Note that the inverse Jacobian matrix DF^{-1} is: */ /* 1.0 / det(DF) * */ /* [Jtt, -Jrt] */ diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h index 62c38fc9..e523bc14 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Give/directSolverGive.h @@ -17,7 +17,7 @@ class DirectSolver_COO_MUMPS_Give : public DirectSolver private: // The stencil definitions must be defined before the declaration of the mumps_solver, - // since the mumps solver will be build in the member initializer. + // since the mumps solver will be build in the member initializer of the DirectSolver class. // clang-format off const Stencil stencil_interior_ = { diff --git a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h index 05c255bc..6ab04d16 100644 --- a/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h +++ b/include/DirectSolver/DirectSolver-COO-MUMPS-Take/directSolverTake.h @@ -17,7 +17,7 @@ class DirectSolver_COO_MUMPS_Take : public DirectSolver private: // The stencil definitions must be defined before the declaration of the mumps_solver, - // since the mumps solver will be build in the member initializer. + // since the mumps solver will be build in the member initializer of the DirectSolver class. // clang-format off const Stencil stencil_interior_ = { diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h index 6d753945..cc6ced0e 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h @@ -69,42 +69,13 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother void extrapolatedSmoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - // MUMPS solver structure with the solver matrix initialized in the constructor. - // std::optional is used because CooMumpsSolver cannot be default-constructed. - std::optional inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -126,6 +97,45 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother }; // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it potentially includes across-origin coupling. Therefore, it is assembled + // into a sparse matrix and solved using a general-purpose sparse solver. + // When using the MUMPS solver, the matrix is assembled in COO format. + // When using the in-house solver, the matrix is stored in CSR format. + +#ifdef GMGPOLAR_USE_MUMPS + // When using the MUMPS solver, the matrix is assembled in COO format. + using MatrixType = SparseMatrixCOO; + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver inner_boundary_mumps_solver_; +#else + // When using the in-house solver, the matrix is stored in CSR format. + using MatrixType = SparseMatrixCSR; + // Sparse matrix for the non-tridiagonal inner boundary circle block. + MatrixType inner_boundary_circle_matrix_; + // LU solver for the inner boundary circle block. + SparseLUSolver inner_boundary_lu_solver_; +#endif + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -137,19 +147,25 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, - double art, double detDF, double coeff_beta); + void buildTridiagonalSolverMatrices(); + void buildTridiagonalCircleSection(int i_r); + void buildTridiagonalRadialSection(int i_theta); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, + double att, double art, double detDF, double coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + MatrixType buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, double arr, double att, double art, + double detDF, double coeff_beta); + void nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, double arr, double att, double art, + double detDF, double coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -157,10 +173,10 @@ class ExtrapolatedSmootherGive : public ExtrapolatedSmoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp); + void applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ /* Line-wise solvers */ diff --git a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h index b94863ec..c9845824 100644 --- a/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h +++ b/include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h @@ -67,42 +67,13 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother void extrapolatedSmoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - // MUMPS solver structure with the solver matrix initialized in the constructor. - // std::optional is used because CooMumpsSolver cannot be default-constructed. - std::optional inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -124,6 +95,45 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother }; // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it potentially includes across-origin coupling. Therefore, it is assembled + // into a sparse matrix and solved using a general-purpose sparse solver. + // When using the MUMPS solver, the matrix is assembled in COO format. + // When using the in-house solver, the matrix is stored in CSR format. + +#ifdef GMGPOLAR_USE_MUMPS + // When using the MUMPS solver, the matrix is assembled in COO format. + using MatrixType = SparseMatrixCOO; + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver inner_boundary_mumps_solver_; +#else + // When using the in-house solver, the matrix is stored in CSR format. + using MatrixType = SparseMatrixCSR; + // Sparse matrix for the non-tridiagonal inner boundary circle block. + MatrixType inner_boundary_circle_matrix_; + // LU solver for the inner boundary circle block. + SparseLUSolver inner_boundary_lu_solver_; +#endif + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r, int i_theta) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -135,20 +145,23 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, - ConstVector& att, ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); + void buildTridiagonalSolverMatrices(); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, + ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + MatrixType buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -156,8 +169,10 @@ class ExtrapolatedSmootherTake : public ExtrapolatedSmoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ /* Line-wise solvers */ diff --git a/include/GMGPolar/build_rhs_f.h b/include/GMGPolar/build_rhs_f.h index c37e2aa8..40292cff 100644 --- a/include/GMGPolar/build_rhs_f.h +++ b/include/GMGPolar/build_rhs_f.h @@ -25,7 +25,7 @@ void GMGPolar::discretize_rhs_f(cons double k1 = grid.angularSpacing(i_theta - 1); double k2 = grid.angularSpacing(i_theta); const double detDF = detDF_cache[grid.index(i_r, i_theta)]; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; @@ -50,7 +50,7 @@ void GMGPolar::discretize_rhs_f(cons double k1 = grid.angularSpacing(i_theta - 1); double k2 = grid.angularSpacing(i_theta); const double detDF = detDF_cache[grid.index(i_r, i_theta)]; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; @@ -91,7 +91,7 @@ void GMGPolar::discretize_rhs_f(cons double Jtt = domain_geometry_.dFy_dt(r, theta); /* Compute the determinant of the Jacobian matrix */ double detDF = Jrr * Jtt - Jrt * Jtr; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; @@ -126,7 +126,7 @@ void GMGPolar::discretize_rhs_f(cons double Jtt = domain_geometry_.dFy_dt(r, theta); /* Compute the determinant of the Jacobian matrix */ double detDF = Jrr * Jtt - Jrt * Jtr; - rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * fabs(detDF); + rhs_f[grid.index(i_r, i_theta)] *= 0.25 * (h1 + h2) * (k1 + k2) * std::fabs(detDF); } else if (i_r == 0 && DirBC_Interior_) { rhs_f[grid.index(i_r, i_theta)] *= 1.0; diff --git a/include/Smoother/SmootherGive/smootherGive.h b/include/Smoother/SmootherGive/smootherGive.h index c413ba52..f6ae1faf 100644 --- a/include/Smoother/SmootherGive/smootherGive.h +++ b/include/Smoother/SmootherGive/smootherGive.h @@ -61,42 +61,13 @@ class SmootherGive : public Smoother void smoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - // MUMPS solver structure with the solver matrix initialized in the constructor. - // std::optional is used because CooMumpsSolver cannot be default-constructed. - std::optional inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -113,12 +84,50 @@ class SmootherGive : public Smoother }; const Stencil circle_stencil_across_origin_ = { -1, 3, -1, - 1, 0, -1, + 1, 0, -1, -1, 2, -1 }; - // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it potentially includes across-origin coupling. Therefore, it is assembled + // into a sparse matrix and solved using a general-purpose sparse solver. + // When using the MUMPS solver, the matrix is assembled in COO format. + // When using the in-house solver, the matrix is stored in CSR format. + +#ifdef GMGPOLAR_USE_MUMPS + // When using the MUMPS solver, the matrix is assembled in COO format. + using MatrixType = SparseMatrixCOO; + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver inner_boundary_mumps_solver_; +#else + // When using the in-house solver, the matrix is stored in CSR format. + using MatrixType = SparseMatrixCSR; + // Sparse matrix for the non-tridiagonal inner boundary circle block. + MatrixType inner_boundary_circle_matrix_; + // LU solver for the inner boundary circle block. + SparseLUSolver inner_boundary_lu_solver_; +#endif + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -130,19 +139,25 @@ class SmootherGive : public Smoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, - double art, double detDF, double coeff_beta); + void buildTridiagonalSolverMatrices(); + void buildTridiagonalCircleSection(int i_r); + void buildTridiagonalRadialSection(int i_theta); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, + double att, double art, double detDF, double coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + MatrixType buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, double arr, double att, double art, + double detDF, double coeff_beta); + void nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, double arr, double att, double art, + double detDF, double coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -150,10 +165,10 @@ class SmootherGive : public Smoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, const SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, const SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp); + void applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ /* Line-wise solvers */ diff --git a/include/Smoother/SmootherTake/smootherTake.h b/include/Smoother/SmootherTake/smootherTake.h index 1f5c7179..9286da75 100644 --- a/include/Smoother/SmootherTake/smootherTake.h +++ b/include/Smoother/SmootherTake/smootherTake.h @@ -59,42 +59,13 @@ class SmootherTake : public Smoother void smoothing(Vector x, ConstVector rhs, Vector temp) override; private: - /* ------------------- */ - /* Tridiagonal solvers */ - /* ------------------- */ - - // Batched solver for cyclic-tridiagonal circle line A_sc matrices. - BatchedTridiagonalSolver circle_tridiagonal_solver_; - - // Batched solver for tridiagonal radial circle line A_sc matrices. - BatchedTridiagonalSolver radial_tridiagonal_solver_; - - // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because - // it potentially includes across-origin coupling. Therefore, it is assembled - // into a sparse matrix and solved using a general-purpose sparse solver. - // When using the MUMPS solver, the matrix is assembled in COO format. - // When using the in-house solver, the matrix is stored in CSR format. -#ifdef GMGPOLAR_USE_MUMPS - using MatrixType = SparseMatrixCOO; - // MUMPS solver structure with the solver matrix initialized in the constructor. - // std::optional is used because CooMumpsSolver cannot be default-constructed. - std::optional inner_boundary_mumps_solver_; -#else - using MatrixType = SparseMatrixCSR; - SparseLUSolver inner_boundary_lu_solver_; -#endif - // Sparse matrix for the non-tridiagonal inner boundary circle block. - MatrixType inner_boundary_circle_matrix_; - - // Note: - // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. - // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. - // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. - /* ------------------- */ /* Stencil definitions */ /* ------------------- */ + // The stencil definitions must be defined before the declaration of the inner_boundary_mumps_solver_, + // since the mumps solver will be build in the member initializer of the Smoother class. + // Stencils encode neighborhood connectivity for A_sc matrix assembly. // It is only used in the construction of COO/CSR matrices. // Thus it is only used for the interior boundary matrix and not needed for the tridiagonal matrices. @@ -116,6 +87,45 @@ class SmootherTake : public Smoother }; // clang-format on + /* ------------------- */ + /* Tridiagonal solvers */ + /* ------------------- */ + + // Batched solver for cyclic-tridiagonal circle line A_sc matrices. + BatchedTridiagonalSolver circle_tridiagonal_solver_; + + // Batched solver for tridiagonal radial line A_sc matrices. + BatchedTridiagonalSolver radial_tridiagonal_solver_; + + // Note: + // - circle_tridiagonal_solver_[batch=0] is unused. Use the COO/CSR matrix instead. + // - circle_tridiagonal_solver_[batch=i_r] solves circle line i_r. + // - radial_tridiagonal_solver_[batch=i_theta] solves radial line i_theta. + + /* ------------------------ */ + /* Interior boundary solver */ + /* ------------------------ */ + + // The A_sc matrix on i_r = 0 (inner circle) is NOT tridiagonal because + // it potentially includes across-origin coupling. Therefore, it is assembled + // into a sparse matrix and solved using a general-purpose sparse solver. + // When using the MUMPS solver, the matrix is assembled in COO format. + // When using the in-house solver, the matrix is stored in CSR format. + +#ifdef GMGPOLAR_USE_MUMPS + // When using the MUMPS solver, the matrix is assembled in COO format. + using MatrixType = SparseMatrixCOO; + // MUMPS solver structure with the solver matrix initialized in the constructor. + CooMumpsSolver inner_boundary_mumps_solver_; +#else + // When using the in-house solver, the matrix is stored in CSR format. + using MatrixType = SparseMatrixCSR; + // Sparse matrix for the non-tridiagonal inner boundary circle block. + MatrixType inner_boundary_circle_matrix_; + // LU solver for the inner boundary circle block. + SparseLUSolver inner_boundary_lu_solver_; +#endif + // Select correct stencil depending on the grid position. const Stencil& getStencil(int i_r) const; /* Only i_r = 0 implemented */ // Number of nonzero A_sc entries. @@ -127,20 +137,23 @@ class SmootherTake : public Smoother /* --------------- */ /* Matrix assembly */ /* --------------- */ - // Build all A_sc matrices for circle and radial smoothers. - void buildAscMatrices(); - // Build A_sc matrix block for a single circular line. - void buildAscCircleSection(int i_r); - // Build A_sc matrix block for a single radial line. - void buildAscRadialSection(int i_theta); - // Build A_sc for a specific node (i_r, i_theta) - void nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, - ConstVector& att, ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta); + void buildTridiagonalSolverMatrices(); + // Build the tridiagonal solver matrices for a specific node (i_r, i_theta) + void nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, + ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); + + // Build the solver matrix for the interior boundary (i_r = 0) which is non-tridiagonal due to across-origin coupling. + MatrixType buildInteriorBoundarySolverMatrix(); + // Build the solver matrix for a specific node (i_r = 0, i_theta) on the interior boundary. + void nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta); /* ---------------------- */ /* Orthogonal application */ @@ -148,8 +161,10 @@ class SmootherTake : public Smoother // Compute temp = f_sc − A_sc^ortho * u_sc^ortho (precomputed right-hand side) // where x = u_sc and rhs = f_sc - void applyAscOrthoCircleSection(int i_r, ConstVector x, ConstVector rhs, Vector temp); - void applyAscOrthoRadialSection(int i_theta, ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp); + void applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp); /* ----------------- */ /* Line-wise solvers */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 429f0a8d..b5e35756 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -123,14 +123,16 @@ set(SMOOTHER_SOURCES # SmootherGive ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/applyAscOrtho.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/buildMatrix.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/buildInnerBoundaryAsc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/buildTridiagonalAsc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/matrixStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/smootherGive.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherGive/solveAscSystem.cpp # SmootherTake ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/applyAscOrtho.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/buildMatrix.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/buildInnerBoundaryAsc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/buildTridiagonalAsc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/matrixStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/smootherTake.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Smoother/SmootherTake/solveAscSystem.cpp @@ -146,14 +148,16 @@ set(EXTRAPOLATED_SMOOTHER_SOURCES # ExtrapolatedSmootherGive ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp # ExtrapolatedSmootherTake ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/smootherStencil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp @@ -197,7 +201,7 @@ find_package(OpenMP REQUIRED) if(OpenMP_CXX_FOUND) target_link_libraries(GMGPolarLib PUBLIC OpenMP::OpenMP_CXX) endif() -find_package(Kokkos 4.4.1...<5 QUIET REQUIRED) +find_package(Kokkos 4.4.1...<5.1 QUIET REQUIRED) target_link_libraries(GMGPolarLib PUBLIC Kokkos::kokkos) diff --git a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp index f0886fc1..549817c8 100644 --- a/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp +++ b/src/DirectSolver/DirectSolver-COO-MUMPS-Take/buildSolverMatrix.cpp @@ -54,7 +54,7 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake(int i_r, int i_theta double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ + (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */ - left_value /* Center: (Left) */ - right_value /* Center: (Right) */ - bottom_value /* Center: (Bottom) */ @@ -178,13 +178,12 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake(int i_r, int i_theta double bottom_value = -coeff3 * (att(center_index) + att(bottom_index)); /* Bottom */ double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ - double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ - - left_value /* Center: (Left) */ - - right_value /* Center: (Right) */ - - bottom_value /* Center: (Bottom) */ - - top_value /* Center: (Top) */ - ); + double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * + std::fabs(detDF(center_index)) /* beta_{i,j} */ + - left_value /* Center: (Left) */ + - right_value /* Center: (Right) */ + - bottom_value /* Center: (Bottom) */ + - top_value; /* Center: (Top) */ double bottom_right_value = +0.25 * (art(right_index) + art(bottom_index)); /* Bottom Right */ double top_right_value = -0.25 * (art(right_index) + art(top_index)); /* Top Right */ @@ -270,12 +269,11 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake(int i_r, int i_theta double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ - - left_value /* Center: (Left) */ - - right_value /* Center: (Right) */ - - bottom_value /* Center: (Bottom) */ - - top_value /* Center: (Top) */ - ); + 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */ + - left_value /* Center: (Left) */ + - right_value /* Center: (Right) */ + - bottom_value /* Center: (Bottom) */ + - top_value; /* Center: (Top) */ double bottom_left_value = -0.25 * (art(left_index) + art(bottom_index)); /* Bottom Left */ double bottom_right_value = +0.25 * (art(right_index) + art(bottom_index)); /* Bottom Right */ @@ -377,12 +375,11 @@ void DirectSolver_COO_MUMPS_Take::nodeBuildSolverMatrixTake(int i_r, int i_theta double top_value = -coeff4 * (att(center_index) + att(top_index)); /* Top */ double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF(center_index)) /* beta_{i,j} */ - - left_value /* Center: (Left) */ - - right_value /* Center: (Right) */ - - bottom_value /* Center: (Bottom) */ - - top_value /* Center: (Top) */ - ); + 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * std::fabs(detDF(center_index)) /* beta_{i,j} */ + - left_value /* Center: (Left) */ + - right_value /* Center: (Right) */ + - bottom_value /* Center: (Bottom) */ + - top_value; /* Center: (Top) */ double bottom_left_value = -0.25 * (art(left_index) + art(bottom_index)); /* Bottom Left */ double bottom_right_value = +0.25 * (art(right_index) + art(bottom_index)); /* Bottom Right */ diff --git a/src/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.cpp b/src/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.cpp index 0e9b6ac4..bff1d3bb 100644 --- a/src/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.cpp +++ b/src/DirectSolver/DirectSolver-CSR-LU-Take/buildSolverMatrix.cpp @@ -166,13 +166,13 @@ void DirectSolver_CSR_LU_Take::nodeBuildSolverMatrixTake(int i_r, int i_theta, c double bottom_value = -coeff3 * (att[center_index] + att[bottom_index]); /* Bottom */ double top_value = -coeff4 * (att[center_index] + att[top_index]); /* Top */ - double center_value = - (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * fabs(detDF[center_index]) /* beta_{i,j} */ - - left_value /* Center: (Left) */ - - right_value /* Center: (Right) */ - - bottom_value /* Center: (Bottom) */ - - top_value /* Center: (Top) */ - ); + double center_value = (+0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center_index] * + std::fabs(detDF[center_index]) /* beta_{i,j} */ + - left_value /* Center: (Left) */ + - right_value /* Center: (Right) */ + - bottom_value /* Center: (Bottom) */ + - top_value /* Center: (Top) */ + ); double bottom_right_value = +0.25 * (art[right_index] + art[bottom_index]); /* Bottom Right */ double top_right_value = -0.25 * (art[right_index] + art[top_index]); /* Top Right */ diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp index eab23224..a0d46f5a 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/applyAscOrtho.cpp @@ -884,42 +884,270 @@ static inline void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const Polar } } -void ExtrapolatedSmootherGive::applyAscOrthoCircleSection(int i_r, SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp) +void ExtrapolatedSmootherGive::applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, + Vector temp) { - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles() + 1); + const int num_smoother_circles = grid_.numberSmootherCircles(); + const SmootherColor smoother_color = SmootherColor::Black; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 2) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + + /* Outside Black Section (Part 1)*/ +#pragma omp for + for (int circle_task = -1; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); - const double r = grid_.radius(i_r); + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - const double theta = grid_.theta(i_theta); - const int index = grid_.index(i_r, i_theta); + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); - double coeff_beta, arr, att, art, detDF; - level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } - // Apply Asc Ortho at the current node - nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, - detDF, coeff_beta); + /* Outside Black Section (Part 2)*/ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } } } -void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(int i_theta, SmootherColor smoother_color, - ConstVector x, ConstVector rhs, - Vector temp) +void ExtrapolatedSmootherGive::applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, + Vector temp) { - const double theta = grid_.theta(i_theta); - - /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ - for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { - const double r = grid_.radius(i_r); - const int index = grid_.index(i_r, i_theta); + const int num_smoother_circles = grid_.numberSmootherCircles(); + const SmootherColor smoother_color = SmootherColor::White; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside White Section */ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 2) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside White Section (Part 1)*/ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside White Section (Part 2)*/ +#pragma omp for + for (int circle_task = 2; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + } +} - double coeff_beta, arr, att, art, detDF; - level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); +void ExtrapolatedSmootherGive::applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, + Vector temp) +{ + const int num_radial_lines = grid_.ntheta(); + const SmootherColor smoother_color = SmootherColor::Black; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 2) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 3; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + } +} - // Apply Asc Ortho at the current node - nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, - detDF, coeff_beta); +void ExtrapolatedSmootherGive::applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, + Vector temp) +{ + const int num_radial_lines = grid_.ntheta(); + const SmootherColor smoother_color = SmootherColor::White; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 2) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 2; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } } -} \ No newline at end of file +} diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp new file mode 100644 index 00000000..6dc767db --- /dev/null +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp @@ -0,0 +1,327 @@ +#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.h" + +#include "../../../include/Definitions/geometry_helper.h" + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) += value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void updateMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) += value; +} +#endif + +void ExtrapolatedSmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, + bool DirBC_Interior, MatrixType& matrix, + double arr, double att, double art, + double detDF, double coeff_beta) +{ + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 0; + + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + /* Fill result(i,j) */ + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff2 = 0.5 * (k1 + k2) / h2; + + int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + int center_index = i_theta; + int right_index = i_theta; + int bottom_index = i_theta_M1; + int top_index = i_theta_P1; + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = getCircleAscIndex(i_r, i_theta); + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + double h1 = 2.0 * grid.radius(0); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int right_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); + const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); + const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); + + int nz_index; + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + if (i_theta & 1) { + /* i_theta % 2 == 1 */ + /* -| x | o | x | */ + /* -| | | | */ + /* -| O | o | o | */ + /* -| | | | */ + /* -| x | o | x | */ + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = -coeff1 * arr; /* Left */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i-1,j) */ + /* From view the view of the across origin node, */ + /* the directions are roatated by 180 degrees in the stencil! */ + row = left_index; + ptr = left_nz_index; + + const Stencil& LeftStencil = CenterStencil; + + offset = LeftStencil[StencilPosition::Left]; + column = center_index; + value = -coeff1 * arr; /* Right -> Left*/ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* i_theta % 2 == 0 */ + /* -| o | o | o | */ + /* -| | | | */ + /* -| X | o | x | */ + /* -| | | | */ + /* -| o | o | o | */ + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + updateMatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i,j-1) */ + row = bottom_index; + ptr = bottom_nz_index; + + const Stencil& BottomStencil = CenterStencil; + + offset = BottomStencil[StencilPosition::Center]; + column = bottom_index; + value = +coeff3 * att; /* Center: (Top) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i,j+1) */ + row = top_index; + ptr = top_nz_index; + + const Stencil& TopStencil = CenterStencil; + + offset = TopStencil[StencilPosition::Center]; + column = top_index; + value = +coeff4 * att; /* Center: (Bottom) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + } +} + +void ExtrapolatedSmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, + bool DirBC_Interior, MatrixType& matrix, + double arr, double att, double art, + double detDF, double coeff_beta) +{ + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 1; + + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left_index = i_theta; + + /* -------------------------- */ + /* Cyclic Tridiagonal Section */ + /* i_r % 2 == 1 */ + if (i_r & 1) { + if (i_theta & 1) { + /* i_theta % 2 == 1 */ + /* | x | o | x | */ + /* | | | | */ + /* | o | O | o | */ + /* | | | | */ + /* | x | o | x | */ + + /* Fill matrix row of (i-1,j) */ + + /* Only in the case of AcrossOrigin */ + if (!DirBC_Interior) { + row = left_index; + ptr = getCircleAscIndex(i_r - 1, i_theta); + + const Stencil& LeftStencil = getStencil(i_r - 1, i_theta); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = +coeff1 * arr; /* Center: (Right) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + } + } +} + +ExtrapolatedSmootherGive::MatrixType ExtrapolatedSmootherGive::buildInteriorBoundarySolverMatrix() +{ + const int ntheta = grid_.ntheta(); + +#ifdef GMGPOLAR_USE_MUMPS + // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. + const int nnz = getNonZeroCountCircleAsc(0); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(false); +#else + std::function nnz_per_row = [&](int i_theta) { + if (DirBC_Interior_) + return 1; + else + return i_theta % 2 == 0 ? 1 : 2; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + { + const int i_r = 0; + const double r = grid_.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid_.index(i_r, i_theta); + const double theta = grid_.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_0( + i_theta, grid_, DirBC_Interior_, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + } + } + } + + { + const int i_r = 1; + const double r = grid_.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid_.index(i_r, i_theta); + const double theta = grid_.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_1( + i_theta, grid_, DirBC_Interior_, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + } + } + } + +#ifdef GMGPOLAR_USE_MUMPS + /* Mumps: In the case of symmetric matrices, only half of the matrix should be provided. */ + const bool construct_symmetric = true; + if (!construct_symmetric) { + return inner_boundary_solver_matrix; + } + + const int full_nnz = inner_boundary_solver_matrix.non_zero_size(); + const int numRows = inner_boundary_solver_matrix.rows(); + const int numColumns = inner_boundary_solver_matrix.columns(); + const int symmetric_nnz = full_nnz - (full_nnz - numRows) / 2; + + SparseMatrixCOO inner_boundary_solver_matrix_symmetric(numRows, numColumns, symmetric_nnz); + inner_boundary_solver_matrix_symmetric.is_symmetric(true); + + int current_nz = 0; // Current non-zero index in the symmetric matrix + for (int nz_index = 0; nz_index < full_nnz; nz_index++) { + const int current_row = inner_boundary_solver_matrix.row_index(nz_index); + const int current_column = inner_boundary_solver_matrix.col_index(nz_index); + if (current_row <= current_column) { + inner_boundary_solver_matrix_symmetric.row_index(current_nz) = current_row; + inner_boundary_solver_matrix_symmetric.col_index(current_nz) = current_column; + inner_boundary_solver_matrix_symmetric.value(current_nz) = inner_boundary_solver_matrix.value(nz_index); + current_nz++; + } + } + return inner_boundary_solver_matrix_symmetric; +#else + return inner_boundary_solver_matrix; +#endif +} diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.cpp similarity index 76% rename from src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp rename to src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.cpp index 983b5ec6..12a4231d 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildAscMatrices.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildTridiagonalAsc.cpp @@ -14,29 +14,11 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, solver.cyclic_corner(batch) += value; } -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) += val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) += val; -} -#endif - -void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, - double att, double art, double detDF, double coeff_beta) +void ExtrapolatedSmootherGive::nodeBuildTridiagonalSolverMatrices( + int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, double art, double detDF, + double coeff_beta) { assert(i_r >= 0 && i_r < grid.nr()); assert(i_theta >= 0 && i_theta < grid.ntheta()); @@ -48,8 +30,8 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const Pola assert(lengthSmootherRadial >= 3); int ptr, offset; - int row, column, col; - double value, val; + int row, column; + double value; /* ------------------------------------------ */ /* Node in the interior of the Circle Section */ /* ------------------------------------------ */ @@ -101,7 +83,7 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const Pola /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -150,21 +132,8 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const Pola /* | x | o | x | */ /* Fill matrix row of (i-1,j) */ - if (i_r == 1) { - /* Only in the case of AcrossOrigin */ - if (!DirBC_Interior) { - row = left_index; - ptr = getCircleAscIndex(i_r - 1, i_theta); - - const Stencil& LeftStencil = getStencil(i_r - 1, i_theta); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = +coeff1 * arr; /* Center: (Right) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - } - } - else { + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + if (i_r > 1) { row = left_index; column = left_index; value = coeff1 * arr; /* Center: (Right) */ @@ -295,7 +264,7 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const Pola /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -426,177 +395,38 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const Pola /* Circle Section: Node in the inner boundary */ /* ------------------------------------------ */ else if (i_r == 0) { - auto& right_solver = circle_tridiagonal_solver; - int right_batch = i_r + 1; - - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - /* Fill result(i,j) */ - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff2 = 0.5 * (k1 + k2) / h2; - - int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - int center_index = i_theta; - int right_index = i_theta; - int bottom_index = i_theta_M1; - int top_index = i_theta_P1; - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = getCircleAscIndex(i_r, i_theta); - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); - const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); - const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); - - int nz_index; - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - if (i_theta & 1) { - /* i_theta % 2 == 1 */ - /* -| x | o | x | */ - /* -| | | | */ - /* -| O | o | o | */ - /* -| | | | */ - /* -| x | o | x | */ - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = -coeff1 * arr; /* Left */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i-1,j) */ - /* From view the view of the across origin node, */ - /* the directions are roatated by 180 degrees in the stencil! */ - row = left_index; - ptr = left_nz_index; - - const Stencil& LeftStencil = CenterStencil; - - offset = LeftStencil[StencilPosition::Left]; - col = center_index; - val = -coeff1 * arr; /* Right -> Left*/ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - else { - /* i_theta % 2 == 0 */ - /* -| o | o | o | */ - /* -| | | | */ - /* -| X | o | x | */ - /* -| | | | */ - /* -| o | o | o | */ - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i,j-1) */ - row = bottom_index; - ptr = bottom_nz_index; - - const Stencil& BottomStencil = CenterStencil; + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + auto& center_solver = circle_tridiagonal_solver; + int center_batch = i_r; + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; - offset = BottomStencil[StencilPosition::Center]; - col = bottom_index; - val = +coeff3 * att; /* Center: (Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + /* Fill result(i,j) */ + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); - /* Fill matrix row of (i,j+1) */ - row = top_index; - ptr = top_nz_index; + double coeff2 = 0.5 * (k1 + k2) / h2; - const Stencil& TopStencil = CenterStencil; + int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - offset = TopStencil[StencilPosition::Center]; - col = top_index; - val = +coeff4 * att; /* Center: (Bottom) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + int center_index = i_theta; + int right_index = i_theta; + int bottom_index = i_theta_M1; + int top_index = i_theta_P1; - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - } + /* Fill matrix row of (i,j) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(center_solver, center_batch, row, column, value); + + /* Fill matrix row of (i+1,j) */ + row = right_index; + column = right_index; + value = coeff2 * arr; /* Center: (Left) */ + updateMatrixElement(right_solver, right_batch, row, column, value); } /* ------------------------------------------- */ /* Circle Section: Node next to radial section */ @@ -1203,7 +1033,7 @@ void ExtrapolatedSmootherGive::nodeBuildAscGive(int i_r, int i_theta, const Pola } } -void ExtrapolatedSmootherGive::buildAscCircleSection(const int i_r) +void ExtrapolatedSmootherGive::buildTridiagonalCircleSection(int i_r) { const double r = grid_.radius(i_r); for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { @@ -1214,12 +1044,12 @@ void ExtrapolatedSmootherGive::buildAscCircleSection(const int i_r) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } -void ExtrapolatedSmootherGive::buildAscRadialSection(const int i_theta) +void ExtrapolatedSmootherGive::buildTridiagonalRadialSection(int i_theta) { const double theta = grid_.theta(i_theta); for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { @@ -1230,46 +1060,13 @@ void ExtrapolatedSmootherGive::buildAscRadialSection(const int i_theta) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } -void ExtrapolatedSmootherGive::buildAscMatrices() +void ExtrapolatedSmootherGive::buildTridiagonalSolverMatrices() { - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - if (DirBC_Interior_) - return 1; - else - return i_theta % 2 == 0 ? 1 : 2; - }; - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); - - for (int i = 0; i < inner_boundary_circle_matrix_.non_zero_size(); i++) { - inner_boundary_circle_matrix_.values_data()[i] = 0.0; - } - -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ - /* Multi-threaded execution: */ const int num_smoother_circles = grid_.numberSmootherCircles(); const int additional_radial_tasks = grid_.ntheta() % 3; @@ -1279,30 +1076,30 @@ void ExtrapolatedSmootherGive::buildAscMatrices() { #pragma omp for for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); + buildTridiagonalCircleSection(i_r); } #pragma omp for for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); + buildTridiagonalCircleSection(i_r); } #pragma omp for for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); + buildTridiagonalCircleSection(i_r); } #pragma omp for for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 0) { int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); + buildTridiagonalRadialSection(i_theta); } else { if (additional_radial_tasks == 0) { - buildAscRadialSection(0); + buildTridiagonalRadialSection(0); } else if (additional_radial_tasks >= 1) { - buildAscRadialSection(0); - buildAscRadialSection(1); + buildTridiagonalRadialSection(0); + buildTridiagonalRadialSection(1); } } } @@ -1310,54 +1107,25 @@ void ExtrapolatedSmootherGive::buildAscMatrices() for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 1) { int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); + buildTridiagonalRadialSection(i_theta); } else { if (additional_radial_tasks == 0) { - buildAscRadialSection(1); + buildTridiagonalRadialSection(1); } else if (additional_radial_tasks == 1) { - buildAscRadialSection(2); + buildTridiagonalRadialSection(2); } else if (additional_radial_tasks == 2) { - buildAscRadialSection(2); - buildAscRadialSection(3); + buildTridiagonalRadialSection(2); + buildTridiagonalRadialSection(3); } } } #pragma omp for for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - } - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------- */ - /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */ - /* ------------------------------------------------------------------- */ - - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; + buildTridiagonalRadialSection(i_theta); } } -#endif } -// clang-format on diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp index f7a27781..9b108a4b 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/extrapolatedSmootherGive.cpp @@ -8,17 +8,17 @@ ExtrapolatedSmootherGive::ExtrapolatedSmootherGive(const PolarGrid& grid, const num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) +#ifdef GMGPOLAR_USE_MUMPS + , inner_boundary_mumps_solver_(buildInteriorBoundarySolverMatrix()) +#else + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_lu_solver_(inner_boundary_circle_matrix_) +#endif { - buildAscMatrices(); + buildTridiagonalSolverMatrices(); circle_tridiagonal_solver_.setup(); radial_tridiagonal_solver_.setup(); - -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); -#else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); -#endif } // The smoothing solves linear systems of the form: @@ -65,110 +65,31 @@ void ExtrapolatedSmootherGive::extrapolatedSmoothing(Vector x, ConstVect } } } - - /* Multi-threaded execution */ - const int num_smoother_circles = grid_.numberSmootherCircles(); - const int num_radial_lines = grid_.ntheta(); - /* ----------------------------------------------- */ /* 1. Black-Circle update (u_bc): */ /* A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho */ /* ----------------------------------------------- */ -#pragma omp parallel num_threads(num_omp_threads_) - { - /* Inside Black Section */ -#pragma omp for - for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 2) { - int i_r = num_smoother_circles - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - /* Outside Black Section (Part 1)*/ -#pragma omp for - for (int circle_task = -1; circle_task < num_smoother_circles; circle_task += 4) { - int i_r = num_smoother_circles - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - /* Outside Black Section (Part 2)*/ -#pragma omp for - for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 4) { - int i_r = num_smoother_circles - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::Black, x, rhs, temp); - } - } + applyAscOrthoBlackCircleSection(x, rhs, temp); solveBlackCircleSection(x, temp); /* ----------------------------------------------- */ /* 2. White-Circle update (u_wc): */ /* A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho */ /* ----------------------------------------------- */ -#pragma omp parallel num_threads(num_omp_threads_) - { - /* Inside White Section */ -#pragma omp for - for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 2) { - int i_r = num_smoother_circles - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* Outside White Section (Part 1)*/ -#pragma omp for - for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 4) { - int i_r = num_smoother_circles - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - /* Outside White Section (Part 2)*/ -#pragma omp for - for (int circle_task = 2; circle_task < num_smoother_circles; circle_task += 4) { - int i_r = num_smoother_circles - circle_task - 1; - applyAscOrthoCircleSection(i_r, SmootherColor::White, x, rhs, temp); - } - } + applyAscOrthoWhiteCircleSection(x, rhs, temp); solveWhiteCircleSection(x, temp); /* ----------------------------------------------- */ /* 3. Black-Radial update (u_br): */ /* A_br * u_br = f_br − A_br^ortho * u_br^ortho */ /* ----------------------------------------------- */ -#pragma omp parallel num_threads(num_omp_threads_) - { - /* Inside Black Section */ -#pragma omp for - for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 2) { - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - /* Outside Black Section (Part 1) */ -#pragma omp for - for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 4) { - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - /* Outside Black Section (Part 2) */ -#pragma omp for - for (int i_theta = 3; i_theta < num_radial_lines; i_theta += 4) { - applyAscOrthoRadialSection(i_theta, SmootherColor::Black, x, rhs, temp); - } - } + applyAscOrthoBlackRadialSection(x, rhs, temp); solveBlackRadialSection(x, temp); /* ----------------------------------------------- */ /* 4. White-Radial update (u_wr): */ /* A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho */ /* ----------------------------------------------- */ -#pragma omp parallel num_threads(num_omp_threads_) - { - /* Inside Black Section */ -#pragma omp for - for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 2) { - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - /* Outside Black Section (Part 1) */ -#pragma omp for - for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 4) { - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - /* Outside Black Section (Part 2) */ -#pragma omp for - for (int i_theta = 2; i_theta < num_radial_lines; i_theta += 4) { - applyAscOrthoRadialSection(i_theta, SmootherColor::White, x, rhs, temp); - } - } + applyAscOrthoWhiteRadialSection(x, rhs, temp); solveWhiteRadialSection(x, temp); } diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp index a438d610..00078262 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/solveAscSystem.cpp @@ -21,7 +21,7 @@ void ExtrapolatedSmootherGive::solveBlackCircleSection(Vector x, Vector< Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif @@ -58,7 +58,7 @@ void ExtrapolatedSmootherGive::solveWhiteCircleSection(Vector x, Vector< Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.cpp index 95bf3bc4..ac416bdc 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/applyAscOrtho.cpp @@ -455,42 +455,92 @@ static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const Polar } } -void ExtrapolatedSmootherTake::applyAscOrthoCircleSection(int i_r, ConstVector x, ConstVector rhs, - Vector temp) +void ExtrapolatedSmootherTake::applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, + Vector temp) { - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles()); - assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); - const auto& arr = level_cache_.arr(); - const auto& att = level_cache_.att(); - const auto& art = level_cache_.art(); - const auto& detDF = level_cache_.detDF(); - const auto& coeff_beta = level_cache_.coeff_beta(); + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + + /* The outer most circle next to the radial section is defined to be black. */ + const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, - coeff_beta); +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } } -void ExtrapolatedSmootherTake::applyAscOrthoRadialSection(int i_theta, ConstVector x, ConstVector rhs, - Vector temp) +void ExtrapolatedSmootherTake::applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, + Vector temp) { - assert(i_theta >= 0 && i_theta < grid_.ntheta()); + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + + /* The outer most circle next to the radial section is defined to be black. */ + const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} +void ExtrapolatedSmootherTake::applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, + Vector temp) +{ assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); - const auto& arr = level_cache_.arr(); - const auto& att = level_cache_.att(); - const auto& art = level_cache_.art(); - const auto& detDF = level_cache_.detDF(); - const auto& coeff_beta = level_cache_.coeff_beta(); + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} + +void ExtrapolatedSmootherTake::applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, + Vector temp) +{ + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, - coeff_beta); + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } -} \ No newline at end of file +} diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.cpp new file mode 100644 index 00000000..3321305f --- /dev/null +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildInnerBoundaryAsc.cpp @@ -0,0 +1,212 @@ +#include "../../../include/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.h" + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) = value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void updateMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) = value; +} +#endif + +void ExtrapolatedSmootherTake::nodeBuildInteriorBoundarySolverMatrix( + int i_theta, const PolarGrid& grid, bool DirBC_Interior, MatrixType& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) +{ + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + /* ------------------------------------------ */ + /* Circle Section: Node in the inner boundary */ + /* ------------------------------------------ */ + const int i_r = 0; + + int ptr, offset; + int row, column; + double value; + + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + const int center_index = i_theta; + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + double h1 = 2.0 * grid.radius(0); + double h2 = grid.radialSpacing(i_r); + double k1 = grid.angularSpacing(i_theta - 1); + double k2 = grid.angularSpacing(i_theta); + + double coeff1 = 0.5 * (k1 + k2) / h1; + double coeff2 = 0.5 * (k1 + k2) / h2; + double coeff3 = 0.5 * (h1 + h2) / k1; + double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int right_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); + const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); + const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); + + int nz_index; + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + if (i_theta & 1) { + /* i_theta % 2 == 1 */ + /* -| x | o | x | */ + /* -| | | | */ + /* -| O | o | o | */ + /* -| | | | */ + /* -| x | o | x | */ + + const int left = grid.index(i_r, i_theta_AcrossOrigin); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); + const double left_value = -coeff1 * (arr[center] + arr[left]); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = center_value; + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = left_value; + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* i_theta % 2 == 0 */ + /* -| o | o | o | */ + /* -| | | | */ + /* -| X | o | x | */ + /* -| | | | */ + /* -| o | o | o | */ + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r, i_theta); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + } +} + +ExtrapolatedSmootherTake::MatrixType ExtrapolatedSmootherTake::buildInteriorBoundarySolverMatrix() +{ + const int i_r = 0; + const int ntheta = grid_.ntheta(); + +#ifdef GMGPOLAR_USE_MUMPS + // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. + const int nnz = getNonZeroCountCircleAsc(i_r); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(false); +#else + std::function nnz_per_row = [&](int i_theta) { + if (DirBC_Interior_) + return 1; + else + return i_theta % 2 == 0 ? 1 : 2; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + nodeBuildInteriorBoundarySolverMatrix(i_theta, grid_, DirBC_Interior_, inner_boundary_solver_matrix, arr, att, + art, detDF, coeff_beta); + } + +#ifdef GMGPOLAR_USE_MUMPS + /* Mumps: In the case of symmetric matrices, only half of the matrix should be provided. */ + const bool construct_symmetric = true; + if (!construct_symmetric) { + return inner_boundary_solver_matrix; + } + + const int full_nnz = inner_boundary_solver_matrix.non_zero_size(); + const int numRows = inner_boundary_solver_matrix.rows(); + const int numColumns = inner_boundary_solver_matrix.columns(); + const int symmetric_nnz = full_nnz - (full_nnz - numRows) / 2; + + SparseMatrixCOO inner_boundary_solver_matrix_symmetric(numRows, numColumns, symmetric_nnz); + inner_boundary_solver_matrix_symmetric.is_symmetric(true); + + int current_nz = 0; // Current non-zero index in the symmetric matrix + for (int nz_index = 0; nz_index < full_nnz; nz_index++) { + const int current_row = inner_boundary_solver_matrix.row_index(nz_index); + const int current_column = inner_boundary_solver_matrix.col_index(nz_index); + if (current_row <= current_column) { + inner_boundary_solver_matrix_symmetric.row_index(current_nz) = current_row; + inner_boundary_solver_matrix_symmetric.col_index(current_nz) = current_column; + inner_boundary_solver_matrix_symmetric.value(current_nz) = inner_boundary_solver_matrix.value(nz_index); + current_nz++; + } + } + return inner_boundary_solver_matrix_symmetric; +#else + return inner_boundary_solver_matrix; +#endif +} diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.cpp similarity index 64% rename from src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp rename to src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.cpp index 7636df8a..b2e719f3 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildAscMatrices.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/buildTridiagonalAsc.cpp @@ -12,31 +12,11 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, solver.cyclic_corner(batch) = value; } -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) = val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) = val; -} -#endif - -void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, - ConstVector& arr, ConstVector& att, - ConstVector& art, ConstVector& detDF, - ConstVector& coeff_beta) +void ExtrapolatedSmootherTake::nodeBuildTridiagonalSolverMatrices( + int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, ConstVector& coeff_beta) { assert(i_r >= 0 && i_r < grid.nr()); assert(i_theta >= 0 && i_theta < grid.ntheta()); @@ -48,8 +28,8 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola assert(lengthSmootherRadial >= 3); int ptr, offset; - int row, column, col; - double value, val; + int row, column; + double value; /* ------------------------------------------ */ /* Node in the interior of the Circle Section */ @@ -102,7 +82,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -141,7 +121,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -161,117 +141,34 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Circle Section: Node in the inner boundary */ /* ------------------------------------------ */ else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - auto& matrix = inner_boundary_circle_matrix; - const int center_index = i_theta; - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + (grid.ntheta() / 2)); - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); - const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); - const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); - - auto& matrix = inner_boundary_circle_matrix; - - int nz_index; - const Stencil& CenterStencil = getStencil(i_r, i_theta); + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - if (i_theta & 1) { - /* i_theta % 2 == 1 */ - /* -| x | o | x | */ - /* -| | | | */ - /* -| O | o | o | */ - /* -| | | | */ - /* -| x | o | x | */ - - const int left = grid.index(i_r, i_theta_AcrossOrigin); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - const double left_value = -coeff1 * (arr[center] + arr[left]); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = center_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = left_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - else { - /* i_theta % 2 == 0 */ - /* -| o | o | o | */ - /* -| | | | */ - /* -| X | o | x | */ - /* -| | | | */ - /* -| o | o | o | */ - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r, i_theta); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - } + auto& solver = circle_tridiagonal_solver; + const int batch = i_r; + + const int center_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Bottom */ + row = center_index; + column = bottom_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Top */ + row = center_index; + column = top_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); } /* ------------------------------------------ */ /* Node in the interior of the Radial Section */ @@ -328,7 +225,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -372,7 +269,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -436,7 +333,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -459,7 +356,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -525,7 +422,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -553,7 +450,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola /* Center: (Left, Right, Bottom, Top) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) + coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); updateMatrixElement(solver, batch, row, column, value); @@ -611,25 +508,7 @@ void ExtrapolatedSmootherTake::nodeBuildAscTake(int i_r, int i_theta, const Pola } } -void ExtrapolatedSmootherTake::buildAscCircleSection(int i_r) -{ - assert(level_cache_.cacheDensityProfileCoefficients()); - assert(level_cache_.cacheDomainGeometry()); - - ConstVector arr = level_cache_.arr(); - ConstVector att = level_cache_.att(); - ConstVector art = level_cache_.art(); - ConstVector detDF = level_cache_.detDF(); - ConstVector coeff_beta = level_cache_.coeff_beta(); - - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } -} - -void ExtrapolatedSmootherTake::buildAscRadialSection(int i_theta) +void ExtrapolatedSmootherTake::buildTridiagonalSolverMatrices() { assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); @@ -640,81 +519,22 @@ void ExtrapolatedSmootherTake::buildAscRadialSection(int i_theta) ConstVector detDF = level_cache_.detDF(); ConstVector coeff_beta = level_cache_.coeff_beta(); - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } -} - -void ExtrapolatedSmootherTake::buildAscMatrices() -{ - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - if (DirBC_Interior_) - return 1; - else - return i_theta % 2 == 0 ? 1 : 2; - }; - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ - #pragma omp parallel num_threads(num_omp_threads_) { #pragma omp for nowait for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { - buildAscCircleSection(i_r); + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } } #pragma omp for nowait for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - buildAscRadialSection(i_theta); - } - } - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------- */ - /* Part 3: Convert inner_boundary_circle_matrix_ to a symmetric matrix */ - /* ------------------------------------------------------------------- */ - - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } } } -#endif } diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp index dfa4b963..3d5f6a33 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/extrapolatedSmootherTake.cpp @@ -8,17 +8,17 @@ ExtrapolatedSmootherTake::ExtrapolatedSmootherTake(const PolarGrid& grid, const num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) +#ifdef GMGPOLAR_USE_MUMPS + , inner_boundary_mumps_solver_(buildInteriorBoundarySolverMatrix()) +#else + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_lu_solver_(inner_boundary_circle_matrix_) +#endif { - buildAscMatrices(); + buildTridiagonalSolverMatrices(); circle_tridiagonal_solver_.setup(); radial_tridiagonal_solver_.setup(); - -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); -#else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); -#endif } // The smoothing solves linear systems of the form: @@ -49,43 +49,68 @@ void ExtrapolatedSmootherTake::extrapolatedSmoothing(Vector x, ConstVect assert(x.size() == rhs.size()); assert(temp.size() == rhs.size()); - assert(level_cache_.cacheDensityProfileCoefficients()); - assert(level_cache_.cacheDomainGeometry()); + /* ----------------------------------------------- */ + /* 1. Black-Circle update (u_bc): */ + /* A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoBlackCircleSection(x, rhs, temp); + solveBlackCircleSection(x, temp); + + /* ----------------------------------------------- */ + /* 2. White-Circle update (u_wc): */ + /* A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoWhiteCircleSection(x, rhs, temp); + solveWhiteCircleSection(x, temp); - /* The outer most circle next to the radial section is defined to be black. */ - /* Priority: Black -> White. */ - const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; - const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; + /* ----------------------------------------------- */ + /* 3. Black-Radial update (u_br): */ + /* A_br * u_br = f_br − A_br^ortho * u_br^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoBlackRadialSection(x, rhs, temp); + solveBlackRadialSection(x, temp); - /* Black Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ + /* ----------------------------------------------- */ + /* 4. White-Radial update (u_wr): */ + /* A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoWhiteRadialSection(x, rhs, temp); + solveWhiteRadialSection(x, temp); - solveBlackCircleSection(x, temp); + // /* The outer most circle next to the radial section is defined to be black. */ + // /* Priority: Black -> White. */ + // const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; + // const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; - /* White Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ + // /* Black Circle Section */ + // #pragma omp parallel for num_threads(num_omp_threads_) + // for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + // applyAscOrthoCircleSection(i_r, x, rhs, temp); + // } /* Implicit barrier */ - solveWhiteCircleSection(x, temp); + // solveBlackCircleSection(x, temp); - /* Black Radial Section */ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ + // /* White Circle Section */ + // #pragma omp parallel for num_threads(num_omp_threads_) + // for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + // applyAscOrthoCircleSection(i_r, x, rhs, temp); + // } /* Implicit barrier */ - solveBlackRadialSection(x, temp); + // solveWhiteCircleSection(x, temp); - /* White Radial Section*/ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ + // /* Black Radial Section */ + // #pragma omp parallel for num_threads(num_omp_threads_) + // for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + // applyAscOrthoRadialSection(i_theta, x, rhs, temp); + // } /* Implicit barrier */ - solveWhiteRadialSection(x, temp); + // solveBlackRadialSection(x, temp); + + // /* White Radial Section*/ + // #pragma omp parallel for num_threads(num_omp_threads_) + // for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + // applyAscOrthoRadialSection(i_theta, x, rhs, temp); + // } /* Implicit barrier */ + + // solveWhiteRadialSection(x, temp); } diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp index d8b49742..f08ec571 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherTake/solveAscSystem.cpp @@ -21,7 +21,7 @@ void ExtrapolatedSmootherTake::solveBlackCircleSection(Vector x, Vector< Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif @@ -58,7 +58,7 @@ void ExtrapolatedSmootherTake::solveWhiteCircleSection(Vector x, Vector< Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif diff --git a/src/Smoother/SmootherGive/applyAscOrtho.cpp b/src/Smoother/SmootherGive/applyAscOrtho.cpp index d4ef5f1b..a8b20d70 100644 --- a/src/Smoother/SmootherGive/applyAscOrtho.cpp +++ b/src/Smoother/SmootherGive/applyAscOrtho.cpp @@ -422,42 +422,266 @@ static inline void nodeApplyAscOrthoRadialGive(int i_r, int i_theta, const Polar } } -void SmootherGive::applyAscOrthoCircleSection(const int i_r, const SmootherColor smoother_color, ConstVector x, - ConstVector rhs, Vector temp) +void SmootherGive::applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp) { - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles() + 1); + const int num_smoother_circles = grid_.numberSmootherCircles(); + const SmootherColor smoother_color = SmootherColor::Black; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 2) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + + /* Outside Black Section (Part 1)*/ +#pragma omp for + for (int circle_task = -1; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); - const double r = grid_.radius(i_r); + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - const double theta = grid_.theta(i_theta); - const int index = grid_.index(i_r, i_theta); + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); - double coeff_beta, arr, att, art, detDF; - level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } - // Apply Asc Ortho at the current node - nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, - detDF, coeff_beta); + /* Outside Black Section (Part 2)*/ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } } } -void SmootherGive::applyAscOrthoRadialSection(const int i_theta, const SmootherColor smoother_color, - ConstVector x, ConstVector rhs, Vector temp) +void SmootherGive::applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp) { - const double theta = grid_.theta(i_theta); - - /* We need to obtain left contributions from the circular section for AscOrtho. */ - /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ - for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { - const double r = grid_.radius(i_r); - const int index = grid_.index(i_r, i_theta); + const int num_smoother_circles = grid_.numberSmootherCircles(); + const SmootherColor smoother_color = SmootherColor::White; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside White Section */ +#pragma omp for + for (int circle_task = 1; circle_task < num_smoother_circles; circle_task += 2) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside White Section (Part 1)*/ +#pragma omp for + for (int circle_task = 0; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside White Section (Part 2)*/ +#pragma omp for + for (int circle_task = 2; circle_task < num_smoother_circles; circle_task += 4) { + const int i_r = num_smoother_circles - circle_task - 1; + const double r = grid_.radius(i_r); + + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + const double theta = grid_.theta(i_theta); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoCircleGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + } +} - double coeff_beta, arr, att, art, detDF; - level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); +void SmootherGive::applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp) +{ + const int num_radial_lines = grid_.ntheta(); + const SmootherColor smoother_color = SmootherColor::Black; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 2) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 3; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + } +} - // Apply Asc Ortho at the current node - nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, - detDF, coeff_beta); +void SmootherGive::applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp) +{ + const int num_radial_lines = grid_.ntheta(); + const SmootherColor smoother_color = SmootherColor::White; + +#pragma omp parallel num_threads(num_omp_threads_) + { + /* Inside Black Section */ +#pragma omp for + for (int i_theta = 1; i_theta < num_radial_lines; i_theta += 2) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 1) */ +#pragma omp for + for (int i_theta = 0; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } + /* Outside Black Section (Part 2) */ +#pragma omp for + for (int i_theta = 2; i_theta < num_radial_lines; i_theta += 4) { + const double theta = grid_.theta(i_theta); + + /* We need to obtain left contributions from the circular section for AscOrtho. */ + /* !!! i_r = grid_.numberSmootherCircles()-1 !!! */ + for (int i_r = grid_.numberSmootherCircles() - 1; i_r < grid_.nr(); i_r++) { + const double r = grid_.radius(i_r); + const int index = grid_.index(i_r, i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); + + // Apply Asc Ortho at the current node + nodeApplyAscOrthoRadialGive(i_r, i_theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, + att, art, detDF, coeff_beta); + } + } } } \ No newline at end of file diff --git a/src/Smoother/SmootherGive/buildInnerBoundaryAsc.cpp b/src/Smoother/SmootherGive/buildInnerBoundaryAsc.cpp new file mode 100644 index 00000000..9926852a --- /dev/null +++ b/src/Smoother/SmootherGive/buildInnerBoundaryAsc.cpp @@ -0,0 +1,300 @@ +#include "../../../include/Smoother/SmootherGive/smootherGive.h" + +#include "../../../include/Definitions/geometry_helper.h" + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) += value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void updateMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) += value; +} +#endif + +void SmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, double arr, double att, double art, + double detDF, double coeff_beta) +{ + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 0; + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + /* Fill result(i,j) */ + const int center_index = i_theta; + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = getCircleAscIndex(i_r, i_theta); + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()>>1)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + const double h1 = 2 * grid.radius(0); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + /* left_matrix (across-the origin), center_matrix, right_matrix */ + /* -| x | o | x | */ + /* -| | | | */ + /* -| O | o | o | */ + /* -| | | | */ + /* -| x | o | x | */ + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); + const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); + const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); + + int nz_index; /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* beta_{i,j} */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = -coeff1 * arr; /* Left */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Bottom]; + column = bottom_index; + value = -coeff3 * att; /* Bottom */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Top]; + column = top_index; + value = -coeff4 * att; /* Top */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + /* Fill matrix row of (i-1,j) */ + /* From view the view of the across origin node, */ /* the directions are roatated by 180 degrees in the stencil! */ + row = left_index; + ptr = left_nz_index; + + const Stencil& LeftStencil = CenterStencil; + + offset = LeftStencil[StencilPosition::Left]; + column = center_index; + value = -coeff1 * arr; /* Right -> Left*/ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + /* Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + + /* Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + + /* Fill matrix row of (i,j-1) */ + row = bottom_index; + ptr = bottom_nz_index; + + const Stencil& BottomStencil = CenterStencil; + + offset = BottomStencil[StencilPosition::Top]; + column = center_index; + value = -coeff3 * att; /* Top */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = BottomStencil[StencilPosition::Center]; + column = bottom_index; + value = +coeff3 * att; /* Center: (Top) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + /* TopLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + + /* Fill matrix row of (i,j+1) */ + row = top_index; + ptr = top_nz_index; + + const Stencil& TopStencil = CenterStencil; + + offset = TopStencil[StencilPosition::Bottom]; + column = center_index; + value = -coeff4 * att; /* Bottom */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = TopStencil[StencilPosition::Center]; + column = top_index; + value = +coeff4 * att; /* Center: (Bottom) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + + /* BottomLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ + } +} + +void SmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_1(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, double arr, double att, double art, + double detDF, double coeff_beta) +{ + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + int ptr, offset; + int row, column; + double value; + + const int i_r = 1; + + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left_index = i_theta; + + /* Fill matrix row of (i-1,j) */ + if (!DirBC_Interior) { + row = left_index; + ptr = getCircleAscIndex(i_r - 1, i_theta); + + const Stencil& LeftStencil = getStencil(i_r - 1); + + offset = LeftStencil[StencilPosition::Center]; + column = left_index; + value = coeff1 * arr; /* Center: (Right) */ + updateMatrixElement(matrix, ptr, offset, row, column, value); + } +} + +SmootherGive::MatrixType SmootherGive::buildInteriorBoundarySolverMatrix() +{ + const int ntheta = grid_.ntheta(); + +#ifdef GMGPOLAR_USE_MUMPS + // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. + const int nnz = getNonZeroCountCircleAsc(0); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(false); +#else + // The stencils size for the inner boundary matrix is either 1 (Dirichlet BC) or 4 (across-origin discretization). + std::function nnz_per_row = [&](int i_theta) { + return DirBC_Interior_ ? 1 : 4; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + { + const int i_r = 0; + const double r = grid_.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid_.index(i_r, i_theta); + const double theta = grid_.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_0( + i_theta, grid_, DirBC_Interior_, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + } + } + } + + { + const int i_r = 1; + const double r = grid_.radius(i_r); + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + { + const int global_index = grid_.index(i_r, i_theta); + const double theta = grid_.theta(i_theta); + + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); + + nodeBuildInteriorBoundarySolverMatrix_i_r_1( + i_theta, grid_, DirBC_Interior_, inner_boundary_solver_matrix, arr, att, art, detDF, coeff_beta); + } + } + } + +#ifdef GMGPOLAR_USE_MUMPS + /* Mumps: In the case of symmetric matrices, only half of the matrix should be provided. */ + const bool construct_symmetric = true; + if (!construct_symmetric) { + return inner_boundary_solver_matrix; + } + + const int full_nnz = inner_boundary_solver_matrix.non_zero_size(); + const int numRows = inner_boundary_solver_matrix.rows(); + const int numColumns = inner_boundary_solver_matrix.columns(); + const int symmetric_nnz = full_nnz - (full_nnz - numRows) / 2; + + SparseMatrixCOO inner_boundary_solver_matrix_symmetric(numRows, numColumns, symmetric_nnz); + inner_boundary_solver_matrix_symmetric.is_symmetric(true); + + int current_nz = 0; // Current non-zero index in the symmetric matrix + for (int nz_index = 0; nz_index < full_nnz; nz_index++) { + const int current_row = inner_boundary_solver_matrix.row_index(nz_index); + const int current_column = inner_boundary_solver_matrix.col_index(nz_index); + if (current_row <= current_column) { + inner_boundary_solver_matrix_symmetric.row_index(current_nz) = current_row; + inner_boundary_solver_matrix_symmetric.col_index(current_nz) = current_column; + inner_boundary_solver_matrix_symmetric.value(current_nz) = inner_boundary_solver_matrix.value(nz_index); + current_nz++; + } + } + return inner_boundary_solver_matrix_symmetric; +#else + return inner_boundary_solver_matrix; +#endif +} diff --git a/src/Smoother/SmootherGive/buildMatrix.cpp b/src/Smoother/SmootherGive/buildTridiagonalAsc.cpp similarity index 59% rename from src/Smoother/SmootherGive/buildMatrix.cpp rename to src/Smoother/SmootherGive/buildTridiagonalAsc.cpp index 8ac7e859..29e7f73d 100644 --- a/src/Smoother/SmootherGive/buildMatrix.cpp +++ b/src/Smoother/SmootherGive/buildTridiagonalAsc.cpp @@ -14,29 +14,11 @@ static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, solver.cyclic_corner(batch) += value; } -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) += val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) += val; -} -#endif - -void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, double arr, double att, - double art, double detDF, double coeff_beta) +void SmootherGive::nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, + double arr, double att, double art, double detDF, + double coeff_beta) { assert(i_r >= 0 && i_r < grid.nr()); assert(i_theta >= 0 && i_theta < grid.ntheta()); @@ -98,7 +80,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -139,17 +121,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, updateMatrixElement(center_solver, center_batch, row, column, value); /* Fill matrix row of (i-1,j) */ - if (!DirBC_Interior && i_r == 1) { - row = left_index; - ptr = getCircleAscIndex(i_r - 1, i_theta); - - const Stencil& LeftStencil = getStencil(i_r - 1); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = coeff1 * arr; /* Center: (Right) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - } + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. if (i_r > 1) { row = left_index; column = left_index; @@ -259,180 +231,39 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, /* Circle Section: Node in the inner boundary */ /* ------------------------------------------ */ else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - if (DirBC_Interior) { - /* Fill result(i,j) */ - const double h2 = grid.radialSpacing(i_r); - const double k1 = grid.angularSpacing(i_theta - 1); - const double k2 = grid.angularSpacing(i_theta); - - const double coeff2 = 0.5 * (k1 + k2) / h2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - auto& right_solver = circle_tridiagonal_solver; - int right_batch = i_r + 1; - - const int center_index = i_theta; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = getCircleAscIndex(i_r, i_theta); - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()>>1)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - const double h1 = 2 * grid.radius(0); - const double h2 = grid.radialSpacing(i_r); - const double k1 = grid.angularSpacing(i_theta - 1); - const double k2 = grid.angularSpacing(i_theta); - - const double coeff1 = 0.5 * (k1 + k2) / h1; - const double coeff2 = 0.5 * (k1 + k2) / h2; - const double coeff3 = 0.5 * (h1 + h2) / k1; - const double coeff4 = 0.5 * (h1 + h2) / k2; - - /* left_matrix (across-the origin), center_matrix, right_matrix */ - /* -| x | o | x | */ - /* -| | | | */ - /* -| O | o | o | */ - /* -| | | | */ - /* -| x | o | x | */ - auto& right_solver = circle_tridiagonal_solver; - int right_batch = i_r + 1; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int right_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - const int bottom_nz_index = getCircleAscIndex(i_r, i_theta_M1); - const int top_nz_index = getCircleAscIndex(i_r, i_theta_P1); - const int left_nz_index = getCircleAscIndex(i_r, i_theta_AcrossOrigin); - - int nz_index; /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* beta_{i,j} */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = -coeff1 * arr; /* Left */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Bottom]; - col = bottom_index; - val = -coeff3 * att; /* Bottom */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Top]; - col = top_index; - val = -coeff4 * att; /* Top */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = (coeff1 + coeff2) * arr + (coeff3 + coeff4) * att; /* Center: (Left, Right, Bottom, Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Fill matrix row of (i-1,j) */ - /* From view the view of the across origin node, */ /* the directions are roatated by 180 degrees in the stencil! */ - row = left_index; - ptr = left_nz_index; - - const Stencil& LeftStencil = CenterStencil; - - offset = LeftStencil[StencilPosition::Left]; - col = center_index; - val = -coeff1 * arr; /* Right -> Left*/ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = LeftStencil[StencilPosition::Center]; - col = left_index; - val = +coeff1 * arr; /* Center: (Right) -> Center: (Left) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* Top Right -> Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - - /* Bottom Right -> Top Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - - /* Fill matrix row of (i+1,j) */ - row = right_index; - column = right_index; - value = coeff2 * arr; /* Center: (Left) */ - updateMatrixElement(right_solver, right_batch, row, column, value); - - /* Fill matrix row of (i,j-1) */ - row = bottom_index; - ptr = bottom_nz_index; - - const Stencil& BottomStencil = CenterStencil; - - offset = BottomStencil[StencilPosition::Top]; - col = center_index; - val = -coeff3 * att; /* Top */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - offset = BottomStencil[StencilPosition::Center]; - col = bottom_index; - val = +coeff3 * att; /* Center: (Top) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); - - /* TopLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - - /* Fill matrix row of (i,j+1) */ - row = top_index; - ptr = top_nz_index; - - const Stencil& TopStencil = CenterStencil; - - offset = TopStencil[StencilPosition::Bottom]; - col = center_index; - val = -coeff4 * att; /* Bottom */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + + /* Fill result(i,j) */ + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - offset = TopStencil[StencilPosition::Center]; - col = top_index; - val = +coeff4 * att; /* Center: (Bottom) */ - updateCOOCSRMatrixElement(inner_boundary_circle_matrix, ptr, offset, row, col, val); + const double coeff2 = 0.5 * (k1 + k2) / h2; - /* BottomLeft: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ - } + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + auto& center_solver = circle_tridiagonal_solver; + int center_batch = i_r; + auto& right_solver = circle_tridiagonal_solver; + int right_batch = i_r + 1; + + const int center_index = i_theta; + const int right_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + /* Fill matrix row of (i,j) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(center_solver, center_batch, row, column, value); + + /* Fill matrix row of (i+1,j) */ + row = right_index; + column = right_index; + value = coeff2 * arr; /* Center: (Left) */ + updateMatrixElement(right_solver, right_batch, row, column, value); } /* --------------------------------------------- */ /* Radial Section: Node next to circular section */ @@ -474,7 +305,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -557,7 +388,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, /* ---------------------------- */ /* Fill matrix row of (i,j) */ row = center_index; column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * fabs(detDF); /* Center: beta_{i,j} */ + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF); /* Center: beta_{i,j} */ updateMatrixElement(center_solver, center_batch, row, column, value); row = center_index; @@ -629,7 +460,7 @@ void SmootherGive::nodeBuildAscGive(int i_r, int i_theta, const PolarGrid& grid, } } -void SmootherGive::buildAscCircleSection(const int i_r) +void SmootherGive::buildTridiagonalCircleSection(int i_r) { const double r = grid_.radius(i_r); for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { @@ -640,12 +471,12 @@ void SmootherGive::buildAscCircleSection(const int i_r) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } -void SmootherGive::buildAscRadialSection(const int i_theta) +void SmootherGive::buildTridiagonalRadialSection(int i_theta) { const double theta = grid_.theta(i_theta); for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { @@ -656,38 +487,13 @@ void SmootherGive::buildAscRadialSection(const int i_theta) level_cache_.obtainValues(i_r, i_theta, global_index, r, theta, coeff_beta, arr, att, art, detDF); // Build Asc at the current node - nodeBuildAscGive(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); } } -void SmootherGive::buildAscMatrices() +void SmootherGive::buildTridiagonalSolverMatrices() { - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - return DirBC_Interior_ ? 1 : 4; - }; - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ - /* Multi-threaded execution: */ const int num_smoother_circles = grid_.numberSmootherCircles(); const int additional_radial_tasks = grid_.ntheta() % 3; @@ -697,30 +503,30 @@ void SmootherGive::buildAscMatrices() { #pragma omp for for (int i_r = 0; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); + buildTridiagonalCircleSection(i_r); } #pragma omp for for (int i_r = 1; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); + buildTridiagonalCircleSection(i_r); } #pragma omp for for (int i_r = 2; i_r < num_smoother_circles; i_r += 3) { - buildAscCircleSection(i_r); + buildTridiagonalCircleSection(i_r); } #pragma omp for for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 0) { int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); + buildTridiagonalRadialSection(i_theta); } else { if (additional_radial_tasks == 0) { - buildAscRadialSection(0); + buildTridiagonalRadialSection(0); } else if (additional_radial_tasks >= 1) { - buildAscRadialSection(0); - buildAscRadialSection(1); + buildTridiagonalRadialSection(0); + buildTridiagonalRadialSection(1); } } } @@ -728,52 +534,25 @@ void SmootherGive::buildAscMatrices() for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { if (radial_task > 1) { int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); + buildTridiagonalRadialSection(i_theta); } else { if (additional_radial_tasks == 0) { - buildAscRadialSection(1); + buildTridiagonalRadialSection(1); } else if (additional_radial_tasks == 1) { - buildAscRadialSection(2); + buildTridiagonalRadialSection(2); } else if (additional_radial_tasks == 2) { - buildAscRadialSection(2); - buildAscRadialSection(3); + buildTridiagonalRadialSection(2); + buildTridiagonalRadialSection(3); } } } #pragma omp for for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { int i_theta = radial_task + additional_radial_tasks; - buildAscRadialSection(i_theta); - } - } - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------ */ - /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ - /* ------------------------------------------------------------------ */ - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; + buildTridiagonalRadialSection(i_theta); } } -#endif } diff --git a/src/Smoother/SmootherGive/smootherGive.cpp b/src/Smoother/SmootherGive/smootherGive.cpp index 595aeee3..67714faf 100644 --- a/src/Smoother/SmootherGive/smootherGive.cpp +++ b/src/Smoother/SmootherGive/smootherGive.cpp @@ -6,17 +6,17 @@ SmootherGive::SmootherGive(const PolarGrid& grid, const LevelCache& level_cache, : Smoother(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) +#ifdef GMGPOLAR_USE_MUMPS + , inner_boundary_mumps_solver_(buildInteriorBoundarySolverMatrix()) +#else + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_lu_solver_(inner_boundary_circle_matrix_) +#endif { - buildAscMatrices(); + buildTridiagonalSolverMatrices(); circle_tridiagonal_solver_.setup(); radial_tridiagonal_solver_.setup(); - -#ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); -#else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); -#endif } // The smoothing solves linear systems of the form: @@ -48,109 +48,31 @@ void SmootherGive::smoothing(Vector x, ConstVector rhs, Vector x, Vector temp Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif @@ -48,7 +48,7 @@ void SmootherGive::solveWhiteCircleSection(Vector x, Vector temp Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif diff --git a/src/Smoother/SmootherTake/applyAscOrtho.cpp b/src/Smoother/SmootherTake/applyAscOrtho.cpp index 854e6a76..e89d224d 100644 --- a/src/Smoother/SmootherTake/applyAscOrtho.cpp +++ b/src/Smoother/SmootherTake/applyAscOrtho.cpp @@ -9,15 +9,15 @@ static inline void nodeApplyAscOrthoCircleTake(int i_r, int i_theta, const Polar assert(i_r >= 0 && i_r < grid.numberSmootherCircles()); if (i_r > 0 && i_r < grid.numberSmootherCircles()) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -53,15 +53,15 @@ static inline void nodeApplyAscOrthoCircleTake(int i_r, int i_theta, const Polar // h1 gets replaced with 2 * R0. // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h1 = 2.0 * grid.radius(0); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -99,15 +99,15 @@ static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const Polar /* Node in the interior */ /* -------------------- */ if (i_r > grid.numberSmootherCircles() && i_r < grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -132,15 +132,15 @@ static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const Polar ); } else if (i_r == grid.numberSmootherCircles()) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -166,15 +166,15 @@ static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const Polar ); } else if (i_r == grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -209,11 +209,31 @@ static inline void nodeApplyAscOrthoRadialTake(int i_r, int i_theta, const Polar } } -void SmootherTake::applyAscOrthoCircleSection(int i_r, ConstVector x, ConstVector rhs, - Vector temp) +void SmootherTake::applyAscOrthoBlackCircleSection(ConstVector x, ConstVector rhs, Vector temp) { - assert(i_r >= 0 && i_r < grid_.numberSmootherCircles()); + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + /* The outer most circle next to the radial section is defined to be black. */ + const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} + +void SmootherTake::applyAscOrthoWhiteCircleSection(ConstVector x, ConstVector rhs, Vector temp) +{ assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); @@ -223,17 +243,40 @@ void SmootherTake::applyAscOrthoCircleSection(int i_r, ConstVector x, Co ConstVector detDF = level_cache_.detDF(); ConstVector coeff_beta = level_cache_.coeff_beta(); - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, - coeff_beta); + /* The outer most circle next to the radial section is defined to be black. */ + const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + nodeApplyAscOrthoCircleTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } } -void SmootherTake::applyAscOrthoRadialSection(int i_theta, ConstVector x, ConstVector rhs, - Vector temp) +void SmootherTake::applyAscOrthoBlackRadialSection(ConstVector x, ConstVector rhs, Vector temp) { - assert(i_theta >= 0 && i_theta < grid_.ntheta()); + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } + } +} + +void SmootherTake::applyAscOrthoWhiteRadialSection(ConstVector x, ConstVector rhs, Vector temp) +{ assert(level_cache_.cacheDensityProfileCoefficients()); assert(level_cache_.cacheDomainGeometry()); @@ -243,8 +286,11 @@ void SmootherTake::applyAscOrthoRadialSection(int i_theta, ConstVector x ConstVector detDF = level_cache_.detDF(); ConstVector coeff_beta = level_cache_.coeff_beta(); - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, - coeff_beta); +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + nodeApplyAscOrthoRadialTake(i_r, i_theta, grid_, DirBC_Interior_, x, rhs, temp, arr, att, art, detDF, + coeff_beta); + } } } \ No newline at end of file diff --git a/src/Smoother/SmootherTake/buildInnerBoundaryAsc.cpp b/src/Smoother/SmootherTake/buildInnerBoundaryAsc.cpp new file mode 100644 index 00000000..f41b112c --- /dev/null +++ b/src/Smoother/SmootherTake/buildInnerBoundaryAsc.cpp @@ -0,0 +1,189 @@ +#include "../../../include/Smoother/SmootherTake/smootherTake.h" + +#ifdef GMGPOLAR_USE_MUMPS +// When using the MUMPS solver, the matrix is assembled in COO format. +static inline void updateMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_index(ptr + offset) = row; + matrix.col_index(ptr + offset) = column; + matrix.value(ptr + offset) = value; +} +#else +// When using the in-house solver, the matrix is stored in CSR format. +static inline void updateMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int column, + double value) +{ + matrix.row_nz_index(row, offset) = column; + matrix.row_nz_entry(row, offset) = value; +} +#endif + +void SmootherTake::nodeBuildInteriorBoundarySolverMatrix(int i_theta, const PolarGrid& grid, bool DirBC_Interior, + MatrixType& matrix, ConstVector& arr, + ConstVector& att, ConstVector& art, + ConstVector& detDF, ConstVector& coeff_beta) +{ + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + /* ------------------------------------------ */ + /* Circle Section: Node in the inner boundary */ + /* ------------------------------------------ */ + const int i_r = 0; + + int ptr, offset; + int row, column; + double value; + + /* ------------------------------------------------ */ + /* Case 1: Dirichlet boundary on the inner boundary */ + /* ------------------------------------------------ */ + if (DirBC_Interior) { + const int center_index = i_theta; + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = 1.0; + updateMatrixElement(matrix, ptr, offset, row, column, value); + } + else { + /* ------------------------------------------------------------- */ + /* Case 2: Across origin discretization on the interior boundary */ + /* ------------------------------------------------------------- */ + // h1 gets replaced with 2 * R0. + // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). + // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. + const double h1 = 2.0 * grid.radius(0); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); + + const int left = grid.index(i_r, i_theta_AcrossOrigin); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + const int center_index = i_theta; + const int left_index = i_theta_AcrossOrigin; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const int center_nz_index = getCircleAscIndex(i_r, i_theta); + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + + /* Fill matrix row of (i,j) */ + row = center_index; + ptr = center_nz_index; + + const Stencil& CenterStencil = getStencil(i_r); + + offset = CenterStencil[StencilPosition::Center]; + column = center_index; + value = center_value; + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Left]; + column = left_index; + value = left_value; + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Bottom]; + column = bottom_index; + value = bottom_value; + updateMatrixElement(matrix, ptr, offset, row, column, value); + + offset = CenterStencil[StencilPosition::Top]; + column = top_index; + value = top_value; + updateMatrixElement(matrix, ptr, offset, row, column, value); + } +} + +SmootherTake::MatrixType SmootherTake::buildInteriorBoundarySolverMatrix() +{ + const int i_r = 0; + const int ntheta = grid_.ntheta(); + +#ifdef GMGPOLAR_USE_MUMPS + // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. + const int nnz = getNonZeroCountCircleAsc(i_r); + SparseMatrixCOO inner_boundary_solver_matrix(ntheta, ntheta, nnz); + inner_boundary_solver_matrix.is_symmetric(false); +#else + // The stencils size for the inner boundary matrix is either 1 (Dirichlet BC) or 4 (across-origin discretization). + std::function nnz_per_row = [&](int i_theta) { + return DirBC_Interior_ ? 1 : 4; + }; + SparseMatrixCSR inner_boundary_solver_matrix(ntheta, ntheta, nnz_per_row); +#endif + + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + +#pragma omp parallel for num_threads(num_omp_threads_) + for (int i_theta = 0; i_theta < ntheta; i_theta++) { + nodeBuildInteriorBoundarySolverMatrix(i_theta, grid_, DirBC_Interior_, inner_boundary_solver_matrix, arr, att, + art, detDF, coeff_beta); + } + +#ifdef GMGPOLAR_USE_MUMPS + /* Mumps: In the case of symmetric matrices, only half of the matrix should be provided. */ + const bool construct_symmetric = true; + if (!construct_symmetric) { + return inner_boundary_solver_matrix; + } + + const int full_nnz = inner_boundary_solver_matrix.non_zero_size(); + const int numRows = inner_boundary_solver_matrix.rows(); + const int numColumns = inner_boundary_solver_matrix.columns(); + const int symmetric_nnz = full_nnz - (full_nnz - numRows) / 2; + + SparseMatrixCOO inner_boundary_solver_matrix_symmetric(numRows, numColumns, symmetric_nnz); + inner_boundary_solver_matrix_symmetric.is_symmetric(true); + + int current_nz = 0; // Current non-zero index in the symmetric matrix + for (int nz_index = 0; nz_index < full_nnz; nz_index++) { + const int current_row = inner_boundary_solver_matrix.row_index(nz_index); + const int current_column = inner_boundary_solver_matrix.col_index(nz_index); + if (current_row <= current_column) { + inner_boundary_solver_matrix_symmetric.row_index(current_nz) = current_row; + inner_boundary_solver_matrix_symmetric.col_index(current_nz) = current_column; + inner_boundary_solver_matrix_symmetric.value(current_nz) = inner_boundary_solver_matrix.value(nz_index); + current_nz++; + } + } + return inner_boundary_solver_matrix_symmetric; +#else + return inner_boundary_solver_matrix; +#endif +} diff --git a/src/Smoother/SmootherTake/buildMatrix.cpp b/src/Smoother/SmootherTake/buildMatrix.cpp deleted file mode 100644 index 703b939e..00000000 --- a/src/Smoother/SmootherTake/buildMatrix.cpp +++ /dev/null @@ -1,466 +0,0 @@ -#include "../../../include/Smoother/SmootherTake/smootherTake.h" - -/* Tridiagonal matrices */ -static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, - double value) -{ - if (row == column) - solver.main_diagonal(batch, row) = value; - else if (row == column - 1) - solver.sub_diagonal(batch, row) = value; - else if (row == 0 && column == solver.matrixDimension() - 1) - solver.cyclic_corner(batch) = value; -} - -/* Inner Boundary COO/CSR matrix */ -#ifdef GMGPOLAR_USE_MUMPS -static inline void updateCOOCSRMatrixElement(SparseMatrixCOO& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_index(ptr + offset) = row; - matrix.col_index(ptr + offset) = col; - matrix.value(ptr + offset) = val; -} -#else -static inline void updateCOOCSRMatrixElement(SparseMatrixCSR& matrix, int ptr, int offset, int row, int col, - double val) -{ - matrix.row_nz_index(row, offset) = col; - matrix.row_nz_entry(row, offset) = val; -} -#endif - -void SmootherTake::nodeBuildAscTake(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, - MatrixType& inner_boundary_circle_matrix, - BatchedTridiagonalSolver& circle_tridiagonal_solver, - BatchedTridiagonalSolver& radial_tridiagonal_solver, - ConstVector& arr, ConstVector& att, ConstVector& art, - ConstVector& detDF, ConstVector& coeff_beta) -{ - assert(i_r >= 0 && i_r < grid.nr()); - assert(i_theta >= 0 && i_theta < grid.ntheta()); - - const int numberSmootherCircles = grid.numberSmootherCircles(); - const int lengthSmootherRadial = grid.lengthSmootherRadial(); - - assert(numberSmootherCircles >= 2); - assert(lengthSmootherRadial >= 3); - - int row, column; - double value; - /* ------------------------------------------ */ - /* Node in the interior of the Circle Section */ - /* ------------------------------------------ */ - if (i_r > 0 && i_r < numberSmootherCircles) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = circle_tridiagonal_solver; - int batch = i_r; - - const int center_index = i_theta; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Bottom */ - row = center_index; - column = bottom_index; - value = -coeff3 * (att[center] + att[bottom]); - updateMatrixElement(solver, batch, row, column, value); - - /* Top */ - row = center_index; - column = top_index; - value = -coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------ */ - /* Node in the interior of the Radial Section */ - /* ------------------------------------------ */ - else if (i_r > numberSmootherCircles && i_r < grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int left_index = i_r - numberSmootherCircles - 1; - const int right_index = i_r - numberSmootherCircles + 1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Left */ - row = center_index; - column = left_index; - value = -coeff1 * (arr[center] + arr[left]); - updateMatrixElement(solver, batch, row, column, value); - - /* Right */ - row = center_index; - column = right_index; - value = -coeff2 * (arr[center] + arr[right]); - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------ */ - /* Circle Section: Node in the inner boundary */ - /* ------------------------------------------ */ - else if (i_r == 0) { - /* ------------------------------------------------ */ - /* Case 1: Dirichlet boundary on the inner boundary */ - /* ------------------------------------------------ */ - int ptr, offset; - int row, col; - double val; - if (DirBC_Interior) { - auto& matrix = inner_boundary_circle_matrix; - const int center_index = i_theta; - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = 1.0; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - else { - /* ------------------------------------------------------------- */ - /* Case 2: Across origin discretization on the interior boundary */ - /* ------------------------------------------------------------- */ - // h1 gets replaced with 2 * R0. - // (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). - // Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. - const double h1 = 2.0 * grid.radius(0); - const double h2 = grid.radialSpacing(i_r); - const double k1 = grid.angularSpacing(i_theta - 1); - const double k2 = grid.angularSpacing(i_theta); - - const double coeff1 = 0.5 * (k1 + k2) / h1; - const double coeff2 = 0.5 * (k1 + k2) / h2; - const double coeff3 = 0.5 * (h1 + h2) / k1; - const double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - const int i_theta_AcrossOrigin = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); - - const int left = grid.index(i_r, i_theta_AcrossOrigin); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& matrix = inner_boundary_circle_matrix; - - const int center_index = i_theta; - const int left_index = i_theta_AcrossOrigin; - const int bottom_index = i_theta_M1; - const int top_index = i_theta_P1; - - const int center_nz_index = getCircleAscIndex(i_r, i_theta); - - const double center_value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - const double left_value = -coeff1 * (arr[center] + arr[left]); - const double bottom_value = -coeff3 * (att[center] + att[bottom]); - const double top_value = -coeff4 * (att[center] + att[top]); - - /* Fill matrix row of (i,j) */ - row = center_index; - ptr = center_nz_index; - - const Stencil& CenterStencil = getStencil(i_r); - - offset = CenterStencil[StencilPosition::Center]; - col = center_index; - val = center_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Left]; - col = left_index; - val = left_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Bottom]; - col = bottom_index; - val = bottom_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - - offset = CenterStencil[StencilPosition::Top]; - col = top_index; - val = top_value; - updateCOOCSRMatrixElement(matrix, ptr, offset, row, col, val); - } - } - /* --------------------------------------------- */ - /* Radial Section: Node next to circular section */ - /* --------------------------------------------- */ - else if (i_r == numberSmootherCircles) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int right_index = i_r - numberSmootherCircles + 1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Right */ - row = center_index; - column = right_index; - value = -coeff2 * (arr[center] + arr[right]); - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------- */ - /* Radial Section: Node next to outer boundary */ - /* ------------------------------------------- */ - else if (i_r == grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int center = grid.index(i_r, i_theta); - const int top = grid.index(i_r, i_theta_P1); - const int right = grid.index(i_r + 1, i_theta); - - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int left_index = i_r - numberSmootherCircles - 1; - const int right_index = i_r - numberSmootherCircles + 1; - - /* Center: (Left, Right, Bottom, Top) */ - row = center_index; - column = center_index; - value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * fabs(detDF[center]) + - coeff1 * (arr[center] + arr[left]) + coeff2 * (arr[center] + arr[right]) + - coeff3 * (att[center] + att[bottom]) + coeff4 * (att[center] + att[top]); - updateMatrixElement(solver, batch, row, column, value); - - /* Left */ - row = center_index; - column = left_index; - value = -coeff1 * (arr[center] + arr[left]); - updateMatrixElement(solver, batch, row, column, value); - - /* Right: NOT INCLUDED! */ - row = center_index; - column = right_index; - value = 0.0; - updateMatrixElement(solver, batch, row, column, value); - } - /* ------------------------------------------ */ - /* Radial Section: Node on the outer boundary */ - /* ------------------------------------------ */ - else if (i_r == grid.nr() - 1) { - auto& solver = radial_tridiagonal_solver; - int batch = i_theta; - - const int center_index = i_r - numberSmootherCircles; - const int left_index = i_r - numberSmootherCircles - 1; - - /* Fill matrix row of (i,j) */ - row = center_index; - column = center_index; - value = 1.0; - updateMatrixElement(solver, batch, row, column, value); - - /* Left: NOT INCLUDED */ - row = center_index; - column = left_index; - value = 0.0; - updateMatrixElement(solver, batch, row, column, value); - } -} - -void SmootherTake::buildAscCircleSection(int i_r) -{ - assert(level_cache_.cacheDensityProfileCoefficients()); - assert(level_cache_.cacheDomainGeometry()); - - ConstVector arr = level_cache_.arr(); - ConstVector att = level_cache_.att(); - ConstVector art = level_cache_.art(); - ConstVector detDF = level_cache_.detDF(); - ConstVector coeff_beta = level_cache_.coeff_beta(); - - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } -} - -void SmootherTake::buildAscRadialSection(int i_theta) -{ - assert(level_cache_.cacheDensityProfileCoefficients()); - assert(level_cache_.cacheDomainGeometry()); - - ConstVector arr = level_cache_.arr(); - ConstVector att = level_cache_.att(); - ConstVector art = level_cache_.art(); - ConstVector detDF = level_cache_.detDF(); - ConstVector coeff_beta = level_cache_.coeff_beta(); - - for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { - // Build Asc at the current node - nodeBuildAscTake(i_r, i_theta, grid_, DirBC_Interior_, inner_boundary_circle_matrix_, - circle_tridiagonal_solver_, radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); - } -} - -void SmootherTake::buildAscMatrices() -{ - /* -------------------------------------- */ - /* Part 1: Allocate Asc Smoother matrices */ - /* -------------------------------------- */ - // BatchedTridiagonalSolvers allocations are handled in the SmootherTake constructor. - // circle_tridiagonal_solver_[batch_index=0] is unitialized. Use inner_boundary_circle_matrix_ instead. - -#ifdef GMGPOLAR_USE_MUMPS - // Although the matrix is symmetric, we need to store all its entries, so we disable the symmetry. - const int inner_i_r = 0; - const int inner_nnz = getNonZeroCountCircleAsc(inner_i_r); - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCOO(num_circle_nodes, num_circle_nodes, inner_nnz); - inner_boundary_circle_matrix_.is_symmetric(false); -#else - std::function nnz_per_row = [&](int i_theta) { - return DirBC_Interior_ ? 1 : 4; - }; - const int num_circle_nodes = grid_.ntheta(); - inner_boundary_circle_matrix_ = SparseMatrixCSR(num_circle_nodes, num_circle_nodes, nnz_per_row); -#endif - - /* ---------------------------------- */ - /* Part 2: Fill Asc Smoother matrices */ - /* ---------------------------------- */ -#pragma omp parallel num_threads(num_omp_threads_) - { -#pragma omp for nowait - for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { - buildAscCircleSection(i_r); - } - -#pragma omp for nowait - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { - buildAscRadialSection(i_theta); - } - } - -#ifdef GMGPOLAR_USE_MUMPS - /* ------------------------------------------------------------------ */ - /* Part 3: Convert inner_boundary_circle_matrix to a symmetric matrix */ - /* ------------------------------------------------------------------ */ - SparseMatrixCOO full_matrix = std::move(inner_boundary_circle_matrix_); - - const int nnz = full_matrix.non_zero_size(); - const int numRows = full_matrix.rows(); - const int numColumns = full_matrix.columns(); - const int symmetric_nnz = nnz - (nnz - numRows) / 2; - - inner_boundary_circle_matrix_ = SparseMatrixCOO(numRows, numColumns, symmetric_nnz); - inner_boundary_circle_matrix_.is_symmetric(true); - - int current_nz = 0; - for (int nz_index = 0; nz_index < full_matrix.non_zero_size(); nz_index++) { - int current_row = full_matrix.row_index(nz_index); - int current_col = full_matrix.col_index(nz_index); - if (current_row <= current_col) { - inner_boundary_circle_matrix_.row_index(current_nz) = current_row; - inner_boundary_circle_matrix_.col_index(current_nz) = current_col; - inner_boundary_circle_matrix_.value(current_nz) = std::move(full_matrix.value(nz_index)); - current_nz++; - } - } -#endif -} -// clang-format on diff --git a/src/Smoother/SmootherTake/buildTridiagonalAsc.cpp b/src/Smoother/SmootherTake/buildTridiagonalAsc.cpp new file mode 100644 index 00000000..d0a05838 --- /dev/null +++ b/src/Smoother/SmootherTake/buildTridiagonalAsc.cpp @@ -0,0 +1,327 @@ +#include "../../../include/Smoother/SmootherTake/smootherTake.h" + +static inline void updateMatrixElement(BatchedTridiagonalSolver& solver, int batch, int row, int column, + double value) +{ + if (row == column) + solver.main_diagonal(batch, row) = value; + else if (row == column - 1) + solver.sub_diagonal(batch, row) = value; + else if (row == 0 && column == solver.matrixDimension() - 1) + solver.cyclic_corner(batch) = value; +} + +void SmootherTake::nodeBuildTridiagonalSolverMatrices(int i_r, int i_theta, const PolarGrid& grid, bool DirBC_Interior, + BatchedTridiagonalSolver& circle_tridiagonal_solver, + BatchedTridiagonalSolver& radial_tridiagonal_solver, + ConstVector& arr, ConstVector& att, + ConstVector& art, ConstVector& detDF, + ConstVector& coeff_beta) +{ + assert(i_r >= 0 && i_r < grid.nr()); + assert(i_theta >= 0 && i_theta < grid.ntheta()); + + const int numberSmootherCircles = grid.numberSmootherCircles(); + const int lengthSmootherRadial = grid.lengthSmootherRadial(); + + assert(numberSmootherCircles >= 2); + assert(lengthSmootherRadial >= 3); + + int row, column; + double value; + /* ------------------------------------------ */ + /* Node in the interior of the Circle Section */ + /* ------------------------------------------ */ + if (i_r > 0 && i_r < numberSmootherCircles) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = circle_tridiagonal_solver; + const int batch = i_r; + + const int center_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Bottom */ + row = center_index; + column = bottom_index; + value = bottom_value; + updateMatrixElement(solver, batch, row, column, value); + + /* Top */ + row = center_index; + column = top_index; + value = top_value; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------ */ + /* Node in the interior of the Radial Section */ + /* ------------------------------------------ */ + else if (i_r > numberSmootherCircles && i_r < grid.nr() - 2) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int left_index = i_r - numberSmootherCircles - 1; + const int right_index = i_r - numberSmootherCircles + 1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Left */ + row = center_index; + column = left_index; + value = left_value; + updateMatrixElement(solver, batch, row, column, value); + + /* Right */ + row = center_index; + column = right_index; + value = right_value; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------ */ + /* Circle Section: Node in the inner boundary */ + /* ------------------------------------------ */ + else if (i_r == 0) { + // The inner boundary circle line are is handled by the inner_boundary_mumps_solver, so we fill in the identity matrix. + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + auto& solver = circle_tridiagonal_solver; + const int batch = i_r; + + const int center_index = i_theta; + const int bottom_index = i_theta_M1; + const int top_index = i_theta_P1; + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Bottom */ + row = center_index; + column = bottom_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Top */ + row = center_index; + column = top_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + } + /* --------------------------------------------- */ + /* Radial Section: Node next to circular section */ + /* --------------------------------------------- */ + else if (i_r == numberSmootherCircles) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int right_index = i_r - numberSmootherCircles + 1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Right */ + row = center_index; + column = right_index; + value = right_value; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------- */ + /* Radial Section: Node next to outer boundary */ + /* ------------------------------------------- */ + else if (i_r == grid.nr() - 2) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + + const int left = grid.index(i_r - 1, i_theta); + const int bottom = grid.index(i_r, i_theta_M1); + const int center = grid.index(i_r, i_theta); + const int top = grid.index(i_r, i_theta_P1); + const int right = grid.index(i_r + 1, i_theta); + + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int left_index = i_r - numberSmootherCircles - 1; + const int right_index = i_r - numberSmootherCircles + 1; + + const double left_value = -coeff1 * (arr[center] + arr[left]); + const double right_value = -coeff2 * (arr[center] + arr[right]); + const double bottom_value = -coeff3 * (att[center] + att[bottom]); + const double top_value = -coeff4 * (att[center] + att[top]); + + /* Center: (Left, Right, Bottom, Top) */ + row = center_index; + column = center_index; + value = 0.25 * (h1 + h2) * (k1 + k2) * coeff_beta[center] * std::fabs(detDF[center]) - + (left_value + right_value + bottom_value + top_value); + updateMatrixElement(solver, batch, row, column, value); + + /* Left */ + row = center_index; + column = left_index; + value = left_value; + updateMatrixElement(solver, batch, row, column, value); + + /* Right: NOT INCLUDED! */ + row = center_index; + column = right_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + } + /* ------------------------------------------ */ + /* Radial Section: Node on the outer boundary */ + /* ------------------------------------------ */ + else if (i_r == grid.nr() - 1) { + auto& solver = radial_tridiagonal_solver; + const int batch = i_theta; + + const int center_index = i_r - numberSmootherCircles; + const int left_index = i_r - numberSmootherCircles - 1; + + /* Fill matrix row of (i,j) */ + row = center_index; + column = center_index; + value = 1.0; + updateMatrixElement(solver, batch, row, column, value); + + /* Left: NOT INCLUDED */ + row = center_index; + column = left_index; + value = 0.0; + updateMatrixElement(solver, batch, row, column, value); + } +} + +void SmootherTake::buildTridiagonalSolverMatrices() +{ + assert(level_cache_.cacheDensityProfileCoefficients()); + assert(level_cache_.cacheDomainGeometry()); + + ConstVector arr = level_cache_.arr(); + ConstVector att = level_cache_.att(); + ConstVector art = level_cache_.art(); + ConstVector detDF = level_cache_.detDF(); + ConstVector coeff_beta = level_cache_.coeff_beta(); + +#pragma omp parallel num_threads(num_omp_threads_) + { +#pragma omp for nowait + for (int i_r = 0; i_r < grid_.numberSmootherCircles(); i_r++) { + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } + } + +#pragma omp for nowait + for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta++) { + for (int i_r = grid_.numberSmootherCircles(); i_r < grid_.nr(); i_r++) { + nodeBuildTridiagonalSolverMatrices(i_r, i_theta, grid_, DirBC_Interior_, circle_tridiagonal_solver_, + radial_tridiagonal_solver_, arr, att, art, detDF, coeff_beta); + } + } + } +} \ No newline at end of file diff --git a/src/Smoother/SmootherTake/smootherTake.cpp b/src/Smoother/SmootherTake/smootherTake.cpp index 76dc8b89..6555b7d7 100644 --- a/src/Smoother/SmootherTake/smootherTake.cpp +++ b/src/Smoother/SmootherTake/smootherTake.cpp @@ -6,17 +6,16 @@ SmootherTake::SmootherTake(const PolarGrid& grid, const LevelCache& level_cache, : Smoother(grid, level_cache, domain_geometry, density_profile_coefficients, DirBC_Interior, num_omp_threads) , circle_tridiagonal_solver_(grid.ntheta(), grid.numberSmootherCircles(), true) , radial_tridiagonal_solver_(grid.lengthSmootherRadial(), grid.ntheta(), false) -{ - buildAscMatrices(); - - circle_tridiagonal_solver_.setup(); - radial_tridiagonal_solver_.setup(); - #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_.emplace(inner_boundary_circle_matrix_); + , inner_boundary_mumps_solver_(buildInteriorBoundarySolverMatrix()) #else - inner_boundary_lu_solver_ = SparseLUSolver(inner_boundary_circle_matrix_); + , inner_boundary_circle_matrix_(buildInteriorBoundarySolverMatrix()) + , inner_boundary_lu_solver_(inner_boundary_circle_matrix_) #endif +{ + buildTridiagonalSolverMatrices(); + circle_tridiagonal_solver_.setup(); + radial_tridiagonal_solver_.setup(); } // The smoothing solves linear systems of the form: @@ -46,43 +45,31 @@ void SmootherTake::smoothing(Vector x, ConstVector rhs, Vector White. */ - const int start_black_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 1 : 0; - const int start_white_circles = (grid_.numberSmootherCircles() % 2 == 0) ? 0 : 1; - - /* Black Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_r = start_black_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ - + /* ----------------------------------------------- */ + /* 1. Black-Circle update (u_bc): */ + /* A_bc * u_bc = f_bc − A_bc^ortho * u_bc^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoBlackCircleSection(x, rhs, temp); solveBlackCircleSection(x, temp); - /* White Circle Section */ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_r = start_white_circles; i_r < grid_.numberSmootherCircles(); i_r += 2) { - applyAscOrthoCircleSection(i_r, x, rhs, temp); - } /* Implicit barrier */ - + /* ----------------------------------------------- */ + /* 2. White-Circle update (u_wc): */ + /* A_wc * u_wc = f_wc − A_wc^ortho * u_wc^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoWhiteCircleSection(x, rhs, temp); solveWhiteCircleSection(x, temp); - /* Black Radial Section */ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_theta = 0; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ - + /* ----------------------------------------------- */ + /* 3. Black-Radial update (u_br): */ + /* A_br * u_br = f_br − A_br^ortho * u_br^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoBlackRadialSection(x, rhs, temp); solveBlackRadialSection(x, temp); - /* White Radial Section*/ -#pragma omp parallel for num_threads(num_omp_threads_) - for (int i_theta = 1; i_theta < grid_.ntheta(); i_theta += 2) { - applyAscOrthoRadialSection(i_theta, x, rhs, temp); - } /* Implicit barrier */ - + /* ----------------------------------------------- */ + /* 4. White-Radial update (u_wr): */ + /* A_wr * u_wr = f_wr − A_wr^ortho * u_wr^ortho */ + /* ----------------------------------------------- */ + applyAscOrthoWhiteRadialSection(x, rhs, temp); solveWhiteRadialSection(x, temp); } diff --git a/src/Smoother/SmootherTake/solveAscSystem.cpp b/src/Smoother/SmootherTake/solveAscSystem.cpp index 64cd249c..bf5d52fd 100644 --- a/src/Smoother/SmootherTake/solveAscSystem.cpp +++ b/src/Smoother/SmootherTake/solveAscSystem.cpp @@ -16,7 +16,7 @@ void SmootherTake::solveBlackCircleSection(Vector x, Vector temp Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif @@ -48,7 +48,7 @@ void SmootherTake::solveWhiteCircleSection(Vector x, Vector temp Vector inner_boundary = Kokkos::subview(temp, Kokkos::make_pair(0, grid_.ntheta())); #ifdef GMGPOLAR_USE_MUMPS - inner_boundary_mumps_solver_->solve(inner_boundary); + inner_boundary_mumps_solver_.solve(inner_boundary); #else inner_boundary_lu_solver_.solveInPlace(inner_boundary); #endif From 12c7c3fe8d35e8c598c98ad17a2ca97916ed8b40 Mon Sep 17 00:00:00 2001 From: julianlitz Date: Mon, 9 Mar 2026 20:56:28 +0100 Subject: [PATCH 20/20] Remove unused variables --- .../buildInnerBoundaryAsc.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp index 6dc767db..ad3722fe 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/buildInnerBoundaryAsc.cpp @@ -39,19 +39,7 @@ void ExtrapolatedSmootherGive::nodeBuildInteriorBoundarySolverMatrix_i_r_0(int i /* ------------------------------------------------ */ if (DirBC_Interior) { /* Fill result(i,j) */ - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff2 = 0.5 * (k1 + k2) / h2; - - int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - int center_index = i_theta; - int right_index = i_theta; - int bottom_index = i_theta_M1; - int top_index = i_theta_P1; /* Fill matrix row of (i,j) */ row = center_index;