Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 0 additions & 152 deletions .github/workflows/compile_on_aws.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/static_checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
run: |
git config --global --add safe.directory /__w/OGL/OGL
# Create list of all source files belonging to this repository
git ls-files | grep -E "\.(C)" > pattern
git ls-files | grep -E "\.(cpp)" > pattern
# Create list of .cpp files that are in this repository and part of the
# compilation database
# also filters out " at the begin and end of the filename
Expand Down
12 changes: 12 additions & 0 deletions include/OGL/CommunicationPattern.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ struct AllToAllPattern {
std::vector<int> recv_offsets;
};

/* @brief computes AllToAllPattern for repartioned communincator from global
* allToAll pattern by discarding all zero communication before and after the
* repartioner scope.
*
* @param exec_handler The executor handler
* @param allToAll The original allToAll pattern
* @param start_rank the original comm_world rank
*/
AllToAllPattern compute_repart_allToall(const ExecutorHandler &exec_handler,
const AllToAllPattern allToAll,
label start_rank);

/* @brief This function computes the send and recv counts vectors and the send
* and recv offsets vectors for scattering from an owner to all ranks, including
* owner itself
Expand Down
22 changes: 20 additions & 2 deletions include/OGL/DevicePersistent/ExecutorHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ struct DeviceIdHandler {
return device_global_id % num_devices_per_node;
}

/* @brief returns the owner rank on the global comm world communicator
*/
label global_owner() const
{
label rank = Pstream::myProcNo();
return rank - (rank % ranks_per_gpu);
}

/* @brief check if rank is an owning rank
*/
bool is_owner() const
Expand Down Expand Up @@ -270,15 +278,16 @@ class ExecutorHandler
// gko comm
label group = device_id_handler_.compute_group();
MPI_Comm gko_comm;
label host_rank = 0;
label host_rank = Pstream::myProcNo();
MPI_Comm_split(MPI_COMM_WORLD, group, host_rank, &gko_comm);
device_comm_ =
std::make_shared<gko::experimental::mpi::communicator>(
gko_comm, gko_force_host_buffer_);

// repart comm
MPI_Comm repart_comm;
label device_id = device_id_handler_.compute_device_id(4);
label global_rank = Pstream::myProcNo();
label device_id = global_rank / device_id_handler_.ranks_per_gpu;
MPI_Comm_split(MPI_COMM_WORLD, device_id, host_rank, &repart_comm);
repart_comm_ =
std::make_shared<gko::experimental::mpi::communicator>(
Expand All @@ -300,6 +309,15 @@ class ExecutorHandler
* */
bool get_non_orig_device_comm() const { return non_orig_device_comm_; }

label get_ranks_per_gpu() const { return device_id_handler_.ranks_per_gpu; }

void set_ranks_per_gpu(label ranks_per_gpu)
{
device_id_handler_.ranks_per_gpu = ranks_per_gpu;
}

label get_owner_rank() const { return device_id_handler_.global_owner(); }

const std::shared_ptr<gko::Executor> get_device_exec() const
{
return this->get_persistent_object();
Expand Down
38 changes: 33 additions & 5 deletions include/OGL/DevicePersistent/Vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ struct VectorInitFunctor {
//// TODO store
auto comm_pattern = compute_gather_to_owner_counts(
exec_, repartitioner->get_ranks_per_gpu(), host_size);
bool host_buffer = !exec_.get_non_orig_device_comm();
bool host_buffer = exec_.get_gko_force_host_buffer();

communicate_values(ref_exec, exec, comm, comm_pattern,
host_view.get_const_data(),
Expand Down Expand Up @@ -177,7 +177,8 @@ class PersistentVector
auto rank = exec_.get_host_rank();
auto ref_exec = exec_.get_ref_exec();
auto comm = exec_.get_host_comm();
bool host_buffer = !exec_.get_non_orig_device_comm();
auto repart_comm = exec_.get_repart_comm();
bool host_buffer = exec_.get_gko_force_host_buffer();

auto repartitioner = dist_matrix_->get_repartitioner();
auto host_size = repartitioner->get_orig_size();
Expand All @@ -186,9 +187,36 @@ class PersistentVector
auto comm_pattern = compute_scatter_from_owner_counts(
exec_, repartitioner->get_ranks_per_gpu(), host_size);

communicate_values(exec, ref_exec, comm, comm_pattern,
get_vector()->get_local_values(),
const_cast<T *>(memory_), host_buffer);
label owner_rank = exec_.get_owner_rank();
auto repartAllToAll =
compute_repart_allToall(exec_, comm_pattern, owner_rank);

// if (owner_rank != Pstream::myProcNo()){
// label recv_count = repartAllToAll.recv_counts[0];
// repartAllToAll.recv_counts[Pstream::myProcNo()] = recv_count;
// repartAllToAll.recv_counts[0] = 0;
// }

// NOTE instead of all_to_all_v based communication MPI_Iscatterv
// seems to be preferable
// communicate_values(exec, ref_exec, comm, comm_pattern,
// get_vector()->get_local_values(),
// const_cast<T *>(memory_), host_buffer);

label send_size = comm_pattern.send_offsets.back();
auto send_view = gko::array<scalar>::const_view(
exec, send_size, get_vector()->get_local_values());
auto tmp = gko::array<scalar>(exec, send_size);

tmp = send_view;
tmp.set_executor(ref_exec);

MPI_Request copy_back_req;
MPI_Iscatterv(tmp.get_data(), repartAllToAll.send_counts.data(),
repartAllToAll.send_offsets.data(), MPI_DOUBLE,
const_cast<T *>(memory_), repartAllToAll.recv_counts[0],
MPI_DOUBLE, 0, repart_comm->get(), &copy_back_req);
MPI_Wait(&copy_back_req, MPI_STATUS_IGNORE);
}

/** Writes the content of the distributed vector to disk
Expand Down
23 changes: 18 additions & 5 deletions include/OGL/StoppingCriterion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ class StoppingCriterion {

const label frequency_;

const word frequencyMode_;

const scalar relaxationFactor_;

const bool adapt_minIter_;
Expand All @@ -172,6 +174,8 @@ class StoppingCriterion {
norm_eval_limit_(
controlDict.lookupOrDefault("normEvalLimit", label(100))),
frequency_(controlDict.lookupOrDefault("evalFrequency", label(1))),
frequencyMode_(controlDict.lookupOrDefault(
"evalFrequencyMode", word("relative"))), // optimizer, fixed
relaxationFactor_(
controlDict.lookupOrDefault("relaxationFactor", scalar(0.6))),
adapt_minIter_(
Expand All @@ -197,21 +201,30 @@ class StoppingCriterion {
bool export_res, label prev_solve_iters,
scalar prev_rel_cost) const
{
word frequencyMode = "optimizer";
Comment thread
greole marked this conversation as resolved.
label minIter = minIter_;
label frequency = frequency_;
// in case of export_res all residuals need to be computed
if (!export_res) {
if (prev_solve_iters > 0 && adapt_minIter_ && prev_rel_cost > 0) {
minIter = prev_solve_iters * relaxationFactor_;
auto alpha =
sqrt(1.0 / (prev_solve_iters * (1.0 - relaxationFactor_)) *
prev_rel_cost);
frequency = min(norm_eval_limit_, max(1, label(1 / alpha)));
if (frequencyMode == "optimizer") {
auto alpha = sqrt(
1.0 / (prev_solve_iters * (1.0 - relaxationFactor_)) *
prev_rel_cost);
frequency = min(norm_eval_limit_, max(1, label(1 / alpha)));
}
if (frequencyMode == "relative") {
frequency = label(prev_solve_iters * 0.075) + 1;
}
}
}

word msg = "Creating stopping criterion with minIter " +
std::to_string(minIter) + " frequency " +
std::to_string(frequency);
std::to_string(frequency) + " prev_solve_iters " +
std::to_string(prev_solve_iters) + " adapt_minIter_ " +
std::to_string(adapt_minIter_) + " prev_rel_cost ";

MLOG_0(verbose, msg)

Expand Down
5 changes: 3 additions & 2 deletions include/OGL/lduLduBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ class lduLduBase : public OGL_Info,
solverPerformance &solverPerf) const
{
bool fused = solver_controls_.lookupOrDefault<Switch>("fuse", true);
exec_handler_.init_device_comm();

auto repartitioner = std::make_shared<Repartitioner>(
host_matrix_wrapper_->get_local_nrows(), ranks_per_gpu_, verbose_,
Expand Down Expand Up @@ -334,8 +335,8 @@ class lduLduBase : public OGL_Info,
std::to_string(time_per_dof) + std::string(" [ns]") +
std::string("\n\tTime per iteration and DOF: ") +
std::to_string(time_per_iter_and_dof) + std::string(" [ns]") +
std::string("\n\tRetrieve results bandwidth ") +
std::to_string(bandwidth_copy_back) + std::string(" [GByte/s]");
std::string("\n\tRetrieve results bandwidth "); // +
std::to_string(bandwidth_copy_back) + std::string(" [GByte/s]");
MLOG_0(verbose_, msg)

return solverPerf;
Expand Down
Loading
Loading