From ae3471bcc55c5435ee55fefe4fc2c719b771385e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Aug 2023 15:02:37 +0100 Subject: [PATCH] Add gcc unroll optimizations in advPosHalf and calcEnergy Unrolling the small loops in advPosHalf and calcEnergy leads to an improved cycles per instructions retired rate in these loops and hence slightly faster compute times. On an i9-12900, looking using test/sedovbig/sedovbig.pnt I'm seeing improvements in the hydro cycle run time of: 2 threads: 0.2% 4 threads: 0.5% 8 threads: 1.6% 16 threads: 2.1% Signed-off-by: Colin Ian King --- src/Hydro.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Hydro.cc b/src/Hydro.cc index 3e60a7a..9048a7e 100644 --- a/src/Hydro.cc +++ b/src/Hydro.cc @@ -318,6 +318,7 @@ void Hydro::advPosHalf( double dth = 0.5 * dt; #pragma ivdep + #pragma GCC unroll 2 for (int p = pfirst; p < plast; ++p) { pxp[p] = px0[p] + pu0[p] * dth; } @@ -478,6 +479,7 @@ void Hydro::calcEnergy( const double fuzz = 1.e-99; #pragma ivdep + #pragma GCC unroll 2 for (int z = zfirst; z < zlast; ++z) { ze[z] = zetot[z] / (zm[z] + fuzz); }