diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 92ef52a5d..528bcd52e 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -25,5 +25,5 @@ | Pierre-siddall | Pierre Siddall | Met Office | 2026-01-29 | | mo-lucy-gordon | Lucy Gordon | Met Office | 2026-03-18 | | shreybh1 | Shrey Bhardwaj | Met Office | 2026-03-26 | - +| mattatmet | Matthew Walker | Met Office | 2026-04-21 | diff --git a/infrastructure/build/psyclone/psyclone_tools.py b/infrastructure/build/psyclone/psyclone_tools.py index 14060c3e2..829733e90 100644 --- a/infrastructure/build/psyclone/psyclone_tools.py +++ b/infrastructure/build/psyclone/psyclone_tools.py @@ -13,20 +13,32 @@ from psyclone.domain.lfric import LFRicConstants from psyclone.psyGen import InvokeSchedule -from psyclone.psyir.nodes import Loop, Routine, Directive +from psyclone.psyir.nodes import ( + Loop, + Routine, + Directive, + Container, + OMPParallelDirective, + OMPParallelDoDirective, + OMPDoDirective, + FileContainer, + ProfileNode +) from psyclone.transformations import ( Dynamo0p3ColourTrans, Dynamo0p3OMPLoopTrans, Dynamo0p3RedundantComputationTrans, OMPParallelTrans, + TransformationError ) +from psyclone.psyir.transformations import ProfileTrans # List of allowed 'setval_*' built-ins for redundant computation transformation SETVAL_BUILTINS = ["setval_c"] # ----------------------------------------------------------------------------- -def redundant_computation_setval(psyir): +def redundant_computation_setval(psyir: FileContainer): """ Applies the redundant computation transformation to loops over DoFs for the initialision built-ins, 'setval_*'. @@ -68,13 +80,14 @@ def redundant_computation_setval(psyir): # ----------------------------------------------------------------------------- -def colour_loops(psyir, enable_tiling=False): +def colour_loops(psyir: FileContainer, enable_tiling=False): """ Applies the colouring transformation to all applicable loops and optionally enables tiling. It creates the instance of `Dynamo0p3ColourTrans` only once. :param psyir: the PSyIR of the PSy-layer. + :param enable_tiling: a bool to enable tiling. Default False. :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` """ @@ -86,6 +99,12 @@ def colour_loops(psyir, enable_tiling=False): # Colour loops over cells unless they are on discontinuous # spaces or over DoFs for child in subroutine.children: + # Check if the profiling calipers have been added before the + # colouring. + if isinstance(child, ProfileNode): + raise TransformationError( + "Must apply colour_loops BEFORE profile_loops function " + "in optimisation script.") if ( isinstance(child, Loop) and child.iteration_space.endswith("cell_column") @@ -94,9 +113,59 @@ def colour_loops(psyir, enable_tiling=False): ): ctrans.apply(child, options={"tiling": enable_tiling}) +# ----------------------------------------------------------------------------- +def profile_loops(psyir: FileContainer, colours_only=True): + """ + Applies timing calipers to kernels during the psyclone build. The default + is to only profile coloured loops but colours_only can be set to False to + profile every instance of a coded kernel. + + :param psyir: the PSyIR of the PSy-layer. + :param colours_only: profile only the coloured kernels. Default True. + :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` + + """ + profile_trans = ProfileTrans() + leave_loops = ["cells_in_colour", + "tiles_in_colour", + "cells_in_tile"] + + # Loop over all the InvokeSchedule in the PSyIR object + for subroutine in psyir.walk(InvokeSchedule): + # Add timing calipers to coloured loops. This should be done + # before the application of the openmp transformation. + count = 0 + for loop in subroutine.loops(): + if not loop.coded_kernels(): + continue + # Insert profiler calls before loop over colours + if ((loop.loop_type == "colours") or + (colours_only is False and loop.loop_type not in leave_loops)): + # First check that the transformation is not being made inside + # an OMP region. + if (loop.ancestor(OMPParallelDirective) + or loop.ancestor(OMPParallelDoDirective) + or loop.ancestor(OMPDoDirective)): + raise TransformationError( + "Must apply profile_loops BEFORE " + "openmp_parallelise_loops function in optimisation " + "script.") + # Constructing unique calliper name based on kernel name, + # invoke name and kernel count + k_object = loop.ancestor(InvokeSchedule).coded_kernels()[count] + k_name = k_object.name + invoke_name = loop.ancestor(InvokeSchedule).invoke.name + file_name = loop.ancestor(Container).name + # Make region name + region_name = invoke_name + ":" + k_name + "_k" + str(count) + options = {"region_name": (file_name, region_name)} + profile_trans.apply(loop, options=options) + # Count here is to distinguish kernels of the same name + # in the same invoke. + count += 1 # ----------------------------------------------------------------------------- -def openmp_parallelise_loops(psyir): +def openmp_parallelise_loops(psyir: FileContainer): """ Applies OpenMP Loop transformation to each applicable loop. @@ -120,7 +189,7 @@ def openmp_parallelise_loops(psyir): # ----------------------------------------------------------------------------- -def view_transformed_schedule(psyir): +def view_transformed_schedule(psyir: FileContainer): """ Provides view of transformed Invoke schedule in the PSy-layer.