diff --git a/devito/__init__.py b/devito/__init__.py index dc6110c65a..bb310e9733 100644 --- a/devito/__init__.py +++ b/devito/__init__.py @@ -42,7 +42,7 @@ from devito.core import * # noqa from devito.logger import logger_registry, _set_log_level # noqa from devito.mpi.routines import mpi_registry -from devito.operator import profiler_registry, operator_registry +from devito.operator import NcuProfiling, profiler_registry, operator_registry # Apply monkey-patching while we wait for our patches to be upstreamed and released from devito.mpatches import * # noqa @@ -159,8 +159,17 @@ def autotune_callback(val): # noqa configuration.add('opt', 'advanced', list(operator_registry._accepted), deprecate='dle') configuration.add('opt-options', {}, deprecate='dle-options') + # Setup Operator profiling -configuration.add('profiling', 'basic', list(profiler_registry), impacts_jit=False) +def profiling_preprocessor(i): + if isinstance(i, dict): + return NcuProfiling(i['ncu']) + + return i + + +configuration.add('profiling', 'basic', list(profiler_registry), + preprocessor=profiling_preprocessor, impacts_jit=False) # Initialize `configuration` init_configuration() diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index 3166bf255a..ea4025a3ab 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -21,8 +21,9 @@ __all__ = [ # noqa: RUF022 'platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices', 'get_nvidia_cc', 'get_cuda_path', 'get_cuda_version', 'get_hip_path', - 'check_cuda_runtime', 'get_m1_llvm_path', 'get_advisor_path', 'Platform', - 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power', 'Device', + 'check_cuda_runtime', 'load_cudart', 'get_m1_llvm_path', 'get_advisor_path', + 'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power', + 'Device', 'NvidiaDevice', 'AmdDevice', 'IntelDevice', # Brand-agnostic 'ANYCPU', 'ANYGPU', @@ -646,13 +647,25 @@ def get_m1_llvm_path(language): @memoized_func -def check_cuda_runtime(): +def load_cudart(): + """ + Load the CUDA runtime library. + """ libname = ctypes.util.find_library("cudart") if not libname: + raise RuntimeError("Unable to find CUDA runtime library `libcudart`") + + return ctypes.CDLL(libname) + + +@memoized_func +def check_cuda_runtime(): + try: + cuda = load_cudart() + except RuntimeError: warning("Unable to check compatibility of NVidia driver and runtime") return - cuda = ctypes.CDLL(libname) driver_version = ctypes.c_int() runtime_version = ctypes.c_int() @@ -1115,11 +1128,10 @@ def max_shm_per_block(self): """ Get the maximum amount of shared memory per thread block """ - # Load libcudart - libname = ctypes.util.find_library("cudart") - if not libname: + try: + lib = load_cudart() + except RuntimeError: return 64 * 1024 # 64 KB default - lib = ctypes.CDLL(libname) cudaDevAttrMaxSharedMemoryPerBlockOptin = 97 # get current device diff --git a/devito/operator/__init__.py b/devito/operator/__init__.py index c11ecb0b75..2a78f2fc6c 100644 --- a/devito/operator/__init__.py +++ b/devito/operator/__init__.py @@ -1,3 +1,3 @@ from .operator import Operator # noqa -from .profiling import profiler_registry # noqa +from .profiling import NcuProfiling, profiler_registry # noqa from .registry import operator_registry # noqa diff --git a/devito/operator/operator.py b/devito/operator/operator.py index 95b4db0a33..bfba76b593 100644 --- a/devito/operator/operator.py +++ b/devito/operator/operator.py @@ -1158,7 +1158,12 @@ def lower_perfentry(v): perf(f"{indent*2}+ {n} ran in {fround(v1.time):.2f} s " f"[{fround(v1.time/v.time*100):.2f}%] {metrics}") - # Emit performance mode and arguments + self._emit_perf_args(args) + + return summary + + def _emit_perf_args(self, args): + """Emit performance mode and runtime performance arguments.""" perf_args = {} for i in self.input + self.dimensions: if not i.is_PerfKnob: @@ -1176,8 +1181,6 @@ def lower_perfentry(v): perf_args = {k: perf_args[k] for k in sorted(perf_args)} perf(f"Performance[mode={self._mode}] arguments: {perf_args}") - return summary - # Pickling support def __getstate__(self): diff --git a/devito/operator/profiling.py b/devito/operator/profiling.py index 49e4417742..835e8b1b36 100644 --- a/devito/operator/profiling.py +++ b/devito/operator/profiling.py @@ -19,7 +19,7 @@ from devito.symbolics import subs_op_args from devito.tools import DefaultOrderedDict, flatten -__all__ = ['create_profile'] +__all__ = ['NcuProfiling', 'create_profile'] SectionData = namedtuple('SectionData', 'ops sops points traffic itermaps') @@ -28,6 +28,28 @@ PerfEntry = namedtuple('PerfEntry', 'time gflopss gpointss oi ops itershapes') +class NcuProfiling(str): + + """ + String-like profiling mode carrying the Operator selected for NCU. + + The string value is ``'ncu'`` so profiler construction can use this object + directly as a key into ``profiler_registry``. The selected Operator is kept + in ``operator_name``. + """ + + def __new__(cls, operator_name): + if not isinstance(operator_name, str) or not operator_name: + raise ValueError("Expected DEVITO_PROFILING=ncu:op_name") + if ',' in operator_name: + raise ValueError("NCU profiling supports one Operator at a time") + + obj = str.__new__(cls, 'ncu') + obj.operator_name = operator_name + + return obj + + class Profiler: _default_includes = [] @@ -532,6 +554,7 @@ def create_profile(name): 'advanced': AdvancedProfiler, 'advanced1': AdvancedProfilerVerbose1, 'advanced2': AdvancedProfilerVerbose2, + 'ncu': AdvancedProfilerVerbose2, 'advisor': AdvisorProfiler } """Profiling levels.""" diff --git a/devito/parameters.py b/devito/parameters.py index 0412380533..2aba1112ad 100644 --- a/devito/parameters.py +++ b/devito/parameters.py @@ -199,7 +199,14 @@ def init_configuration(configuration=configuration, env_vars_mapper=env_vars_map # Env variable format: 'var=k1:v1;k2:v2:k3:v3:...' keys, values = zip(*[i.split(':') for i in items], strict=True) # Casting - values = [eval(i) for i in values] + processed = [] + for i in values: + try: + processed.append(eval(i)) + except (NameError, SyntaxError): + # Allow unquoted strings as `k:v` values. + processed.append(i) + values = processed except AttributeError: # Env variable format: 'var=v', 'v' is not a string keys = [v]