Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ jobs:
module purge
module unuse $MODULEPATH
module use /cvmfs/software.eessi.io/init/modules/
# Set CPU override for the stack to query
export EESSI_ARCHDETECT_OPTIONS_OVERRIDE="x86_64/intel/icelake"
# Set GPU overrides for the stack to query (we have no GPU, so also override the GPU check)
export EESSI_ACCELERATOR_TARGET_OVERRIDE="accel/nvidia/cc90"
export EESSI_OVERRIDE_GPU_CHECK=1
# First do 2023.06 for EB 4
( module load EESSI/2023.06 && module load EasyBuild/4 && module load EESSI-extend && python scripts/generate_data_files.py --eessi-version=2023.06 ) &
# then 2023.06 for EB 5
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/prs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ jobs:
module purge
module unuse $MODULEPATH
module use /cvmfs/software.eessi.io/init/modules/
# Set CPU override for the stack to query
export EESSI_ARCHDETECT_OPTIONS_OVERRIDE="x86_64/intel/icelake"
# Set GPU overrides for the stack to query (we have no GPU, so also override the GPU check)
export EESSI_ACCELERATOR_TARGET_OVERRIDE="accel/nvidia/cc90"
export EESSI_OVERRIDE_GPU_CHECK=1
# Only do 2023.06 for EB 5 since this is just a test
( module load EESSI/2023.06 && module load EasyBuild/5 && module load EESSI-extend && python scripts/generate_data_files.py --eessi-version=2023.06 ) &
# Merge all these results together
Expand Down
81 changes: 56 additions & 25 deletions scripts/generate_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@
from collections import defaultdict, OrderedDict
from datetime import datetime, timezone
from easybuild.tools.version import VERSION as EASYBUILD_VERSION
from easybuild.framework.easyconfig.easyconfig import process_easyconfig, get_toolchain_hierarchy
from easybuild.framework.easyconfig.easyconfig import (
process_easyconfig,
get_toolchain_hierarchy,
)
from easybuild.tools.options import set_up_configuration
from easybuild.tools.include import include_easyblocks
from contextlib import contextmanager

VALID_EESSI_VERSIONS = ["2025.06", "2023.06"]

EESSI_REFERENCE_ARCHITECTURE = "x86_64/intel/icelake"

# Give order to my toolchains so I can easily figure out what "latest" means
EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS = OrderedDict(
{
Expand Down Expand Up @@ -49,7 +50,11 @@ def suppress_stdout():

def module_dict_from_module_string(module):
module_name, module_version = module.split("/", 1)
module_dict = {"module_name": module_name, "module_version": module_version, "full_module_name": module}
module_dict = {
"module_name": module_name,
"module_version": module_version,
"full_module_name": module,
}

return module_dict

Expand Down Expand Up @@ -182,6 +187,16 @@ def collect_eb_files(base_path):
return dict(eb_files_by_version)


def merge_dicts(d1, d2):
merged = defaultdict(list)

for d in (d1, d2):
for key, value in d.items():
merged[key].extend(value)

return dict(merged)


if __name__ == "__main__":
# The EESSI version is provided as an argument
parser = argparse.ArgumentParser(description="EESSI version to scan.")
Expand All @@ -199,10 +214,23 @@ def collect_eb_files(base_path):
print(f"Using EESSI version: {eessi_version}")

# We use a single architecture path to gather information about the software versions
base_path = (
f"/cvmfs/software.eessi.io/versions/{eessi_version}/software/linux/{EESSI_REFERENCE_ARCHITECTURE}/software/"
)
result = collect_eb_files(base_path)
eessi_reference_architecture = os.getenv("EESSI_ARCHDETECT_OPTIONS_OVERRIDE", False)
if not eessi_reference_architecture:
print("You must have selected a CPU architecture via EESSI_ARCHDETECT_OPTIONS_OVERRIDE")
exit()
base_path = f"/cvmfs/software.eessi.io/versions/{eessi_version}/software/linux/{eessi_reference_architecture}"
cpu_easyconfig_files_dict = collect_eb_files(os.path.join(base_path, "software"))
# We also gather all the acclerator installations for NVIDIA-enabled packages
# We're not typically running this script on a node with a GPU so an override must have been set
eessi_reference_nvidia_architecture = os.getenv("EESSI_ACCELERATOR_TARGET_OVERRIDE", False)
if not eessi_reference_nvidia_architecture:
print("You must have selected a GPU architecture via EESSI_ACCELERATOR_TARGET_OVERRIDE")
exit()
accel_base_path = os.path.join(base_path, eessi_reference_nvidia_architecture)
accel_easyconfig_files_dict = collect_eb_files(os.path.join(accel_base_path, "software"))

# Merge the easyconfig files
easyconfig_files_dict = merge_dicts(cpu_easyconfig_files_dict, accel_easyconfig_files_dict)

set_up_configuration(args="")
tmpdir = tempfile.mkdtemp()
Expand All @@ -224,23 +252,23 @@ def collect_eb_files(base_path):
{"name": "system", "version": "system"}
] + get_toolchain_hierarchy(top_level_toolchain)

for eb_version_of_install, files in sorted(result.items()):
for eb_version_of_install, easyconfigs in sorted(easyconfig_files_dict.items()):
print(f"Major version {eb_version_of_install}:")
if eb_version_of_install == str(EASYBUILD_VERSION.version[0]):
total_files = len(files)
for i, file in enumerate(files, start=1):
percent = (i / total_files) * 100
print(f"{percent:.1f}% - {file}")
total_easyconfigs = len(easyconfigs)
for i, easyconfig in enumerate(easyconfigs, start=1):
percent = (i / total_easyconfigs) * 100
print(f"{percent:.1f}% - {easyconfig}")

# Don't try to parse an EasyBuild easyconfig that is not the same major release
if "/software/EasyBuild/" in file and f"/EasyBuild/{eb_version_of_install}" not in file:
if "/software/EasyBuild/" in easyconfig and f"/EasyBuild/{eb_version_of_install}" not in easyconfig:
continue
# print(process_easyconfig(path)[0]['ec'].asdict())

eb_hooks_path = use_timestamped_reprod_if_exists(f"{os.path.dirname(file)}/reprod/easyblocks")
eb_hooks_path = use_timestamped_reprod_if_exists(f"{os.path.dirname(easyconfig)}/reprod/easyblocks")
easyblocks_dir = include_easyblocks(tmpdir, [eb_hooks_path + "/*.py"])
with suppress_stdout():
parsed_ec = process_easyconfig(file)[0]
parsed_ec = process_easyconfig(easyconfig)[0]
# included easyblocks are the first entry in sys.path, so just pop them but keep a list of what was used
sys.path.pop(0)
easyblocks_used = [
Expand All @@ -252,26 +280,29 @@ def collect_eb_files(base_path):

# Store everything we now know about the installation as a dict
# Use the path as the key since we know it is unique
eessi_software["eessi_version"][eessi_version][file] = parsed_ec["ec"].asdict()
eessi_software["eessi_version"][eessi_version][file]["mtime"] = os.path.getmtime(file)
eessi_software["eessi_version"][eessi_version][easyconfig] = parsed_ec["ec"].asdict()
eessi_software["eessi_version"][eessi_version][easyconfig]["mtime"] = os.path.getmtime(easyconfig)

# Make sure we can load the module before adding it's information to the main dict
try:
eessi_software["eessi_version"][eessi_version][file]["required_modules"] = load_and_list_modules(
parsed_ec["full_mod_name"]
eessi_software["eessi_version"][eessi_version][easyconfig]["required_modules"] = (
load_and_list_modules(parsed_ec["full_mod_name"])
)
except RuntimeError as e:
print(f"Ignoring {file} due to error processing module: {e}")
eessi_software["eessi_version"][eessi_version].pop(file)
print(f"Ignoring {easyconfig} due to error processing module: {e}")
eessi_software["eessi_version"][eessi_version].pop(easyconfig)
continue

# Add important data that is related to the module environment
eessi_software["eessi_version"][eessi_version][file]["module"] = module_dict_from_module_string(
eessi_software["eessi_version"][eessi_version][easyconfig]["module"] = module_dict_from_module_string(
parsed_ec["full_mod_name"]
)
# Retain the easyblocks used so we can use a heuristic to figure out the type of extensions (R, Python, Perl)
eessi_software["eessi_version"][eessi_version][file]["easyblocks"] = easyblocks_used
eessi_software["eessi_version"][eessi_version][easyconfig]["easyblocks"] = easyblocks_used

# Store the result
with open(f"eessi_software_{eessi_version}-eb{str(EASYBUILD_VERSION.version[0])}.yaml", "w") as f:
with open(
f"eessi_software_{eessi_version}-eb{str(EASYBUILD_VERSION.version[0])}.yaml",
"w",
) as f:
yaml.dump(eessi_software, f)
40 changes: 34 additions & 6 deletions scripts/process_eessi_software_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
"x86_64/intel/cascadelake",
]

NVIDIA_ARCHITECTURES = [
"accel/nvidia/cc70",
"accel/nvidia/cc80",
"accel/nvidia/cc90",
"accel/nvidia/cc100",
"accel/nvidia/cc120",
]

TOOLCHAIN_FAMILIES = [
"2025b_foss",
"2025a_foss",
Expand Down Expand Up @@ -59,13 +67,25 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
if f"/{arch}/" in original_path:
detected_arch = arch
break

if detected_arch is None:
raise RuntimeError("No known architecture matched in the input path.")

# also detect the GPU arch (this one may not exist)
# needs to be a dict as we can filter on associated cpu arch
base_version_dict["gpu_arch"] = {}
detected_accel_arch = None
for accel_arch in NVIDIA_ARCHITECTURES:
if f"/{accel_arch}/" in original_path:
detected_accel_arch = accel_arch
break
if detected_accel_arch is None:
# Not having a GPU is not an error (we can just leave it empty, which is falsey)
detected_accel_arch = ""

# 2) Construct the modulefile path
before_arch, _, _ = original_path.partition(detected_arch)
modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["module"]["full_module_name"] + ".lua"
# Remember, detected_accel_arch can be an empty string
modulefile = os.path.join(before_arch, detected_arch, detected_accel_arch, "modules/all", file_metadata["module"]["full_module_name"] + ".lua")
spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua"

# 3) Substitute each architecture and test module file existence in spider cache
Expand All @@ -76,13 +96,21 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
found = subprocess.run(["grep", "-q", substituted_modulefile, substituted_spider_cache]).returncode == 0
if found:
base_version_dict["cpu_arch"].append(arch)
# If we have an accelerator module let's check which architectures are supported
if detected_accel_arch:
base_version_dict["gpu_arch"][arch] = []
for accel_arch in NVIDIA_ARCHITECTURES:
accel_substituted_modulefile = substituted_modulefile.replace(detected_accel_arch, accel_arch)
found = subprocess.run(["grep", "-q", accel_substituted_modulefile, substituted_spider_cache]).returncode == 0
if found:
base_version_dict["gpu_arch"][arch].append(accel_arch)
else:
print(f"No module {accel_substituted_modulefile}...not adding software for architecture {arch}/{accel_arch}")
continue
else:
print(f"No module {substituted_modulefile}...not adding software for archtecture {arch}")
print(f"No module {substituted_modulefile}...not adding software for architecture {arch}")
continue

# TODO: Handle GPU arch later, but it is going to need to be a dict as we will filter on cpu arch
base_version_dict["gpu_arch"] = {}

# Now we can cycle throught the possibilities
# - software application itself
software = {}
Expand Down