From 24a85ce1f984aa2e8367f9fb59a056a1052c4929 Mon Sep 17 00:00:00 2001
From: nnasiri <nnasiri@nvidia.com>
Date: Tue, 28 Apr 2026 14:28:22 -0700
Subject: [PATCH 1/4] Added more fields to the schema of mlperf training
 summary results

---
 .../result_summarizer/result_summarizer.py    | 288 +++++++-----------
 1 file changed, 107 insertions(+), 181 deletions(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index 1198236..9e3eb31 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -1,6 +1,9 @@
 '''
 Summarizes a set of results.
 '''
+
+from __future__ import print_function
+
 import argparse
 import glob
 import json
@@ -10,15 +13,12 @@
 import itertools
 import pandas as pd
 import yaml
-import hashlib
-import math
-import operator
-import uuid as uuidlib
+import numpy as np
 
 from ..compliance_checker import mlp_compliance
 from ..compliance_checker.mlp_compliance import usage_choices, rule_choices
 from ..compliance_checker.mlp_parser import parse_file
-
+from ..rcp_checker import rcp_checker
 from ..benchmark_meta import get_allowed_benchmarks, get_result_file_counts
 
 
@@ -262,6 +262,14 @@ def _get_weak_scaling_metric_schema():
         'time_to_train_all': float,
     }
 
+def _get_strong_scaling_metric_schema():
+    return {
+        'time_to_train': float,
+        'Energy': float,
+        'GBS': float,
+        'epochs': float,
+        'RCP': str,
+    }
 
 def _get_empty_summary(usage, ruleset, weak_scaling=False):
     return Summary(
@@ -281,7 +289,6 @@ def _get_column_schema(usage, ruleset, weak_scaling=False):
         'accelerators_count': int,
         'framework': str,
         'notes': str,
-        'private_id': str
     }
     if weak_scaling == True:
         benchmarks = get_allowed_benchmarks(usage, ruleset)
@@ -289,10 +296,14 @@ def _get_column_schema(usage, ruleset, weak_scaling=False):
             for metric, dtype in _get_weak_scaling_metric_schema().items():
                 schema['{}:{}'.format(benchmark, metric)] = dtype
     else:
-        schema.update(
-            {b: float
-             for b in get_allowed_benchmarks(usage, ruleset)})
-    schema.update({'details_url': str, 'code_url': str})
+        #schema.update(
+        #    {b: float
+        #     for b in get_allowed_benchmarks(usage, ruleset)})
+        benchmarks = get_allowed_benchmarks(usage, ruleset)
+        for benchmark in benchmarks:
+            for metric, dtype in _get_strong_scaling_metric_schema().items():
+                schema['{}:{}'.format(benchmark, metric)] = dtype
+    schema.update({'notes': str, 'details_url': str, 'code_url': str})
     return schema
 
 
@@ -338,6 +349,8 @@ def _compute_strong_score_standalone(
 ):
     pattern = "{folder}/result_*.txt".format(folder=benchmark_folder)
     result_files = glob.glob(pattern, recursive=True)
+    benchmark_scores = {}
+    benchmark_power_scores = {}
     scores = []
     scores_track = {}
     power_scores = []
@@ -474,7 +487,7 @@ def _compute_weak_score_standalone(benchmark, system, has_power, benchmark_folde
 
 
 
-def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset):
+def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division, rcp_bypass=False):
     # Collect scores for benchmarks.
     benchmark_scores = {}
     benchmark_power_scores = {}
@@ -489,16 +502,82 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset):
         has_power = _has_power(benchmark_folder)
         benchmark = _benchmark_alias(folder_parts[-1])
         system = folder_parts[-3] if usage == 'hpc' else folder_parts[-2]
-        # Read scores from result files.
-        score, power_score = _compute_strong_score_standalone(benchmark, system, has_power, benchmark_folder, usage, ruleset, desc)
+        # Compute base perf/power scores
+        score, power_score = _compute_strong_score_standalone(
+            benchmark, system, has_power, benchmark_folder, usage, ruleset, desc
+        )
+
+        # RCP/GBS/Epochs additions for closed division
+        benchmark_gbs = None
+        benchmark_epochs = None
+        benchmark_rcp = None
+        if division == 'closed':
+            pattern = '{folder}/result_*.txt'.format(folder=benchmark_folder)
+            result_files = glob.glob(pattern, recursive=True)
+            try:
+                # RCP check
+                verbose = False
+                bert_train_samples = False
+                rcp_pass, rcp_msg, rcp_norm_factor = rcp_checker.check_directory(
+                    benchmark_folder,
+                    usage,
+                    ruleset,
+                    verbose,
+                    bert_train_samples,
+                    rcp_file=None,
+                    rcp_pass='pruned_rcps',
+                    rcp_bypass=rcp_bypass,
+                    set_scaling=True,
+                )
+                if not rcp_pass:
+                    print(
+                        'ERROR: RCP Test Failed on {}/{}/{} with message: {}.'.format(
+                            desc['submitter'], system, benchmark, rcp_msg
+                        )
+                    )
+                    if rcp_msg == 'RCP found':
+                        benchmark_rcp = 'Fail'
+                    elif rcp_msg == 'RCP Interpolation':
+                        benchmark_rcp = 'Interp. Fail'
+                    elif 'Missing' in rcp_msg:
+                        benchmark_rcp = 'Missing'
+                    elif rcp_msg == 'Cannot find any RCPs':
+                        benchmark_rcp = 'No RCP'
+                    else:
+                        benchmark_rcp = 'Unknown state'
+                else:
+                    benchmark_rcp = 'Pass'
+
+                # GBS and epochs
+                benchmark_gbs, subm_epochs, _ = rcp_checker.get_submission_epochs(
+                    result_files, ruleset, bert_train_samples=False
+                )
+                subm_epochs.sort()
+                samples_rejected = 4 if benchmark == 'unet3d' else 1
+                if len(subm_epochs) >= 2 * samples_rejected + 1:
+                    benchmark_epochs = float(
+                        np.mean(
+                            subm_epochs[
+                                samples_rejected : len(subm_epochs) - samples_rejected
+                            ]
+                        )
+                    )
+            except Exception as e:
+                print(f"WARNING: RCP/GBS computation failed for {benchmark_folder}: {e}")
+
+        # Map into metric-suffixed keys for schema
         if score is not None:
-            benchmark_scores[benchmark] = score
+            benchmark_scores[f"{benchmark}:time_to_train"] = score
+        if benchmark_gbs is not None:
+            benchmark_scores[f"{benchmark}:GBS"] = float(benchmark_gbs)
+        if benchmark_epochs is not None:
+            benchmark_scores[f"{benchmark}:epochs"] = float(benchmark_epochs)
+        if benchmark_rcp is not None:
+            benchmark_scores[f"{benchmark}:RCP"] = benchmark_rcp
         if power_score is not None:
-            benchmark_power_scores[benchmark] = power_score
+            benchmark_scores[f"{benchmark}:Energy"] = power_score
     _fill_empty_benchmark_scores(benchmark_scores, usage, ruleset)
-    if len(benchmark_power_scores) > 0:
-        _fill_empty_benchmark_scores(benchmark_power_scores, usage, ruleset)
-    return benchmark_scores, benchmark_power_scores
+    return benchmark_scores, {}
 
 
 def _compute_weak_scaling_scores(desc, system_folder, usage, ruleset):
@@ -674,19 +753,6 @@ def _load_system_desc(folder, system):
         raise FileNotFoundError('ERROR: Missing {}'.format(system_file))
     return _read_json_file(system_file)
 
-def _get_id_file(folder, system):
-    systems_folder = os.path.join(folder, 'results/',system)
-    system_file = os.path.join(systems_folder, 'privateid.json'.format(system))
-    if not os.path.exists(system_file):
-        return {}
-    return _read_json_file(system_file)
-
-def _update_id_file(folder, system, id):
-    systems_folder = os.path.join(folder, 'results/',system)
-    system_file = os.path.join(systems_folder, 'privateid.json'.format(system))
-    id = {'private_id': id}
-    with open(system_file, 'w') as f:
-        json.dump(id, f, indent=4)
 
 def _fill_empty_benchmark_scores(
     benchmark_scores,
@@ -702,130 +768,10 @@ def _fill_empty_benchmark_scores(
                     benchmark_scores[k] = None
 
         else:
-            if benchmark not in benchmark_scores:
-                benchmark_scores[benchmark] = None
-
-
-def _get_id_from_sysinfo(summary):
-    """Generate private id from system information.
-
-    Args:
-        summary (dictionary): Sysinfo Dictionary
-    """
-
-
-    # Code from humanhash3, which is public domain.
-    DEFAULT_WORDLIST = (
-    'ack', 'alabama', 'alanine', 'alaska', 'alpha', 'angel', 'apart', 'april',
-    'arizona', 'arkansas', 'artist', 'asparagus', 'aspen', 'august', 'autumn',
-    'avocado', 'bacon', 'bakerloo', 'batman', 'beer', 'berlin', 'beryllium',
-    'black', 'blossom', 'blue', 'bluebird', 'bravo', 'bulldog', 'burger',
-    'butter', 'california', 'carbon', 'cardinal', 'carolina', 'carpet', 'cat',
-    'ceiling', 'charlie', 'chicken', 'coffee', 'cola', 'cold', 'colorado',
-    'comet', 'connecticut', 'crazy', 'cup', 'dakota', 'december', 'delaware',
-    'delta', 'diet', 'don', 'double', 'early', 'earth', 'east', 'echo',
-    'edward', 'eight', 'eighteen', 'eleven', 'emma', 'enemy', 'equal',
-    'failed', 'fanta', 'fifteen', 'fillet', 'finch', 'fish', 'five', 'fix',
-    'floor', 'florida', 'football', 'four', 'fourteen', 'foxtrot', 'freddie',
-    'friend', 'fruit', 'gee', 'georgia', 'glucose', 'golf', 'green', 'grey',
-    'hamper', 'happy', 'harry', 'hawaii', 'helium', 'high', 'hot', 'hotel',
-    'hydrogen', 'idaho', 'illinois', 'india', 'indigo', 'ink', 'iowa',
-    'island', 'item', 'jersey', 'jig', 'johnny', 'juliet', 'july', 'jupiter',
-    'kansas', 'kentucky', 'kilo', 'king', 'kitten', 'lactose', 'lake', 'lamp',
-    'lemon', 'leopard', 'lima', 'lion', 'lithium', 'london', 'louisiana',
-    'low', 'magazine', 'magnesium', 'maine', 'mango', 'march', 'mars',
-    'maryland', 'massachusetts', 'may', 'mexico', 'michigan', 'mike',
-    'minnesota', 'mirror', 'mississippi', 'missouri', 'mobile', 'mockingbird',
-    'monkey', 'montana', 'moon', 'mountain', 'muppet', 'music', 'nebraska',
-    'neptune', 'network', 'nevada', 'nine', 'nineteen', 'nitrogen', 'north',
-    'november', 'nuts', 'october', 'ohio', 'oklahoma', 'one', 'orange',
-    'oranges', 'oregon', 'oscar', 'oven', 'oxygen', 'papa', 'paris', 'pasta',
-    'pennsylvania', 'pip', 'pizza', 'pluto', 'potato', 'princess', 'purple',
-    'quebec', 'queen', 'quiet', 'red', 'river', 'robert', 'robin', 'romeo',
-    'rugby', 'sad', 'salami', 'saturn', 'september', 'seven', 'seventeen',
-    'shade', 'sierra', 'single', 'sink', 'six', 'sixteen', 'skylark', 'snake',
-    'social', 'sodium', 'solar', 'south', 'spaghetti', 'speaker', 'spring',
-    'stairway', 'steak', 'stream', 'summer', 'sweet', 'table', 'tango', 'ten',
-    'tennessee', 'tennis', 'texas', 'thirteen', 'three', 'timing', 'triple',
-    'twelve', 'twenty', 'two', 'uncle', 'undress', 'uniform', 'uranus', 'utah',
-    'vegan', 'venus', 'vermont', 'victor', 'video', 'violet', 'virginia',
-    'washington', 'west', 'whiskey', 'white', 'william', 'winner', 'winter',
-    'wisconsin', 'wolfram', 'wyoming', 'xray', 'yankee', 'yellow', 'zebra',
-    'zulu')
-
-    class HumanHasher(object):
-
-        def __init__(self, wordlist=DEFAULT_WORDLIST):
-            self.wordlist = wordlist
-
-        def humanize_list(self, hexdigest, words=4):
-            # Gets a list of byte values between 0-255.
-            bytes_ = map(lambda x: int(x, 16),
-                        map(''.join, zip(hexdigest[::2], hexdigest[1::2])))
-            # Compress an arbitrary number of bytes to `words`.
-            compressed = self.compress(bytes_, words)
-
-            return [str(self.wordlist[byte]) for byte in compressed]
-
-        def humanize(self, hexdigest, words=4, separator='-'):
-            # Map the compressed byte values through the word list.
-            return separator.join(self.humanize_list(hexdigest, words))
-
-        @staticmethod
-        def compress(bytes_, target):
-            bytes_list = list(bytes_)
-
-            length = len(bytes_list)
-            # If there are less than the target number bytes, return input bytes
-            if target >= length:
-                return bytes_
-
-            # Split `bytes` evenly into `target` segments
-            # Each segment hashes `seg_size` bytes, rounded down for some
-            seg_size = float(length) / float(target)
-            # Initialize `target` number of segments
-            segments = [0] * target
-            seg_num = 0
-
-            # Use a simple XOR checksum-like function for compression
-            for i, byte in enumerate(bytes_list):
-                # Divide the byte index by the segment size to assign its segment
-                # Floor to create a valid segment index
-                # Min to ensure the index is within `target`
-                seg_num = min(int(math.floor(i / seg_size)), target-1)
-                # Apply XOR to the existing segment and the byte
-                segments[seg_num] = operator.xor(segments[seg_num], byte)
-
-            return segments
-
-        def uuid(self, **params):
-            digest = str(uuidlib.uuid4()).replace('-', '')
-            return self.humanize(digest, **params), digest
-
-
-
-    def get_hash(row):
-        columns_for_hashing = [    
-            'division',
-            'submitter',
-            'system_name',
-            'number_of_nodes',
-            'host_processor_model_name',
-            'host_processors_per_node',
-            'accelerator_model_name',
-            'accelerators_per_node',
-            'framework'
-        ]
-        to_hash = ''.join(str(row[c]) for c in columns_for_hashing)
-        return hashlib.sha256(to_hash.encode('utf-8')).hexdigest()
-    
-    hash = get_hash(summary)
-    humanhasha = HumanHasher()
-    summary = humanhasha.humanize(hash)
-
-    return summary
-
-     
+            for metric in _get_strong_scaling_metric_schema().keys():
+                k = '{}:{}'.format(benchmark, metric)
+                if k not in benchmark_scores:
+                    benchmark_scores[k] = None
 
 
 def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
@@ -843,22 +789,13 @@ def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
                                               weak_scaling=True)
     power_summary = _get_empty_summary(usage, ruleset)
     power_weak_scaling_summary = _get_empty_summary(usage, ruleset, weak_scaling=True)
+
     for system_folder in _get_sub_folders(results_folder):
         folder_parts = system_folder.split('/')
         system = folder_parts[-1]
         # Load corresponding system description.
         try:
             desc = _load_system_desc(folder, system)
-            id = _get_id_file(folder, system)
-            # Generate private id and update system desc to match
-            if kwargs.get('generate_private_ids') and 'private_id' not in id:
-                id['private_id'] = _get_id_from_sysinfo(desc)
-                _update_id_file(folder, system, desc['private_id'])
-            elif 'private_id' not in id:
-                # Ensure private_id field exists in desc for consistent processing later, even if it's empty
-                id['private_id'] = '' 
-            desc['private_id']  = id['private_id']
-
         except (json.JSONDecodeError, FileNotFoundError) as e:
             print(e)
             continue
@@ -875,7 +812,6 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
         # Construct prefix portion of the row.
         try:
             _check_and_update_system_specs('division', 'division')
-            _check_and_update_system_specs('private_id', 'private_id')
             # Map availability if requested
             if "availability" in kwargs:
                 _check_and_update_system_specs('status', 'availability', lambda desc: _map_availability(desc["status"], kwargs["availability"]))
@@ -925,7 +861,7 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
 
         # Compute the scores.
         strong_scaling_scores, power_scores = _compute_strong_scaling_scores(
-            desc, system_folder, usage, ruleset)
+            desc, system_folder, usage, ruleset, system_specs["division"], rcp_bypass=False)
         if usage == 'hpc':
             weak_scaling_scores, power_scores_weak_scaling = _compute_weak_scaling_scores(
                 desc, system_folder, usage, ruleset)
@@ -984,8 +920,6 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
     return strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary
 
 
-
-
 def get_parser():
     parser = argparse.ArgumentParser(
         prog='mlperf_logging.result_summarizer',
@@ -1006,11 +940,6 @@ def get_parser():
                         type=str,
                         choices=rule_choices(),
                         help='the ruleset such as 0.6.0, 0.7.0, or 1.0.0')
-    
-    parser.add_argument('--generate_private_ids',
-                        action='store_true',
-                        help='Generate private IDs for each run.')
-
     parser.add_argument('--werror',
                         action='store_true',
                         help='Treat warnings as errors')
@@ -1028,7 +957,6 @@ def get_parser():
         '--xlsx',
         type=str,
         help='Exports a xlsx of the results to the path specified')
-    
 
     return parser
 
@@ -1051,15 +979,13 @@ def _update_summaries(folder):
                 folder,
                 args.usage,
                 args.ruleset,
-                availability = config["availability"],
-                generate_private_ids = args.generate_private_ids,
+                availability = config["availability"]
             )
         else:
             strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
                 folder,
                 args.usage,
                 args.ruleset,
-                generate_private_ids = args.generate_private_ids,
             )
         strong_scaling_summaries.append(strong_scaling_summary)
         if len(weak_scaling_summary) > 0:
@@ -1178,7 +1104,7 @@ def _summaries_to_xlsx(summaries: pd.DataFrame, path, version):
                     start += len(section)
                     index += len(section)
 
-        writer.save()
+        writer.close()
     # Print and write back results.
     def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
         if len(summaries) > 0:
@@ -1199,7 +1125,7 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
 
             # Sort rows by their values
             summaries = summaries.sort_values(by=cols)
-
+            print(summaries)
             if args.csv is not None:
                 csv = args.csv
                 assert csv.endswith(".csv")
@@ -1229,4 +1155,4 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
 
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file

From 2ebc543c6b41467eefbcf5abfc835b4fc5bb0532 Mon Sep 17 00:00:00 2001
From: nnasiri <nnasiri@nvidia.com>
Date: Tue, 28 Apr 2026 14:58:43 -0700
Subject: [PATCH 2/4] Added more fields to the schema of mlperf training
 summary results

---
 .../result_summarizer/result_summarizer.py    | 229 +++++++++++++++---
 1 file changed, 198 insertions(+), 31 deletions(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index 9e3eb31..9636911 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -1,23 +1,26 @@
 '''
 Summarizes a set of results.
 '''
-
-from __future__ import print_function
-
 import argparse
 import glob
 import json
 import os
 import re
 import sys
+import traceback
 import itertools
 import pandas as pd
 import yaml
 import numpy as np
+import hashlib
+import math
+import operator
+import uuid as uuidlib
 
 from ..compliance_checker import mlp_compliance
 from ..compliance_checker.mlp_compliance import usage_choices, rule_choices
 from ..compliance_checker.mlp_parser import parse_file
+
 from ..rcp_checker import rcp_checker
 from ..benchmark_meta import get_allowed_benchmarks, get_result_file_counts
 
@@ -262,6 +265,7 @@ def _get_weak_scaling_metric_schema():
         'time_to_train_all': float,
     }
 
+
 def _get_strong_scaling_metric_schema():
     return {
         'time_to_train': float,
@@ -269,8 +273,10 @@ def _get_strong_scaling_metric_schema():
         'GBS': float,
         'epochs': float,
         'RCP': str,
+        'rcp_scaling_factor': float,
     }
 
+
 def _get_empty_summary(usage, ruleset, weak_scaling=False):
     return Summary(
         _get_column_schema(usage, ruleset, weak_scaling=weak_scaling).keys())
@@ -289,6 +295,7 @@ def _get_column_schema(usage, ruleset, weak_scaling=False):
         'accelerators_count': int,
         'framework': str,
         'notes': str,
+        'private_id': str
     }
     if weak_scaling == True:
         benchmarks = get_allowed_benchmarks(usage, ruleset)
@@ -296,14 +303,11 @@ def _get_column_schema(usage, ruleset, weak_scaling=False):
             for metric, dtype in _get_weak_scaling_metric_schema().items():
                 schema['{}:{}'.format(benchmark, metric)] = dtype
     else:
-        #schema.update(
-        #    {b: float
-        #     for b in get_allowed_benchmarks(usage, ruleset)})
         benchmarks = get_allowed_benchmarks(usage, ruleset)
         for benchmark in benchmarks:
             for metric, dtype in _get_strong_scaling_metric_schema().items():
                 schema['{}:{}'.format(benchmark, metric)] = dtype
-    schema.update({'notes': str, 'details_url': str, 'code_url': str})
+    schema.update({'details_url': str, 'code_url': str})
     return schema
 
 
@@ -349,8 +353,6 @@ def _compute_strong_score_standalone(
 ):
     pattern = "{folder}/result_*.txt".format(folder=benchmark_folder)
     result_files = glob.glob(pattern, recursive=True)
-    benchmark_scores = {}
-    benchmark_power_scores = {}
     scores = []
     scores_track = {}
     power_scores = []
@@ -417,8 +419,8 @@ def _compute_strong_score_standalone(
             power_score = olympic_avg
             power_score *= scaling_factor
     if return_full_scores:
-        return scores_track, power_scores_track, score, power_score
-    return score, power_score
+        return scores_track, power_scores_track, score, power_score, scaling_factor
+    return score, power_score, scaling_factor
 
 
 def _compute_weak_score_standalone(benchmark, system, has_power, benchmark_folder, usage, ruleset, desc = {"submitter": None}):
@@ -490,12 +492,10 @@ def _compute_weak_score_standalone(benchmark, system, has_power, benchmark_folde
 def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division, rcp_bypass=False):
     # Collect scores for benchmarks.
     benchmark_scores = {}
-    benchmark_power_scores = {}
-    has_power = None
     benchmark_folder_parent = os.path.join(
         system_folder, 'strong') if usage == 'hpc' else system_folder
     if not os.path.isdir(benchmark_folder_parent):
-        return benchmark_scores, benchmark_power_scores
+        return benchmark_scores, {}
     for benchmark_folder in _get_sub_folders(benchmark_folder_parent):
         folder_parts = benchmark_folder.split('/')
         # Check if this benchmark has power results
@@ -503,7 +503,7 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division
         benchmark = _benchmark_alias(folder_parts[-1])
         system = folder_parts[-3] if usage == 'hpc' else folder_parts[-2]
         # Compute base perf/power scores
-        score, power_score = _compute_strong_score_standalone(
+        score, power_score, rcp_scaling_factor = _compute_strong_score_standalone(
             benchmark, system, has_power, benchmark_folder, usage, ruleset, desc
         )
 
@@ -518,7 +518,7 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division
                 # RCP check
                 verbose = False
                 bert_train_samples = False
-                rcp_pass, rcp_msg, rcp_norm_factor = rcp_checker.check_directory(
+                rcp_pass, rcp_msg, _ = rcp_checker.check_directory(
                     benchmark_folder,
                     usage,
                     ruleset,
@@ -553,7 +553,7 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division
                     result_files, ruleset, bert_train_samples=False
                 )
                 subm_epochs.sort()
-                samples_rejected = 4 if benchmark == 'unet3d' else 1
+                samples_rejected = 1
                 if len(subm_epochs) >= 2 * samples_rejected + 1:
                     benchmark_epochs = float(
                         np.mean(
@@ -563,9 +563,15 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division
                         )
                     )
             except Exception as e:
-                print(f"WARNING: RCP/GBS computation failed for {benchmark_folder}: {e}")
+                print(
+                    f"WARNING: RCP/GBS computation failed for {benchmark_folder}: {e}"
+                )
+                traceback.print_exc()
 
         # Map into metric-suffixed keys for schema
+        benchmark_scores[f"{benchmark}:rcp_scaling_factor"] = float(
+            rcp_scaling_factor
+        )
         if score is not None:
             benchmark_scores[f"{benchmark}:time_to_train"] = score
         if benchmark_gbs is not None:
@@ -753,6 +759,19 @@ def _load_system_desc(folder, system):
         raise FileNotFoundError('ERROR: Missing {}'.format(system_file))
     return _read_json_file(system_file)
 
+def _get_id_file(folder, system):
+    systems_folder = os.path.join(folder, 'results/',system)
+    system_file = os.path.join(systems_folder, 'privateid.json'.format(system))
+    if not os.path.exists(system_file):
+        return {}
+    return _read_json_file(system_file)
+
+def _update_id_file(folder, system, id):
+    systems_folder = os.path.join(folder, 'results/',system)
+    system_file = os.path.join(systems_folder, 'privateid.json'.format(system))
+    id = {'private_id': id}
+    with open(system_file, 'w') as f:
+        json.dump(id, f, indent=4)
 
 def _fill_empty_benchmark_scores(
     benchmark_scores,
@@ -768,10 +787,137 @@ def _fill_empty_benchmark_scores(
                     benchmark_scores[k] = None
 
         else:
-            for metric in _get_strong_scaling_metric_schema().keys():
+            strong_schema = _get_strong_scaling_metric_schema()
+            for metric, dtype in strong_schema.items():
                 k = '{}:{}'.format(benchmark, metric)
-                if k not in benchmark_scores:
-                    benchmark_scores[k] = None
+                if dtype is str:
+                    if k not in benchmark_scores or benchmark_scores[k] is None:
+                        benchmark_scores[k] = ''
+                else:
+                    if k not in benchmark_scores:
+                        benchmark_scores[k] = None
+
+
+def _get_id_from_sysinfo(summary):
+    """Generate private id from system information.
+
+    Args:
+        summary (dictionary): Sysinfo Dictionary
+    """
+
+
+    # Code from humanhash3, which is public domain.
+    DEFAULT_WORDLIST = (
+    'ack', 'alabama', 'alanine', 'alaska', 'alpha', 'angel', 'apart', 'april',
+    'arizona', 'arkansas', 'artist', 'asparagus', 'aspen', 'august', 'autumn',
+    'avocado', 'bacon', 'bakerloo', 'batman', 'beer', 'berlin', 'beryllium',
+    'black', 'blossom', 'blue', 'bluebird', 'bravo', 'bulldog', 'burger',
+    'butter', 'california', 'carbon', 'cardinal', 'carolina', 'carpet', 'cat',
+    'ceiling', 'charlie', 'chicken', 'coffee', 'cola', 'cold', 'colorado',
+    'comet', 'connecticut', 'crazy', 'cup', 'dakota', 'december', 'delaware',
+    'delta', 'diet', 'don', 'double', 'early', 'earth', 'east', 'echo',
+    'edward', 'eight', 'eighteen', 'eleven', 'emma', 'enemy', 'equal',
+    'failed', 'fanta', 'fifteen', 'fillet', 'finch', 'fish', 'five', 'fix',
+    'floor', 'florida', 'football', 'four', 'fourteen', 'foxtrot', 'freddie',
+    'friend', 'fruit', 'gee', 'georgia', 'glucose', 'golf', 'green', 'grey',
+    'hamper', 'happy', 'harry', 'hawaii', 'helium', 'high', 'hot', 'hotel',
+    'hydrogen', 'idaho', 'illinois', 'india', 'indigo', 'ink', 'iowa',
+    'island', 'item', 'jersey', 'jig', 'johnny', 'juliet', 'july', 'jupiter',
+    'kansas', 'kentucky', 'kilo', 'king', 'kitten', 'lactose', 'lake', 'lamp',
+    'lemon', 'leopard', 'lima', 'lion', 'lithium', 'london', 'louisiana',
+    'low', 'magazine', 'magnesium', 'maine', 'mango', 'march', 'mars',
+    'maryland', 'massachusetts', 'may', 'mexico', 'michigan', 'mike',
+    'minnesota', 'mirror', 'mississippi', 'missouri', 'mobile', 'mockingbird',
+    'monkey', 'montana', 'moon', 'mountain', 'muppet', 'music', 'nebraska',
+    'neptune', 'network', 'nevada', 'nine', 'nineteen', 'nitrogen', 'north',
+    'november', 'nuts', 'october', 'ohio', 'oklahoma', 'one', 'orange',
+    'oranges', 'oregon', 'oscar', 'oven', 'oxygen', 'papa', 'paris', 'pasta',
+    'pennsylvania', 'pip', 'pizza', 'pluto', 'potato', 'princess', 'purple',
+    'quebec', 'queen', 'quiet', 'red', 'river', 'robert', 'robin', 'romeo',
+    'rugby', 'sad', 'salami', 'saturn', 'september', 'seven', 'seventeen',
+    'shade', 'sierra', 'single', 'sink', 'six', 'sixteen', 'skylark', 'snake',
+    'social', 'sodium', 'solar', 'south', 'spaghetti', 'speaker', 'spring',
+    'stairway', 'steak', 'stream', 'summer', 'sweet', 'table', 'tango', 'ten',
+    'tennessee', 'tennis', 'texas', 'thirteen', 'three', 'timing', 'triple',
+    'twelve', 'twenty', 'two', 'uncle', 'undress', 'uniform', 'uranus', 'utah',
+    'vegan', 'venus', 'vermont', 'victor', 'video', 'violet', 'virginia',
+    'washington', 'west', 'whiskey', 'white', 'william', 'winner', 'winter',
+    'wisconsin', 'wolfram', 'wyoming', 'xray', 'yankee', 'yellow', 'zebra',
+    'zulu')
+
+    class HumanHasher(object):
+
+        def __init__(self, wordlist=DEFAULT_WORDLIST):
+            self.wordlist = wordlist
+
+        def humanize_list(self, hexdigest, words=4):
+            # Gets a list of byte values between 0-255.
+            bytes_ = map(lambda x: int(x, 16),
+                        map(''.join, zip(hexdigest[::2], hexdigest[1::2])))
+            # Compress an arbitrary number of bytes to `words`.
+            compressed = self.compress(bytes_, words)
+
+            return [str(self.wordlist[byte]) for byte in compressed]
+
+        def humanize(self, hexdigest, words=4, separator='-'):
+            # Map the compressed byte values through the word list.
+            return separator.join(self.humanize_list(hexdigest, words))
+
+        @staticmethod
+        def compress(bytes_, target):
+            bytes_list = list(bytes_)
+
+            length = len(bytes_list)
+            # If there are less than the target number bytes, return input bytes
+            if target >= length:
+                return bytes_
+
+            # Split `bytes` evenly into `target` segments
+            # Each segment hashes `seg_size` bytes, rounded down for some
+            seg_size = float(length) / float(target)
+            # Initialize `target` number of segments
+            segments = [0] * target
+            seg_num = 0
+
+            # Use a simple XOR checksum-like function for compression
+            for i, byte in enumerate(bytes_list):
+                # Divide the byte index by the segment size to assign its segment
+                # Floor to create a valid segment index
+                # Min to ensure the index is within `target`
+                seg_num = min(int(math.floor(i / seg_size)), target-1)
+                # Apply XOR to the existing segment and the byte
+                segments[seg_num] = operator.xor(segments[seg_num], byte)
+
+            return segments
+
+        def uuid(self, **params):
+            digest = str(uuidlib.uuid4()).replace('-', '')
+            return self.humanize(digest, **params), digest
+
+
+
+    def get_hash(row):
+        columns_for_hashing = [    
+            'division',
+            'submitter',
+            'system_name',
+            'number_of_nodes',
+            'host_processor_model_name',
+            'host_processors_per_node',
+            'accelerator_model_name',
+            'accelerators_per_node',
+            'framework'
+        ]
+        to_hash = ''.join(str(row[c]) for c in columns_for_hashing)
+        return hashlib.sha256(to_hash.encode('utf-8')).hexdigest()
+    
+    hash = get_hash(summary)
+    humanhasha = HumanHasher()
+    summary = humanhasha.humanize(hash)
+
+    return summary
+
+     
 
 
 def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
@@ -789,13 +935,22 @@ def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
                                               weak_scaling=True)
     power_summary = _get_empty_summary(usage, ruleset)
     power_weak_scaling_summary = _get_empty_summary(usage, ruleset, weak_scaling=True)
-
     for system_folder in _get_sub_folders(results_folder):
         folder_parts = system_folder.split('/')
         system = folder_parts[-1]
         # Load corresponding system description.
         try:
             desc = _load_system_desc(folder, system)
+            id = _get_id_file(folder, system)
+            # Generate private id and update system desc to match
+            if kwargs.get('generate_private_ids') and 'private_id' not in id:
+                id['private_id'] = _get_id_from_sysinfo(desc)
+                _update_id_file(folder, system, desc['private_id'])
+            elif 'private_id' not in id:
+                # Ensure private_id field exists in desc for consistent processing later, even if it's empty
+                id['private_id'] = '' 
+            desc['private_id']  = id['private_id']
+
         except (json.JSONDecodeError, FileNotFoundError) as e:
             print(e)
             continue
@@ -812,6 +967,7 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
         # Construct prefix portion of the row.
         try:
             _check_and_update_system_specs('division', 'division')
+            _check_and_update_system_specs('private_id', 'private_id')
             # Map availability if requested
             if "availability" in kwargs:
                 _check_and_update_system_specs('status', 'availability', lambda desc: _map_availability(desc["status"], kwargs["availability"]))
@@ -892,11 +1048,12 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
                     power_scores.items(),
                     urls.items(),
             ):
-                power_summary.push(column_name, value)
-                if column_name in strong_scaling_scores:
-                    power_summary.push(column_name, strong_scaling_scores[column_name])
-                else:
-                    power_summary.push(column_name, value)
+                merged = (
+                    strong_scaling_scores[column_name]
+                    if column_name in strong_scaling_scores
+                    else value
+                )
+                power_summary.push(column_name, merged)
         if usage == 'hpc' and len(power_scores_weak_scaling) > 0:
             for column_name, value in itertools.chain(
                     system_specs.items(),
@@ -920,6 +1077,8 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
     return strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary
 
 
+
+
 def get_parser():
     parser = argparse.ArgumentParser(
         prog='mlperf_logging.result_summarizer',
@@ -940,6 +1099,11 @@ def get_parser():
                         type=str,
                         choices=rule_choices(),
                         help='the ruleset such as 0.6.0, 0.7.0, or 1.0.0')
+    
+    parser.add_argument('--generate_private_ids',
+                        action='store_true',
+                        help='Generate private IDs for each run.')
+
     parser.add_argument('--werror',
                         action='store_true',
                         help='Treat warnings as errors')
@@ -957,6 +1121,7 @@ def get_parser():
         '--xlsx',
         type=str,
         help='Exports a xlsx of the results to the path specified')
+    
 
     return parser
 
@@ -979,13 +1144,15 @@ def _update_summaries(folder):
                 folder,
                 args.usage,
                 args.ruleset,
-                availability = config["availability"]
+                availability = config["availability"],
+                generate_private_ids = args.generate_private_ids,
             )
         else:
             strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
                 folder,
                 args.usage,
                 args.ruleset,
+                generate_private_ids = args.generate_private_ids,
             )
         strong_scaling_summaries.append(strong_scaling_summary)
         if len(weak_scaling_summary) > 0:
@@ -1104,7 +1271,7 @@ def _summaries_to_xlsx(summaries: pd.DataFrame, path, version):
                     start += len(section)
                     index += len(section)
 
-        writer.close()
+        writer.save()
     # Print and write back results.
     def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
         if len(summaries) > 0:
@@ -1125,7 +1292,7 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
 
             # Sort rows by their values
             summaries = summaries.sort_values(by=cols)
-            print(summaries)
+
             if args.csv is not None:
                 csv = args.csv
                 assert csv.endswith(".csv")
@@ -1155,4 +1322,4 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From 12d733d0edc0e945c1861ae87f596c2ec183134b Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Wed, 6 May 2026 19:42:44 -0500
Subject: [PATCH 3/4] Output both summary and summary_detailed

---
 .../result_summarizer/result_summarizer.py    | 104 +++++++++++-------
 1 file changed, 63 insertions(+), 41 deletions(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index 9636911..1947d2b 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -16,6 +16,7 @@
 import math
 import operator
 import uuid as uuidlib
+import copy
 
 from ..compliance_checker import mlp_compliance
 from ..compliance_checker.mlp_compliance import usage_choices, rule_choices
@@ -277,12 +278,12 @@ def _get_strong_scaling_metric_schema():
     }
 
 
-def _get_empty_summary(usage, ruleset, weak_scaling=False):
+def _get_empty_summary(usage, ruleset, weak_scaling=False, detailed=False):
     return Summary(
-        _get_column_schema(usage, ruleset, weak_scaling=weak_scaling).keys())
+        _get_column_schema(usage, ruleset, weak_scaling=weak_scaling, detailed=detailed).keys())
 
 
-def _get_column_schema(usage, ruleset, weak_scaling=False):
+def _get_column_schema(usage, ruleset, weak_scaling=False, detailed=False):
     schema = {
         'division': str,
         'availability': str,
@@ -303,10 +304,17 @@ def _get_column_schema(usage, ruleset, weak_scaling=False):
             for metric, dtype in _get_weak_scaling_metric_schema().items():
                 schema['{}:{}'.format(benchmark, metric)] = dtype
     else:
-        benchmarks = get_allowed_benchmarks(usage, ruleset)
-        for benchmark in benchmarks:
-            for metric, dtype in _get_strong_scaling_metric_schema().items():
-                schema['{}:{}'.format(benchmark, metric)] = dtype
+        if detailed:
+            benchmarks = get_allowed_benchmarks(usage, ruleset)
+            for benchmark in benchmarks:
+                for metric, dtype in _get_strong_scaling_metric_schema().items():
+                    schema['{}:{}'.format(benchmark, metric)] = dtype
+        else:
+            schema.update(
+                {
+                    b: float for b in get_allowed_benchmarks(usage, ruleset)
+                }
+            )
     schema.update({'details_url': str, 'code_url': str})
     return schema
 
@@ -492,6 +500,7 @@ def _compute_weak_score_standalone(benchmark, system, has_power, benchmark_folde
 def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division, rcp_bypass=False):
     # Collect scores for benchmarks.
     benchmark_scores = {}
+    detailed_bechmark_scores = {}
     benchmark_folder_parent = os.path.join(
         system_folder, 'strong') if usage == 'hpc' else system_folder
     if not os.path.isdir(benchmark_folder_parent):
@@ -569,21 +578,25 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division
                 traceback.print_exc()
 
         # Map into metric-suffixed keys for schema
-        benchmark_scores[f"{benchmark}:rcp_scaling_factor"] = float(
+        detailed_bechmark_scores[f"{benchmark}:rcp_scaling_factor"] = float(
             rcp_scaling_factor
         )
         if score is not None:
-            benchmark_scores[f"{benchmark}:time_to_train"] = score
+            detailed_bechmark_scores[f"{benchmark}:time_to_train"] = score
         if benchmark_gbs is not None:
-            benchmark_scores[f"{benchmark}:GBS"] = float(benchmark_gbs)
+            detailed_bechmark_scores[f"{benchmark}:GBS"] = float(benchmark_gbs)
         if benchmark_epochs is not None:
-            benchmark_scores[f"{benchmark}:epochs"] = float(benchmark_epochs)
+            detailed_bechmark_scores[f"{benchmark}:epochs"] = float(benchmark_epochs)
         if benchmark_rcp is not None:
-            benchmark_scores[f"{benchmark}:RCP"] = benchmark_rcp
+            detailed_bechmark_scores[f"{benchmark}:RCP"] = benchmark_rcp
         if power_score is not None:
-            benchmark_scores[f"{benchmark}:Energy"] = power_score
-    _fill_empty_benchmark_scores(benchmark_scores, usage, ruleset)
-    return benchmark_scores, {}
+            detailed_bechmark_scores[f"{benchmark}:Energy"] = power_score
+        benchmark_scores[f"{benchmark}"] = float(
+            rcp_scaling_factor
+        )
+    _fill_empty_benchmark_scores(benchmark_scores, usage, ruleset, detailed=False)
+    _fill_empty_benchmark_scores(detailed_bechmark_scores, usage, ruleset, detailed=True)
+    return benchmark_scores, detailed_bechmark_scores
 
 
 def _compute_weak_scaling_scores(desc, system_folder, usage, ruleset):
@@ -778,6 +791,7 @@ def _fill_empty_benchmark_scores(
     usage,
     ruleset,
     weak_scaling=False,
+    detailed=False,
 ):
     for benchmark in get_allowed_benchmarks(usage, ruleset):
         if weak_scaling:
@@ -787,15 +801,19 @@ def _fill_empty_benchmark_scores(
                     benchmark_scores[k] = None
 
         else:
-            strong_schema = _get_strong_scaling_metric_schema()
-            for metric, dtype in strong_schema.items():
-                k = '{}:{}'.format(benchmark, metric)
-                if dtype is str:
-                    if k not in benchmark_scores or benchmark_scores[k] is None:
-                        benchmark_scores[k] = ''
-                else:
-                    if k not in benchmark_scores:
-                        benchmark_scores[k] = None
+            if detailed:
+                strong_schema = _get_strong_scaling_metric_schema()
+                for metric, dtype in strong_schema.items():
+                    k = '{}:{}'.format(benchmark, metric)
+                    if dtype is str:
+                        if k not in benchmark_scores or benchmark_scores[k] is None:
+                            benchmark_scores[k] = ''
+                    else:
+                        if k not in benchmark_scores:
+                            benchmark_scores[k] = None
+            else:
+                if benchmark not in benchmark_scores:
+                    benchmark_scores[benchmark] = None
 
 
 def _get_id_from_sysinfo(summary):
@@ -933,7 +951,7 @@ def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
     weak_scaling_summary = _get_empty_summary(usage,
                                               ruleset,
                                               weak_scaling=True)
-    power_summary = _get_empty_summary(usage, ruleset)
+    detailed_strong_scaling_summary = _get_empty_summary(usage, ruleset, detailed=True)
     power_weak_scaling_summary = _get_empty_summary(usage, ruleset, weak_scaling=True)
     for system_folder in _get_sub_folders(results_folder):
         folder_parts = system_folder.split('/')
@@ -1016,7 +1034,7 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
             continue
 
         # Compute the scores.
-        strong_scaling_scores, power_scores = _compute_strong_scaling_scores(
+        strong_scaling_scores, detailed_strong_scaling_scores = _compute_strong_scaling_scores(
             desc, system_folder, usage, ruleset, system_specs["division"], rcp_bypass=False)
         if usage == 'hpc':
             weak_scaling_scores, power_scores_weak_scaling = _compute_weak_scaling_scores(
@@ -1042,18 +1060,18 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
                     urls.items(),
             ):
                 weak_scaling_summary.push(column_name, value)
-        if len(power_scores) > 0:
+        if len(detailed_strong_scaling_scores) > 0:
             for column_name, value in itertools.chain(
                     system_specs.items(),
-                    power_scores.items(),
+                    detailed_strong_scaling_scores.items(),
                     urls.items(),
             ):
                 merged = (
-                    strong_scaling_scores[column_name]
-                    if column_name in strong_scaling_scores
+                    detailed_strong_scaling_scores[column_name]
+                    if column_name in detailed_strong_scaling_scores
                     else value
                 )
-                power_summary.push(column_name, merged)
+                detailed_strong_scaling_summary.push(column_name, merged)
         if usage == 'hpc' and len(power_scores_weak_scaling) > 0:
             for column_name, value in itertools.chain(
                     system_specs.items(),
@@ -1068,13 +1086,13 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
     if len(weak_scaling_summary) > 0:
         weak_scaling_summary = weak_scaling_summary.to_dataframe().sort_values(
             _get_sort_by_column_names()).reset_index(drop=True)
-    if len(power_summary) > 0:
-        power_summary = power_summary.to_dataframe().sort_values(
+    if len(detailed_strong_scaling_summary) > 0:
+        detailed_strong_scaling_summary = detailed_strong_scaling_summary.to_dataframe().sort_values(
             _get_sort_by_column_names()).reset_index(drop=True)
     if len(power_weak_scaling_summary) > 0:
         power_weak_scaling_summary = power_weak_scaling_summary.to_dataframe().sort_values(
             _get_sort_by_column_names()).reset_index(drop=True)
-    return strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary
+    return strong_scaling_summary, weak_scaling_summary, detailed_strong_scaling_summary, power_weak_scaling_summary
 
 
 
@@ -1132,7 +1150,7 @@ def main():
 
     strong_scaling_summaries = []
     weak_scaling_summaries = []
-    power_summaries = []
+    detailed_strong_scaling_summaries = []
     power_weak_scaling_summaries = []
 
     def _update_summaries(folder):
@@ -1140,7 +1158,7 @@ def _update_summaries(folder):
             config_path = os.path.join(os.path.dirname(__file__), "config.yaml")
             with open(config_path, "r") as f:
                 config = yaml.safe_load(f)
-            strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
+            strong_scaling_summary, weak_scaling_summary, detailed_strong_scaling_summary, power_weak_scaling_summary = summarize_results(
                 folder,
                 args.usage,
                 args.ruleset,
@@ -1148,7 +1166,7 @@ def _update_summaries(folder):
                 generate_private_ids = args.generate_private_ids,
             )
         else:
-            strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
+            strong_scaling_summary, weak_scaling_summary, detailed_strong_scaling_summary, power_weak_scaling_summary = summarize_results(
                 folder,
                 args.usage,
                 args.ruleset,
@@ -1157,8 +1175,8 @@ def _update_summaries(folder):
         strong_scaling_summaries.append(strong_scaling_summary)
         if len(weak_scaling_summary) > 0:
             weak_scaling_summaries.append(weak_scaling_summary)
-        if len(power_summary) > 0:
-            power_summaries.append(power_summary)
+        if len(detailed_strong_scaling_summary) > 0:
+            detailed_strong_scaling_summaries.append(detailed_strong_scaling_summary)
         if len(power_weak_scaling_summary) > 0:
             power_weak_scaling_summaries.append(power_weak_scaling_summary)
 
@@ -1273,13 +1291,14 @@ def _summaries_to_xlsx(summaries: pd.DataFrame, path, version):
 
         writer.save()
     # Print and write back results.
-    def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
+    def _print_and_write(summaries, weak_scaling=False, mode='w', power = False, detailed = False):
         if len(summaries) > 0:
             summaries = pd.concat(summaries).astype(
                 _get_column_schema(
                     args.usage,
                     args.ruleset,
                     weak_scaling=weak_scaling,
+                    detailed=detailed
                 )
             )
             if weak_scaling:
@@ -1301,6 +1320,9 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
                     specs_and_notes = [c for c in summaries.columns if c not in benchmarks]
                     csv = csv.replace(".csv", "_power.csv")
                     summaries.groupby(specs_and_notes).apply(lambda x: agg_columns_fn(x, benchmarks)).to_csv(csv, mode=mode)
+                elif detailed:
+                    csv = csv.replace(".csv", "_detailed.csv")
+                    summaries.to_csv(csv, index=False, mode=mode)
                 else:
                     summaries.to_csv(csv, index=False, mode=mode)
             json_path = "summary.json" if args.csv is None else f"""{csv.replace(".csv", ".json")}"""
@@ -1317,7 +1339,7 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):
                            None, 'display.max_colwidth', None):
         _print_and_write(strong_scaling_summaries)
         _print_and_write(weak_scaling_summaries, weak_scaling=True, mode='a')
-        _print_and_write(power_summaries, mode='a', power=True)
+        _print_and_write(detailed_strong_scaling_summaries, mode='a', detailed=True)
         _print_and_write(power_weak_scaling_summaries, weak_scaling=True, mode='a', power=True)
 
 

From d9250f60e8af0e8bd994459384c06bcabc164531 Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Thu, 7 May 2026 10:57:52 -0500
Subject: [PATCH 4/4] Rename column: epochs -> samples_to_converge

---
 mlperf_logging/result_summarizer/result_summarizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlperf_logging/result_summarizer/result_summarizer.py b/mlperf_logging/result_summarizer/result_summarizer.py
index 1947d2b..131d208 100644
--- a/mlperf_logging/result_summarizer/result_summarizer.py
+++ b/mlperf_logging/result_summarizer/result_summarizer.py
@@ -586,7 +586,7 @@ def _compute_strong_scaling_scores(desc, system_folder, usage, ruleset, division
         if benchmark_gbs is not None:
             detailed_bechmark_scores[f"{benchmark}:GBS"] = float(benchmark_gbs)
         if benchmark_epochs is not None:
-            detailed_bechmark_scores[f"{benchmark}:epochs"] = float(benchmark_epochs)
+            detailed_bechmark_scores[f"{benchmark}:samples_to_converge"] = float(benchmark_epochs)
         if benchmark_rcp is not None:
             detailed_bechmark_scores[f"{benchmark}:RCP"] = benchmark_rcp
         if power_score is not None: