diff --git a/scripts/plot_overhead_pdg.py b/scripts/plot_overhead_pdg.py new file mode 100644 index 0000000..cc16628 --- /dev/null +++ b/scripts/plot_overhead_pdg.py @@ -0,0 +1,81 @@ +""" +Per-track overhead distribution overlay by particle species. +Usage: python3 plot_overhead_pdg.py [output.png] +""" +import sys +import uproot +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +RFILE = sys.argv[1] if len(sys.argv) > 1 else '/tmp/latest_histos_overhead.root' +OUTFILE = sys.argv[2] if len(sys.argv) > 2 else '/tmp/overhead_pdg.png' + +species = [ + ('m_overhead_dist_pdg11', 'e⁻', '#1f77b4'), + ('m_overhead_dist_pdg22', 'γ', '#ff7f0e'), + ('m_overhead_dist_pdg-22', 'opt-γ', '#d62728'), + ('m_overhead_dist_pdg-11', 'e⁺', '#9467bd'), + ('m_overhead_dist_pdg2112', 'n', '#2ca02c'), + ('m_overhead_dist_pdg2212', 'p', '#8c564b'), + ('m_overhead_dist_pdg211', 'π⁺', '#e377c2'), + ('m_overhead_dist_pdg-211', 'π⁻', '#bcbd22'), +] + +XMIN, XMAX, SMOOTH = 2.0, 100.0, 7 + +fig, axes = plt.subplots(1, 2, figsize=(13, 5)) +fig.patch.set_facecolor('white') + +with uproot.open(RFILE) as f: + for hname, label, color in species: + if hname not in f: + continue + h = f[hname] + vals, edges = h.to_numpy() + centers = (edges[:-1] + edges[1:]) / 2 + mask = (centers >= XMIN) & (centers <= XMAX) + x = centers[mask] + y = vals[mask].astype(float) + total = y.sum() + if total == 0: + continue + y_norm = y / total + y_smooth = np.convolve(y_norm, np.ones(SMOOTH) / SMOOTH, mode='same') + n_tracks = int(vals.sum()) + mean_us = (centers * vals).sum() / vals.sum() if vals.sum() > 0 else 0 + full_label = f'{label} ({n_tracks/1e6:.1f}M, mean={mean_us:.0f}µs)' + for ax in axes: + ax.plot(x, y_smooth, label=full_label, color=color, linewidth=1.8) + +axes[0].set_xlim(XMIN, 40) +axes[0].set_ylim(bottom=0) +axes[0].set_xlabel('per-track overhead [µs]', fontsize=11) +axes[0].set_ylabel('fraction of tracks (normalized)', fontsize=10) +axes[0].set_title('Linear scale', fontsize=11) +axes[0].legend(fontsize=8, framealpha=0.95) +axes[0].grid(True, color='#dddddd', linewidth=0.5) + +axes[1].set_yscale('log') +axes[1].set_xlim(XMIN, XMAX) +axes[1].set_ylim(1e-6, 0.2) +axes[1].set_xlabel('per-track overhead [µs]', fontsize=11) +axes[1].set_ylabel('fraction of tracks (normalized)', fontsize=10) +axes[1].set_title('Log scale', fontsize=11) +axes[1].legend(fontsize=8, framealpha=0.95) +axes[1].grid(True, color='#dddddd', linewidth=0.5, which='both') + +for ax in axes: + ax.set_facecolor('white') + for sp in ax.spines.values(): + sp.set_edgecolor('#aaaaaa') + +fig.suptitle( + 'Per-track overhead by species (normalized, smoothed, <2µs excluded)\n' + f'{RFILE}', + fontsize=10, y=1.01 +) +plt.tight_layout() +plt.savefig(OUTFILE, dpi=150, bbox_inches='tight', facecolor='white') +print(f'saved {OUTFILE}') diff --git a/scripts/plot_overhead_spatial.py b/scripts/plot_overhead_spatial.py new file mode 100644 index 0000000..6fe9f80 --- /dev/null +++ b/scripts/plot_overhead_spatial.py @@ -0,0 +1,63 @@ +""" +Per-track overhead spatial heatmaps (xy and zr). +Usage: python3 plot_overhead_spatial.py [output.png] +""" +import sys +import uproot +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from matplotlib.colors import LogNorm + +RFILE = sys.argv[1] if len(sys.argv) > 1 else '/tmp/latest_histos_overhead.root' +OUTFILE = sys.argv[2] if len(sys.argv) > 2 else '/tmp/overhead_spatial.png' + +with uproot.open(RFILE) as f: + hxy = f['m_overhead_xy'] + xy_vals, xy_xe, xy_ye = hxy.to_numpy() + hzr = f['m_overhead_zr'] + zr_vals, zr_xe, zr_ye = hzr.to_numpy() + +# mm -> cm +xy_xe /= 10; xy_ye /= 10 +zr_xe /= 10; zr_ye /= 10 + +# zoom zr: z in [-500, 500] cm, r in [0, 200] cm +zmask = (zr_xe >= -500) & (zr_xe <= 500) +rmask = (zr_ye >= 0) & (zr_ye <= 200) +zi = np.where(zmask)[0] +ri = np.where(rmask)[0] +zr_zoom = zr_vals[zi[0]:zi[-1], ri[0]:ri[-1]] +zr_xe_z = zr_xe[zi[0]:zi[-1]+1] +zr_ye_z = zr_ye[ri[0]:ri[-1]+1] + +fig, axes = plt.subplots(1, 2, figsize=(15, 6)) +fig.patch.set_facecolor('white') + +def heatmap(ax, xe, ye, vals, xlabel, ylabel, title): + vp = np.where(vals > 0, vals, np.nan) + vmin = np.nanpercentile(vp, 20) + vmax = np.nanpercentile(vp, 99.5) + im = ax.pcolormesh(xe, ye, vp.T, norm=LogNorm(vmin=vmin, vmax=vmax), cmap='plasma') + cb = plt.colorbar(im, ax=ax) + cb.set_label('overhead [ns·tracks]', fontsize=10) + ax.set_xlabel(xlabel, fontsize=11) + ax.set_ylabel(ylabel, fontsize=11) + ax.set_title(title, fontsize=12) + ax.set_facecolor('#f0f0f0') + for sp in ax.spines.values(): + sp.set_edgecolor('#999999') + +heatmap(axes[0], xy_xe, xy_ye, xy_vals, 'x [cm]', 'y [cm]', 'Overhead map — xy') +axes[0].set_aspect('equal') +heatmap(axes[1], zr_xe_z, zr_ye_z, zr_zoom, 'z [cm]', 'r [cm]', 'Overhead map — zr (|z|<500cm, r<200cm)') + +fig.suptitle( + 'Per-track overhead spatial map (track start position weighted by overhead)\n' + f'{RFILE}', + fontsize=10, y=1.01 +) +plt.tight_layout() +plt.savefig(OUTFILE, dpi=150, bbox_inches='tight', facecolor='white') +print(f'saved {OUTFILE}') diff --git a/scripts/plot_timing_profile.py b/scripts/plot_timing_profile.py new file mode 100755 index 0000000..a492913 --- /dev/null +++ b/scripts/plot_timing_profile.py @@ -0,0 +1,460 @@ +#!/usr/bin/env python3 +""" +Analyze performance profile histograms to identify hot regions. +Extracts summed timing values for regions in R and Z from the ZR plot. +Outputs: annotated ZR plot, region×PDG heatmap, text summary. + +Usage: plot_timing_profile.py [output_prefix] +""" + +import sys +import ROOT +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +def analyze_hot_regions(input_file, output_prefix=None): + """ + Analyze the m_zr histogram to find hot regions and create annotated plots + + Args: + input_file: Path to the histos.root file + output_prefix: Prefix for output files (default: input filename without .root) + """ + # Set ROOT to batch mode + ROOT.gROOT.SetBatch(True) + ROOT.gStyle.SetOptStat(0) + ROOT.gStyle.SetPalette(ROOT.kRainBow) + + # Open the file + f = ROOT.TFile.Open(input_file) + if not f or f.IsZombie(): + print(f"Error: Cannot open file {input_file}") + return 1 + + # Get histograms + m_zr = f.Get("m_zr") + + if not m_zr: + print("Error: Could not find histogram m_zr in file") + f.Close() + return 1 + + if output_prefix is None: + output_prefix = input_file.replace(".root", "") + + print(f"Analyzing hot regions from: {input_file}") + print(f"=" * 80) + print() + + # Get histogram properties + nbins_z = m_zr.GetNbinsX() + nbins_r = m_zr.GetNbinsY() + z_min = m_zr.GetXaxis().GetXmin() + z_max = m_zr.GetXaxis().GetXmax() + r_min = m_zr.GetYaxis().GetXmin() + r_max = m_zr.GetYaxis().GetXmax() + + print(f"Histogram dimensions:") + print(f" Z bins: {nbins_z}, range: [{z_min/1000:.2f}, {z_max/1000:.2f}] m") + print(f" R bins: {nbins_r}, range: [{r_min/1000:.2f}, {r_max/1000:.2f}] m") + print() + + # Extract all bin values + total_time = 0 + bin_values = [] + + for iz in range(1, nbins_z + 1): + for ir in range(1, nbins_r + 1): + value = m_zr.GetBinContent(iz, ir) + if value > 0: + z_center = m_zr.GetXaxis().GetBinCenter(iz) + r_center = m_zr.GetYaxis().GetBinCenter(ir) + bin_values.append({ + 'z': z_center, + 'r': r_center, + 'time': value + }) + total_time += value + + print(f"Total time: {total_time:.2e} ns = {total_time/1e9:.3f} s") + print(f"Non-zero bins: {len(bin_values)} / {nbins_z * nbins_r}") + print() + + if len(bin_values) == 0: + print("No data in histogram!") + f.Close() + return 1 + + # Define detector regions based on actual geometry + # Format: (name, z_min, z_max, r_min, r_max) + detector_regions = [ + ("Forward EM+Hadron Cal", 3346.25, 4971.75, 0, 2626), + ("dRICH", 1944.75, 3250, 0, 1800), + ("DIRC Bar (straight)", -2734, 1900, 750, 782.50), + ("DIRC Bar (triangle)", -3150, -2734, 728.75, 1007.50), + ("EEEMCal", -1945, -1740, 0, 540), + ("Barrel Imaging and SciFi Cal",-2700, 1900, 826.77, 1217.61), + ("Barrel HCal", -3196.25, 3196.25, 1838.5, 2700.3), + ("Tracker / Beampipe", -1740, 1945, 0, 728.75), + ("Far Forward (Other)", 5500, 34000, 0, 1500), + ("Far Forward (Zero Degree Calorimeter)", 34000, 38000, 0, 1500), + ] + + # Group by region + region_stats = {} + + for name, z_lo, z_hi, r_lo, r_hi in detector_regions: + region_stats[name] = { + 'z_range': (z_lo, z_hi), + 'r_range': (r_lo, r_hi), + 'time': 0, + 'n_bins': 0 + } + + # Accumulate times by region + for bin_data in bin_values: + z = bin_data['z'] + r = bin_data['r'] + t = bin_data['time'] + + for name, z_lo, z_hi, r_lo, r_hi in detector_regions: + if z_lo <= z < z_hi and r_lo <= r < r_hi: + region_stats[name]['time'] += t + region_stats[name]['n_bins'] += 1 + break + + # Combine DIRC Bar straight and triangle sections for reporting + dirc_combined_time = 0 + dirc_combined_bins = 0 + dirc_regions = [] + + if "DIRC Bar (straight)" in region_stats: + straight = region_stats["DIRC Bar (straight)"] + dirc_combined_time += straight['time'] + dirc_combined_bins += straight['n_bins'] + dirc_regions.append(("DIRC Bar (straight)", straight)) + + if "DIRC Bar (triangle)" in region_stats: + triangle = region_stats["DIRC Bar (triangle)"] + dirc_combined_time += triangle['time'] + dirc_combined_bins += triangle['n_bins'] + dirc_regions.append(("DIRC Bar (triangle)", triangle)) + + # Create combined entry for sorting/display + combined_stats = {} + for name, stats in region_stats.items(): + if name not in ["DIRC Bar (straight)", "DIRC Bar (triangle)"]: + combined_stats[name] = stats + + # Add combined DIRC entry + if dirc_regions: + combined_stats["DIRC"] = { + 'time': dirc_combined_time, + 'n_bins': dirc_combined_bins, + 'sub_regions': dirc_regions # Keep track of sub-regions for drawing boxes + } + + # Sort regions by time + sorted_regions = sorted(combined_stats.items(), + key=lambda x: x[1]['time'], + reverse=True) + + # Print top regions + print("Top regions by total time:") + print("-" * 80) + print(f"{'Region':<35} {'Time (ms)':<12} {'% Total':<10} {'Bins':<8}") + print("-" * 80) + + for region_name, stats in sorted_regions: + if stats['time'] > 0: + time_ms = stats['time'] / 1e6 + percent = 100 * stats['time'] / total_time + print(f"{region_name:<35} {time_ms:>10.2f} {percent:>7.2f}% {stats['n_bins']:>6}") + + print("-" * 80) + print() + + # Create annotated ZR plot using uproot + matplotlib + import uproot + import matplotlib.colors as mcolors + from matplotlib.patches import Rectangle + + with uproot.open(input_file) as uf: + h = uf["m_zr"] + counts, z_edges, r_edges = h.to_numpy() + + fig_zr, ax_zr = plt.subplots(figsize=(14, 7)) + counts_plot = np.where(counts > 0, counts, np.nan) + im = ax_zr.pcolormesh(z_edges, r_edges, counts_plot.T, + norm=mcolors.LogNorm(vmin=np.nanmin(counts_plot[counts_plot > 0]), + vmax=np.nanmax(counts_plot)), + cmap='rainbow', shading='flat') + cbar = fig_zr.colorbar(im, ax=ax_zr, pad=0.02) + cbar.set_label('Time [ns]', fontsize=18) + cbar.ax.tick_params(labelsize=16) + + ax_zr.set_xlabel('Z [mm]', fontsize=18) + ax_zr.set_ylabel('R [mm]', fontsize=18) + ax_zr.tick_params(axis='x', labelsize=16, rotation=45) + ax_zr.tick_params(axis='y', labelsize=16) + # Auto-zoom to Z range containing 99.9% of activity + z_centers_arr = (z_edges[:-1] + z_edges[1:]) / 2 + z_content = counts.sum(axis=1) + cumsum_z = np.cumsum(z_content) + total_z = cumsum_z[-1] + z_lo_idx = np.searchsorted(cumsum_z, 0.0005 * total_z) + z_hi_idx = np.searchsorted(cumsum_z, 0.9995 * total_z) + padding = (z_centers_arr[z_hi_idx] - z_centers_arr[z_lo_idx]) * 0.03 + ax_zr.set_xlim(z_centers_arr[z_lo_idx] - padding, z_centers_arr[z_hi_idx] + padding) + ax_zr.set_ylim(r_edges[0], r_edges[-1]) + + # Draw thin outline boxes for all regions + for name, z_lo, z_hi, r_lo, r_hi in detector_regions: + rect = Rectangle((z_lo, r_lo), z_hi - z_lo, r_hi - r_lo, + linewidth=1, edgecolor='black', facecolor='none') + ax_zr.add_patch(rect) + + # Draw thick ranked boxes with white number labels + top_n = len([r for r in sorted_regions if r[1]['time'] > 0]) + for idx, (region_name, stats) in enumerate(sorted_regions[:top_n]): + if stats['time'] <= 0: + continue + if region_name == "DIRC" and 'sub_regions' in stats: + all_z, all_r = [], [] + for sub_name, sub_stats in stats['sub_regions']: + z_lo, z_hi = sub_stats['z_range'] + r_lo, r_hi = sub_stats['r_range'] + rect = Rectangle((z_lo, r_lo), z_hi - z_lo, r_hi - r_lo, + linewidth=3, edgecolor='black', facecolor='none') + ax_zr.add_patch(rect) + all_z += [z_lo, z_hi] + all_r += [r_lo, r_hi] + z_center = min(all_z) + r_center = (min(all_r) + max(all_r)) / 2.0 + ha = 'left' + else: + z_lo, z_hi = stats['z_range'] + r_lo, r_hi = stats['r_range'] + rect = Rectangle((z_lo, r_lo), z_hi - z_lo, r_hi - r_lo, + linewidth=3, edgecolor='black', facecolor='none') + ax_zr.add_patch(rect) + z_center = (z_lo + z_hi) / 2.0 + r_center = (r_lo + r_hi) / 2.0 + ha = 'center' + + ax_zr.text(z_center, r_center, str(idx + 1), + ha=ha, va='center', fontsize=22, fontweight='bold', + color='white', + bbox=dict(boxstyle='round,pad=0.15', facecolor='black', + edgecolor='none', alpha=0.6)) + + plt.tight_layout() + zr_png = f"{output_prefix}_zr_annotated.png" + zr_pdf = f"{output_prefix}_zr_annotated.pdf" + fig_zr.savefig(zr_png, dpi=150, bbox_inches='tight') + fig_zr.savefig(zr_pdf, bbox_inches='tight') + plt.close(fig_zr) + print(f"Saved: {zr_png}") + print(f"Saved: {zr_pdf}") + + # Write text summary + # Compute per-PDG region sums + pdg_labels = {11: "e-", -11: "e+", 22: "gamma", -22: "optical photon"} + pdg_region_stats = {} + for pdg, label in pdg_labels.items(): + hname = f"m_zr_pdg{pdg}" + h = f.Get(hname) + if not h: + continue + stats_pdg = {name: {'time': 0, 'n_bins': 0} for name, *_ in detector_regions} + pdg_histo_total = 0 + for iz in range(1, h.GetNbinsX() + 1): + for ir in range(1, h.GetNbinsY() + 1): + value = h.GetBinContent(iz, ir) + if value <= 0: + continue + pdg_histo_total += value + z = h.GetXaxis().GetBinCenter(iz) + r = h.GetYaxis().GetBinCenter(ir) + matched = False + for name, z_lo, z_hi, r_lo, r_hi in detector_regions: + if z_lo <= z < z_hi and r_lo <= r < r_hi: + stats_pdg[name]['time'] += value + stats_pdg[name]['n_bins'] += 1 + matched = True + break + # Combine DIRC sub-regions + dirc_time = stats_pdg.get("DIRC Bar (straight)", {}).get('time', 0) + \ + stats_pdg.get("DIRC Bar (triangle)", {}).get('time', 0) + dirc_bins = stats_pdg.get("DIRC Bar (straight)", {}).get('n_bins', 0) + \ + stats_pdg.get("DIRC Bar (triangle)", {}).get('n_bins', 0) + combined_pdg = {k: v for k, v in stats_pdg.items() + if k not in ("DIRC Bar (straight)", "DIRC Bar (triangle)")} + combined_pdg["DIRC"] = {'time': dirc_time, 'n_bins': dirc_bins} + pdg_assigned = sum(s['time'] for s in combined_pdg.values()) + combined_pdg["other regions"] = {'time': max(0, pdg_histo_total - pdg_assigned), 'n_bins': 0} + pdg_total = pdg_histo_total + pdg_region_stats[pdg] = {'label': label, 'regions': combined_pdg, 'total': pdg_total} + + # Build unified table: rows=regions, cols=total + per-PDG + # Collect all region names in sorted order + all_region_names = [name for name, stats in sorted_regions if stats['time'] > 0] + # Compute "other regions" time = total - sum of all defined regions + assigned_time = sum(stats['time'] for _, stats in sorted_regions if stats['time'] > 0) + other_regions_time = max(0, total_time - assigned_time) + all_region_names_with_other = all_region_names + ["other regions"] + present_pdgs = [pdg for pdg in pdg_labels if pdg in pdg_region_stats] + + txt_file = f"{output_prefix}_profile.txt" + with open(txt_file, 'w') as tf: + tf.write(f"Performance Profile Summary\n") + tf.write(f"Input: {input_file}\n") + tf.write(f"Total time: {total_time:.2e} ns ({total_time/1e6:.1f} ms)\n") + tf.write(f"\n") + + # Header: region | total % | per-PDG % + pdg_col_labels = [pdg_labels[p] for p in present_pdgs] + hdr_region = f"{'Region':<35}" + hdr_total = f"{'Total ms':>12} {'%tot':>6}" + hdr_pdgs = "".join(f" {lbl+' ms':>12} {'%tot':>6}" for lbl in pdg_col_labels) + tf.write(hdr_region + hdr_total + hdr_pdgs + "\n") + tf.write("-" * (35 + 20 + 20 * len(present_pdgs)) + "\n") + + for region_name in all_region_names_with_other: + if region_name == "other regions": + region_time = other_regions_time + else: + region_time = combined_stats[region_name]['time'] if region_name in combined_stats else 0 + total_ms = region_time / 1e6 + total_pct = 100 * region_time / total_time + + row = f"{region_name:<35}{total_ms:>12.1f} {total_pct:>5.1f}%" + + for pdg in present_pdgs: + pdata = pdg_region_stats[pdg] + t = pdata['regions'].get(region_name, {}).get('time', 0) + ms = t / 1e6 + pct = 100 * t / total_time + row += f" {ms:>12.1f} {pct:>5.1f}%" + + tf.write(row + "\n") + + tf.write(f"\n") + tf.write(f"Region geometry:\n") + for name, z_lo, z_hi, r_lo, r_hi in detector_regions: + tf.write(f" {name}: Z=[{z_lo:.0f},{z_hi:.0f}] mm, R=[{r_lo:.0f},{r_hi:.0f}] mm\n") + + print(f"Saved: {txt_file}") + print() + + # Heatmap: rows=particles+other, cols=regions+other regions, values=% of total sim time + if present_pdgs: + pdg_names = {11: 'e-', -11: 'e+', 22: 'gamma', -22: 'opt. photon', 'other': 'other'} + pdg_row_keys = ['other'] + present_pdgs + + far_forward_regions = {"Far Forward (Other)", "Far Forward (Zero Degree Calorimeter)"} + central_cols = [r for r in all_region_names_with_other if r not in far_forward_regions] + farfwd_cols = [r for r in all_region_names_with_other if r in far_forward_regions] + + def make_heatmap(col_labels, title, outfile): + data = np.zeros((len(pdg_row_keys), len(col_labels))) + group_time_ns = 0 + for ri, region_name in enumerate(col_labels): + if region_name == "other regions": + region_total = other_regions_time + else: + region_total = combined_stats[region_name]['time'] if region_name in combined_stats else 0 + group_time_ns += region_total + accounted = 0 + for pi, pdg in enumerate(present_pdgs): + t = pdg_region_stats[pdg]['regions'].get(region_name, {}).get('time', 0) + data[pi + 1, ri] = t + accounted += t + data[0, ri] = max(0, region_total - accounted) + + group_ms = group_time_ns / 1e6 + group_pct = 100 * group_time_ns / total_time + subtitle = f'{group_ms:,.0f} ms ({group_pct:.1f}% of total {total_time/1e6:,.0f} ms)' + # normalize to % of this zone's total + if group_time_ns > 0: + data = 100 * data / group_time_ns + + row_labels = [pdg_names[p] for p in pdg_row_keys] + fig, ax = plt.subplots(figsize=(max(10, 1.5 * len(col_labels)), max(4, 1.2 * len(row_labels)))) + im = ax.imshow(data, aspect='auto', cmap='YlOrRd') + ax.set_xticks(np.arange(len(col_labels))) + ax.set_yticks(np.arange(len(row_labels))) + ax.set_xticklabels(col_labels, rotation=25, ha='right', fontsize=14) + ax.set_yticklabels(row_labels, fontsize=14) + for pi in range(len(row_labels)): + for ri in range(len(col_labels)): + v = data[pi, ri] + if v > 0.05: + textcolor = 'white' if v > data.max() * 0.6 else 'black' + ax.text(ri, pi, f'{v:.1f}%', ha='center', va='center', + fontsize=12, color=textcolor, fontweight='bold') + cbar = fig.colorbar(im, ax=ax, fraction=0.03, pad=0.02) + cbar.set_label('% of zone simulation time', fontsize=13) + cbar.ax.tick_params(labelsize=12) + ax.set_title(f'{title}\n{subtitle}', fontsize=14, fontweight='bold') + plt.tight_layout() + fig.savefig(outfile, dpi=150, bbox_inches='tight') + plt.close(fig) + print(f"Saved: {outfile}") + + central_time_ns = sum( + (other_regions_time if r == "other regions" else combined_stats.get(r, {}).get('time', 0)) + for r in central_cols + ) + farfwd_time_ns = sum( + (other_regions_time if r == "other regions" else combined_stats.get(r, {}).get('time', 0)) + for r in farfwd_cols + ) + other_time_ns = max(0, total_time - central_time_ns - farfwd_time_ns) + + make_heatmap( + central_cols, + '% of Stepping Time in Central Detector', + f"{output_prefix}_heatmap_central.png" + ) + if farfwd_cols: + make_heatmap( + farfwd_cols, + '% of Stepping Time in Far Forward Detectors', + f"{output_prefix}_heatmap_farfwd.png" + ) + + # central/farfwd totals include "other regions" — strip it out for clarity + central_boxes_ns = sum( + combined_stats[r]['time'] for r in central_cols + if r != "other regions" and r in combined_stats + ) + farfwd_boxes_ns = sum( + combined_stats[r]['time'] for r in farfwd_cols + if r != "other regions" and r in combined_stats + ) + other_ns = max(0, total_time - central_boxes_ns - farfwd_boxes_ns) + + print() + print("=" * 60) + print(f"Combined Central: {central_boxes_ns/1e6:>10,.1f} ms ({100*central_boxes_ns/total_time:.1f}%)") + print(f"Combined Far Forward: {farfwd_boxes_ns/1e6:>10,.1f} ms ({100*farfwd_boxes_ns/total_time:.1f}%)") + print(f"Other (outside boxes): {other_ns/1e6:>10,.1f} ms ({100*other_ns/total_time:.1f}%)") + print(f"Total Stepping Time: {total_time/1e6:>10,.1f} ms") + print("=" * 60) + print() + + f.Close() + return 0 + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: plot_timing_profile.py [output_prefix]") + sys.exit(1) + + input_file = sys.argv[1] + output_prefix = sys.argv[2] if len(sys.argv) > 2 else None + + sys.exit(analyze_hot_regions(input_file, output_prefix)) diff --git a/src/dd4pod/python/npsim.py b/src/dd4pod/python/npsim.py index 2c35e91..865891f 100755 --- a/src/dd4pod/python/npsim.py +++ b/src/dd4pod/python/npsim.py @@ -9,10 +9,12 @@ """ from __future__ import absolute_import, unicode_literals import logging +import os import sys from DDSim.DD4hepSimulation import DD4hepSimulation +from g4units import mm if __name__ == "__main__": logging.basicConfig(format='%(name)-16s %(levelname)s %(message)s', level=logging.INFO, stream=sys.stdout) @@ -25,6 +27,12 @@ # https://github.com/AIDASoft/DD4hep/pull/1376 RUNNER.parseOptions() + # Allow overriding the global range cut via RANGE_CUT_MM env var (value in mm) + _range_cut_mm = os.environ.get('RANGE_CUT_MM', '') + if _range_cut_mm: + RUNNER.physics.rangecut = float(_range_cut_mm) + logger.info('RANGE_CUT_MM: setting physics.rangecut = %s mm', _range_cut_mm) + # Ensure that Cerenkov and optical physics are always loaded def setupCerenkov(kernel): from DDG4 import PhysicsList @@ -69,8 +77,37 @@ def setupCerenkov(kernel): RUNNER.action.mapActions['PFRICH'] = 'Geant4OpticalTrackerAction' RUNNER.action.mapActions['DIRC'] = 'Geant4OpticalTrackerAction' - # Use the optical photon efficiency stacking action for hpDIRC + # Use the optical photon efficiency stacking action + nm = 1e-6 * mm # dd4hep length unit + drich_lambda_values = [l * nm for l in [ # [nm] + 315, 325, 340, 350, 370, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 1000 + ]] + drich_efficiency = [e/100. for e in [ # [%] + 0.00, 4.00, 10.0, 20.0, 30.0, 35.0, 40.0, 38.0, 35.0, 27.0, 20.0, 15.0, 12.0, 8.00, 6.00, 4.00, 0.00 + ]] + pfrich_lambda_values = [l * nm for l in [ # [nm] + 315, 325, 340, 350, 370, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 1000 + ]] + pfrich_efficiency = [e/100. for e in [ # [%] + 0.00, 4.00, 10.0, 20.0, 30.0, 35.0, 40.0, 38.0, 35.0, 27.0, 20.0, 15.0, 12.0, 8.00, 6.00, 4.00, 0.00 + ]] RUNNER.action.stack = [ + { + "name": "OpticalPhotonEfficiencyStackingAction", + "parameter": { + "LambdaValues": drich_lambda_values, + "LogicalVolume": "DRICH_(gas|aerogel)", + "Efficiency": drich_efficiency, + } + }, + { + "name": "OpticalPhotonEfficiencyStackingAction", + "parameter": { + "LambdaValues": pfrich_lambda_values, + "LogicalVolume": "PFRICH(_GasVol|-aerogel-[0-9]+-[0-9]+)", + "Efficiency": pfrich_efficiency, + } + }, { "name": "OpticalPhotonEfficiencyStackingAction", "parameter": { @@ -100,6 +137,18 @@ def setupCerenkov(kernel): } ] + RUNNER.action.event = { + "name": "PerformanceProfileEventAction" + } + + RUNNER.action.step = { + "name": "PerformanceProfileSteppingAction" + } + + RUNNER.action.track = { + "name": "PerformanceProfileTrackingAction" + } + try: sys.exit(RUNNER.run()) except NameError as e: diff --git a/src/plugins/CMakeLists.txt b/src/plugins/CMakeLists.txt index 6216894..9419c0e 100644 --- a/src/plugins/CMakeLists.txt +++ b/src/plugins/CMakeLists.txt @@ -5,8 +5,11 @@ dd4hep_add_plugin(NPDetPlugins src/EICInteractionVertexBoost.cxx src/EICInteractionVertexSmear.cxx src/OpticalPhotonEfficiencyStackingAction.cxx + src/PerformanceProfileSteppingAction.cxx + src/PerformanceProfileTrackingAction.cxx + src/PerformanceProfileEventAction.cxx INCLUDES $ - USES DD4hep::DDCore DD4hep::DDG4 + USES DD4hep::DDCore DD4hep::DDG4 ROOT::Core ) install(TARGETS NPDetPlugins diff --git a/src/plugins/include/npdet/OpticalPhotonEfficiencyStackingAction.h b/src/plugins/include/npdet/OpticalPhotonEfficiencyStackingAction.h index e902f4a..4a57f6d 100644 --- a/src/plugins/include/npdet/OpticalPhotonEfficiencyStackingAction.h +++ b/src/plugins/include/npdet/OpticalPhotonEfficiencyStackingAction.h @@ -17,6 +17,12 @@ #include "DDG4/Geant4StackingAction.h" #include "G4OpticalPhoton.hh" #include "G4Track.hh" +#include +#include +#include +#include +#include +#include /// Namespace for the AIDA detector description toolkit namespace dd4hep { @@ -31,20 +37,15 @@ namespace dd4hep { : Geant4StackingAction(c, n) { declareProperty("LambdaMin", m_lambda_min); declareProperty("LambdaMax", m_lambda_max); + declareProperty("LambdaValues", m_lambda_values); declareProperty("Efficiency", m_efficiency); declareProperty("LogicalVolume", m_logical_volume); }; /// Default destructor virtual ~OpticalPhotonEfficiencyStackingAction() { - printout(DEBUG, name(), "Suppressed %d of %d photons in lv %s", - m_killed_photons, m_total_photons, m_logical_volume.c_str()); - printout(DEBUG, name(), "lambda range: [%f,%f] nm", - m_lambda_min / CLHEP::nm, m_lambda_max / CLHEP::nm); - std::ostringstream oss_efficiency; - std::copy(m_efficiency.begin(), m_efficiency.end(), - std::ostream_iterator(oss_efficiency, " ")); - std::string str_efficiency = oss_efficiency.str(); - printout(DEBUG, name(), "efficiency: %s", str_efficiency.c_str()); + double pct = m_total_photons > 0 ? 100.0 * m_killed_photons / m_total_photons : 0.0; + printout(WARNING, name(), "Suppressed %zu of %zu photons (%.1f%%) in lv %s", + m_killed_photons, m_total_photons, pct, m_logical_volume.c_str()); }; /// New-stage callback virtual void newStage(G4StackManager*) override { }; @@ -52,6 +53,7 @@ namespace dd4hep { virtual void prepare(G4StackManager*) override { }; /// Return TrackClassification with enum G4ClassificationOfNewTrack or NoTrackClassification virtual TrackClassification classifyNewTrack(G4StackManager*, const G4Track* aTrack) override { + std::call_once(m_init_once, [this]() { this->initialize(); }); // Only apply to optical photons if (aTrack->GetDefinition() == G4OpticalPhoton::OpticalPhotonDefinition()) { auto* pv = aTrack->GetVolume(); @@ -59,40 +61,18 @@ namespace dd4hep { auto* lv = pv->GetLogicalVolume(); printout(VERBOSE, name(), "photon in pv %s lv %s", pv->GetName().c_str(), lv->GetName().c_str()); - // Only apply to specified logical volume - if (lv->GetName() == m_logical_volume) { + // Only apply to specified logical volume regex + if (std::regex_match(lv->GetName().c_str(), m_logical_volume_regex)) { double mom = aTrack->GetMomentum().mag(); double lambda = CLHEP::hbarc * CLHEP::twopi / mom; printout(VERBOSE, name(), "with mom = %f eV, lambda = %f nm", mom / CLHEP::eV, lambda / CLHEP::nm); m_total_photons++; - if (m_lambda_min < lambda && lambda < m_lambda_max) { - double efficiency{0.}; - if (m_efficiency.size() == 0) { - // No efficiency specified, assume zero - efficiency = 0.; - // which means kill - ++m_killed_photons; - return TrackClassification(fKill); - - } else if (m_efficiency.size() == 1) { - // Single constant value over lambda range - efficiency = m_efficiency.front(); - - } else { - // Linear interpolation on lambda grid - double lambda_step = (m_lambda_max - m_lambda_min) / (m_efficiency.size() - 1); - double div = (lambda - m_lambda_min) / lambda_step; - auto i = std::llround(std::floor(div)); - double t = div - i; - double a = m_efficiency[i]; - double b = m_efficiency[i+1]; - efficiency = a + t * (b - a); - printout(VERBOSE, name(), "a = %f, b = %f, t = %f", a, b, t); - printout(VERBOSE, name(), "efficiency %f", efficiency); - } - + auto lambda_min = !m_interp_lambda_values.empty() ? m_interp_lambda_values.front() : m_lambda_min; + auto lambda_max = !m_interp_lambda_values.empty() ? m_interp_lambda_values.back() : m_lambda_max; + if (lambda_min < lambda && lambda < lambda_max) { + double efficiency = interpolate(lambda); // Edge cases if (efficiency == 0.0) { ++m_killed_photons; @@ -110,7 +90,10 @@ namespace dd4hep { return TrackClassification(fKill); } } else { - printout(VERBOSE, name(), "outside lambda range [%f,%f] nm", m_lambda_min / CLHEP::nm, m_lambda_max / CLHEP::nm); + // Outside the QE-specified wavelength range: efficiency is 0 + printout(VERBOSE, name(), "outside lambda range [%f,%f] nm", lambda_min / CLHEP::nm, lambda_max / CLHEP::nm); + ++m_killed_photons; + return TrackClassification(fKill); } } else { printout(VERBOSE, name(), "not in volume %s", m_logical_volume.c_str()); @@ -119,9 +102,58 @@ namespace dd4hep { return TrackClassification(); }; private: + void initialize() { + try { + m_logical_volume_regex = std::regex(m_logical_volume); + } catch (const std::regex_error&) { + throw std::runtime_error("Invalid LogicalVolume regex"); + } + if (m_efficiency.size() > 1) { + if (m_lambda_values.size() == m_efficiency.size()) { + m_interp_lambda_values = m_lambda_values; + } else { + auto lambda_min = m_lambda_min; + auto lambda_max = m_lambda_max; + m_interp_lambda_values.reserve(m_efficiency.size()); + auto lambda_step = (lambda_max - lambda_min) / (m_efficiency.size() - 1); + for (std::size_t i = 0; i < m_efficiency.size(); ++i) { + m_interp_lambda_values.push_back(lambda_min + i * lambda_step); + } + } + } else if (m_lambda_values.size() > 1) { + m_interp_lambda_values = m_lambda_values; + } + } + inline double interpolate(double lambda) const { + if (m_efficiency.size() == 1) { + return m_efficiency.front(); + } + if (m_efficiency.size() < 2 || m_interp_lambda_values.size() < 2) { + return 0.0; + } + auto upper = std::upper_bound(m_interp_lambda_values.begin(), m_interp_lambda_values.end(), lambda); + auto i = std::distance(m_interp_lambda_values.begin(), upper) - 1; + double a_lambda = m_interp_lambda_values[i]; + double b_lambda = m_interp_lambda_values[i+1]; + double t = (lambda - a_lambda) / (b_lambda - a_lambda); + double a = m_efficiency[i]; + double b = m_efficiency[i+1]; + printout(VERBOSE, name(), "a = %f, b = %f, t = %f", a, b, t); +#if defined(__cpp_lib_interpolate) && __cpp_lib_interpolate >= 201902L + double efficiency = std::lerp(a, b, t); +#else + double efficiency = a + t * (b - a); +#endif + printout(VERBOSE, name(), "efficiency %f", efficiency); + return efficiency; + } double m_lambda_min{0.}, m_lambda_max{0.}; + std::vector m_lambda_values; + std::vector m_interp_lambda_values; std::vector m_efficiency; std::string m_logical_volume; + std::regex m_logical_volume_regex{}; + std::once_flag m_init_once{}; std::size_t m_total_photons{0}, m_killed_photons{0}; }; } // End namespace sim diff --git a/src/plugins/include/npdet/PerformanceProfileSteppingAction.h b/src/plugins/include/npdet/PerformanceProfileSteppingAction.h new file mode 100644 index 0000000..1f23571 --- /dev/null +++ b/src/plugins/include/npdet/PerformanceProfileSteppingAction.h @@ -0,0 +1,197 @@ +//========================================================================== +// AIDA Detector description implementation +//-------------------------------------------------------------------------- +// Copyright (C) Organisation europeenne pour la Recherche nucleaire (CERN) +// All rights reserved. +// +// For the licensing terms see $DD4hepINSTALL/LICENSE. +// For the list of contributors see $DD4hepINSTALL/doc/CREDITS. +// +// Author : M.Frank +// +//========================================================================== +#ifndef PERFORMANCEPROFILESTEPPINGACTION_H +#define PERFORMANCEPROFILESTEPPINGACTION_H + +#include "DDG4/Geant4SteppingAction.h" +#include "G4Step.hh" +#include "TFile.h" +#include "TH2D.h" +#include "npdet/PerformanceProfileTrackingAction.h" + +#include +#include +#include +#include +#include + +/// Namespace for the AIDA detector description toolkit +namespace dd4hep { + + /// Namespace for the Geant4 based simulation part of the AIDA detector description toolkit + namespace sim { + + using namespace std::chrono_literals; + + class PerformanceProfileSteppingAction : public Geant4SteppingAction { + public: + /// Standard constructor with initializing arguments + PerformanceProfileSteppingAction(Geant4Context* c, const std::string& n) : Geant4SteppingAction(c, n) {}; + /// Default destructor + virtual ~PerformanceProfileSteppingAction() { + printLongTracks(); + std::chrono::milliseconds total_duration = std::chrono::milliseconds::zero(); + for (auto& [pdg, ns] : m_pdg_duration) { + total_duration += std::chrono::duration_cast(ns); + } + // Print per-PDG summary: stepping | tracking | remainder (tracking - stepping) + auto& shared = performanceProfileSharedData(); + auto& tracking_dur = shared.tracking_duration; + auto& track_count = shared.track_count; + auto& begin_oh_dur = shared.begin_overhead_duration; + auto& end_oh_dur = shared.end_overhead_duration; + printout(WARNING, name(), "%-10s %15s %15s %15s %12s %12s %10s", + "PDG", "stepping_ms", "tracking_ms", "remainder_ms", "begin_oh_ms", "end_oh_ms", "n_tracks"); + for (auto& [pdg, step_ns] : m_pdg_duration) { + auto step_ms = std::chrono::duration_cast(step_ns); + auto tit = tracking_dur.find(pdg); + auto track_ms = std::chrono::duration_cast( + tit != tracking_dur.end() ? tit->second : std::chrono::nanoseconds::zero()); + auto remainder_ms = track_ms - step_ms; + auto bit = begin_oh_dur.find(pdg); + auto begin_ms = std::chrono::duration_cast( + bit != begin_oh_dur.end() ? bit->second : std::chrono::nanoseconds::zero()); + auto eit = end_oh_dur.find(pdg); + auto end_ms = std::chrono::duration_cast( + eit != end_oh_dur.end() ? eit->second : std::chrono::nanoseconds::zero()); + auto n_tracks = track_count.count(pdg) ? track_count.at(pdg) : 0; + printout(WARNING, name(), "%-10d %15ld %15ld %15ld %12ld %12ld %10ld", + pdg, step_ms.count(), track_ms.count(), remainder_ms.count(), + begin_ms.count(), end_ms.count(), n_tracks); + } + printout(WARNING, name(), "total duration: %ld ms", total_duration.count()); + std::time_t t = std::time(nullptr); + char ts[32]; + std::tm tm_buf{}; + if (localtime_r(&t, &tm_buf) != nullptr) { + std::strftime(ts, sizeof(ts), "%Y%m%d_%H%M%S", &tm_buf); + } else { + std::snprintf(ts, sizeof(ts), "%s", "19700101_000000"); + } + auto suffix = std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()).count(); + std::string fname = std::string("histos_") + ts + "_" + std::to_string(suffix) + ".root"; + TFile f(fname.c_str(), "recreate"); + m_xy.Write(); + m_zr.Write(); + for (auto& [pdg, hxy] : m_xy_pdg) { + hxy.Write(); + } + for (auto& [pdg, hzr] : m_zr_pdg) { + hzr.Write(); + } + f.Close(); + }; + /// User stepping callback + void operator()(const G4Step* step, G4SteppingManager*) override { + auto now = std::chrono::steady_clock::now(); + std::chrono::nanoseconds step_duration = std::chrono::duration_cast(now - m_previous_timepoint); + + // Get step info + auto* track = step->GetTrack(); + auto* particle = track->GetParticleDefinition(); + auto pdg = particle->GetPDGEncoding(); + auto track_id = track->GetTrackID(); + auto* prestep [[maybe_unused]] = step->GetPreStepPoint(); + auto prestep_position = prestep->GetPosition(); + auto* poststep [[maybe_unused]] = step->GetPostStepPoint(); + auto poststep_position = poststep->GetPosition(); + auto poststep_energy = poststep->GetTotalEnergy(); + auto firststep [[maybe_unused]] = step->IsFirstStepInVolume(); + auto laststep [[maybe_unused]] = step->IsLastStepInVolume(); + + if (track_id == m_previous_track_id) { + // Current track + m_duration[track_id] += step_duration; + m_pdg_duration[pdg] += step_duration; + auto& shared = performanceProfileSharedData(); + shared.stepping_duration_per_track[track_id] += step_duration; + shared.last_step_time[track_id] = now; + m_poststep_position[track_id] = poststep_position; + m_poststep_energy[track_id] = poststep_energy; + m_xy.Fill(poststep_position.x(), poststep_position.y(), step_duration.count()); + m_zr.Fill(poststep_position.z(), std::hypot(poststep_position.x(), poststep_position.y()), step_duration.count()); + // Per-PDG histos: create on first encounter + auto hname_xy = "m_xy_pdg" + std::to_string(pdg); + auto hname_zr = "m_zr_pdg" + std::to_string(pdg); + if (m_xy_pdg.find(pdg) == m_xy_pdg.end()) { + m_xy_pdg.emplace(std::piecewise_construct, std::forward_as_tuple(pdg), + std::forward_as_tuple(hname_xy.c_str(), hname_xy.c_str(), + 100, -3.*CLHEP::m, +3.*CLHEP::m, + 100, -3.*CLHEP::m, +3.*CLHEP::m)); + m_zr_pdg.emplace(std::piecewise_construct, std::forward_as_tuple(pdg), + std::forward_as_tuple(hname_zr.c_str(), hname_zr.c_str(), + 1000, -50.*CLHEP::m, +50.*CLHEP::m, + 100, 0., +3.*CLHEP::m)); + } + m_xy_pdg.at(pdg).Fill(poststep_position.x(), poststep_position.y(), step_duration.count()); + m_zr_pdg.at(pdg).Fill(poststep_position.z(), std::hypot(poststep_position.x(), poststep_position.y()), step_duration.count()); + } else { + // New track — flush if event action signalled a new event + if (performanceProfileSharedData().new_event_flag) { + printLongTracks(); + m_duration.clear(); + m_pdg.clear(); + m_prestep_position.clear(); + m_poststep_position.clear(); + m_poststep_energy.clear(); + performanceProfileSharedData().first_step_time.clear(); + performanceProfileSharedData().last_step_time.clear(); + performanceProfileSharedData().new_event_flag = false; + printout(VERBOSE, name(), "new event"); + } + m_previous_track_id = track_id; + m_pdg[track_id] = pdg; + m_duration[track_id] = std::chrono::nanoseconds::zero(); + m_prestep_position[track_id] = prestep_position; + // Record first step entry time for begin-overhead calculation + performanceProfileSharedData().first_step_time[track_id] = now; + performanceProfileSharedData().last_step_time[track_id] = now; + } + + m_previous_timepoint = now; + }; + + private: + void printLongTracks() { + for (auto& [track_id, duration] : m_duration) { + if (std::chrono::abs(duration) <= std::chrono::nanoseconds(10ms)) { + continue; + } + auto p0 = m_prestep_position[track_id] / CLHEP::mm; + auto p1 = m_poststep_position[track_id] / CLHEP::mm; + auto E1 = m_pdg[track_id] == -22 ? m_poststep_energy[track_id] / CLHEP::eV + : m_poststep_energy[track_id] / CLHEP::MeV; + printout(INFO, name(), "track %d (pdg=%d): %ld ms (%f %f %f mm -> %f %f %f mm, %f (M?)eV)", track_id, + m_pdg[track_id], std::chrono::duration_cast(duration).count(), p0.x(), + p0.y(), p0.z(), p1.x(), p1.y(), p1.z(), E1); + } + } + + std::unordered_map m_duration; + std::unordered_map m_pdg; + std::unordered_map m_prestep_position; + std::unordered_map m_poststep_position; + std::unordered_map m_poststep_energy; + G4int m_previous_track_id{0}; + std::chrono::time_point m_previous_timepoint; + TH2D m_xy{"m_xy", "xy", 100, -3.*CLHEP::m, +3.*CLHEP::m, 100, -3.*CLHEP::m, +3.*CLHEP::m}; + TH2D m_zr{"m_zr", "zr", 1000, -50.*CLHEP::m, +50.*CLHEP::m, 100, 0., +3.*CLHEP::m}; + std::unordered_map m_xy_pdg; + std::unordered_map m_zr_pdg; + std::unordered_map m_pdg_duration; + }; + } // End namespace sim +} // End namespace dd4hep + +#endif // PERFORMANCEPROFILESTEPPINGACTION_H diff --git a/src/plugins/include/npdet/PerformanceProfileTrackingAction.h b/src/plugins/include/npdet/PerformanceProfileTrackingAction.h new file mode 100644 index 0000000..dc9cb52 --- /dev/null +++ b/src/plugins/include/npdet/PerformanceProfileTrackingAction.h @@ -0,0 +1,216 @@ +//========================================================================== +// AIDA Detector description implementation +//-------------------------------------------------------------------------- +// Copyright (C) Organisation europeenne pour la Recherche nucleaire (CERN) +// All rights reserved. +// +// For the licensing terms see $DD4hepINSTALL/LICENSE. +// For the list of contributors see $DD4hepINSTALL/doc/CREDITS. +// +// Author : M.Frank +// +//========================================================================== +#ifndef PERFORMANCEPROFILETRACKINGACTION_H +#define PERFORMANCEPROFILETRACKINGACTION_H + +#include "DDG4/Geant4EventAction.h" +#include "DDG4/Geant4TrackingAction.h" +#include "G4Event.hh" +#include "G4ThreeVector.hh" +#include "G4Track.hh" +#include "CLHEP/Units/SystemOfUnits.h" +#include "TFile.h" +#include "TH1F.h" +#include "TH2D.h" + +#include +#include +#include +#include + +/// Namespace for the AIDA detector description toolkit +namespace dd4hep { + + /// Namespace for the Geant4 based simulation part of the AIDA detector description toolkit + namespace sim { + + using namespace std::chrono_literals; + + /// Shared state between tracking and stepping actions (per-PDG wall-clock accumulators). + /// Populated by PerformanceProfileTrackingAction, read by PerformanceProfileSteppingAction. + struct PerformanceProfileSharedData { + using time_point = std::chrono::time_point; + /// per-PDG total wall-clock time from begin(track) to end(track) + std::unordered_map tracking_duration; + /// per-PDG number of tracks seen by tracking action + std::unordered_map track_count; + /// per-track stepping duration, written by stepping action, read+cleared by tracking action at end(track) + std::unordered_map stepping_duration_per_track; + /// time of first UserSteppingAction call for each track (written by stepping action) + std::unordered_map first_step_time; + /// time of last UserSteppingAction call for each track (written by stepping action, updated each step) + std::unordered_map last_step_time; + /// per-PDG accumulated begin overhead (PreUserTrackingAction → first step) + std::unordered_map begin_overhead_duration; + /// per-PDG accumulated end overhead (last step → PostUserTrackingAction) + std::unordered_map end_overhead_duration; + /// set true by event action at begin of each event; cleared by stepping action + bool new_event_flag{false}; + }; + + /// Global singleton accessor — single-threaded use only. + inline PerformanceProfileSharedData& performanceProfileSharedData() { + static PerformanceProfileSharedData instance; + return instance; + } + + class PerformanceProfileTrackingAction : public Geant4TrackingAction { + public: + PerformanceProfileTrackingAction(Geant4Context* c, const std::string& n) + : Geant4TrackingAction(c, n) {} + virtual ~PerformanceProfileTrackingAction() { + std::time_t t = std::time(nullptr); + char ts[32]; + std::tm tm_buf{}; + if (localtime_r(&t, &tm_buf) != nullptr) { + std::strftime(ts, sizeof(ts), "%Y%m%d_%H%M%S", &tm_buf); + } else { + std::snprintf(ts, sizeof(ts), "unknown_time"); + } + auto suffix = std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()).count(); + std::string fname = std::string("histos_overhead_") + ts + "_" + std::to_string(suffix) + ".root"; + TFile f(fname.c_str(), "recreate"); + m_overhead_xy.Write(); + m_overhead_zr.Write(); + m_overhead_dist.Write(); + m_begin_overhead_dist.Write(); + m_end_overhead_dist.Write(); + for (auto& [pdg, h] : m_overhead_dist_pdg) h.Write(); + for (auto& [pdg, h] : m_begin_overhead_dist_pdg) h.Write(); + for (auto& [pdg, h] : m_end_overhead_dist_pdg) h.Write(); + f.Close(); + } + + /// Pre-track callback: record wall-clock start and track start position + void begin(const G4Track* track) override { + auto track_id = track->GetTrackID(); + m_begin_time[track_id] = std::chrono::steady_clock::now(); + m_pdg[track_id] = track->GetParticleDefinition()->GetPDGEncoding(); + m_begin_pos[track_id] = track->GetPosition(); + performanceProfileSharedData().track_count[m_pdg[track_id]]++; + } + + /// Post-track callback: accumulate wall-clock duration per PDG; fill overhead spatial histos + void end(const G4Track* track) override { + auto track_id = track->GetTrackID(); + auto it = m_begin_time.find(track_id); + if (it == m_begin_time.end()) return; + auto end_time = std::chrono::steady_clock::now(); + auto track_duration = std::chrono::duration_cast( + end_time - it->second); + auto pdg = m_pdg[track_id]; + performanceProfileSharedData().tracking_duration[pdg] += track_duration; + + // Compute per-track overhead = tracking − stepping; fill spatial histo at track start position + auto& shared = performanceProfileSharedData(); + auto& step_map = shared.stepping_duration_per_track; + auto sit = step_map.find(track_id); + auto step_ns = sit != step_map.end() ? sit->second : std::chrono::nanoseconds::zero(); + if (sit != step_map.end()) step_map.erase(sit); + auto overhead_ns = track_duration > step_ns ? track_duration - step_ns + : std::chrono::nanoseconds::zero(); + auto pos = m_begin_pos[track_id]; + m_overhead_xy.Fill(pos.x(), pos.y(), overhead_ns.count()); + m_overhead_zr.Fill(pos.z(), std::hypot(pos.x(), pos.y()), overhead_ns.count()); + double overhead_us = overhead_ns.count() / 1000.0; + m_overhead_dist.Fill(overhead_us); + + // Split overhead into pre-first-step (begin) and post-last-step (end) components + auto fit = shared.first_step_time.find(track_id); + auto lit = shared.last_step_time.find(track_id); + if (fit != shared.first_step_time.end()) { + auto begin_overhead_ns = std::chrono::duration_cast( + fit->second - it->second); + double begin_us = begin_overhead_ns.count() / 1000.0; + if (begin_us >= 0.) { + m_begin_overhead_dist.Fill(begin_us); + auto bhname = "m_begin_overhead_dist_pdg" + std::to_string(pdg); + m_begin_overhead_dist_pdg.try_emplace(pdg, bhname.c_str(), + (bhname + ";overhead [#mus];tracks").c_str(), + 2000, 0., 200.); + m_begin_overhead_dist_pdg.at(pdg).Fill(begin_us); + shared.begin_overhead_duration[pdg] += begin_overhead_ns; + } + shared.first_step_time.erase(fit); + } + if (lit != shared.last_step_time.end()) { + auto end_overhead_ns = std::chrono::duration_cast( + end_time - lit->second); + double end_us = end_overhead_ns.count() / 1000.0; + if (end_us >= 0.) { + m_end_overhead_dist.Fill(end_us); + auto ehname = "m_end_overhead_dist_pdg" + std::to_string(pdg); + m_end_overhead_dist_pdg.try_emplace(pdg, ehname.c_str(), + (ehname + ";overhead [#mus];tracks").c_str(), + 2000, 0., 200.); + m_end_overhead_dist_pdg.at(pdg).Fill(end_us); + shared.end_overhead_duration[pdg] += end_overhead_ns; + } + shared.last_step_time.erase(lit); + } + // Per-PDG overhead distribution: create on first encounter + auto hname = "m_overhead_dist_pdg" + std::to_string(pdg); + m_overhead_dist_pdg.try_emplace(pdg, hname.c_str(), + (hname + ";overhead [#mus];tracks").c_str(), + 2000, 0., 200.); + m_overhead_dist_pdg.at(pdg).Fill(overhead_us); + + m_begin_time.erase(it); + m_pdg.erase(track_id); + m_begin_pos.erase(track_id); + } + + private: + std::unordered_map> m_begin_time; + std::unordered_map m_pdg; + std::unordered_map m_begin_pos; + TH2D m_overhead_xy{"m_overhead_xy", "overhead_xy", + 100, -3.*CLHEP::m, +3.*CLHEP::m, + 100, -3.*CLHEP::m, +3.*CLHEP::m}; + TH2D m_overhead_zr{"m_overhead_zr", "overhead_zr", + 1000, -50.*CLHEP::m, +50.*CLHEP::m, + 100, 0., +3.*CLHEP::m}; + // Per-track overhead distribution: x = overhead in µs, 2000 bins from 0–200 µs + TH1F m_overhead_dist{"m_overhead_dist", "per-track overhead (#mus);overhead [#mus];tracks", + 2000, 0., 200.}; + // Pre-first-step overhead (begin): PreUserTrackingAction → first UserSteppingAction + TH1F m_begin_overhead_dist{"m_begin_overhead_dist", + "begin overhead (pre-first-step);overhead [#mus];tracks", + 2000, 0., 200.}; + // Post-last-step overhead (end): last UserSteppingAction → PostUserTrackingAction + TH1F m_end_overhead_dist{"m_end_overhead_dist", + "end overhead (post-last-step);overhead [#mus];tracks", + 2000, 0., 200.}; + std::unordered_map m_overhead_dist_pdg; + std::unordered_map m_begin_overhead_dist_pdg; + std::unordered_map m_end_overhead_dist_pdg; + }; + + class PerformanceProfileEventAction : public Geant4EventAction { + public: + PerformanceProfileEventAction(Geant4Context* c, const std::string& n) + : Geant4EventAction(c, n) {} + virtual ~PerformanceProfileEventAction() = default; + + /// Signal stepping action to flush and clear at start of new event + void begin(const G4Event*) override { + performanceProfileSharedData().new_event_flag = true; + } + void end(const G4Event*) override {} + }; + + } // End namespace sim +} // End namespace dd4hep + +#endif // PERFORMANCEPROFILETRACKINGACTION_H diff --git a/src/plugins/src/PerformanceProfileEventAction.cxx b/src/plugins/src/PerformanceProfileEventAction.cxx new file mode 100644 index 0000000..c5ddf59 --- /dev/null +++ b/src/plugins/src/PerformanceProfileEventAction.cxx @@ -0,0 +1,5 @@ +#include "DDG4/Factories.h" + +#include "npdet/PerformanceProfileTrackingAction.h" + +DECLARE_GEANT4ACTION(PerformanceProfileEventAction) diff --git a/src/plugins/src/PerformanceProfileSteppingAction.cxx b/src/plugins/src/PerformanceProfileSteppingAction.cxx new file mode 100644 index 0000000..243128f --- /dev/null +++ b/src/plugins/src/PerformanceProfileSteppingAction.cxx @@ -0,0 +1,5 @@ +#include "DDG4/Factories.h" + +#include "npdet/PerformanceProfileSteppingAction.h" + +DECLARE_GEANT4ACTION(PerformanceProfileSteppingAction) diff --git a/src/plugins/src/PerformanceProfileTrackingAction.cxx b/src/plugins/src/PerformanceProfileTrackingAction.cxx new file mode 100644 index 0000000..31ce92f --- /dev/null +++ b/src/plugins/src/PerformanceProfileTrackingAction.cxx @@ -0,0 +1,5 @@ +#include "DDG4/Factories.h" + +#include "npdet/PerformanceProfileTrackingAction.h" + +DECLARE_GEANT4ACTION(PerformanceProfileTrackingAction)