File indexing completed on 2026-05-15 07:41:49
0001
0002 """
0003 Benchmark: quantify GPU speedup for optical photon simulation.
0004
0005 Runs three configurations of the raindrop geometry (10 MeV e- in water):
0006
0007 cpu -- Optical photons generated AND tracked on CPU by Geant4
0008 baseline -- Optical photons generated but NOT tracked (SetStackPhotons=false)
0009 gpu -- Optical photons simulated on GPU via eic-opticks
0010
0011 Speedup = CPU_optical_time / GPU_simulate_time
0012 where CPU_optical_time = T(cpu) - T(baseline)
0013 and GPU_simulate_time = wall time of G4CXOpticks::simulate()
0014 (includes genstep upload, OptiX launch,
0015 cudaDeviceSynchronize, hit download)
0016
0017 Usage:
0018 python3 benchmark_gpu_speedup.py # all modes, 10 events
0019 python3 benchmark_gpu_speedup.py --events 20 # more events
0020 python3 benchmark_gpu_speedup.py --mode cpu # single mode
0021 python3 benchmark_gpu_speedup.py --mode gpu --events 5 # single mode
0022 """
0023 import argparse
0024 import json
0025 import os
0026 import re
0027 import subprocess
0028 import sys
0029 import time
0030
0031
0032 _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
0033
0034
0035 def run_single_mode(mode, num_events, photon_threshold=0):
0036 """Run one benchmark mode inside the current process."""
0037 import cppyy
0038 import DDG4
0039 from g4units import MeV
0040
0041 cppyy.include("G4OpticalParameters.hh")
0042 from cppyy.gbl import G4OpticalParameters
0043
0044
0045 kernel = DDG4.Kernel()
0046 compact = os.path.join(_SCRIPT_DIR, "geometry", "raindrop_dd4hep.xml")
0047 if not os.path.exists(compact):
0048 print(f"ERROR: {compact} not found", file=sys.stderr)
0049 sys.exit(1)
0050 kernel.loadGeometry(str("file:" + compact))
0051
0052 geant4 = DDG4.Geant4(kernel)
0053 geant4.setupUI(typ="tcsh", vis=False, ui=False)
0054 geant4.setupGun(
0055 "Gun",
0056 particle="e-",
0057 energy=10 * MeV,
0058 position=(0, 0, 0),
0059 isotrop=False,
0060 direction=(1.0, 0.0, 0.0),
0061 multiplicity=1,
0062 )
0063
0064
0065 geant4.setupPhysics("QGSP_BERT")
0066 ph = DDG4.PhysicsList(kernel, "Geant4PhysicsList/OpticalPhys")
0067 ph.addPhysicsConstructor(str("G4OpticalPhysics"))
0068 kernel.physicsList().adopt(ph)
0069
0070
0071 if mode == "cpu":
0072
0073 seq, act = geant4.setupDetector(
0074 "Raindrop", "Geant4OpticalTrackerAction"
0075 )
0076 filt = DDG4.Filter(
0077 kernel, "ParticleSelectFilter/OpticalPhotonSelector"
0078 )
0079 filt.particle = "opticalphoton"
0080 seq.adopt(filt)
0081
0082 elif mode == "baseline":
0083
0084 geant4.setupTracker("Raindrop")
0085
0086 elif mode == "gpu":
0087
0088
0089 seq, act = geant4.setupTracker("Raindrop")
0090 filt = DDG4.Filter(kernel, "EnergyDepositMinimumCut/Block")
0091 filt.Cut = 1e12
0092 seq.adopt(filt)
0093
0094
0095 stepping = DDG4.SteppingAction(
0096 kernel, "OpticsSteppingAction/OpticsStep1"
0097 )
0098 stepping.Verbose = 0
0099 kernel.steppingAction().adopt(stepping)
0100
0101 run_action = DDG4.RunAction(kernel, "OpticsRun/OpticsRun1")
0102 run_action.SaveGeometry = False
0103 kernel.runAction().adopt(run_action)
0104
0105 evt_action = DDG4.EventAction(kernel, "OpticsEvent/OpticsEvt1")
0106 evt_action.Verbose = 1
0107 if photon_threshold > 0:
0108 evt_action.PhotonThreshold = photon_threshold
0109 kernel.eventAction().adopt(evt_action)
0110
0111
0112 kernel.NumEvents = num_events
0113 kernel.configure()
0114
0115
0116
0117
0118 if mode in ("baseline", "gpu"):
0119 G4OpticalParameters.Instance().SetCerenkovStackPhotons(False)
0120 G4OpticalParameters.Instance().SetScintStackPhotons(False)
0121
0122 kernel.initialize()
0123
0124
0125
0126 if mode == "cpu":
0127 G4OpticalParameters.Instance().SetBoundaryInvokeSD(True)
0128
0129
0130 t0 = time.perf_counter()
0131 kernel.run()
0132 t1 = time.perf_counter()
0133 wall_s = t1 - t0
0134
0135 kernel.terminate()
0136
0137 result = {
0138 "mode": mode,
0139 "events": num_events,
0140 "wall_s": round(wall_s, 4),
0141 "per_event_ms": round(wall_s / num_events * 1000, 2),
0142 "photon_threshold": photon_threshold,
0143 }
0144 print(f"BENCHMARK_RESULT:{json.dumps(result)}", flush=True)
0145 return result
0146
0147
0148
0149
0150
0151
0152 def _run_subprocess(mode, num_events, photon_threshold=0):
0153 """Run a single mode in a child process, return (result_dict, raw_output)."""
0154 script = os.path.abspath(__file__)
0155 cmd = [sys.executable, script, "--mode", mode, "--events", str(num_events),
0156 "--photon-threshold", str(photon_threshold)]
0157 proc = subprocess.run(
0158 cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
0159 )
0160 output = proc.stdout
0161
0162 if proc.returncode != 0:
0163 print(f"ERROR: subprocess exited with code {proc.returncode}",
0164 file=sys.stderr)
0165 return None, output
0166
0167
0168 result = None
0169 for line in output.splitlines():
0170 if line.startswith("BENCHMARK_RESULT:"):
0171 result = json.loads(line[len("BENCHMARK_RESULT:"):])
0172 break
0173
0174 return result, output
0175
0176
0177 def _parse_gpu_times(output):
0178 """Extract per-event GPU simulate() times from C++ info output."""
0179 times = []
0180 for line in output.splitlines():
0181 m = re.search(r"OPTICKS_GPU_TIME event=(\d+) ms=([\d.]+)", line)
0182 if m:
0183 times.append((int(m.group(1)), float(m.group(2))))
0184 return times
0185
0186
0187 def run_all(num_events, photon_threshold=0):
0188 """Run all three modes and print speedup analysis."""
0189 results = {}
0190 gpu_event_times = []
0191
0192 for mode in ("baseline", "cpu", "gpu"):
0193 pt = photon_threshold if mode == "gpu" else 0
0194 label = {
0195 "baseline": "baseline (no photon tracking)",
0196 "cpu": "cpu (photons tracked on CPU)",
0197 "gpu": "gpu (photons on GPU via eic-opticks)",
0198 }[mode]
0199 extra = f", threshold={pt}" if pt > 0 else ""
0200 print(f"\n{'='*60}")
0201 print(f" {label} -- {num_events} events{extra}")
0202 print(f"{'='*60}")
0203
0204 result, output = _run_subprocess(mode, num_events, pt)
0205
0206 if result is None:
0207 print(f"ERROR: mode={mode} failed. Output:\n{output[-2000:]}")
0208 sys.exit(1)
0209
0210 results[mode] = result
0211
0212
0213 for line in output.splitlines():
0214 if line.startswith("BENCHMARK_RESULT:"):
0215 continue
0216
0217 if any(kw in line for kw in (
0218 "OPTICKS_GPU_TIME", "gensteps", "hits from GPU",
0219 "Detected photons",
0220 )):
0221 print(f" {line.strip()}")
0222
0223 print(f" Wall time: {result['wall_s']:.3f} s "
0224 f"({result['per_event_ms']:.1f} ms/event)")
0225
0226 if mode == "gpu":
0227 gpu_event_times = _parse_gpu_times(output)
0228
0229
0230
0231
0232 T_b = results["baseline"]["wall_s"]
0233 T_c = results["cpu"]["wall_s"]
0234 T_g = results["gpu"]["wall_s"]
0235 cpu_optical = T_c - T_b
0236 gpu_overhead = T_g - T_b
0237
0238 print(f"\n{'='*60}")
0239 print(f" RESULTS ({num_events} events, 10 MeV e- in raindrop)")
0240 print(f"{'='*60}")
0241 print(f"\n {'Mode':<12} {'Total (s)':<12} {'Per event (ms)'}")
0242 print(f" {'-'*40}")
0243 for m in ("baseline", "cpu", "gpu"):
0244 r = results[m]
0245 print(f" {m:<12} {r['wall_s']:<12.3f} {r['per_event_ms']:.1f}")
0246
0247 print(f"\n CPU optical photon tracking: {cpu_optical:.3f} s"
0248 f" ({cpu_optical / num_events * 1000:.1f} ms/event)")
0249 print(f" GPU total overhead: {gpu_overhead:.3f} s"
0250 f" ({gpu_overhead / num_events * 1000:.1f} ms/event)")
0251
0252 if gpu_event_times:
0253 all_ms = [t for _, t in gpu_event_times]
0254 total_ms = sum(all_ms)
0255 print(f"\n GPU simulate() per event:")
0256 for evt_id, ms in gpu_event_times:
0257 tag = " (includes OptiX warmup)" if evt_id == 0 else ""
0258 print(f" event {evt_id:>3d}: {ms:>8.1f} ms{tag}")
0259 print(f" {'total':>9s}: {total_ms:>8.1f} ms")
0260
0261 if len(all_ms) > 1:
0262 warm_ms = all_ms[1:]
0263 avg_warm = sum(warm_ms) / len(warm_ms)
0264 print(f" avg (excl. first): {avg_warm:>5.1f} ms")
0265
0266 if total_ms > 0:
0267 speedup = cpu_optical / (total_ms / 1000)
0268 print(f"\n >>> SPEEDUP (CPU optical / GPU simulate): {speedup:.1f}x <<<")
0269
0270 if len(all_ms) > 1 and avg_warm > 0:
0271 speedup_warm = (cpu_optical / num_events * 1000) / avg_warm
0272 print(f" >>> SPEEDUP (per-event, excl. warmup): {speedup_warm:.1f}x <<<")
0273
0274 if gpu_overhead > 0:
0275 speedup_total = cpu_optical / gpu_overhead
0276 print(f" >>> SPEEDUP (CPU optical / GPU overhead): {speedup_total:.1f}x <<<")
0277
0278 print(f"{'='*60}\n")
0279
0280
0281
0282 if __name__ == "__main__":
0283 parser = argparse.ArgumentParser(
0284 description="Benchmark GPU vs CPU optical photon speedup"
0285 )
0286 parser.add_argument(
0287 "--mode",
0288 choices=["cpu", "baseline", "gpu", "all"],
0289 default="all",
0290 help="cpu=photons on CPU, baseline=no photon tracking, "
0291 "gpu=photons on GPU, all=run all three",
0292 )
0293 parser.add_argument(
0294 "--events", type=int, default=10, help="number of events (default: 10)"
0295 )
0296 parser.add_argument(
0297 "--photon-threshold", type=int, default=0,
0298 help="GPU batch mode: accumulate photons across events and simulate "
0299 "when this count is reached (default: 0 = per-event)",
0300 )
0301 args = parser.parse_args()
0302
0303 if args.mode == "all":
0304 run_all(args.events, args.photon_threshold)
0305 else:
0306 run_single_mode(args.mode, args.events, args.photon_threshold)