Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-27 07:24:18

0001 # This file is part of the ACTS project.
0002 #
0003 # Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 #
0005 # This Source Code Form is subject to the terms of the Mozilla Public
0006 # License, v. 2.0. If a copy of the MPL was not distributed with this
0007 # file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008 
0009 # detray includes
0010 import plotting
0011 
0012 # python includes
0013 from collections import namedtuple
0014 import json
0015 import itertools
0016 import math
0017 import numpy as np
0018 import os
0019 import pandas as pd
0020 import sys
0021 
0022 # Plot types for benchmarks
0023 benchmark_plots = namedtuple(
0024     "benchmark_plots",
0025     "latency throughput, weak_scaling, strong_scaling",
0026     defaults=[None, None, None, None],
0027 )
0028 
0029 # How to label plots
0030 label_data = namedtuple("label_data", "title label x_axis y_axis")
0031 
0032 # Define labels for google benchmark data collections
0033 label_dict = {
0034     "latency": label_data(
0035         "Propagation Latency", "", "No. tracks", r"t $[\mathrm{ms}]$"
0036     ),
0037     "throughput": label_data(
0038         "Propagation Throughout", "", "No. tracks", r"Prop. rate $[\mathrm{MHz}]$"
0039     ),
0040     "weak_scaling": label_data(
0041         "Propagation Weak Scaling", "", "No. threads", "Efficiency"
0042     ),
0043     "strong_scaling": label_data(
0044         "Propagation Strong Scaling", "", "No. threads", "Speedup"
0045     ),
0046 }
0047 
0048 # Common options
0049 ldg_loc = "upper left"
0050 
0051 
0052 """ Read google benchmark data from json file """
0053 
0054 
0055 def read_benchmark_data(logging, input_path, benchmark_file):
0056 
0057     file_path = os.path.join(input_path, benchmark_file)
0058     with open(file_path, "r") as file:
0059         logging.debug(f"Reading file '{file_path}'")
0060 
0061         results = json.load(file)
0062 
0063         context = results["context"]
0064         data = pd.DataFrame(results["benchmarks"])
0065 
0066         return context, data
0067 
0068     logging.error(f"Could not find file: {benchmark_file}")
0069 
0070     return None, None
0071 
0072 
0073 """ Adds a column 'x' to the data frame that contains the number of tracks """
0074 
0075 
0076 def add_track_multiplicity_column(df):
0077 
0078     assert "_TRACKS" in str(
0079         df["run_name"][0]
0080     ), "Benchmark case name not correctly formatted: (BM_PROPAGATION_<detector name>_<#tracks>_TRACKS)"
0081 
0082     # The number of tracks is the second last part of the benchmark name
0083     find_track_multiplicity = lambda n: (int(n.split("_")[-3]))
0084 
0085     # Add new column based on benchmark case name
0086     df["x"] = df["run_name"].apply(find_track_multiplicity)
0087 
0088 
0089 """ Read the benchmark data and prepare it for plotting """
0090 
0091 
0092 def prepare_benchmark_data(logging, input_dir, file):
0093 
0094     # Convert benchmark timings to 'ms'
0095     unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3}
0096 
0097     # Read the data part into a pandas frame
0098     context, data = read_benchmark_data(logging, input_dir, file)
0099 
0100     if context is None or data is None:
0101         logging.warning(f"Failed to read data in file: {file}")
0102         sys.exit(1)
0103 
0104     # Add the number of tracks per benchmark case as new column 'x'
0105     # A column called 'x' is expected by the 'plot_benchmark' method
0106     add_track_multiplicity_column(data)
0107 
0108     # Convert timings to 'ms'
0109     bench_time_unit = data["time_unit"][0]
0110     to_milliseconds = lambda x: (x * unit_conversion[bench_time_unit])
0111 
0112     data["real_time"] = data["real_time"].apply(to_milliseconds)
0113     data["cpu_time"] = data["cpu_time"].apply(to_milliseconds)
0114 
0115     # Convert from Hz to MHz
0116     data["TracksPropagated"] = data["TracksPropagated"] / 1000000
0117 
0118     return context, data
0119 
0120 
0121 """ Filter the data frame for a specific type of data """
0122 
0123 
0124 def filter_benchmark_data(df, data_type):
0125 
0126     assert len(df["x"]) != 0, "Data frame has to provide column 'x'"
0127     assert len(df[data_type]) != 0, f"Data frame has to provide column '{data_type}'"
0128 
0129     # Filter the relevant data from the frame
0130     median = lambda data_frame: (data_frame["aggregate_name"] == "mean")
0131     stddev = lambda data_frame: (data_frame["aggregate_name"] == "stddev")
0132 
0133     data, n_tracks = plotting.filter_data(
0134         data=df, filter=median, variables=[data_type, "x"]
0135     )
0136 
0137     err = plotting.filter_data(data=df, filter=stddev, variables=[data_type])
0138 
0139     return n_tracks, data, err
0140 
0141 
0142 """
0143 Plot the benchmark latency and throughout for different hardware backends and
0144 algebra plugins
0145 """
0146 
0147 
0148 def plot_benchmark_case(
0149     plot_factory,
0150     x,
0151     y,
0152     label,
0153     y_error=[],
0154     plot_type="latency",
0155     title="",
0156     marker=".",
0157     plot=None,
0158     xaxis_format=None,
0159     yaxis_format=None,
0160     log_scale=10,
0161 ):
0162     if plot is None:
0163         # Create new plot
0164         lgd_ops = plotting.legend_options(
0165             loc=ldg_loc, horiz_anchor=1.0, vert_anchor=1.02
0166         )
0167 
0168         labels = label_dict[plot_type]
0169         x_axis_opts = plotting.axis_options(
0170             label=labels.x_axis,
0171             log_scale=log_scale,
0172             tick_positions=x,
0173             label_format=xaxis_format,
0174         )
0175         y_axis_opts = plotting.axis_options(
0176             label=labels.y_axis, log_scale=log_scale, label_format=yaxis_format
0177         )
0178 
0179         # Plot the propagation latency against the number of tracks
0180         plot_data = plot_factory.graph(
0181             x=x,
0182             y=y,
0183             y_errors=y_error,
0184             x_axis=x_axis_opts,
0185             y_axis=y_axis_opts,
0186             title=title,
0187             label=label,
0188             lgd_ops=lgd_ops,
0189             marker=marker,
0190             figsize=(18, 8),
0191         )
0192     else:
0193         # Add new data to exiting plot
0194         plot_data = plot_factory.add_graph(
0195             plot=plot,
0196             x=x,
0197             y=y,
0198             y_errors=y_error,
0199             label=label,
0200             marker=marker,
0201             color=None,
0202         )
0203 
0204     return plot_data
0205 
0206 
0207 """ Plot the data of all benchmark files given in 'data_files' """
0208 
0209 
0210 def plot_benchmark_data(
0211     logging,
0212     input_dir,
0213     det_name,
0214     file_list,
0215     label_list,
0216     title,
0217     plot_series_name,
0218     plot_factory,
0219     out_format,
0220 ):
0221 
0222     # Cycle through marker styles per plot
0223     marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]
0224     marker_style_cycle = itertools.cycle(marker_styles)
0225 
0226     # Save the different plots per hardware backend
0227     plots = benchmark_plots()
0228 
0229     # Go through all benchmark data files in the list and make a comparison plot
0230     for i, file in enumerate(file_list):
0231         # Get the data for the next benchmark case
0232         _, df = prepare_benchmark_data(logging, input_dir, file)
0233         marker = next(marker_style_cycle)
0234 
0235         n_tracks, latency, latency_sigma = filter_benchmark_data(df, "real_time")
0236         _, throughput, throughput_sigma = filter_benchmark_data(df, "TracksPropagated")
0237 
0238         # Initialize plots
0239         if i == 0:
0240             # Plot the data against the number of tracks
0241             latency_plot = plot_benchmark_case(
0242                 plot_factory=plot_factory,
0243                 plot_type="latency",
0244                 label=label_list[i],
0245                 x=n_tracks,
0246                 y=latency,
0247                 y_error=latency_sigma,
0248                 marker=marker,
0249                 title=title,
0250             )
0251 
0252             throughput_plot = plot_benchmark_case(
0253                 plot_factory=plot_factory,
0254                 plot_type="throughput",
0255                 label=label_list[i],
0256                 x=n_tracks,
0257                 y=throughput,
0258                 y_error=throughput_sigma,
0259                 marker=marker,
0260                 title=title,
0261             )
0262 
0263             plots = benchmark_plots(latency=latency_plot, throughput=throughput_plot)
0264 
0265         # Add new data to plots
0266         else:
0267             plot_benchmark_case(
0268                 plot_factory=plot_factory,
0269                 plot_type="latency",
0270                 label=label_list[i],
0271                 x=n_tracks,
0272                 y=latency,
0273                 y_error=latency_sigma,
0274                 marker=marker,
0275                 plot=plots.latency,
0276             )
0277 
0278             plot_benchmark_case(
0279                 plot_factory=plot_factory,
0280                 plot_type="throughput",
0281                 label=label_list[i],
0282                 x=n_tracks,
0283                 y=throughput,
0284                 y_error=throughput_sigma,
0285                 marker=marker,
0286                 plot=plots.throughput,
0287             )
0288 
0289     # Write to disk
0290     plot_factory.write_plot(
0291         plots.latency, f"{det_name}_{plot_series_name}_latency", out_format
0292     )
0293 
0294     plot_factory.write_plot(
0295         plots.throughput, f"{det_name}_{plot_series_name}_throughput", out_format
0296     )
0297 
0298 
0299 """ Plot weak and strong scaling data """
0300 
0301 
0302 def plot_scaling_data(
0303     logging,
0304     input_dir,
0305     det_name,
0306     file_list,
0307     label_list,
0308     title,
0309     plot_factory,
0310     out_format,
0311     n_threads,
0312     n_cores,
0313 ):
0314     # Cycle through marker styles per plot
0315     marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]
0316     marker_style_cycle = itertools.cycle(marker_styles)
0317 
0318     # Save the different plots per algebra plugin
0319     plots = benchmark_plots()
0320 
0321     # Go through all benchmark data files in the list and make a comparison plot
0322     for i, file in enumerate(file_list):
0323         # Get the data for the next benchmark case
0324         _, df = prepare_benchmark_data(logging, input_dir, file)
0325         marker = next(marker_style_cycle)
0326 
0327         # Filter the relevant data
0328         _, latency, latency_sigma = filter_benchmark_data(df, "real_time")
0329 
0330         # Split the data set
0331         weak_sc_latency = latency[: len(n_threads)]
0332         strong_sc_latency = latency[len(n_threads) :]
0333 
0334         # Calculate speedups and efficiencies
0335         weak_sc_efficiency = weak_sc_latency[0] / weak_sc_latency
0336         strong_sc_speedup = strong_sc_latency[0] / strong_sc_latency
0337 
0338         weak_sc_stddev = latency_sigma[: len(n_threads)]
0339         strong_sc_stddev = latency_sigma[len(n_threads) :]
0340 
0341         # Gaussian error propagation x/y_i
0342         err_prob = lambda x, y, err_x, err_y: np.sqrt(
0343             np.power(err_x / y, 2) + np.power((x * err_y) / np.power(y, 2), 2)
0344         )
0345 
0346         weak_sc_stddev = err_prob(
0347             weak_sc_latency[0], weak_sc_latency, weak_sc_stddev[0], weak_sc_stddev
0348         )
0349         strong_sc_stddev = err_prob(
0350             strong_sc_latency[0],
0351             strong_sc_latency,
0352             strong_sc_stddev[0],
0353             strong_sc_stddev,
0354         )
0355 
0356         # Initialize plots
0357         if i == 0:
0358             # Plot the data against the number of tracks
0359             weak_sc_plot = plot_benchmark_case(
0360                 plot_factory=plot_factory,
0361                 plot_type="weak_scaling",
0362                 label=label_list[i],
0363                 x=n_threads,
0364                 y=weak_sc_efficiency,
0365                 y_error=weak_sc_stddev,
0366                 marker=marker,
0367                 title=title,
0368                 log_scale=2,
0369                 xaxis_format="{x:3.0f}",
0370                 yaxis_format="{x:3.2f}",
0371             )
0372 
0373             strong_sc_plot = plot_benchmark_case(
0374                 plot_factory=plot_factory,
0375                 plot_type="strong_scaling",
0376                 label=label_list[i],
0377                 x=n_threads,
0378                 y=strong_sc_speedup,
0379                 y_error=strong_sc_stddev,
0380                 marker=marker,
0381                 title=title,
0382                 log_scale=2,
0383                 xaxis_format="{x:3.0f}",
0384                 yaxis_format="{x:3.0f}",
0385             )
0386 
0387             plots = benchmark_plots(
0388                 weak_scaling=weak_sc_plot, strong_scaling=strong_sc_plot
0389             )
0390 
0391         # Add new data to plots
0392         else:
0393             plot_benchmark_case(
0394                 plot_factory=plot_factory,
0395                 plot_type="weak_scaling",
0396                 label=label_list[i],
0397                 x=n_threads,
0398                 y=weak_sc_efficiency,
0399                 y_error=weak_sc_stddev,
0400                 marker=marker,
0401                 plot=plots.weak_scaling,
0402             )
0403 
0404             plot_benchmark_case(
0405                 plot_factory=plot_factory,
0406                 plot_type="strong_scaling",
0407                 label=label_list[i],
0408                 x=n_threads,
0409                 y=strong_sc_speedup,
0410                 y_error=strong_sc_stddev,
0411                 marker=marker,
0412                 plot=plots.strong_scaling,
0413             )
0414 
0415     # Ideal weak scaling
0416     plot_factory.add_graph(
0417         plot=plots.weak_scaling,
0418         x=n_threads,
0419         y=[1] * len(n_threads),
0420         marker="",
0421         color="r",
0422         label="ideal scaling",
0423     )
0424 
0425     plot_factory.vertical_line(
0426         plot_data=plots.weak_scaling,
0427         x=n_cores,
0428         y=2 * min(weak_sc_efficiency),
0429         color="black",
0430         label="no. cores",
0431     )
0432 
0433     # Ideal strong scaling
0434     plot_factory.add_graph(
0435         plot=plots.strong_scaling,
0436         x=n_threads,
0437         y=n_threads,
0438         marker="",
0439         color="r",
0440         label="ideal scaling",
0441     )
0442 
0443     plot_factory.vertical_line(
0444         plot_data=plots.strong_scaling, x=n_cores, color="black", label="no. cores"
0445     )
0446 
0447     # Write to disk
0448     plot_factory.write_plot(plots.weak_scaling, f"{det_name}_weak_scaling", out_format)
0449 
0450     plot_factory.write_plot(
0451         plots.strong_scaling, f"{det_name}_strong_scaling", out_format
0452     )