File indexing completed on 2026-05-27 07:24:18
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 import plotting
0011
0012
0013 from collections import namedtuple
0014 import json
0015 import itertools
0016 import math
0017 import numpy as np
0018 import os
0019 import pandas as pd
0020 import sys
0021
0022
0023 benchmark_plots = namedtuple(
0024 "benchmark_plots",
0025 "latency throughput, weak_scaling, strong_scaling",
0026 defaults=[None, None, None, None],
0027 )
0028
0029
0030 label_data = namedtuple("label_data", "title label x_axis y_axis")
0031
0032
0033 label_dict = {
0034 "latency": label_data(
0035 "Propagation Latency", "", "No. tracks", r"t $[\mathrm{ms}]$"
0036 ),
0037 "throughput": label_data(
0038 "Propagation Throughout", "", "No. tracks", r"Prop. rate $[\mathrm{MHz}]$"
0039 ),
0040 "weak_scaling": label_data(
0041 "Propagation Weak Scaling", "", "No. threads", "Efficiency"
0042 ),
0043 "strong_scaling": label_data(
0044 "Propagation Strong Scaling", "", "No. threads", "Speedup"
0045 ),
0046 }
0047
0048
0049 ldg_loc = "upper left"
0050
0051
0052 """ Read google benchmark data from json file """
0053
0054
0055 def read_benchmark_data(logging, input_path, benchmark_file):
0056
0057 file_path = os.path.join(input_path, benchmark_file)
0058 with open(file_path, "r") as file:
0059 logging.debug(f"Reading file '{file_path}'")
0060
0061 results = json.load(file)
0062
0063 context = results["context"]
0064 data = pd.DataFrame(results["benchmarks"])
0065
0066 return context, data
0067
0068 logging.error(f"Could not find file: {benchmark_file}")
0069
0070 return None, None
0071
0072
0073 """ Adds a column 'x' to the data frame that contains the number of tracks """
0074
0075
0076 def add_track_multiplicity_column(df):
0077
0078 assert "_TRACKS" in str(
0079 df["run_name"][0]
0080 ), "Benchmark case name not correctly formatted: (BM_PROPAGATION_<detector name>_<#tracks>_TRACKS)"
0081
0082
0083 find_track_multiplicity = lambda n: (int(n.split("_")[-3]))
0084
0085
0086 df["x"] = df["run_name"].apply(find_track_multiplicity)
0087
0088
0089 """ Read the benchmark data and prepare it for plotting """
0090
0091
0092 def prepare_benchmark_data(logging, input_dir, file):
0093
0094
0095 unit_conversion = {"ns": 10**-6, "um": 10**-3, "ms": 1, "s": 10**3}
0096
0097
0098 context, data = read_benchmark_data(logging, input_dir, file)
0099
0100 if context is None or data is None:
0101 logging.warning(f"Failed to read data in file: {file}")
0102 sys.exit(1)
0103
0104
0105
0106 add_track_multiplicity_column(data)
0107
0108
0109 bench_time_unit = data["time_unit"][0]
0110 to_milliseconds = lambda x: (x * unit_conversion[bench_time_unit])
0111
0112 data["real_time"] = data["real_time"].apply(to_milliseconds)
0113 data["cpu_time"] = data["cpu_time"].apply(to_milliseconds)
0114
0115
0116 data["TracksPropagated"] = data["TracksPropagated"] / 1000000
0117
0118 return context, data
0119
0120
0121 """ Filter the data frame for a specific type of data """
0122
0123
0124 def filter_benchmark_data(df, data_type):
0125
0126 assert len(df["x"]) != 0, "Data frame has to provide column 'x'"
0127 assert len(df[data_type]) != 0, f"Data frame has to provide column '{data_type}'"
0128
0129
0130 median = lambda data_frame: (data_frame["aggregate_name"] == "mean")
0131 stddev = lambda data_frame: (data_frame["aggregate_name"] == "stddev")
0132
0133 data, n_tracks = plotting.filter_data(
0134 data=df, filter=median, variables=[data_type, "x"]
0135 )
0136
0137 err = plotting.filter_data(data=df, filter=stddev, variables=[data_type])
0138
0139 return n_tracks, data, err
0140
0141
0142 """
0143 Plot the benchmark latency and throughout for different hardware backends and
0144 algebra plugins
0145 """
0146
0147
0148 def plot_benchmark_case(
0149 plot_factory,
0150 x,
0151 y,
0152 label,
0153 y_error=[],
0154 plot_type="latency",
0155 title="",
0156 marker=".",
0157 plot=None,
0158 xaxis_format=None,
0159 yaxis_format=None,
0160 log_scale=10,
0161 ):
0162 if plot is None:
0163
0164 lgd_ops = plotting.legend_options(
0165 loc=ldg_loc, horiz_anchor=1.0, vert_anchor=1.02
0166 )
0167
0168 labels = label_dict[plot_type]
0169 x_axis_opts = plotting.axis_options(
0170 label=labels.x_axis,
0171 log_scale=log_scale,
0172 tick_positions=x,
0173 label_format=xaxis_format,
0174 )
0175 y_axis_opts = plotting.axis_options(
0176 label=labels.y_axis, log_scale=log_scale, label_format=yaxis_format
0177 )
0178
0179
0180 plot_data = plot_factory.graph(
0181 x=x,
0182 y=y,
0183 y_errors=y_error,
0184 x_axis=x_axis_opts,
0185 y_axis=y_axis_opts,
0186 title=title,
0187 label=label,
0188 lgd_ops=lgd_ops,
0189 marker=marker,
0190 figsize=(18, 8),
0191 )
0192 else:
0193
0194 plot_data = plot_factory.add_graph(
0195 plot=plot,
0196 x=x,
0197 y=y,
0198 y_errors=y_error,
0199 label=label,
0200 marker=marker,
0201 color=None,
0202 )
0203
0204 return plot_data
0205
0206
0207 """ Plot the data of all benchmark files given in 'data_files' """
0208
0209
0210 def plot_benchmark_data(
0211 logging,
0212 input_dir,
0213 det_name,
0214 file_list,
0215 label_list,
0216 title,
0217 plot_series_name,
0218 plot_factory,
0219 out_format,
0220 ):
0221
0222
0223 marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]
0224 marker_style_cycle = itertools.cycle(marker_styles)
0225
0226
0227 plots = benchmark_plots()
0228
0229
0230 for i, file in enumerate(file_list):
0231
0232 _, df = prepare_benchmark_data(logging, input_dir, file)
0233 marker = next(marker_style_cycle)
0234
0235 n_tracks, latency, latency_sigma = filter_benchmark_data(df, "real_time")
0236 _, throughput, throughput_sigma = filter_benchmark_data(df, "TracksPropagated")
0237
0238
0239 if i == 0:
0240
0241 latency_plot = plot_benchmark_case(
0242 plot_factory=plot_factory,
0243 plot_type="latency",
0244 label=label_list[i],
0245 x=n_tracks,
0246 y=latency,
0247 y_error=latency_sigma,
0248 marker=marker,
0249 title=title,
0250 )
0251
0252 throughput_plot = plot_benchmark_case(
0253 plot_factory=plot_factory,
0254 plot_type="throughput",
0255 label=label_list[i],
0256 x=n_tracks,
0257 y=throughput,
0258 y_error=throughput_sigma,
0259 marker=marker,
0260 title=title,
0261 )
0262
0263 plots = benchmark_plots(latency=latency_plot, throughput=throughput_plot)
0264
0265
0266 else:
0267 plot_benchmark_case(
0268 plot_factory=plot_factory,
0269 plot_type="latency",
0270 label=label_list[i],
0271 x=n_tracks,
0272 y=latency,
0273 y_error=latency_sigma,
0274 marker=marker,
0275 plot=plots.latency,
0276 )
0277
0278 plot_benchmark_case(
0279 plot_factory=plot_factory,
0280 plot_type="throughput",
0281 label=label_list[i],
0282 x=n_tracks,
0283 y=throughput,
0284 y_error=throughput_sigma,
0285 marker=marker,
0286 plot=plots.throughput,
0287 )
0288
0289
0290 plot_factory.write_plot(
0291 plots.latency, f"{det_name}_{plot_series_name}_latency", out_format
0292 )
0293
0294 plot_factory.write_plot(
0295 plots.throughput, f"{det_name}_{plot_series_name}_throughput", out_format
0296 )
0297
0298
0299 """ Plot weak and strong scaling data """
0300
0301
0302 def plot_scaling_data(
0303 logging,
0304 input_dir,
0305 det_name,
0306 file_list,
0307 label_list,
0308 title,
0309 plot_factory,
0310 out_format,
0311 n_threads,
0312 n_cores,
0313 ):
0314
0315 marker_styles = ["o", "x", "*", "v", "s", "^", "<", ">"]
0316 marker_style_cycle = itertools.cycle(marker_styles)
0317
0318
0319 plots = benchmark_plots()
0320
0321
0322 for i, file in enumerate(file_list):
0323
0324 _, df = prepare_benchmark_data(logging, input_dir, file)
0325 marker = next(marker_style_cycle)
0326
0327
0328 _, latency, latency_sigma = filter_benchmark_data(df, "real_time")
0329
0330
0331 weak_sc_latency = latency[: len(n_threads)]
0332 strong_sc_latency = latency[len(n_threads) :]
0333
0334
0335 weak_sc_efficiency = weak_sc_latency[0] / weak_sc_latency
0336 strong_sc_speedup = strong_sc_latency[0] / strong_sc_latency
0337
0338 weak_sc_stddev = latency_sigma[: len(n_threads)]
0339 strong_sc_stddev = latency_sigma[len(n_threads) :]
0340
0341
0342 err_prob = lambda x, y, err_x, err_y: np.sqrt(
0343 np.power(err_x / y, 2) + np.power((x * err_y) / np.power(y, 2), 2)
0344 )
0345
0346 weak_sc_stddev = err_prob(
0347 weak_sc_latency[0], weak_sc_latency, weak_sc_stddev[0], weak_sc_stddev
0348 )
0349 strong_sc_stddev = err_prob(
0350 strong_sc_latency[0],
0351 strong_sc_latency,
0352 strong_sc_stddev[0],
0353 strong_sc_stddev,
0354 )
0355
0356
0357 if i == 0:
0358
0359 weak_sc_plot = plot_benchmark_case(
0360 plot_factory=plot_factory,
0361 plot_type="weak_scaling",
0362 label=label_list[i],
0363 x=n_threads,
0364 y=weak_sc_efficiency,
0365 y_error=weak_sc_stddev,
0366 marker=marker,
0367 title=title,
0368 log_scale=2,
0369 xaxis_format="{x:3.0f}",
0370 yaxis_format="{x:3.2f}",
0371 )
0372
0373 strong_sc_plot = plot_benchmark_case(
0374 plot_factory=plot_factory,
0375 plot_type="strong_scaling",
0376 label=label_list[i],
0377 x=n_threads,
0378 y=strong_sc_speedup,
0379 y_error=strong_sc_stddev,
0380 marker=marker,
0381 title=title,
0382 log_scale=2,
0383 xaxis_format="{x:3.0f}",
0384 yaxis_format="{x:3.0f}",
0385 )
0386
0387 plots = benchmark_plots(
0388 weak_scaling=weak_sc_plot, strong_scaling=strong_sc_plot
0389 )
0390
0391
0392 else:
0393 plot_benchmark_case(
0394 plot_factory=plot_factory,
0395 plot_type="weak_scaling",
0396 label=label_list[i],
0397 x=n_threads,
0398 y=weak_sc_efficiency,
0399 y_error=weak_sc_stddev,
0400 marker=marker,
0401 plot=plots.weak_scaling,
0402 )
0403
0404 plot_benchmark_case(
0405 plot_factory=plot_factory,
0406 plot_type="strong_scaling",
0407 label=label_list[i],
0408 x=n_threads,
0409 y=strong_sc_speedup,
0410 y_error=strong_sc_stddev,
0411 marker=marker,
0412 plot=plots.strong_scaling,
0413 )
0414
0415
0416 plot_factory.add_graph(
0417 plot=plots.weak_scaling,
0418 x=n_threads,
0419 y=[1] * len(n_threads),
0420 marker="",
0421 color="r",
0422 label="ideal scaling",
0423 )
0424
0425 plot_factory.vertical_line(
0426 plot_data=plots.weak_scaling,
0427 x=n_cores,
0428 y=2 * min(weak_sc_efficiency),
0429 color="black",
0430 label="no. cores",
0431 )
0432
0433
0434 plot_factory.add_graph(
0435 plot=plots.strong_scaling,
0436 x=n_threads,
0437 y=n_threads,
0438 marker="",
0439 color="r",
0440 label="ideal scaling",
0441 )
0442
0443 plot_factory.vertical_line(
0444 plot_data=plots.strong_scaling, x=n_cores, color="black", label="no. cores"
0445 )
0446
0447
0448 plot_factory.write_plot(plots.weak_scaling, f"{det_name}_weak_scaling", out_format)
0449
0450 plot_factory.write_plot(
0451 plots.strong_scaling, f"{det_name}_strong_scaling", out_format
0452 )