File indexing completed on 2025-01-18 09:12:07
0001 import glob
0002
0003 import pandas as pd
0004 import numpy as np
0005
0006 from ambiguity_solver_network import prepareDataSet
0007
0008
0009 def readDataSet(CKS_files: list[str]) -> pd.DataFrame:
0010 """Read the dataset from the different file, remove the pure duplicate tracks and combine the datasets"""
0011 """
0012 @param[in] CKS_files: DataFrame contain the data from each track files (1 file per events usually)
0013 @return: combined DataFrame containing all the track, ordered by events and then by truth particle ID in each event
0014 """
0015 data = []
0016 for f in CKS_files:
0017 datafile = pd.read_csv(f)
0018 datafile = prepareDataSet(datafile)
0019
0020 data.append(datafile)
0021 return data
0022
0023
0024
0025
0026
0027 CKF_files_track = sorted(
0028 glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ckf.csv")
0029 )
0030 CKF_files_resolved = sorted(
0031 glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ambi.csv")
0032 )
0033 ML_files_resolved = sorted(
0034 glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ambiML.csv")
0035 )
0036
0037 data_track = readDataSet(CKF_files_track)
0038 data_ML_track = readDataSet(CKF_files_track)
0039 data_resolved = readDataSet(CKF_files_resolved)
0040 data_ML_resolved = readDataSet(ML_files_resolved)
0041
0042
0043 nb_part = 0
0044 nb_track = 0
0045 nb_fake = 0
0046 nb_duplicate = 0
0047
0048 nb_good_match = 0
0049 nb_reco_part = 0
0050 nb_reco_fake = 0
0051 nb_reco_duplicate = 0
0052 nb_reco_track = 0
0053
0054 nb_good_match_ML = 0
0055 nb_reco_part_ML = 0
0056 nb_reco_fake_ML = 0
0057 nb_reco_duplicate_ML = 0
0058 nb_reco_track_ML = 0
0059
0060
0061 for trackEvent, resolvedEvent in zip(data_track, data_resolved):
0062 nb_part += trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"].shape[0]
0063 nb_track += trackEvent.shape[0]
0064 nb_fake += trackEvent.loc[trackEvent["good/duplicate/fake"] == "fake"].shape[0]
0065 nb_duplicate += trackEvent.loc[
0066 trackEvent["good/duplicate/fake"] == "duplicate"
0067 ].shape[0]
0068
0069
0070 merged = pd.merge(
0071 trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"],
0072 resolvedEvent,
0073 on=[
0074 "particleId",
0075 "nStates",
0076 "nMeasurements",
0077 "nOutliers",
0078 "nHoles",
0079 "ndf",
0080 "chi2/ndf",
0081 "good/duplicate/fake",
0082 ],
0083 how="left",
0084 indicator="exists",
0085 )
0086
0087 merged["exists"] = np.where(merged.exists == "both", True, False)
0088 merged.to_csv(path_or_buf="merged.csv")
0089
0090 nb_good_match += merged.loc[merged["exists"] == True].shape[0]
0091 nb_reco_fake += resolvedEvent.loc[
0092 resolvedEvent["good/duplicate/fake"] == "fake"
0093 ].shape[0]
0094 nb_reco_duplicate += resolvedEvent.loc[
0095 resolvedEvent["good/duplicate/fake"] == "duplicate"
0096 ].shape[0]
0097 nb_reco_part += resolvedEvent.loc[
0098 resolvedEvent["good/duplicate/fake"] != "fake"
0099 ].index.nunique()
0100 nb_reco_track += resolvedEvent.shape[0]
0101
0102
0103 for trackEvent, resolvedEvent in zip(data_ML_track, data_ML_resolved):
0104
0105 merged_ML = pd.merge(
0106 trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"],
0107 resolvedEvent,
0108 on=[
0109 "particleId",
0110 "nStates",
0111 "nMeasurements",
0112 "nOutliers",
0113 "nHoles",
0114 "ndf",
0115 "chi2/ndf",
0116 "good/duplicate/fake",
0117 ],
0118 how="left",
0119 indicator="exists",
0120 )
0121
0122
0123 merged_ML["exists"] = np.where(merged_ML.exists == "both", True, False)
0124 merged_ML.to_csv(path_or_buf="merged_ML.csv")
0125
0126 nb_good_match_ML += merged_ML.loc[merged_ML["exists"] == True].shape[0]
0127 nb_reco_fake_ML += resolvedEvent.loc[
0128 resolvedEvent["good/duplicate/fake"] == "fake"
0129 ].shape[0]
0130 nb_reco_duplicate_ML += resolvedEvent.loc[
0131 resolvedEvent["good/duplicate/fake"] == "duplicate"
0132 ].shape[0]
0133 nb_reco_part_ML += resolvedEvent.loc[
0134 resolvedEvent["good/duplicate/fake"] != "fake"
0135 ].index.nunique()
0136 nb_reco_track_ML += resolvedEvent.shape[0]
0137
0138 print("===Initial efficiencies===")
0139 print("nb particles : ", nb_part)
0140 print("nb track : ", nb_track)
0141 print("duplicate rate: ", 100 * nb_duplicate / nb_track, " %")
0142 print("Fake rate: ", 100 * nb_fake / nb_track, " %")
0143
0144 print("===computed efficiencies Greedy===")
0145 print("nb particles : ", nb_part)
0146 print("nb good match : ", nb_good_match)
0147 print("nb particle reco : ", nb_reco_part)
0148 print("nb track reco : ", nb_reco_track)
0149 print("Efficiency (good track) : ", 100 * nb_good_match / nb_part, " %")
0150 print("Efficiency (particle reco) : ", 100 * nb_reco_part / nb_part, " %")
0151 print("duplicate rate: ", 100 * nb_reco_duplicate / nb_reco_track, " %")
0152 print("Fake rate: ", 100 * nb_reco_fake / nb_reco_track, " %")
0153
0154 print("===computed efficiencies ML===")
0155 print("nb particles: ", nb_part)
0156 print("nb good match: ", nb_good_match_ML)
0157 print("nb particle reco: ", nb_reco_part_ML)
0158 print("nb track reco: ", nb_reco_track_ML)
0159 print("Efficiency (good track): ", 100 * nb_good_match_ML / nb_part, " %")
0160 print("Efficiency (particle reco): ", 100 * nb_reco_part_ML / nb_part, " %")
0161 print("duplicate rate: ", 100 * nb_reco_duplicate_ML / nb_reco_track_ML, " %")
0162 print("Fake rate: ", 100 * nb_reco_fake_ML / nb_reco_track_ML, " %")