Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:12:07

0001 import glob
0002 
0003 import pandas as pd
0004 import numpy as np
0005 
0006 from ambiguity_solver_network import prepareDataSet
0007 
0008 
0009 def readDataSet(CKS_files: list[str]) -> pd.DataFrame:
0010     """Read the dataset from the different file, remove the pure duplicate tracks and combine the datasets"""
0011     """
0012     @param[in] CKS_files: DataFrame contain the data from each track files (1 file per events usually)
0013     @return: combined DataFrame containing all the track, ordered by events and then by truth particle ID in each event
0014     """
0015     data = []
0016     for f in CKS_files:
0017         datafile = pd.read_csv(f)
0018         datafile = prepareDataSet(datafile)
0019         # Combine dataset
0020         data.append(datafile)
0021     return data
0022 
0023 
0024 # ==================================================================
0025 
0026 # CSV files to be compared, do not forget to sort them
0027 CKF_files_track = sorted(
0028     glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ckf.csv")
0029 )
0030 CKF_files_resolved = sorted(
0031     glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ambi.csv")
0032 )
0033 ML_files_resolved = sorted(
0034     glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ambiML.csv")
0035 )
0036 
0037 data_track = readDataSet(CKF_files_track)
0038 data_ML_track = readDataSet(CKF_files_track)
0039 data_resolved = readDataSet(CKF_files_resolved)
0040 data_ML_resolved = readDataSet(ML_files_resolved)
0041 
0042 # Compute the algorithm performances
0043 nb_part = 0
0044 nb_track = 0
0045 nb_fake = 0
0046 nb_duplicate = 0
0047 
0048 nb_good_match = 0
0049 nb_reco_part = 0
0050 nb_reco_fake = 0
0051 nb_reco_duplicate = 0
0052 nb_reco_track = 0
0053 
0054 nb_good_match_ML = 0
0055 nb_reco_part_ML = 0
0056 nb_reco_fake_ML = 0
0057 nb_reco_duplicate_ML = 0
0058 nb_reco_track_ML = 0
0059 
0060 # Compute the different efficiencies
0061 for trackEvent, resolvedEvent in zip(data_track, data_resolved):
0062     nb_part += trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"].shape[0]
0063     nb_track += trackEvent.shape[0]
0064     nb_fake += trackEvent.loc[trackEvent["good/duplicate/fake"] == "fake"].shape[0]
0065     nb_duplicate += trackEvent.loc[
0066         trackEvent["good/duplicate/fake"] == "duplicate"
0067     ].shape[0]
0068 
0069     # Merge two dataFrames and add indicator column
0070     merged = pd.merge(
0071         trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"],
0072         resolvedEvent,
0073         on=[
0074             "particleId",
0075             "nStates",
0076             "nMeasurements",
0077             "nOutliers",
0078             "nHoles",
0079             "ndf",
0080             "chi2/ndf",
0081             "good/duplicate/fake",
0082         ],
0083         how="left",
0084         indicator="exists",
0085     )
0086     # Add column to show if each row in first DataFrame exists in second
0087     merged["exists"] = np.where(merged.exists == "both", True, False)
0088     merged.to_csv(path_or_buf="merged.csv")
0089 
0090     nb_good_match += merged.loc[merged["exists"] == True].shape[0]
0091     nb_reco_fake += resolvedEvent.loc[
0092         resolvedEvent["good/duplicate/fake"] == "fake"
0093     ].shape[0]
0094     nb_reco_duplicate += resolvedEvent.loc[
0095         resolvedEvent["good/duplicate/fake"] == "duplicate"
0096     ].shape[0]
0097     nb_reco_part += resolvedEvent.loc[
0098         resolvedEvent["good/duplicate/fake"] != "fake"
0099     ].index.nunique()
0100     nb_reco_track += resolvedEvent.shape[0]
0101 
0102 # Compute the different efficiencies for ML
0103 for trackEvent, resolvedEvent in zip(data_ML_track, data_ML_resolved):
0104     # Merge two dataFrames and add indicator column
0105     merged_ML = pd.merge(
0106         trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"],
0107         resolvedEvent,
0108         on=[
0109             "particleId",
0110             "nStates",
0111             "nMeasurements",
0112             "nOutliers",
0113             "nHoles",
0114             "ndf",
0115             "chi2/ndf",
0116             "good/duplicate/fake",
0117         ],
0118         how="left",
0119         indicator="exists",
0120     )
0121 
0122     # Add column to show if each row in first DataFrame exists in second
0123     merged_ML["exists"] = np.where(merged_ML.exists == "both", True, False)
0124     merged_ML.to_csv(path_or_buf="merged_ML.csv")
0125 
0126     nb_good_match_ML += merged_ML.loc[merged_ML["exists"] == True].shape[0]
0127     nb_reco_fake_ML += resolvedEvent.loc[
0128         resolvedEvent["good/duplicate/fake"] == "fake"
0129     ].shape[0]
0130     nb_reco_duplicate_ML += resolvedEvent.loc[
0131         resolvedEvent["good/duplicate/fake"] == "duplicate"
0132     ].shape[0]
0133     nb_reco_part_ML += resolvedEvent.loc[
0134         resolvedEvent["good/duplicate/fake"] != "fake"
0135     ].index.nunique()
0136     nb_reco_track_ML += resolvedEvent.shape[0]
0137 
0138 print("===Initial efficiencies===")
0139 print("nb particles : ", nb_part)
0140 print("nb track : ", nb_track)
0141 print("duplicate rate: ", 100 * nb_duplicate / nb_track, " %")
0142 print("Fake rate: ", 100 * nb_fake / nb_track, " %")
0143 
0144 print("===computed efficiencies Greedy===")
0145 print("nb particles : ", nb_part)
0146 print("nb good match : ", nb_good_match)
0147 print("nb particle reco : ", nb_reco_part)
0148 print("nb track reco : ", nb_reco_track)
0149 print("Efficiency (good track) : ", 100 * nb_good_match / nb_part, " %")
0150 print("Efficiency (particle reco) : ", 100 * nb_reco_part / nb_part, " %")
0151 print("duplicate rate: ", 100 * nb_reco_duplicate / nb_reco_track, " %")
0152 print("Fake rate: ", 100 * nb_reco_fake / nb_reco_track, " %")
0153 
0154 print("===computed efficiencies ML===")
0155 print("nb particles: ", nb_part)
0156 print("nb good match: ", nb_good_match_ML)
0157 print("nb particle reco: ", nb_reco_part_ML)
0158 print("nb track reco: ", nb_reco_track_ML)
0159 print("Efficiency (good track): ", 100 * nb_good_match_ML / nb_part, " %")
0160 print("Efficiency (particle reco): ", 100 * nb_reco_part_ML / nb_part, " %")
0161 print("duplicate rate: ", 100 * nb_reco_duplicate_ML / nb_reco_track_ML, " %")
0162 print("Fake rate: ", 100 * nb_reco_fake_ML / nb_reco_track_ML, " %")