File indexing completed on 2025-01-18 09:12:07
0001 import glob
0002
0003 import pandas as pd
0004
0005
0006 def matchGood(seed_files: list[str], ckf_files: list[str]):
0007 """Read the dataset from the tracks and seeds files, then modify the seed dataset so that good seed correspond to the ones that lead to good tracks. Seed with truth id that do not lead to good tracks are considered as fake. Also create a new dataset with only truth particle associated to a good seeds."""
0008 """
0009 @param[in] Seed_files: List of files containing seeds data (1 file per events usually)
0010 @param[in] CKF_files: List of files containing tracks data (1 file per events usually)
0011 """
0012 data_seed = pd.DataFrame()
0013 data_track = pd.DataFrame()
0014 goodSeed = pd.DataFrame()
0015
0016 for f_ckf, f_seed in zip(ckf_files, seed_files):
0017 print("reading file: ", f_ckf, f_seed)
0018 data_track = pd.read_csv(f_ckf)
0019 data_track = data_track.loc[data_track["good/duplicate/fake"] == "good"]
0020 goodSeed = data_track["seed_id"]
0021
0022 data_seed = pd.read_csv(f_seed)
0023
0024 data_seed["goodSeed"] = data_seed["seed_id"].isin(goodSeed)
0025
0026 data_seed.loc[
0027 data_seed["good/duplicate/fake"] == "good", "good/duplicate/fake"
0028 ] = "duplicate"
0029 data_seed.loc[data_seed["goodSeed"] == True, "good/duplicate/fake"] = "good"
0030
0031 cleanedData = pd.DataFrame()
0032
0033
0034 for ID in data_seed["particleId"].unique():
0035 if (
0036 data_seed.loc[data_seed["particleId"] == ID, "goodSeed"] == False
0037 ).all():
0038 data_seed.loc[data_seed["particleId"] == ID, "good/duplicate/fake"] = (
0039 "fake"
0040 )
0041 else:
0042 cleanedData = pd.concat(
0043 [data_seed.loc[data_seed["particleId"] == ID], cleanedData]
0044 )
0045
0046
0047 matched = f_seed[:-4] + "_matched.csv"
0048 matchedData = data_seed.sort_values("seed_id")
0049 matchedData = matchedData.set_index("seed_id")
0050 matchedData = matchedData.drop(columns=["goodSeed"])
0051 matchedData.to_csv(matched)
0052
0053
0054 cleaned = f_seed[:-4] + "_cleaned.csv"
0055 cleanedData = cleanedData.sort_values("seed_id")
0056 cleanedData = cleanedData.set_index("seed_id")
0057 cleanedData = cleanedData.drop(columns=["goodSeed"])
0058 cleanedData.to_csv(cleaned)
0059
0060 return
0061
0062
0063
0064
0065 seed_files = sorted(glob.glob("odd_output" + "/event*-seed.csv"))
0066 ckf_files = sorted(glob.glob("odd_output" + "/event*-tracks_ckf.csv"))
0067 matchGood(seed_files, ckf_files)