File indexing completed on 2025-01-18 09:12:07
0001 import pandas as pd
0002
0003 import torch.nn as nn
0004 import torch.nn.functional as F
0005 import torch.utils
0006
0007 import ast
0008
0009
0010 def prepareDataSet(data: pd.DataFrame) -> pd.DataFrame:
0011 """Format the dataset that have been written from the Csv file"""
0012 """
0013 @param[in] data: input DataFrame containing 1 event
0014 @return: Formatted DataFrame
0015 """
0016
0017 data = data.sort_values("particleId")
0018
0019 data = data.set_index("particleId")
0020
0021 hitsIds = []
0022 for list in data["Hits_ID"].values:
0023 hitsIds.append(ast.literal_eval(list))
0024 data["Hits_ID"] = hitsIds
0025
0026 return data
0027
0028
0029 class DuplicateClassifier(nn.Module):
0030 """MLP model used to separate goods seed from duplicate seeds. Return one score per seed the higher one correspond to the good seed."""
0031
0032 def __init__(self, input_dim, n_layers):
0033 """Four layer MLP, sigmoid activation for the last layer."""
0034 super(DuplicateClassifier, self).__init__()
0035 self.linear1 = nn.Linear(input_dim, n_layers[0])
0036 self.linear2 = nn.Linear(n_layers[0], n_layers[1])
0037 self.linear3 = nn.Linear(n_layers[1], n_layers[2])
0038 self.linear4 = nn.Linear(n_layers[2], n_layers[3])
0039 self.linear5 = nn.Linear(n_layers[3], n_layers[4])
0040 self.output = nn.Linear(n_layers[4], 1)
0041 self.sigmoid = nn.Sigmoid()
0042
0043 def forward(self, z):
0044 z = F.relu(self.linear1(z))
0045 z = F.relu(self.linear2(z))
0046 z = F.relu(self.linear3(z))
0047 z = F.relu(self.linear4(z))
0048 z = F.relu(self.linear5(z))
0049 return self.sigmoid(self.output(z))
0050
0051
0052 class Normalise(nn.Module):
0053 """Normalisation of the input before the MLP model."""
0054
0055 def __init__(self, mean, std):
0056 super(Normalise, self).__init__()
0057 self.mean = torch.tensor(mean, dtype=torch.float32)
0058 self.std = torch.tensor(std, dtype=torch.float32)
0059
0060 def forward(self, z):
0061 z = z - self.mean
0062 z = z / self.std
0063 return z