Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-06-29 07:06:05

0001 
0002 #include "JEventProcessorPODIO.h"
0003 
0004 #include <JANA/JApplication.h>
0005 #include <JANA/JLogger.h>
0006 #include <JANA/Services/JParameterManager.h>
0007 #include <JANA/Utils/JTypeInfo.h>
0008 #include <edm4eic/EDM4eicVersion.h>
0009 #include <fmt/core.h>
0010 #include <podio/CollectionBase.h>
0011 #include <podio/Frame.h>
0012 #include <podio/ROOTWriter.h>
0013 #include <spdlog/common.h>
0014 #include <chrono>
0015 #include <exception>
0016 #include <thread>
0017 
0018 #include "services/log/Log_service.h"
0019 
0020 
0021 JEventProcessorPODIO::JEventProcessorPODIO() {
0022     SetTypeName(NAME_OF_THIS); // Provide JANA with this class's name
0023 
0024     japp->SetDefaultParameter(
0025             "podio:output_file",
0026             m_output_file,
0027             "Name of EDM4hep/podio output file to write to. Setting this will cause the output file to be created and written to."
0028     );
0029 
0030     // Allow user to set PODIO:OUTPUT_FILE to "1" to specify using the default name.
0031     if( m_output_file == "1" ){
0032         auto param = japp->GetJParameterManager()->FindParameter("podio:output_file" );
0033         if(param) {
0034             param->SetValue( param->GetDefault() );
0035             m_output_file = param->GetDefault();
0036         }
0037     }
0038 
0039     // Get the output directory path for creating a second copy of the output file at the end of processing.
0040     // (this is duplicating similar functionality in Juggler/Gaudi so assume it is useful).
0041     japp->SetDefaultParameter(
0042             "podio:output_file_copy_dir",
0043             m_output_file_copy_dir,
0044             "Directory name to make an additional copy of the output file to. Copy will be done at end of processing. Default is empty string which means do not make a copy. No check is made on path existing."
0045     );
0046 
0047     // Get the list of output collections to include/exclude
0048     std::vector<std::string> output_collections={
0049             // Header and other metadata
0050             "EventHeader",
0051 
0052             // Truth record
0053             "MCParticles",
0054             "MCBeamElectrons",
0055             "MCBeamProtons",
0056             "MCScatteredElectrons",
0057             "MCScatteredProtons",
0058             "MCParticlesHeadOnFrameNoBeamFX",
0059 
0060             // All tracking hits combined
0061             "CentralTrackingRecHits",
0062             "CentralTrackSeedingResults",
0063             "CentralTrackerMeasurements",
0064 
0065             // Si tracker hits
0066             "SiBarrelTrackerRecHits",
0067             "SiBarrelVertexRecHits",
0068             "SiEndcapTrackerRecHits",
0069 
0070             "SiBarrelRawHits",
0071             "SiBarrelVertexRawHits",
0072             "SiEndcapTrackerRawHits",
0073 
0074             "SiBarrelHits",
0075             "VertexBarrelHits",
0076             "TrackerEndcapHits",
0077 
0078             "SiBarrelHitAssociations",
0079             "SiBarrelVertexHitAssociations",
0080             "SiEndcapHitAssociations",
0081 
0082             // TOF
0083             "TOFBarrelRecHit",
0084             "TOFEndcapRecHits",
0085 
0086             "TOFBarrelRawHit",
0087             "TOFEndcapRawHits",
0088 
0089             "TOFBarrelHits",
0090             "TOFEndcapHits",
0091 
0092             "TOFBarrelHitAssociations",
0093             "TOFEndcapHitAssociations",
0094 
0095             "CombinedTOFParticleIDs",
0096             "CombinedTOFSeededParticleIDs",
0097 
0098             // DRICH
0099             "DRICHRawHits",
0100             "DRICHRawHitsAssociations",
0101             "DRICHAerogelTracks",
0102             "DRICHGasTracks",
0103             "DRICHAerogelIrtCherenkovParticleID",
0104             "DRICHGasIrtCherenkovParticleID",
0105             "DRICHParticleIDs",
0106             "DRICHSeededParticleIDs",
0107 
0108             // PFRICH
0109             "RICHEndcapNRawHits",
0110             "RICHEndcapNRawHitsAssociations",
0111             "RICHEndcapNParticleIDs",
0112             "RICHEndcapNSeededParticleIDs",
0113 
0114             // MPGD
0115             "MPGDBarrelRecHits",
0116             "OuterMPGDBarrelRecHits",
0117             "BackwardMPGDEndcapRecHits",
0118             "ForwardMPGDEndcapRecHits",
0119 
0120             "MPGDBarrelRawHits",
0121             "OuterMPGDBarrelRawHits",
0122             "BackwardMPGDEndcapRawHits",
0123             "ForwardMPGDEndcapRawHits",
0124 
0125             "MPGDBarrelHits",
0126             "OuterMPGDBarrelHits",
0127             "BackwardMPGDEndcapHits",
0128             "ForwardMPGDEndcapHits",
0129 
0130             "MPGDBarrelHitAssociations",
0131             "OuterMPGDBarrelHitAssociations",
0132             "BackwardMPGDEndcapAssociations",
0133             "ForwardMPGDHitAssociations",
0134 
0135             // LOWQ2 hits
0136             "TaggerTrackerRawHits",
0137             "TaggerTrackerHitAssociations",
0138             "TaggerTrackerM1L0ClusterPositions",
0139             "TaggerTrackerM1L1ClusterPositions",
0140             "TaggerTrackerM1L2ClusterPositions",
0141             "TaggerTrackerM1L3ClusterPositions",
0142             "TaggerTrackerM2L0ClusterPositions",
0143             "TaggerTrackerM2L1ClusterPositions",
0144             "TaggerTrackerM2L2ClusterPositions",
0145             "TaggerTrackerM2L3ClusterPositions",
0146             "TaggerTrackerM1Tracks",
0147             "TaggerTrackerM2Tracks",
0148             "TaggerTrackerProjectedTracks",
0149             "TaggerTrackerTracks",
0150             "TaggerTrackerTrajectories",
0151             "TaggerTrackerTrackParameters",
0152 
0153             // Forward & Far forward hits
0154             "B0TrackerRecHits",
0155             "B0TrackerRawHits",
0156             "B0TrackerHits",
0157             "B0TrackerHitAssociations",
0158 
0159             "ForwardRomanPotRecHits",
0160             "ForwardOffMTrackerRecHits",
0161 
0162             "ForwardRomanPotRecParticles",
0163             "ForwardOffMRecParticles",
0164 
0165             "ForwardRomanPotHitAssociations",
0166             "ForwardOffMTrackerHitAssociations",
0167 
0168             // Reconstructed data
0169             "GeneratedParticles",
0170             "GeneratedBreitFrameParticles",
0171             "ReconstructedParticles",
0172             "ReconstructedParticleAssociations",
0173             "ReconstructedChargedParticles",
0174             "ReconstructedChargedParticleAssociations",
0175             "ReconstructedChargedRealPIDParticles",
0176             "ReconstructedSeededChargedParticles",
0177             "ReconstructedSeededChargedParticleAssociations",
0178             "MCScatteredElectronAssociations", // Remove if/when used internally
0179             "MCNonScatteredElectronAssociations", // Remove if/when used internally
0180             "ReconstructedChargedParticleIDs",
0181             "ReconstructedBreitFrameParticles",
0182             "CentralTrackSegments",
0183             "CentralTrackVertices",
0184             "CentralCKFTrajectories",
0185             "CentralCKFTracks",
0186             "CentralCKFTrackParameters",
0187             "CentralCKFSeededTrajectories",
0188             "CentralCKFSeededTracks",
0189             "CentralCKFSeededTrackParameters",
0190             //tracking properties - true seeding
0191             "CentralCKFTrajectoriesUnfiltered",
0192             "CentralCKFTracksUnfiltered",
0193             "CentralCKFTrackParametersUnfiltered",
0194              //tracking properties - realistic seeding
0195             "CentralCKFSeededTrajectoriesUnfiltered",
0196             "CentralCKFSeededTracksUnfiltered",
0197             "CentralCKFSeededTrackParametersUnfiltered",
0198             "InclusiveKinematicsDA",
0199             "InclusiveKinematicsJB",
0200             "InclusiveKinematicsSigma",
0201             "InclusiveKinematicseSigma",
0202             "InclusiveKinematicsElectron",
0203             "InclusiveKinematicsTruth",
0204             "GeneratedJets",
0205             "GeneratedChargedJets",
0206             "GeneratedCentauroJets",
0207             "ReconstructedJets",
0208             "ReconstructedChargedJets",
0209             "ReconstructedCentauroJets",
0210             "ReconstructedElectrons",
0211             "ScatteredElectronsTruth",
0212             "ScatteredElectronsEMinusPz",
0213 #if EDM4EIC_VERSION_MAJOR >= 6
0214             "HadronicFinalState",
0215 #endif
0216 
0217             // Track projections
0218             "CalorimeterTrackProjections",
0219 
0220             // Ecal stuff
0221             "EcalEndcapNRawHits",
0222             "EcalEndcapNRecHits",
0223             "EcalEndcapNTruthClusters",
0224             "EcalEndcapNTruthClusterAssociations",
0225             "EcalEndcapNClusters",
0226             "EcalEndcapNClusterAssociations",
0227             "EcalEndcapPRawHits",
0228             "EcalEndcapPRecHits",
0229             "EcalEndcapPTruthClusters",
0230             "EcalEndcapPTruthClusterAssociations",
0231             "EcalEndcapPClusters",
0232             "EcalEndcapPClusterAssociations",
0233             "EcalEndcapPInsertRawHits",
0234             "EcalEndcapPInsertRecHits",
0235             "EcalEndcapPInsertTruthClusters",
0236             "EcalEndcapPInsertTruthClusterAssociations",
0237             "EcalEndcapPInsertClusters",
0238             "EcalEndcapPInsertClusterAssociations",
0239             "EcalBarrelClusters",
0240             "EcalBarrelClusterAssociations",
0241             "EcalBarrelTruthClusters",
0242             "EcalBarrelTruthClusterAssociations",
0243             "EcalBarrelImagingRawHits",
0244             "EcalBarrelImagingRecHits",
0245             "EcalBarrelImagingClusters",
0246             "EcalBarrelImagingClusterAssociations",
0247             "EcalBarrelScFiRawHits",
0248             "EcalBarrelScFiRecHits",
0249             "EcalBarrelScFiClusters",
0250             "EcalBarrelScFiClusterAssociations",
0251             "EcalLumiSpecRawHits",
0252             "EcalLumiSpecRecHits",
0253             "EcalLumiSpecTruthClusters",
0254             "EcalLumiSpecTruthClusterAssociations",
0255             "EcalLumiSpecClusters",
0256             "EcalLumiSpecClusterAssociations",
0257             "HcalEndcapNRawHits",
0258             "HcalEndcapNRecHits",
0259             "HcalEndcapNMergedHits",
0260             "HcalEndcapNClusters",
0261             "HcalEndcapNClusterAssociations",
0262             "HcalEndcapPInsertRawHits",
0263             "HcalEndcapPInsertRecHits",
0264             "HcalEndcapPInsertMergedHits",
0265             "HcalEndcapPInsertClusters",
0266             "HcalEndcapPInsertClusterAssociations",
0267             "LFHCALRawHits",
0268             "LFHCALRecHits",
0269             "LFHCALClusters",
0270             "LFHCALClusterAssociations",
0271             "HcalBarrelRawHits",
0272             "HcalBarrelRecHits",
0273             "HcalBarrelClusters",
0274             "HcalBarrelClusterAssociations",
0275             "B0ECalRawHits",
0276             "B0ECalRecHits",
0277             "B0ECalClusters",
0278             "B0ECalClusterAssociations",
0279             "HcalEndcapNTruthClusters",
0280             "HcalEndcapNTruthClusterAssociations",
0281             "HcalBarrelTruthClusters",
0282             "HcalBarrelTruthClusterAssociations",
0283             "B0ECalRecHits",
0284             "B0ECalClusters",
0285             "B0ECalClusterAssociations",
0286 
0287             //ZDC Ecal
0288             "EcalFarForwardZDCRawHits",
0289             "EcalFarForwardZDCRecHits",
0290             "EcalFarForwardZDCClusters",
0291             "EcalFarForwardZDCClusterAssociations",
0292             "EcalFarForwardZDCTruthClusters",
0293             "EcalFarForwardZDCTruthClusterAssociations",
0294 
0295             //ZDC HCal
0296             "HcalFarForwardZDCRawHits",
0297             "HcalFarForwardZDCRecHits",
0298             "HcalFarForwardZDCSubcellHits",
0299             "HcalFarForwardZDCClusters",
0300             "HcalFarForwardZDCClusterAssociations",
0301             "HcalFarForwardZDCClustersBaseline",
0302             "HcalFarForwardZDCClusterAssociationsBaseline",
0303             "HcalFarForwardZDCTruthClusters",
0304             "HcalFarForwardZDCTruthClusterAssociations",
0305             "ReconstructedFarForwardZDCNeutrons",
0306 
0307             // DIRC
0308             "DIRCRawHits",
0309             "DIRCPID",
0310             "DIRCParticleIDs",
0311             "DIRCSeededParticleIDs",
0312     };
0313     std::vector<std::string> output_exclude_collections;  // need to get as vector, then convert to set
0314     std::string output_include_collections = "DEPRECATED";
0315     japp->SetDefaultParameter(
0316             "podio:output_include_collections",
0317             output_include_collections,
0318             "DEPRECATED. Use podio:output_collections instead."
0319     );
0320     if (output_include_collections != "DEPRECATED") {
0321       output_collections.clear();
0322       JParameterManager::Parse(output_include_collections, output_collections);
0323       m_output_include_collections_set = true;
0324     }
0325     japp->SetDefaultParameter(
0326             "podio:output_collections",
0327             output_collections,
0328             "Comma separated list of collection names to write out. If not set, all collections will be written (including ones from input file). Don't set this and use PODIO:OUTPUT_EXCLUDE_COLLECTIONS to write everything except a selection."
0329     );
0330     japp->SetDefaultParameter(
0331             "podio:output_exclude_collections",
0332             output_exclude_collections,
0333             "Comma separated list of collection names to not write out."
0334     );
0335     japp->SetDefaultParameter(
0336             "podio:print_collections",
0337             m_collections_to_print,
0338             "Comma separated list of collection names to print to screen, e.g. for debugging."
0339     );
0340 
0341     m_output_collections = std::set<std::string>(output_collections.begin(),
0342                                                  output_collections.end());
0343     m_output_exclude_collections = std::set<std::string>(output_exclude_collections.begin(),
0344                                                          output_exclude_collections.end());
0345 
0346 }
0347 
0348 
0349 void JEventProcessorPODIO::Init() {
0350 
0351     auto *app = GetApplication();
0352     m_log = app->GetService<Log_service>()->logger("JEventProcessorPODIO");
0353     m_log->set_level(spdlog::level::debug);
0354     m_writer = std::make_unique<podio::ROOTFrameWriter>(m_output_file);
0355     // TODO: NWB: Verify that output file is writable NOW, rather than after event processing completes.
0356     //       I definitely don't trust PODIO to do this for me.
0357 
0358     if (m_output_include_collections_set) {
0359       m_log->error("The podio:output_include_collections was provided, but is deprecated. Use podio:output_collections instead.");
0360       // Adding a delay to ensure users notice the deprecation warning.
0361       using namespace std::chrono_literals;
0362       std::this_thread::sleep_for(10s);
0363     }
0364 
0365 }
0366 
0367 
0368 void JEventProcessorPODIO::FindCollectionsToWrite(const std::shared_ptr<const JEvent>& event) {
0369 
0370     // Set up the set of collections_to_write.
0371     std::vector<std::string> all_collections = event->GetAllCollectionNames();
0372 
0373     if (m_output_collections.empty()) {
0374         // User has not specified an include list, so we include _all_ PODIO collections present in the first event.
0375         for (const std::string& col : all_collections) {
0376             if (m_output_exclude_collections.find(col) == m_output_exclude_collections.end()) {
0377                 m_collections_to_write.push_back(col);
0378                 m_log->info("Persisting collection '{}'", col);
0379             }
0380         }
0381     }
0382     else {
0383         m_log->debug("Persisting podio types from includes list");
0384         m_user_included_collections = true;
0385 
0386         // We match up the include list with what is actually present in the event
0387         std::set<std::string> all_collections_set = std::set<std::string>(all_collections.begin(), all_collections.end());
0388 
0389         for (const auto& col : m_output_collections) {
0390             if (m_output_exclude_collections.find(col) == m_output_exclude_collections.end()) {
0391                 // Included and not excluded
0392                 if (all_collections_set.find(col) == all_collections_set.end()) {
0393                     // Included, but not a valid PODIO type
0394                     m_log->warn("Explicitly included collection '{}' not present in factory set, omitting.", col);
0395                 }
0396                 else {
0397                     // Included, not excluded, and a valid PODIO type
0398                     m_collections_to_write.push_back(col);
0399                     m_log->info("Persisting collection '{}'", col);
0400                 }
0401             }
0402         }
0403     }
0404 
0405 }
0406 
0407 void JEventProcessorPODIO::Process(const std::shared_ptr<const JEvent> &event) {
0408 
0409     std::lock_guard<std::mutex> lock(m_mutex);
0410     if (m_is_first_event) {
0411         FindCollectionsToWrite(event);
0412     }
0413 
0414     // Trigger all collections once to fix the collection IDs
0415     // TODO: WDC: This should not be necessary, but while we await collection IDs
0416     //            that are determined by hash, we have to ensure they are reproducible
0417     //            even if the collections are filled in unpredictable order (or not at
0418     //            all). See also below, at "TODO: NWB:".
0419     for (const auto& coll_name : m_collections_to_write) {
0420         try {
0421             [[maybe_unused]]
0422             const auto* coll_ptr = event->GetCollectionBase(coll_name);
0423         }
0424         catch(std::exception &e) {
0425             // chomp
0426         }
0427     }
0428 
0429     // Print the contents of some collections, just for debugging purposes
0430     // Do this before writing just in case writing crashes
0431     if (!m_collections_to_print.empty()) {
0432         LOG << "========================================" << LOG_END;
0433         LOG << "JEventProcessorPODIO: Event " << event->GetEventNumber() << LOG_END;
0434     }
0435     for (const auto& coll_name : m_collections_to_print) {
0436         LOG << "------------------------------" << LOG_END;
0437         LOG << coll_name << LOG_END;
0438         try {
0439             const auto* coll_ptr = event->GetCollectionBase(coll_name);
0440             if (coll_ptr == nullptr) {
0441                 LOG << "missing" << LOG_END;
0442             } else {
0443                 coll_ptr->print();
0444             }
0445         }
0446         catch(std::exception &e) {
0447             LOG << "missing" << LOG_END;
0448         }
0449     }
0450 
0451     m_log->trace("==================================");
0452     m_log->trace("Event #{}", event->GetEventNumber());
0453 
0454 
0455     // Make sure that all factories get called that need to be written into the frame.
0456     // We need to do this for _all_ factories unless we've constrained it by using includes/excludes.
0457     // Note that all collections need to be present in the first event, as podio::RootFrameWriter constrains us to write one event at a time, so there
0458     // is no way to add a new branch after the first event.
0459 
0460     // If we get an exception below while trying to add a factory for any
0461     // reason then mark that factory as bad and don't try running it again.
0462     // This is motivated by trying to write EcalBarrelSciGlass objects for
0463     // data simulated using the imaging calorimeter. In that case, it will
0464     // always throw an exception, but DD4hep also prints its own error message.
0465     // Thus, to prevent that error message every event, we must avoid calling
0466     // it.
0467 
0468     // Activate factories.
0469     // TODO: NWB: For now we run every factory every time, swallowing exceptions if necessary.
0470     //            We do this so that we always have the same collections created in the same order.
0471     //            This means that the collection IDs are stable so the writer doesn't segfault.
0472     //            The better fix is to maintain a map of collection IDs, or just wait for PODIO to fix the bug.
0473     std::vector<std::string> successful_collections;
0474     static std::set<std::string> failed_collections;
0475     for (const std::string& coll : m_collections_to_write) {
0476         try {
0477             m_log->trace("Ensuring factory for collection '{}' has been called.", coll);
0478             const auto* coll_ptr = event->GetCollectionBase(coll);
0479             if (coll_ptr == nullptr) {
0480                 // If a collection is missing from the frame, the podio root writer will segfault.
0481                 // To avoid this, we treat this as a failing collection and omit from this point onwards.
0482                 // However, this code path is expected to be unreachable because any missing collection will be
0483                 // replaced with an empty collection in JFactoryPodioTFixed::Create.
0484                 if (failed_collections.count(coll) == 0) {
0485                     m_log->error("Omitting PODIO collection '{}' because it is null", coll);
0486                     failed_collections.insert(coll);
0487                 }
0488             }
0489             else {
0490                 m_log->trace("Including PODIO collection '{}'", coll);
0491                 successful_collections.push_back(coll);
0492             }
0493         }
0494         catch(std::exception &e) {
0495             // Limit printing warning to just once per factory
0496             if (failed_collections.count(coll) == 0) {
0497                 m_log->error("Omitting PODIO collection '{}' due to exception: {}.", coll, e.what());
0498                 failed_collections.insert(coll);
0499             }
0500         }
0501     }
0502     m_collections_to_write = successful_collections;
0503 
0504     // Frame will contain data from all Podio factories that have been triggered,
0505     // including by the `event->GetCollectionBase(coll);` above.
0506     // Note that collections MUST be present in frame. If a collection is null, the writer will segfault.
0507     const auto* frame = event->GetSingle<podio::Frame>();
0508 
0509     // TODO: NWB: We need to actively stabilize podio collections. Until then, keep this around in case
0510     //            the writer starts segfaulting, so we can quickly see whether the problem is unstable collection IDs.
0511     /*
0512     m_log->info("Event {}: Writing {} collections", event->GetEventNumber(), m_collections_to_write.size());
0513     for (const std::string& collname : m_collections_to_write) {
0514         m_log->info("Writing collection '{}' with id {}", collname, frame->get(collname)->getID());
0515     }
0516     */
0517     m_writer->writeFrame(*frame, "events", m_collections_to_write);
0518     m_is_first_event = false;
0519 
0520 }
0521 
0522 void JEventProcessorPODIO::Finish() {
0523     if (m_output_include_collections_set) {
0524       m_log->error("The podio:output_include_collections was provided, but is deprecated. Use podio:output_collections instead.");
0525       // Adding a delay to ensure users notice the deprecation warning.
0526       using namespace std::chrono_literals;
0527       std::this_thread::sleep_for(10s);
0528     }
0529 
0530     m_writer->finish();
0531 }