Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:14:17

0001 // Author: Vincenzo Eduardo Padulano CERN 09/2024
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #ifndef ROOT_RDF_RFilterWithMissingValues
0012 #define ROOT_RDF_RFilterWithMissingValues
0013 
0014 #include "ROOT/RDF/ColumnReaderUtils.hxx"
0015 #include "ROOT/RDF/RColumnReaderBase.hxx"
0016 #include "ROOT/RDF/RCutFlowReport.hxx"
0017 #include "ROOT/RDF/Utils.hxx"
0018 #include "ROOT/RDF/RFilterBase.hxx"
0019 #include "ROOT/RDF/RLoopManager.hxx"
0020 #include "ROOT/RDF/RTreeColumnReader.hxx"
0021 #include "ROOT/TypeTraits.hxx"
0022 #include "RtypesCore.h"
0023 
0024 #include <algorithm>
0025 #include <cassert>
0026 #include <memory>
0027 #include <string>
0028 #include <unordered_map>
0029 #include <utility> // std::index_sequence
0030 #include <vector>
0031 
0032 // fwd decls for RFilterWithMissingValues
0033 namespace ROOT::Internal::RDF::GraphDrawing {
0034 std::shared_ptr<GraphNode> CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr,
0035                                             std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap);
0036 
0037 std::shared_ptr<GraphNode> AddDefinesToGraph(std::shared_ptr<GraphNode> node,
0038                                              const ROOT::Internal::RDF::RColumnRegister &colRegister,
0039                                              const std::vector<std::string> &prevNodeDefines,
0040                                              std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap);
0041 } // namespace ROOT::Internal::RDF::GraphDrawing
0042 
0043 namespace ROOT::Detail::RDF {
0044 
0045 namespace RDFGraphDrawing = ROOT::Internal::RDF::GraphDrawing;
0046 class RJittedFilter;
0047 
0048 /**
0049  * \brief implementation of FilterAvailable and FilterMissing operations
0050  *
0051  * The filter evaluates if the entry is missing a value for the input column.
0052  * Depending on which function was called by the user, the entry with the
0053  * missing value:
0054  * - will be discarded in case the user called FilterAvailable
0055  * - will be kept in case the user called FilterMissing
0056  */
0057 template <typename PrevNodeRaw>
0058 class R__CLING_PTRCHECK(off) RFilterWithMissingValues final : public RFilterBase {
0059 
0060    // If the PrevNode is a RJittedFilter, treat it as a more generic RFilterBase: when dealing with systematic
0061    // variations we'll have a RJittedFilter node for the nominal case but other "universes" will use concrete filters,
0062    // so we normalize the "previous node type" to the base type RFilterBase.
0063    using PrevNode_t = std::conditional_t<std::is_same<PrevNodeRaw, RJittedFilter>::value, RFilterBase, PrevNodeRaw>;
0064    const std::shared_ptr<PrevNode_t> fPrevNodePtr;
0065 
0066    // One column reader per slot
0067    std::vector<RColumnReaderBase *> fValues;
0068 
0069    // Whether the entry should be kept in case of missing value for the input column
0070    bool fDiscardEntryWithMissingValue;
0071 
0072 public:
0073    RFilterWithMissingValues(bool discardEntry, std::shared_ptr<PrevNode_t> pd,
0074                             const RDFInternal::RColumnRegister &colRegister, const ColumnNames_t &columns,
0075                             std::string_view filterName = "", const std::string &variationName = "nominal")
0076       : RFilterBase(pd->GetLoopManagerUnchecked(), filterName, pd->GetLoopManagerUnchecked()->GetNSlots(), colRegister,
0077                     columns, pd->GetVariations(), variationName),
0078         fPrevNodePtr(std::move(pd)),
0079         fValues(fPrevNodePtr->GetLoopManagerUnchecked()->GetNSlots()),
0080         fDiscardEntryWithMissingValue(discardEntry)
0081    {
0082       fLoopManager->Register(this);
0083       // We suppress errors that TTreeReader prints regarding the missing branch
0084       fLoopManager->InsertSuppressErrorsForMissingBranch(fColumnNames[0]);
0085    }
0086 
0087    RFilterWithMissingValues(const RFilterWithMissingValues &) = delete;
0088    RFilterWithMissingValues &operator=(const RFilterWithMissingValues &) = delete;
0089    RFilterWithMissingValues(RFilterWithMissingValues &&) = delete;
0090    RFilterWithMissingValues &operator=(RFilterWithMissingValues &&) = delete;
0091    ~RFilterWithMissingValues() final
0092    {
0093       // must Deregister objects from the RLoopManager here, before the fPrevNodePtr data member is destroyed:
0094       // otherwise if fPrevNodePtr is the RLoopManager, it will be destroyed before the calls to Deregister happen.
0095       fLoopManager->Deregister(this);
0096       fLoopManager->EraseSuppressErrorsForMissingBranch(fColumnNames[0]);
0097    }
0098 
0099    bool CheckFilters(unsigned int slot, Long64_t entry) final
0100    {
0101       constexpr static auto cacheLineStepLong64_t = RDFInternal::CacheLineStep<Long64_t>();
0102       constexpr static auto cacheLineStepint = RDFInternal::CacheLineStep<int>();
0103       constexpr static auto cacheLineStepULong64_t = RDFInternal::CacheLineStep<ULong64_t>();
0104 
0105       if (entry != fLastCheckedEntry[slot * cacheLineStepLong64_t]) {
0106          if (!fPrevNodePtr->CheckFilters(slot, entry)) {
0107             // a filter upstream returned false, cache the result
0108             fLastResult[slot * cacheLineStepint] = false;
0109          } else {
0110             // evaluate this filter, cache the result
0111             const bool valueIsMissing = fValues[slot]->template TryGet<void>(entry) == nullptr;
0112             if (fDiscardEntryWithMissingValue) {
0113                valueIsMissing ? ++fRejected[slot * cacheLineStepULong64_t] : ++fAccepted[slot * cacheLineStepULong64_t];
0114                fLastResult[slot * cacheLineStepint] = !valueIsMissing;
0115             } else {
0116                valueIsMissing ? ++fAccepted[slot * cacheLineStepULong64_t] : ++fRejected[slot * cacheLineStepULong64_t];
0117                fLastResult[slot * cacheLineStepint] = valueIsMissing;
0118             }
0119          }
0120          fLastCheckedEntry[slot * cacheLineStepLong64_t] = entry;
0121       }
0122       return fLastResult[slot * cacheLineStepint];
0123    }
0124 
0125    void InitSlot(TTreeReader *r, unsigned int slot) final
0126    {
0127       fValues[slot] =
0128          RDFInternal::GetColumnReader(slot, fColRegister.GetReaderUnchecked(slot, fColumnNames[0], fVariation),
0129                                       *fLoopManager, r, fColumnNames[0], typeid(void));
0130       fLastCheckedEntry[slot * RDFInternal::CacheLineStep<Long64_t>()] = -1;
0131    }
0132 
0133    // recursive chain of `Report`s
0134    void Report(ROOT::RDF::RCutFlowReport &rep) const final { PartialReport(rep); }
0135 
0136    void PartialReport(ROOT::RDF::RCutFlowReport &rep) const final
0137    {
0138       fPrevNodePtr->PartialReport(rep);
0139       FillReport(rep);
0140    }
0141 
0142    void StopProcessing() final
0143    {
0144       ++fNStopsReceived;
0145       if (fNStopsReceived == fNChildren)
0146          fPrevNodePtr->StopProcessing();
0147    }
0148 
0149    void IncrChildrenCount() final
0150    {
0151       ++fNChildren;
0152       // propagate "children activation" upstream. named filters do the propagation via `TriggerChildrenCount`.
0153       if (fNChildren == 1 && fName.empty())
0154          fPrevNodePtr->IncrChildrenCount();
0155    }
0156 
0157    void TriggerChildrenCount() final
0158    {
0159       assert(!fName.empty()); // this method is to only be called on named filters
0160       fPrevNodePtr->IncrChildrenCount();
0161    }
0162 
0163    void AddFilterName(std::vector<std::string> &filters) final
0164    {
0165       fPrevNodePtr->AddFilterName(filters);
0166       auto name = (HasName() ? fName : fDiscardEntryWithMissingValue ? "FilterAvailable" : "FilterMissing");
0167       filters.push_back(name);
0168    }
0169 
0170    /// Clean-up operations to be performed at the end of a task.
0171    void FinalizeSlot(unsigned int slot) final { fValues[slot] = nullptr; }
0172 
0173    std::shared_ptr<RDFGraphDrawing::GraphNode>
0174    GetGraph(std::unordered_map<void *, std::shared_ptr<RDFGraphDrawing::GraphNode>> &visitedMap) final
0175    {
0176       // Recursively call for the previous node.
0177       auto prevNode = fPrevNodePtr->GetGraph(visitedMap);
0178       const auto &prevColumns = prevNode->GetDefinedColumns();
0179 
0180       auto thisNode = RDFGraphDrawing::CreateFilterNode(this, visitedMap);
0181 
0182       /* If the returned node is not new, there is no need to perform any other operation.
0183        * This is a likely scenario when building the entire graph in which branches share
0184        * some nodes. */
0185       if (!thisNode->IsNew()) {
0186          return thisNode;
0187       }
0188 
0189       auto upmostNode = AddDefinesToGraph(thisNode, fColRegister, prevColumns, visitedMap);
0190 
0191       // Keep track of the columns defined up to this point.
0192       thisNode->AddDefinedColumns(fColRegister.GenerateColumnNames());
0193 
0194       upmostNode->SetPrevNode(prevNode);
0195       return thisNode;
0196    }
0197 
0198    /// Return a clone of this Filter that works with values in the variationName "universe".
0199    std::shared_ptr<RNodeBase> GetVariedFilter(const std::string &variationName) final
0200    {
0201       // Only the nominal filter should be asked to produce varied filters
0202       assert(fVariation == "nominal");
0203       // nobody should ask for a varied filter for the nominal variation: they can just
0204       // use the nominal filter!
0205       assert(variationName != "nominal");
0206       // nobody should ask for a varied filter for a variation on which this filter does not depend:
0207       // they can just use the nominal filter.
0208       assert(RDFInternal::IsStrInVec(variationName, fVariations));
0209 
0210       auto it = fVariedFilters.find(variationName);
0211       if (it != fVariedFilters.end())
0212          return it->second;
0213 
0214       auto prevNode = fPrevNodePtr;
0215       if (static_cast<RNodeBase *>(fPrevNodePtr.get()) != static_cast<RNodeBase *>(fLoopManager) &&
0216           RDFInternal::IsStrInVec(variationName, prevNode->GetVariations()))
0217          prevNode = std::static_pointer_cast<PrevNode_t>(prevNode->GetVariedFilter(variationName));
0218 
0219       // the varied filters get a copy of the callable object.
0220       // TODO document this
0221       auto variedFilter = std::unique_ptr<RFilterBase>(new RFilterWithMissingValues<PrevNode_t>(
0222          fDiscardEntryWithMissingValue, std::move(prevNode), fColRegister, fColumnNames, fName, variationName));
0223       auto e = fVariedFilters.insert({variationName, std::move(variedFilter)});
0224       return e.first->second;
0225    }
0226 };
0227 
0228 } // namespace ROOT::Detail::RDF
0229 
0230 #endif // ROOT_RDF_RFilterWithMissingValues