Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:12:11

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008 
0009 // This ROOT script compares two ROOT files in an order-insensitive way. Its
0010 // intended use is to compare the output of a single-threaded and multi-threaded
0011 // programs in order to check that results are perfectly reproducible.
0012 //
0013 // As a current limitation, which may be lifted in the future, the script does
0014 // all of its processing in RAM, which means that the input dataset must fit in
0015 // RAM. So do not try to run this on terabytes of data. You don't need that much
0016 // data to check that your multithreaded program runs well anyhow.
0017 //
0018 // Another limitation is that the comparison relies on perfect output
0019 // reproducibility, which is a very costly guarantee to achieve in a
0020 // multi-threaded environment. If you want to compare "slightly different"
0021 // outputs, this script will not work as currently written. I cannot think of a
0022 // way in which imperfect reproducibility could be checked in a manner which
0023 // doesn't depend on the details of the data being compared.
0024 
0025 #include <cstring>
0026 #include <map>
0027 #include <string>
0028 #include <utility>
0029 #include <vector>
0030 
0031 #include "TBranch.h"
0032 #include "TFile.h"
0033 #include "TKey.h"
0034 #include "TList.h"
0035 #include "TObject.h"
0036 #include "TTree.h"
0037 #include "TTreeReader.h"
0038 #include "compareRootFiles.hpp"
0039 
0040 // Minimal mechanism for assertion checking and comparison
0041 #define ASSERT(pred, msg)          \
0042   if (!(pred)) {                   \
0043     std::cout << msg << std::endl; \
0044     return 1;                      \
0045   }
0046 
0047 #define ASSERT_EQUAL(v1, v2, msg) \
0048   ASSERT((v1) == (v2), msg << "(" << (v1) << " vs " << (v2) << ") ")
0049 
0050 #define ASSERT_STR_EQUAL(s1, s2, msg) \
0051   ASSERT(strcmp((s1), (s2)) == 0, msg << " (" << (s1) << " vs " << (s2) << ") ")
0052 
0053 // This script returns 0 if the files have identical contents except for event
0054 // ordering, and a nonzero result if the contents differ or an error occurred.
0055 //
0056 // If the optional dump_data_on_failure flag is set, it will also dump the
0057 // mismatching event data to stdout on failure for manual inspection.
0058 //
0059 // If the optional skip_unsupported_branches flag is set, the script will ignore
0060 // unsupported branch types in the input file instead of aborting.
0061 //
0062 
0063 int compareRootFiles(std::string file1, std::string file2,
0064                      bool dump_data_on_failure = false,
0065                      bool skip_unsupported_branches = false) {
0066   std::cout << "Comparing ROOT files " << file1 << " and " << file2
0067             << std::endl;
0068 
0069   std::cout << "* Opening the files..." << std::endl;
0070   HomogeneousPair<TFile> files{file1.c_str(), file2.c_str()};
0071   if (files.first.IsZombie()) {
0072     std::cout << "  - Could not open file " << file1 << "!" << std::endl;
0073     return 2;
0074   } else if (files.second.IsZombie()) {
0075     std::cout << "  - Could not open file " << file2 << "!" << std::endl;
0076     return 2;
0077   }
0078 
0079   std::cout << "* Extracting file keys..." << std::endl;
0080   HomogeneousPair<std::vector<TKey*>> fileKeys;
0081   {
0082     // This is how we would extract keys from one file
0083     const auto loadKeys = [](const TFile& file, std::vector<TKey*>& target) {
0084       const int keyCount = file.GetNkeys();
0085       target.reserve(keyCount);
0086       TIter keyIter{file.GetListOfKeys()};
0087       for (int i = 0; i < keyCount; ++i) {
0088         target.emplace_back(dynamic_cast<TKey*>(keyIter()));
0089       }
0090     };
0091 
0092     // Do it for each of our files
0093     loadKeys(files.first, fileKeys.first);
0094     loadKeys(files.second, fileKeys.second);
0095   }
0096 
0097   std::cout << "* Selecting the latest key cycle..." << std::endl;
0098   std::vector<HomogeneousPair<TKey*>> keyPairs;
0099   {
0100     // For each file and for each key name, we want to know what is the latest
0101     // key cycle, and who is the associated key object
0102     using KeyMetadata = std::pair<short, TKey*>;
0103     using FileMetadata = std::map<std::string, KeyMetadata>;
0104     HomogeneousPair<FileMetadata> metadata;
0105 
0106     // This is how we compute this metadata for a single file
0107     const auto findLatestCycle = [](const std::vector<TKey*>& keys,
0108                                     FileMetadata& target) {
0109       // Iterate through the file's keys
0110       for (const auto key : keys) {
0111         // Extract information about the active key
0112         const std::string keyName{key->GetName()};
0113         const short newCycle{key->GetCycle()};
0114 
0115         // Do we already know of a key with the same name?
0116         auto latestCycleIter = target.find(keyName);
0117         if (latestCycleIter != target.end()) {
0118           // If so, keep the key with the most recent cycle number
0119           auto& latestCycleMetadata = latestCycleIter->second;
0120           if (newCycle > latestCycleMetadata.first) {
0121             latestCycleMetadata = {newCycle, key};
0122           }
0123         } else {
0124           // If not, this is obviously the most recent key we've seen so
0125           // far
0126           target.emplace(keyName, KeyMetadata{newCycle, key});
0127         }
0128       }
0129     };
0130 
0131     // We'll compute this information for both of our files...
0132     std::cout << "  - Finding the latest cycle for each file..." << std::endl;
0133     findLatestCycle(fileKeys.first, metadata.first);
0134     findLatestCycle(fileKeys.second, metadata.second);
0135 
0136     // ...and then we'll group the latest keys by name, detect keys which only
0137     // exist in a single file along the way, and report that as an error
0138     std::cout << "  - Grouping per-file latest keys..." << std::endl;
0139     {
0140       // Make sure that both files have the same amount of keys once duplicate
0141       // versions are removed
0142       const auto f1KeyCount = metadata.first.size();
0143       const auto f2KeyCount = metadata.second.size();
0144       ASSERT_EQUAL(f1KeyCount, f2KeyCount,
0145                    "    o Number of keys does not match");
0146       keyPairs.reserve(f1KeyCount);
0147 
0148       // Iterate through the keys, in the same order (as guaranteed by std::map)
0149       for (auto f1MetadataIter = metadata.first.cbegin(),
0150                 f2MetadataIter = metadata.second.cbegin();
0151            f1MetadataIter != metadata.first.cend();
0152            ++f1MetadataIter, ++f2MetadataIter) {
0153         // Do the keys have the same name?
0154         const auto& f1KeyName = f1MetadataIter->first;
0155         const auto& f2KeyName = f2MetadataIter->first;
0156         ASSERT_EQUAL(f1KeyName, f2KeyName, "    o Key names do not match");
0157 
0158         // If so, extract the associated key pair
0159         keyPairs.emplace_back(f1MetadataIter->second.second,
0160                               f2MetadataIter->second.second);
0161       }
0162     }
0163   }
0164 
0165   std::cout << "* Comparing key metadata..." << std::endl;
0166   for (const auto& keyPair : keyPairs) {
0167     const auto& key1 = keyPair.first;
0168     const auto& key2 = keyPair.second;
0169 
0170     ASSERT_STR_EQUAL(key1->GetClassName(), key2->GetClassName(),
0171                      "  - Class name does not match!");
0172     ASSERT_STR_EQUAL(key1->GetTitle(), key2->GetTitle(),
0173                      "  - Title does not match!");
0174     ASSERT_EQUAL(key1->GetVersion(), key2->GetVersion(),
0175                  "  - Key version does not match!");
0176   }
0177 
0178   // NOTE: The current version of this script only supports some file contents.
0179   //       It may be extended later if the need for other data formats arise.
0180   std::cout << "* Extracting TTrees..." << std::endl;
0181   std::vector<HomogeneousPair<TTree*>> treePairs;
0182   std::vector<HomogeneousPair<TVectorT<float>*>> vectorPairs;
0183   std::vector<HomogeneousPair<TEfficiency*>> efficiencyPairs;
0184   std::vector<HomogeneousPair<TProfile*>> profilePairs;
0185   std::vector<HomogeneousPair<TH2F*>> th2fPairs;
0186 
0187   for (const auto& keyPair : keyPairs) {
0188     TObject* obj1 = keyPair.first->ReadObj();
0189     TObject* obj2 = keyPair.second->ReadObj();
0190 
0191     ASSERT_STR_EQUAL(obj1->ClassName(), obj2->ClassName(),
0192                      "  - Object type does not match!");
0193 
0194     // Check if the object is a TTree
0195     bool isTTree = strcmp(obj1->ClassName(), "TTree") == 0;
0196 
0197     if (isTTree) {
0198       TTree* tree1 = dynamic_cast<TTree*>(obj1);
0199       TTree* tree2 = dynamic_cast<TTree*>(obj2);
0200       if (tree1 && tree2) {
0201         treePairs.emplace_back(tree1, tree2);
0202       }
0203       continue;  // Skip the rest of the loop
0204     }
0205 
0206     bool isTVector = strcmp(obj1->ClassName(), "TVectorT<float>") == 0;
0207 
0208     if (isTVector) {
0209       TVectorT<float>* vector1 = dynamic_cast<TVectorT<float>*>(obj1);
0210       TVectorT<float>* vector2 = dynamic_cast<TVectorT<float>*>(obj2);
0211       if (vector1 && vector2) {
0212         vectorPairs.emplace_back(vector1, vector2);
0213       }
0214       continue;  // Skip the rest of the loop
0215     }
0216 
0217     bool isTEfficiency = strcmp(obj1->ClassName(), "TEfficiency") == 0;
0218 
0219     if (isTEfficiency) {
0220       TEfficiency* efficiency1 = dynamic_cast<TEfficiency*>(obj1);
0221       TEfficiency* efficiency2 = dynamic_cast<TEfficiency*>(obj2);
0222       if (efficiency1 && efficiency2) {
0223         efficiencyPairs.emplace_back(efficiency1, efficiency2);
0224       }
0225       continue;  // Skip the rest of the loop
0226     }
0227 
0228     bool isTProfile = strcmp(obj1->ClassName(), "TProfile") == 0;
0229 
0230     if (isTProfile) {
0231       TProfile* profile1 = dynamic_cast<TProfile*>(obj1);
0232       TProfile* profile2 = dynamic_cast<TProfile*>(obj2);
0233       if (profile1 && profile2) {
0234         profilePairs.emplace_back(profile1, profile2);
0235       }
0236       continue;  // Skip the rest of the loop
0237     }
0238 
0239     bool isTH2F = strcmp(obj1->ClassName(), "TH2F") == 0;
0240 
0241     if (isTH2F) {
0242       TH2F* th2f1 = dynamic_cast<TH2F*>(obj1);
0243       TH2F* th2f2 = dynamic_cast<TH2F*>(obj2);
0244       if (th2f1 && th2f2) {
0245         th2fPairs.emplace_back(th2f1, th2f2);
0246       }
0247       continue;  // Skip the rest of the loop
0248     }
0249 
0250     ASSERT(false, "  - Input " << obj1->ClassName() << " is not supported!");
0251   }
0252 
0253   std::cout << "* Comparing the trees..." << std::endl;
0254   for (const auto& treePair : treePairs) {
0255     const auto& tree1 = treePair.first;
0256     const auto& tree2 = treePair.second;
0257 
0258     std::cout << "  - Comparing tree " << tree1->GetName() << "..."
0259               << std::endl;
0260 
0261     std::cout << "    o Comparing tree-wide metadata..." << std::endl;
0262     const std::size_t t1EntryCount = tree1->GetEntries();
0263     {
0264       const std::size_t t2EntryCount = tree2->GetEntries();
0265       ASSERT_EQUAL(t1EntryCount, t2EntryCount,
0266                    "      ~ Number of entries does not match!");
0267     }
0268 
0269     if (t1EntryCount == 0) {
0270       std::cout << "    o Skipping empty tree!" << std::endl;
0271       continue;
0272     }
0273 
0274     std::cout << "    o Preparing for tree readout..." << std::endl;
0275     TTreeReader t1Reader(tree1);
0276     TTreeReader t2Reader(tree2);
0277     BranchComparisonHarness::TreeMetadata treeMetadata{t1Reader, t2Reader,
0278                                                        t1EntryCount};
0279 
0280     std::cout << "    o Comparing branch metadata..." << std::endl;
0281     std::vector<HomogeneousPair<TBranch*>> branchPairs;
0282     {
0283       // Check number of branches and allocate branch storage
0284       const int t1BranchCount = tree1->GetNbranches();
0285       const int t2BranchCount = tree2->GetNbranches();
0286       ASSERT_EQUAL(t1BranchCount, t2BranchCount,
0287                    "      ~ Number of branches does not match!");
0288       branchPairs.reserve(t1BranchCount);
0289 
0290       // Extract branches using TTree::GetListOfBranches()
0291       TIter t1BranchIter{tree1->GetListOfBranches()};
0292       TIter t2BranchIter{tree2->GetListOfBranches()};
0293       for (int i = 0; i < t1BranchCount; ++i) {
0294         branchPairs.emplace_back(dynamic_cast<TBranch*>(t1BranchIter()),
0295                                  dynamic_cast<TBranch*>(t2BranchIter()));
0296       }
0297     }
0298 
0299     std::cout << "    o Setting up branch-specific processing..." << std::endl;
0300     std::vector<BranchComparisonHarness> branchComparisonHarnesses;
0301     branchComparisonHarnesses.reserve(branchPairs.size());
0302     for (const auto& branchPair : branchPairs) {
0303       const auto& branch1 = branchPair.first;
0304       const auto& branch2 = branchPair.second;
0305 
0306       std::cout << "      ~ Checking branch metadata..." << std::endl;
0307       std::string b1ClassName, b1BranchName;
0308       EDataType b1DataType;
0309       {
0310         std::string b2ClassName, b2BranchName;
0311         EDataType b2DataType;
0312         TClass* unused;
0313 
0314         b1ClassName = branch1->GetClassName();
0315         b2ClassName = branch2->GetClassName();
0316         ASSERT_EQUAL(b1ClassName, b2ClassName,
0317                      "        + Class name does not match!");
0318         branch1->GetExpectedType(unused, b1DataType);
0319         branch2->GetExpectedType(unused, b2DataType);
0320         ASSERT_EQUAL(b1DataType, b2DataType,
0321                      "        + Raw data type does not match!");
0322         const int b1LeafCount = branch1->GetNleaves();
0323         const int b2LeafCount = branch2->GetNleaves();
0324         ASSERT_EQUAL(b1LeafCount, b2LeafCount,
0325                      "        + Number of leaves does not match!");
0326         ASSERT_EQUAL(
0327             b1LeafCount, 1,
0328             "        + Branches with several leaves are not supported!");
0329         b1BranchName = branch1->GetName();
0330         b2BranchName = branch2->GetName();
0331         ASSERT_EQUAL(b1BranchName, b2BranchName,
0332                      "        + Branch name does not match!");
0333       }
0334 
0335       std::cout << "      ~ Building comparison harness for branch "
0336                 << b1BranchName << "..." << std::endl;
0337       try {
0338         auto branchHarness = BranchComparisonHarness::create(
0339             treeMetadata, b1BranchName, b1DataType, b1ClassName);
0340         branchComparisonHarnesses.emplace_back(std::move(branchHarness));
0341       } catch (BranchComparisonHarness::UnsupportedBranchType) {
0342         // When encountering an unsupported branch type, we can either skip
0343         // the branch or abort depending on configuration
0344         std::cout << "        + Unsupported branch type! "
0345                   << "(eDataType: " << b1DataType << ", ClassName: \""
0346                   << b1ClassName << "\")" << std::endl;
0347         if (skip_unsupported_branches) {
0348           continue;
0349         } else {
0350           return 3;
0351         }
0352       }
0353     }
0354 
0355     std::cout << "    o Reading event data..." << std::endl;
0356     for (std::size_t i = 0; i < t1EntryCount; ++i) {
0357       // Move to the next TTree entry (= next event)
0358       t1Reader.Next();
0359       t2Reader.Next();
0360 
0361       // Load the data associated with each branch
0362       for (auto& branchHarness : branchComparisonHarnesses) {
0363         branchHarness.loadCurrentEvent();
0364       }
0365     }
0366 
0367     std::cout << "    o Sorting the first tree..." << std::endl;
0368     {
0369       std::cout << "      ~ Defining event comparison operator..." << std::endl;
0370       IndexComparator t1CompareEvents = [&branchComparisonHarnesses](
0371                                             std::size_t i,
0372                                             std::size_t j) -> Ordering {
0373         for (auto& branchHarness : branchComparisonHarnesses) {
0374           const auto order = branchHarness.sortHarness.first.first(i, j);
0375           if (order != Ordering::EQUAL) {
0376             return order;
0377           }
0378         }
0379         return Ordering::EQUAL;
0380       };
0381 
0382       std::cout << "      ~ Defining event swapping operator..." << std::endl;
0383       IndexSwapper t1SwapEvents = [&branchComparisonHarnesses](std::size_t i,
0384                                                                std::size_t j) {
0385         for (auto& branchHarness : branchComparisonHarnesses) {
0386           branchHarness.sortHarness.first.second(i, j);
0387         }
0388       };
0389 
0390       std::cout << "      ~ Running quicksort on the tree..." << std::endl;
0391       quickSort(0, t1EntryCount - 1, t1CompareEvents, t1SwapEvents);
0392     }
0393 
0394     std::cout << "    o Sorting the second tree..." << std::endl;
0395     {
0396       std::cout << "      ~ Defining event comparison operator..." << std::endl;
0397       IndexComparator t2CompareEvents = [&branchComparisonHarnesses](
0398                                             std::size_t i,
0399                                             std::size_t j) -> Ordering {
0400         for (auto& branchHarness : branchComparisonHarnesses) {
0401           const auto order = branchHarness.sortHarness.second.first(i, j);
0402           if (order != Ordering::EQUAL) {
0403             return order;
0404           }
0405         }
0406         return Ordering::EQUAL;
0407       };
0408 
0409       std::cout << "      ~ Defining event swapping operator..." << std::endl;
0410       IndexSwapper t2SwapEvents = [&branchComparisonHarnesses](std::size_t i,
0411                                                                std::size_t j) {
0412         for (auto& branchHarness : branchComparisonHarnesses) {
0413           branchHarness.sortHarness.second.second(i, j);
0414         }
0415       };
0416 
0417       std::cout << "      ~ Running quicksort on the tree..." << std::endl;
0418       quickSort(0, t1EntryCount - 1, t2CompareEvents, t2SwapEvents);
0419     }
0420 
0421     std::cout << "    o Checking that both trees are now equal..." << std::endl;
0422     for (auto& branchHarness : branchComparisonHarnesses) {
0423       std::cout << "      ~ Comparing branch " << branchHarness.branchName
0424                 << "..." << std::endl;
0425       if (!branchHarness.eventDataEqual()) {
0426         std::cout << "        + Branch contents do not match!" << std::endl;
0427         if (dump_data_on_failure) {
0428           std::cout << "        + Dumping branch contents:" << std::endl;
0429           branchHarness.dumpEventData();
0430         }
0431         return 4;
0432       }
0433     }
0434   }
0435 
0436   std::cout << "* Comparing the vectors..." << std::endl;
0437   for (const auto& vectorPair : vectorPairs) {
0438     const auto& vector1 = vectorPair.first;
0439     const auto& vector2 = vectorPair.second;
0440 
0441     std::cout << "  - Comparing vector " << vector1->GetName() << "..."
0442               << std::endl;
0443 
0444     std::cout << "    o Comparing vector-wide metadata..." << std::endl;
0445     const std::size_t v1Size = vector1->GetNoElements();
0446     {
0447       const std::size_t v2Size = vector2->GetNoElements();
0448       ASSERT_EQUAL(v1Size, v2Size,
0449                    "      ~ Number of elements does not match!");
0450     }
0451 
0452     if (v1Size == 0) {
0453       std::cout << "    o Skipping empty vector!" << std::endl;
0454       continue;
0455     }
0456 
0457     std::cout << "    o Comparing vector data..." << std::endl;
0458     for (std::size_t i = 0; i < v1Size; ++i) {
0459       ASSERT_EQUAL(vector1->operator[](i), vector2->operator[](i),
0460                    "      ~ Vector elements do not match!");
0461     }
0462   }
0463 
0464   std::cout << "* Comparing the efficiencies..." << std::endl;
0465   for (const auto& efficiencyPair : efficiencyPairs) {
0466     const auto& efficiency1 = efficiencyPair.first;
0467     const auto& efficiency2 = efficiencyPair.second;
0468 
0469     std::cout << "  - Comparing efficiency " << efficiency1->GetName() << "..."
0470               << std::endl;
0471 
0472     std::cout << "    o Comparing efficiency-wide metadata..." << std::endl;
0473     const std::size_t e1Size = efficiency1->GetTotalHistogram()->GetEntries();
0474     {
0475       const std::size_t e2Size = efficiency2->GetTotalHistogram()->GetEntries();
0476       ASSERT_EQUAL(e1Size, e2Size, "      ~ Number of entries does not match!");
0477     }
0478 
0479     if (e1Size == 0) {
0480       std::cout << "    o Skipping empty efficiency!" << std::endl;
0481       continue;
0482     }
0483 
0484     std::cout << "    o Comparing efficiency data..." << std::endl;
0485     for (std::size_t i = 0; i < e1Size; ++i) {
0486       ASSERT_EQUAL(efficiency1->GetEfficiency(i), efficiency2->GetEfficiency(i),
0487                    "      ~ Efficiency elements do not match!");
0488     }
0489   }
0490 
0491   std::cout << "* Comparing the profiles..." << std::endl;
0492   for (const auto& profilePair : profilePairs) {
0493     const auto& profile1 = profilePair.first;
0494     const auto& profile2 = profilePair.second;
0495 
0496     std::cout << "  - Comparing profile " << profile1->GetName() << "..."
0497               << std::endl;
0498 
0499     std::cout << "    o Comparing profile-wide metadata..." << std::endl;
0500     const std::size_t p1Size = profile1->GetEntries();
0501     {
0502       const std::size_t p2Size = profile2->GetEntries();
0503       ASSERT_EQUAL(p1Size, p2Size, "      ~ Number of entries does not match!");
0504     }
0505 
0506     if (p1Size == 0) {
0507       std::cout << "    o Skipping empty profile!" << std::endl;
0508       continue;
0509     }
0510 
0511     std::cout << "    o Comparing profile data..." << std::endl;
0512     for (std::size_t i = 0; i < p1Size; ++i) {
0513       ASSERT_EQUAL(profile1->GetBinContent(i), profile2->GetBinContent(i),
0514                    "      ~ Profile elements do not match!");
0515     }
0516   }
0517 
0518   std::cout << "* Comparing the TH2Fs..." << std::endl;
0519   for (const auto& th2fPair : th2fPairs) {
0520     const auto& th2f1 = th2fPair.first;
0521     const auto& th2f2 = th2fPair.second;
0522 
0523     std::cout << "  - Comparing TH2F " << th2f1->GetName() << "..."
0524               << std::endl;
0525 
0526     std::cout << "    o Comparing TH2F-wide metadata..." << std::endl;
0527     const std::size_t th2f1Size = th2f1->GetEntries();
0528     {
0529       const std::size_t th2f2Size = th2f2->GetEntries();
0530       ASSERT_EQUAL(th2f1Size, th2f2Size,
0531                    "      ~ Number of entries does not match!");
0532     }
0533 
0534     if (th2f1Size == 0) {
0535       std::cout << "    o Skipping empty TH2F!" << std::endl;
0536       continue;
0537     }
0538 
0539     std::cout << "    o Comparing TH2F data..." << std::endl;
0540     for (std::size_t i = 0; i < th2f1Size; ++i) {
0541       ASSERT_EQUAL(th2f1->GetBinContent(i), th2f2->GetBinContent(i),
0542                    "      ~ TH2F elements do not match!");
0543     }
0544   }
0545 
0546   std::cout << "* Input files are equal, event order aside!" << std::endl;
0547   return 0;
0548 }