Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /epic/src/FileLoaderHelper.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // SPDX-License-Identifier: LGPL-3.0-or-later
0002 // Copyright (C) 2022 Wouter Deconinck
0003 
0004 #pragma once
0005 
0006 #include <DD4hep/DetFactoryHelper.h>
0007 #include <DD4hep/Factories.h>
0008 #include <DD4hep/Primitives.h>
0009 #include <DD4hep/Printout.h>
0010 
0011 #include <fmt/core.h>
0012 
0013 #include <cstdlib>
0014 #include <filesystem>
0015 #include <iostream>
0016 #include <regex>
0017 #include <string>
0018 
0019 namespace fs = std::filesystem;
0020 
0021 using dd4hep::ERROR, dd4hep::WARNING, dd4hep::VERBOSE, dd4hep::INFO;
0022 using dd4hep::printout;
0023 
0024 namespace FileLoaderHelper {
0025 static constexpr const char* const kCurlCommand =
0026     "curl --retry 5 --location --fail {0} --output {1}";
0027 static constexpr const char* const kXrootdCommand = "xrdcp --retry 5 {0} {1}";
0028 } // namespace FileLoaderHelper
0029 
0030 // Function to download files
0031 inline void EnsureFileFromURLExists(std::string url, std::string file, std::string cache_str = "") {
0032   // parse cache for environment variables
0033   auto pos = std::string::npos;
0034   while ((pos = cache_str.find('$')) != std::string::npos) {
0035     auto after = cache_str.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
0036                                              "abcdefghijklmnopqrstuvwxyz"
0037                                              "0123456789"
0038                                              "_",
0039                                              pos + 1);
0040     if (after == std::string::npos)
0041       after = cache_str.size(); // cache ends on env var
0042     const std::string env_name(cache_str.substr(pos + 1, after - pos - 1));
0043     auto env_ptr = std::getenv(env_name.c_str());
0044     const std::string env_value(env_ptr != nullptr ? env_ptr : "");
0045     cache_str.erase(pos, after - pos);
0046     cache_str.insert(pos, env_value);
0047     printout(INFO, "FileLoader", "$" + env_name + " -> " + env_value);
0048   }
0049 
0050   // tokenize cache on regex
0051   std::regex cache_sep(":");
0052   std::sregex_token_iterator cache_iter(cache_str.begin(), cache_str.end(), cache_sep, -1);
0053   std::sregex_token_iterator cache_end;
0054   std::vector<std::string> cache_vec(cache_iter, cache_end);
0055 
0056   // create file path
0057   fs::path file_path(file);
0058 
0059   // create hash from url, hex of unsigned long long
0060   std::string hash =
0061       fmt::format("{:016x}", dd4hep::detail::hash64(url)); // TODO: Use c++20 std::fmt
0062 
0063   // create file parent path, if not exists
0064   fs::path parent_path = file_path.parent_path();
0065   if (!fs::exists(parent_path)) {
0066     if (fs::create_directories(parent_path) == false) {
0067       printout(ERROR, "FileLoader", "parent path " + parent_path.string() + " cannot be created");
0068       printout(ERROR, "FileLoader", "hint: try running 'mkdir -p " + parent_path.string() + "'");
0069       std::_Exit(EXIT_FAILURE);
0070     }
0071   }
0072 
0073   // if file exists and is symlink to correct hash
0074   fs::path hash_path(parent_path / hash);
0075   if (fs::exists(file_path) && fs::exists(hash_path) && fs::equivalent(file_path, hash_path)) {
0076     printout(INFO, "FileLoader", "link " + file + " -> hash " + hash + " already exists");
0077     return;
0078   }
0079 
0080   if (fs::exists(fs::symlink_status(hash_path))) {
0081     printout(INFO, "FileLoader", "removing symlink \"" + hash_path.string() + "\"");
0082     remove(hash_path);
0083   }
0084 
0085   // if hash does not exist, we try to retrieve file from cache
0086   if (!fs::exists(hash_path)) {
0087     // recursive loop into cache directories
0088     bool success = false;
0089     for (auto cache : cache_vec) {
0090       fs::path cache_path(cache);
0091       printout(INFO, "FileLoader", "cache " + cache_path.string());
0092       if (fs::exists(cache_path)) {
0093         auto check_path = [&](const fs::path& cache_dir_path) {
0094           printout(VERBOSE, "FileLoader", "checking " + cache_dir_path.string());
0095           fs::path cache_hash_path = cache_dir_path / hash;
0096           if (fs::exists(cache_hash_path)) {
0097             // symlink hash to cache/.../hash
0098             printout(VERBOSE, "FileLoader",
0099                      "file " + file + " with hash " + hash + " found in " +
0100                          cache_hash_path.string());
0101             fs::path link_target;
0102             if (cache_hash_path.is_absolute()) {
0103               link_target = cache_hash_path;
0104             } else {
0105               link_target = fs::proximate(cache_hash_path, parent_path);
0106             }
0107             try {
0108               fs::create_symlink(link_target, hash_path);
0109               success = true;
0110             } catch (const fs::filesystem_error&) {
0111               printout(ERROR, "FileLoader",
0112                        "unable to link from " + hash_path.string() + " to " + link_target.string());
0113               std::_Exit(EXIT_FAILURE);
0114             }
0115             return true;
0116           }
0117           return false;
0118         };
0119         if (!check_path(cache_path)) {
0120           for (auto const& dir_entry : fs::recursive_directory_iterator(cache_path)) {
0121             if (!dir_entry.is_directory())
0122               continue;
0123             if (check_path(dir_entry.path())) {
0124               break;
0125             };
0126           }
0127         }
0128       }
0129       if (success)
0130         break;
0131     }
0132   }
0133 
0134   // if hash does not exist, we try to retrieve file from url
0135   if (!fs::exists(hash_path)) {
0136     std::string cmd;
0137 
0138     if (url.find("root://") == 0) {
0139       cmd = fmt::format(FileLoaderHelper::kXrootdCommand, url, hash_path.c_str());
0140     } else {
0141       cmd = fmt::format(FileLoaderHelper::kCurlCommand, url, hash_path.c_str());
0142     }
0143     printout(INFO, "FileLoader", "downloading " + file + " as hash " + hash + " with " + cmd);
0144 
0145     // run cmd
0146     auto ret = std::system(cmd.c_str());
0147     if (!fs::exists(hash_path)) {
0148       printout(ERROR, "FileLoader", "unable to run the download command " + cmd);
0149       printout(ERROR, "FileLoader", "the return value was ", ret);
0150       printout(ERROR, "FileLoader", "hint: check the command and try running manually");
0151       printout(ERROR, "FileLoader",
0152                "hint: allow insecure connections on some systems with the flag -k");
0153       std::_Exit(EXIT_FAILURE);
0154     }
0155   }
0156 
0157   // check if file is symlink
0158   if (fs::is_symlink(file_path)) {
0159     // file is symlink, i.e. valid symlink
0160     if (fs::exists(file_path)) {
0161       // file already exists
0162       fs::path symlink_target = fs::read_symlink(file_path);
0163       if (fs::exists(symlink_target) && fs::equivalent(hash_path, symlink_target)) {
0164         // link points to correct path
0165         return;
0166       } else {
0167         // link points to incorrect path -> remove symlink
0168         if (fs::remove(file_path) == false) {
0169           // failure mode: cannot remove incorrect symlink
0170           printout(ERROR, "FileLoader", "unable to remove symlink " + file_path.string());
0171           printout(ERROR, "FileLoader",
0172                    "we tried to create a symlink " + file_path.string() +
0173                        " to the actual resource, " +
0174                        "but a symlink already exists there and points to an incorrect location");
0175           printout(ERROR, "FileLoader",
0176                    "hint: this may be resolved by removing directory " + parent_path.string());
0177           printout(ERROR, "FileLoader",
0178                    "hint: or in that directory removing the file or link " + file_path.string());
0179           std::_Exit(EXIT_FAILURE);
0180         }
0181       }
0182     } else {
0183       // file does not exists, i.e. dead symllink -> remove symlink
0184       if (fs::remove(file_path) == false) {
0185         // failure mode; cannot remove dead symlink
0186         printout(ERROR, "FileLoader", "unable to remove symlink " + file_path.string());
0187         std::_Exit(EXIT_FAILURE);
0188       }
0189     }
0190   } else {
0191     if (fs::exists(file_path)) {
0192       // failure mode: file exists but not symlink, and we won't remove files
0193       printout(ERROR, "FileLoader",
0194                "file " + file_path.string() + " already exists but is not a symlink");
0195       printout(ERROR, "FileLoader",
0196                "we tried to create a symlink " + file_path.string() + " to the actual resource, " +
0197                    "but a file already exists there and we will not remove it automatically");
0198       printout(ERROR, "FileLoader", "hint: backup the file, remove it manually, and retry");
0199       std::_Exit(EXIT_FAILURE);
0200     }
0201   }
0202   // file_path now does not exist
0203 
0204   // symlink file_path to hash_path
0205   try {
0206     // use new path from hash so file link is local
0207     fs::create_symlink(fs::path(hash), file_path);
0208   } catch (const fs::filesystem_error&) {
0209     printout(ERROR, "FileLoader",
0210              "unable to link from " + file_path.string() + " to " + hash_path.string());
0211     printout(ERROR, "FileLoader", "check permissions and retry");
0212     std::_Exit(EXIT_FAILURE);
0213   }
0214 
0215   // final check of the file size
0216   if (fs::file_size(file_path) == 0) {
0217     printout(ERROR, "FileLoader",
0218              "zero file size of symlink from " + file_path.string() + " to (ultimately) " +
0219                  fs::canonical(file_path).string());
0220     printout(ERROR, "FileLoader",
0221              "hint: check whether the file " + fs::canonical(file_path).string() +
0222                  " has any content");
0223     printout(ERROR, "FileLoader", "hint: check whether the URL " + url + " has any content");
0224     std::_Exit(EXIT_FAILURE);
0225   }
0226 }