Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:58

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <chrono>
0021 #include <cstdint>
0022 #include <functional>
0023 #include <iosfwd>
0024 #include <memory>
0025 #include <string>
0026 #include <utility>
0027 #include <vector>
0028 
0029 #include "arrow/filesystem/type_fwd.h"
0030 #include "arrow/io/interfaces.h"
0031 #include "arrow/type_fwd.h"
0032 #include "arrow/util/compare.h"
0033 #include "arrow/util/macros.h"
0034 #include "arrow/util/type_fwd.h"
0035 #include "arrow/util/visibility.h"
0036 #include "arrow/util/windows_fixup.h"
0037 
0038 namespace arrow {
0039 namespace fs {
0040 
0041 using arrow::util::Uri;
0042 
0043 // A system clock time point expressed as a 64-bit (or more) number of
0044 // nanoseconds since the epoch.
0045 using TimePoint =
0046     std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
0047 
0048 ARROW_EXPORT std::string ToString(FileType);
0049 
0050 ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType);
0051 
0052 static const int64_t kNoSize = -1;
0053 static const TimePoint kNoTime = TimePoint(TimePoint::duration(-1));
0054 
0055 /// \brief FileSystem entry info
0056 struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {
0057   FileInfo() = default;
0058   FileInfo(FileInfo&&) = default;
0059   FileInfo& operator=(FileInfo&&) = default;
0060   FileInfo(const FileInfo&) = default;
0061   FileInfo& operator=(const FileInfo&) = default;
0062 
0063   explicit FileInfo(std::string path, FileType type = FileType::Unknown)
0064       : path_(std::move(path)), type_(type) {}
0065 
0066   /// The file type
0067   FileType type() const { return type_; }
0068   void set_type(FileType type) { type_ = type; }
0069 
0070   /// The full file path in the filesystem
0071   const std::string& path() const { return path_; }
0072   void set_path(std::string path) { path_ = std::move(path); }
0073 
0074   /// The file base name (component after the last directory separator)
0075   std::string base_name() const;
0076 
0077   // The directory base name (component before the file base name).
0078   std::string dir_name() const;
0079 
0080   /// The size in bytes, if available
0081   ///
0082   /// Only regular files are guaranteed to have a size.
0083   int64_t size() const { return size_; }
0084   void set_size(int64_t size) { size_ = size; }
0085 
0086   /// The file extension (excluding the dot)
0087   std::string extension() const;
0088 
0089   /// The time of last modification, if available
0090   TimePoint mtime() const { return mtime_; }
0091   void set_mtime(TimePoint mtime) { mtime_ = mtime; }
0092 
0093   bool IsFile() const { return type_ == FileType::File; }
0094   bool IsDirectory() const { return type_ == FileType::Directory; }
0095 
0096   bool Equals(const FileInfo& other) const {
0097     return type() == other.type() && path() == other.path() && size() == other.size() &&
0098            mtime() == other.mtime();
0099   }
0100 
0101   std::string ToString() const;
0102 
0103   /// Function object implementing less-than comparison and hashing by
0104   /// path, to support sorting infos, using them as keys, and other
0105   /// interactions with the STL.
0106   struct ByPath {
0107     bool operator()(const FileInfo& l, const FileInfo& r) const {
0108       return l.path() < r.path();
0109     }
0110 
0111     size_t operator()(const FileInfo& i) const {
0112       return std::hash<std::string>{}(i.path());
0113     }
0114   };
0115 
0116  protected:
0117   std::string path_;
0118   FileType type_ = FileType::Unknown;
0119   int64_t size_ = kNoSize;
0120   TimePoint mtime_ = kNoTime;
0121 };
0122 
0123 ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const FileInfo&);
0124 
0125 /// \brief File selector for filesystem APIs
0126 struct ARROW_EXPORT FileSelector {
0127   /// The directory in which to select files.
0128   /// If the path exists but doesn't point to a directory, this should be an error.
0129   std::string base_dir;
0130   /// The behavior if `base_dir` isn't found in the filesystem.  If false,
0131   /// an error is returned.  If true, an empty selection is returned.
0132   bool allow_not_found;
0133   /// Whether to recurse into subdirectories.
0134   bool recursive;
0135   /// The maximum number of subdirectories to recurse into.
0136   int32_t max_recursion;
0137 
0138   FileSelector() : allow_not_found(false), recursive(false), max_recursion(INT32_MAX) {}
0139 };
0140 
0141 /// \brief FileSystem, path pair
0142 struct ARROW_EXPORT FileLocator {
0143   std::shared_ptr<FileSystem> filesystem;
0144   std::string path;
0145 };
0146 
0147 using FileInfoVector = std::vector<FileInfo>;
0148 using FileInfoGenerator = std::function<Future<FileInfoVector>()>;
0149 
0150 }  // namespace fs
0151 
0152 template <>
0153 struct IterationTraits<fs::FileInfoVector> {
0154   static fs::FileInfoVector End() { return {}; }
0155   static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
0156 };
0157 
0158 namespace fs {
0159 
0160 /// \brief Abstract file system API
0161 class ARROW_EXPORT FileSystem
0162     /// \cond false
0163     : public std::enable_shared_from_this<FileSystem>
0164 /// \endcond
0165 {  // NOLINT
0166  public:
0167   virtual ~FileSystem();
0168 
0169   virtual std::string type_name() const = 0;
0170 
0171   /// EXPERIMENTAL: The IOContext associated with this filesystem.
0172   const io::IOContext& io_context() const { return io_context_; }
0173 
0174   /// Normalize path for the given filesystem
0175   ///
0176   /// The default implementation of this method is a no-op, but subclasses
0177   /// may allow normalizing irregular path forms (such as Windows local paths).
0178   virtual Result<std::string> NormalizePath(std::string path);
0179 
0180   /// \brief Ensure a URI (or path) is compatible with the given filesystem and return the
0181   ///        path
0182   ///
0183   /// \param uri_string A URI representing a resource in the given filesystem.
0184   ///
0185   /// This method will check to ensure the given filesystem is compatible with the
0186   /// URI. This can be useful when the user provides both a URI and a filesystem or
0187   /// when a user provides multiple URIs that should be compatible with the same
0188   /// filesystem.
0189   ///
0190   /// uri_string can be an absolute path instead of a URI.  In that case it will ensure
0191   /// the filesystem (if supplied) is the local filesystem (or some custom filesystem that
0192   /// is capable of reading local paths) and will normalize the path's file separators.
0193   ///
0194   /// Note, this method only checks to ensure the URI scheme is valid.  It will not detect
0195   /// inconsistencies like a mismatching region or endpoint override.
0196   ///
0197   /// \return The path inside the filesystem that is indicated by the URI.
0198   virtual Result<std::string> PathFromUri(const std::string& uri_string) const;
0199 
0200   /// \brief Make a URI from which FileSystemFromUri produces an equivalent filesystem
0201   /// \param path The path component to use in the resulting URI
0202   /// \return A URI string, or an error if an equivalent URI cannot be produced
0203   virtual Result<std::string> MakeUri(std::string path) const;
0204 
0205   virtual bool Equals(const FileSystem& other) const = 0;
0206 
0207   virtual bool Equals(const std::shared_ptr<FileSystem>& other) const {
0208     return Equals(*other);
0209   }
0210 
0211   /// Get info for the given target.
0212   ///
0213   /// Any symlink is automatically dereferenced, recursively.
0214   /// A nonexistent or unreachable file returns an Ok status and
0215   /// has a FileType of value NotFound.  An error status indicates
0216   /// a truly exceptional condition (low-level I/O error, etc.).
0217   virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
0218   /// Same, for many targets at once.
0219   virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
0220   /// Same, according to a selector.
0221   ///
0222   /// The selector's base directory will not be part of the results, even if
0223   /// it exists.
0224   /// If it doesn't exist, see `FileSelector::allow_not_found`.
0225   virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;
0226 
0227   /// Async version of GetFileInfo
0228   virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);
0229 
0230   /// Streaming async version of GetFileInfo
0231   ///
0232   /// The returned generator is not async-reentrant, i.e. you need to wait for
0233   /// the returned future to complete before calling the generator again.
0234   virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);
0235 
0236   /// Create a directory and subdirectories.
0237   ///
0238   /// This function succeeds if the directory already exists.
0239   virtual Status CreateDir(const std::string& path, bool recursive) = 0;
0240   Status CreateDir(const std::string& path) { return CreateDir(path, true); }
0241 
0242   /// Delete a directory and its contents, recursively.
0243   virtual Status DeleteDir(const std::string& path) = 0;
0244 
0245   /// Delete a directory's contents, recursively.
0246   ///
0247   /// Like DeleteDir, but doesn't delete the directory itself.
0248   /// Passing an empty path ("" or "/") is disallowed, see DeleteRootDirContents.
0249   virtual Status DeleteDirContents(const std::string& path, bool missing_dir_ok) = 0;
0250   Status DeleteDirContents(const std::string& path) {
0251     return DeleteDirContents(path, false);
0252   }
0253 
0254   /// Async version of DeleteDirContents.
0255   virtual Future<> DeleteDirContentsAsync(const std::string& path, bool missing_dir_ok);
0256 
0257   /// Async version of DeleteDirContents.
0258   ///
0259   /// This overload allows missing directories.
0260   Future<> DeleteDirContentsAsync(const std::string& path);
0261 
0262   /// EXPERIMENTAL: Delete the root directory's contents, recursively.
0263   ///
0264   /// Implementations may decide to raise an error if this operation is
0265   /// too dangerous.
0266   // NOTE: may decide to remove this if it's deemed not useful
0267   virtual Status DeleteRootDirContents() = 0;
0268 
0269   /// Delete a file.
0270   virtual Status DeleteFile(const std::string& path) = 0;
0271   /// Delete many files.
0272   ///
0273   /// The default implementation issues individual delete operations in sequence.
0274   virtual Status DeleteFiles(const std::vector<std::string>& paths);
0275 
0276   /// Move / rename a file or directory.
0277   ///
0278   /// If the destination exists:
0279   /// - if it is a non-empty directory, an error is returned
0280   /// - otherwise, if it has the same type as the source, it is replaced
0281   /// - otherwise, behavior is unspecified (implementation-dependent).
0282   virtual Status Move(const std::string& src, const std::string& dest) = 0;
0283 
0284   /// Copy a file.
0285   ///
0286   /// If the destination exists and is a directory, an error is returned.
0287   /// Otherwise, it is replaced.
0288   virtual Status CopyFile(const std::string& src, const std::string& dest) = 0;
0289 
0290   /// Open an input stream for sequential reading.
0291   virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(
0292       const std::string& path) = 0;
0293 
0294   /// Open an input stream for sequential reading.
0295   ///
0296   /// This override assumes the given FileInfo validly represents the file's
0297   /// characteristics, and may optimize access depending on them (for example
0298   /// avoid querying the file size or its existence).
0299   virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info);
0300 
0301   /// Open an input file for random access reading.
0302   virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0303       const std::string& path) = 0;
0304 
0305   /// Open an input file for random access reading.
0306   ///
0307   /// This override assumes the given FileInfo validly represents the file's
0308   /// characteristics, and may optimize access depending on them (for example
0309   /// avoid querying the file size or its existence).
0310   virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0311       const FileInfo& info);
0312 
0313   /// Async version of OpenInputStream
0314   virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0315       const std::string& path);
0316 
0317   /// Async version of OpenInputStream
0318   virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0319       const FileInfo& info);
0320 
0321   /// Async version of OpenInputFile
0322   virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0323       const std::string& path);
0324 
0325   /// Async version of OpenInputFile
0326   virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0327       const FileInfo& info);
0328 
0329   /// Open an output stream for sequential writing.
0330   ///
0331   /// If the target already exists, existing data is truncated.
0332   virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
0333       const std::string& path,
0334       const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
0335   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);
0336 
0337   /// Open an output stream for appending.
0338   ///
0339   /// If the target doesn't exist, a new empty file is created.
0340   ///
0341   /// Note: some filesystem implementations do not support efficient appending
0342   /// to an existing file, in which case this method will return NotImplemented.
0343   /// Consider writing to multiple files (using e.g. the dataset layer) instead.
0344   virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
0345       const std::string& path,
0346       const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
0347   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);
0348 
0349  protected:
0350   explicit FileSystem(io::IOContext io_context = io::default_io_context())
0351       : io_context_(std::move(io_context)) {}
0352 
0353   io::IOContext io_context_;
0354   // Whether metadata operations (such as GetFileInfo or OpenInputStream)
0355   // are cheap enough that the default async variants don't bother with
0356   // a thread pool.
0357   bool default_async_is_sync_ = true;
0358 };
0359 
0360 struct FileSystemFactory {
0361   std::function<Result<std::shared_ptr<FileSystem>>(
0362       const Uri& uri, const io::IOContext& io_context, std::string* out_path)>
0363       function;
0364   std::string_view file;
0365   int line;
0366 
0367   bool operator==(const FileSystemFactory& other) const {
0368     // In the case where libarrow is linked statically both to the executable and to a
0369     // dynamically loaded filesystem implementation library, the library contains a
0370     // duplicate definition of the registry and duplicate definitions of any
0371     // FileSystemRegistrars which are statically linked to libarrow. When retrieving
0372     // factories from the filesystem implementation library, we use the file and line
0373     // of the registrar's definition to determine equivalence of the duplicate factories.
0374     return file == other.file && line == other.line;
0375   }
0376 };
0377 
0378 /// \brief A FileSystem implementation that delegates to another
0379 /// implementation after prepending a fixed base path.
0380 ///
0381 /// This is useful to expose a logical view of a subtree of a filesystem,
0382 /// for example a directory in a LocalFileSystem.
0383 /// This works on abstract paths, i.e. paths using forward slashes and
0384 /// and a single root "/".  Windows paths are not guaranteed to work.
0385 /// This makes no security guarantee.  For example, symlinks may allow to
0386 /// "escape" the subtree and access other parts of the underlying filesystem.
0387 class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
0388  public:
0389   // This constructor may abort if base_path is invalid.
0390   explicit SubTreeFileSystem(const std::string& base_path,
0391                              std::shared_ptr<FileSystem> base_fs);
0392   ~SubTreeFileSystem() override;
0393 
0394   std::string type_name() const override { return "subtree"; }
0395   std::string base_path() const { return base_path_; }
0396   std::shared_ptr<FileSystem> base_fs() const { return base_fs_; }
0397 
0398   Result<std::string> NormalizePath(std::string path) override;
0399   Result<std::string> PathFromUri(const std::string& uri_string) const override;
0400 
0401   bool Equals(const FileSystem& other) const override;
0402 
0403   /// \cond FALSE
0404   using FileSystem::CreateDir;
0405   using FileSystem::DeleteDirContents;
0406   using FileSystem::GetFileInfo;
0407   using FileSystem::OpenAppendStream;
0408   using FileSystem::OpenOutputStream;
0409   /// \endcond
0410 
0411   Result<FileInfo> GetFileInfo(const std::string& path) override;
0412   Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
0413 
0414   FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
0415 
0416   Status CreateDir(const std::string& path, bool recursive) override;
0417 
0418   Status DeleteDir(const std::string& path) override;
0419   Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
0420   Status DeleteRootDirContents() override;
0421 
0422   Status DeleteFile(const std::string& path) override;
0423 
0424   Status Move(const std::string& src, const std::string& dest) override;
0425 
0426   Status CopyFile(const std::string& src, const std::string& dest) override;
0427 
0428   Result<std::shared_ptr<io::InputStream>> OpenInputStream(
0429       const std::string& path) override;
0430   Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
0431   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0432       const std::string& path) override;
0433   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0434       const FileInfo& info) override;
0435 
0436   Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0437       const std::string& path) override;
0438   Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0439       const FileInfo& info) override;
0440   Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0441       const std::string& path) override;
0442   Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0443       const FileInfo& info) override;
0444 
0445   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
0446       const std::string& path,
0447       const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0448   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
0449       const std::string& path,
0450       const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0451 
0452  protected:
0453   SubTreeFileSystem() = default;
0454 
0455   const std::string base_path_;
0456   std::shared_ptr<FileSystem> base_fs_;
0457 
0458   Result<std::string> PrependBase(const std::string& s) const;
0459   Result<std::string> PrependBaseNonEmpty(const std::string& s) const;
0460   Result<std::string> StripBase(const std::string& s) const;
0461   Status FixInfo(FileInfo* info) const;
0462 
0463   static Result<std::string> NormalizeBasePath(
0464       std::string base_path, const std::shared_ptr<FileSystem>& base_fs);
0465 };
0466 
0467 /// \brief A FileSystem implementation that delegates to another
0468 /// implementation but inserts latencies at various points.
0469 class ARROW_EXPORT SlowFileSystem : public FileSystem {
0470  public:
0471   SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
0472                  std::shared_ptr<io::LatencyGenerator> latencies);
0473   SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency);
0474   SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency,
0475                  int32_t seed);
0476 
0477   std::string type_name() const override { return "slow"; }
0478   bool Equals(const FileSystem& other) const override;
0479   Result<std::string> PathFromUri(const std::string& uri_string) const override;
0480 
0481   /// \cond FALSE
0482   using FileSystem::CreateDir;
0483   using FileSystem::DeleteDirContents;
0484   using FileSystem::GetFileInfo;
0485   using FileSystem::OpenAppendStream;
0486   using FileSystem::OpenOutputStream;
0487   /// \endcond
0488 
0489   Result<FileInfo> GetFileInfo(const std::string& path) override;
0490   Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
0491 
0492   Status CreateDir(const std::string& path, bool recursive) override;
0493 
0494   Status DeleteDir(const std::string& path) override;
0495   Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
0496   Status DeleteRootDirContents() override;
0497 
0498   Status DeleteFile(const std::string& path) override;
0499 
0500   Status Move(const std::string& src, const std::string& dest) override;
0501 
0502   Status CopyFile(const std::string& src, const std::string& dest) override;
0503 
0504   Result<std::shared_ptr<io::InputStream>> OpenInputStream(
0505       const std::string& path) override;
0506   Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
0507   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0508       const std::string& path) override;
0509   Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0510       const FileInfo& info) override;
0511   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
0512       const std::string& path,
0513       const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0514   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
0515       const std::string& path,
0516       const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0517 
0518  protected:
0519   std::shared_ptr<FileSystem> base_fs_;
0520   std::shared_ptr<io::LatencyGenerator> latencies_;
0521 };
0522 
0523 /// \brief Ensure all registered filesystem implementations are finalized.
0524 ///
0525 /// Individual finalizers may wait for concurrent calls to finish so as to avoid
0526 /// race conditions. After this function has been called, all filesystem APIs
0527 /// will fail with an error.
0528 ///
0529 /// The user is responsible for synchronization of calls to this function.
0530 void EnsureFinalized();
0531 
0532 /// \defgroup filesystem-factories Functions for creating FileSystem instances
0533 ///
0534 /// @{
0535 
0536 /// \brief Create a new FileSystem by URI
0537 ///
0538 /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
0539 /// "gs" and "gcs".
0540 ///
0541 /// Support for other schemes can be added using RegisterFileSystemFactory.
0542 ///
0543 /// \param[in] uri a URI-based path, ex: file:///some/local/path
0544 /// \param[out] out_path (optional) Path inside the filesystem.
0545 /// \return out_fs FileSystem instance.
0546 ARROW_EXPORT
0547 Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
0548                                                       std::string* out_path = NULLPTR);
0549 
0550 /// \brief Create a new FileSystem by URI with a custom IO context
0551 ///
0552 /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
0553 /// "gs" and "gcs".
0554 ///
0555 /// Support for other schemes can be added using RegisterFileSystemFactory.
0556 ///
0557 /// \param[in] uri a URI-based path, ex: file:///some/local/path
0558 /// \param[in] io_context an IOContext which will be associated with the filesystem
0559 /// \param[out] out_path (optional) Path inside the filesystem.
0560 /// \return out_fs FileSystem instance.
0561 ARROW_EXPORT
0562 Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
0563                                                       const io::IOContext& io_context,
0564                                                       std::string* out_path = NULLPTR);
0565 
0566 /// \brief Create a new FileSystem by URI
0567 ///
0568 /// Support for other schemes can be added using RegisterFileSystemFactory.
0569 ///
0570 /// Same as FileSystemFromUri, but in addition also recognize non-URIs
0571 /// and treat them as local filesystem paths.  Only absolute local filesystem
0572 /// paths are allowed.
0573 ARROW_EXPORT
0574 Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
0575     const std::string& uri, std::string* out_path = NULLPTR);
0576 
0577 /// \brief Create a new FileSystem by URI with a custom IO context
0578 ///
0579 /// Support for other schemes can be added using RegisterFileSystemFactory.
0580 ///
0581 /// Same as FileSystemFromUri, but in addition also recognize non-URIs
0582 /// and treat them as local filesystem paths.  Only absolute local filesystem
0583 /// paths are allowed.
0584 ARROW_EXPORT
0585 Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
0586     const std::string& uri, const io::IOContext& io_context,
0587     std::string* out_path = NULLPTR);
0588 
0589 /// @}
0590 
0591 /// \defgroup filesystem-factory-registration Helpers for FileSystem registration
0592 ///
0593 /// @{
0594 
0595 /// \brief Register a FileSystem factory
0596 ///
0597 /// Support for custom URI schemes can be added by registering a factory
0598 /// for the corresponding FileSystem.
0599 ///
0600 /// \param[in] scheme a Uri scheme which the factory will handle.
0601 ///            If a factory has already been registered for a scheme,
0602 ///            the new factory will be ignored.
0603 /// \param[in] factory a function which can produce a FileSystem for Uris which match
0604 ///            scheme.
0605 /// \param[in] finalizer a function which must be called to finalize the factory before
0606 ///            the process exits, or nullptr if no finalization is necessary.
0607 /// \return raises KeyError if a name collision occurs.
0608 ARROW_EXPORT Status RegisterFileSystemFactory(std::string scheme,
0609                                               FileSystemFactory factory,
0610                                               std::function<void()> finalizer = {});
0611 
0612 /// \brief Register FileSystem factories from a shared library
0613 ///
0614 /// FileSystem implementations may be housed in separate shared libraries and only
0615 /// registered when the shared library is explicitly loaded. FileSystemRegistrar is
0616 /// provided to simplify definition of such libraries: each instance at namespace scope
0617 /// in the library will register a factory for a scheme. Any library which uses
0618 /// FileSystemRegistrars and which must be dynamically loaded should be loaded using
0619 /// LoadFileSystemFactories(), which will additionally merge registries are if necessary
0620 /// (static linkage to arrow can produce isolated registries).
0621 ARROW_EXPORT Status LoadFileSystemFactories(const char* libpath);
0622 
0623 struct ARROW_EXPORT FileSystemRegistrar {
0624   /// \brief Register a FileSystem factory at load time
0625   ///
0626   /// Support for custom URI schemes can be added by registering a factory for the
0627   /// corresponding FileSystem. An instance of this helper can be defined at namespace
0628   /// scope to cause the factory to be registered at load time.
0629   ///
0630   /// Global constructors will finish execution before main() starts if the registrar is
0631   /// linked into the same binary as main(), or before dlopen()/LoadLibrary() returns if
0632   /// the library in which the registrar is defined is dynamically loaded.
0633   ///
0634   /// \code
0635   ///     FileSystemRegistrar kSlowFileSystemModule{
0636   ///       "slowfile",
0637   ///       [](const Uri& uri, const io::IOContext& io_context, std::string* out_path)
0638   ///           ->Result<std::shared_ptr<FileSystem>> {
0639   ///         auto local_uri = "file" + uri.ToString().substr(uri.scheme().size());
0640   ///         ARROW_ASSIGN_OR_RAISE(auto base_fs,
0641   ///             FileSystemFromUri(local_uri, io_context, out_path));
0642   ///         double average_latency = 1;
0643   ///         int32_t seed = 0xDEADBEEF;
0644   ///         ARROW_ASSIGN_OR_RAISE(auto params, uri.query_item());
0645   ///         for (const auto& [key, value] : params) {
0646   ///           if (key == "average_latency") {
0647   ///             average_latency = std::stod(value);
0648   ///           }
0649   ///           if (key == "seed") {
0650   ///             seed = std::stoi(value, nullptr, /*base=*/16);
0651   ///           }
0652   ///         }
0653   ///         return std::make_shared<SlowFileSystem>(base_fs, average_latency, seed);
0654   ///     }));
0655   /// \endcode
0656   ///
0657   /// \param[in] scheme a Uri scheme which the factory will handle.
0658   ///            If a factory has already been registered for a scheme, the
0659   ///            new factory will be ignored.
0660   /// \param[in] factory a function which can produce a FileSystem for Uris which match
0661   ///            scheme.
0662   /// \param[in] finalizer a function which must be called to finalize the factory before
0663   ///            the process exits, or nullptr if no finalization is necessary.
0664   FileSystemRegistrar(std::string scheme, FileSystemFactory factory,
0665                       std::function<void()> finalizer = {});
0666 };
0667 
0668 #define ARROW_REGISTER_FILESYSTEM(scheme, factory_function, finalizer)            \
0669   ::arrow::fs::FileSystemRegistrar {                                              \
0670     scheme, ::arrow::fs::FileSystemFactory{factory_function, __FILE__, __LINE__}, \
0671         finalizer                                                                 \
0672   }
0673 
0674 /// @}
0675 
0676 namespace internal {
0677 ARROW_EXPORT void* GetFileSystemRegistry();
0678 }  // namespace internal
0679 
0680 /// \brief Copy files, including from one FileSystem to another
0681 ///
0682 /// If a source and destination are resident in the same FileSystem FileSystem::CopyFile
0683 /// will be used, otherwise the file will be opened as a stream in both FileSystems and
0684 /// chunks copied from the source to the destination. No directories will be created.
0685 ARROW_EXPORT
0686 Status CopyFiles(const std::vector<FileLocator>& sources,
0687                  const std::vector<FileLocator>& destinations,
0688                  const io::IOContext& io_context = io::default_io_context(),
0689                  int64_t chunk_size = 1024 * 1024, bool use_threads = true);
0690 
0691 /// \brief Copy selected files, including from one FileSystem to another
0692 ///
0693 /// Directories will be created under the destination base directory as needed.
0694 ARROW_EXPORT
0695 Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
0696                  const FileSelector& source_sel,
0697                  const std::shared_ptr<FileSystem>& destination_fs,
0698                  const std::string& destination_base_dir,
0699                  const io::IOContext& io_context = io::default_io_context(),
0700                  int64_t chunk_size = 1024 * 1024, bool use_threads = true);
0701 
0702 struct FileSystemGlobalOptions {
0703   /// Path to a single PEM file holding all TLS CA certificates
0704   ///
0705   /// If empty, the underlying TLS library's defaults will be used.
0706   std::string tls_ca_file_path;
0707 
0708   /// Path to a directory holding TLS CA certificates in individual PEM files
0709   /// named along the OpenSSL "hashed" format.
0710   ///
0711   /// If empty, the underlying TLS library's defaults will be used.
0712   std::string tls_ca_dir_path;
0713 };
0714 
0715 /// EXPERIMENTAL: optional global initialization routine
0716 ///
0717 /// This is for environments (such as manylinux) where the path
0718 /// to TLS CA certificates needs to be configured at runtime.
0719 ARROW_EXPORT
0720 Status Initialize(const FileSystemGlobalOptions& options);
0721 
0722 }  // namespace fs
0723 }  // namespace arrow