File indexing completed on 2025-08-28 08:26:58
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <chrono>
0021 #include <cstdint>
0022 #include <functional>
0023 #include <iosfwd>
0024 #include <memory>
0025 #include <string>
0026 #include <utility>
0027 #include <vector>
0028
0029 #include "arrow/filesystem/type_fwd.h"
0030 #include "arrow/io/interfaces.h"
0031 #include "arrow/type_fwd.h"
0032 #include "arrow/util/compare.h"
0033 #include "arrow/util/macros.h"
0034 #include "arrow/util/type_fwd.h"
0035 #include "arrow/util/visibility.h"
0036 #include "arrow/util/windows_fixup.h"
0037
0038 namespace arrow {
0039 namespace fs {
0040
0041 using arrow::util::Uri;
0042
0043
0044
0045 using TimePoint =
0046 std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
0047
0048 ARROW_EXPORT std::string ToString(FileType);
0049
0050 ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType);
0051
0052 static const int64_t kNoSize = -1;
0053 static const TimePoint kNoTime = TimePoint(TimePoint::duration(-1));
0054
0055
0056 struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {
0057 FileInfo() = default;
0058 FileInfo(FileInfo&&) = default;
0059 FileInfo& operator=(FileInfo&&) = default;
0060 FileInfo(const FileInfo&) = default;
0061 FileInfo& operator=(const FileInfo&) = default;
0062
0063 explicit FileInfo(std::string path, FileType type = FileType::Unknown)
0064 : path_(std::move(path)), type_(type) {}
0065
0066
0067 FileType type() const { return type_; }
0068 void set_type(FileType type) { type_ = type; }
0069
0070
0071 const std::string& path() const { return path_; }
0072 void set_path(std::string path) { path_ = std::move(path); }
0073
0074
0075 std::string base_name() const;
0076
0077
0078 std::string dir_name() const;
0079
0080
0081
0082
0083 int64_t size() const { return size_; }
0084 void set_size(int64_t size) { size_ = size; }
0085
0086
0087 std::string extension() const;
0088
0089
0090 TimePoint mtime() const { return mtime_; }
0091 void set_mtime(TimePoint mtime) { mtime_ = mtime; }
0092
0093 bool IsFile() const { return type_ == FileType::File; }
0094 bool IsDirectory() const { return type_ == FileType::Directory; }
0095
0096 bool Equals(const FileInfo& other) const {
0097 return type() == other.type() && path() == other.path() && size() == other.size() &&
0098 mtime() == other.mtime();
0099 }
0100
0101 std::string ToString() const;
0102
0103
0104
0105
0106 struct ByPath {
0107 bool operator()(const FileInfo& l, const FileInfo& r) const {
0108 return l.path() < r.path();
0109 }
0110
0111 size_t operator()(const FileInfo& i) const {
0112 return std::hash<std::string>{}(i.path());
0113 }
0114 };
0115
0116 protected:
0117 std::string path_;
0118 FileType type_ = FileType::Unknown;
0119 int64_t size_ = kNoSize;
0120 TimePoint mtime_ = kNoTime;
0121 };
0122
0123 ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const FileInfo&);
0124
0125
0126 struct ARROW_EXPORT FileSelector {
0127
0128
0129 std::string base_dir;
0130
0131
0132 bool allow_not_found;
0133
0134 bool recursive;
0135
0136 int32_t max_recursion;
0137
0138 FileSelector() : allow_not_found(false), recursive(false), max_recursion(INT32_MAX) {}
0139 };
0140
0141
0142 struct ARROW_EXPORT FileLocator {
0143 std::shared_ptr<FileSystem> filesystem;
0144 std::string path;
0145 };
0146
0147 using FileInfoVector = std::vector<FileInfo>;
0148 using FileInfoGenerator = std::function<Future<FileInfoVector>()>;
0149
0150 }
0151
0152 template <>
0153 struct IterationTraits<fs::FileInfoVector> {
0154 static fs::FileInfoVector End() { return {}; }
0155 static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
0156 };
0157
0158 namespace fs {
0159
0160
0161 class ARROW_EXPORT FileSystem
0162
0163 : public std::enable_shared_from_this<FileSystem>
0164
0165 {
0166 public:
0167 virtual ~FileSystem();
0168
0169 virtual std::string type_name() const = 0;
0170
0171
0172 const io::IOContext& io_context() const { return io_context_; }
0173
0174
0175
0176
0177
0178 virtual Result<std::string> NormalizePath(std::string path);
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198 virtual Result<std::string> PathFromUri(const std::string& uri_string) const;
0199
0200
0201
0202
0203 virtual Result<std::string> MakeUri(std::string path) const;
0204
0205 virtual bool Equals(const FileSystem& other) const = 0;
0206
0207 virtual bool Equals(const std::shared_ptr<FileSystem>& other) const {
0208 return Equals(*other);
0209 }
0210
0211
0212
0213
0214
0215
0216
0217 virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
0218
0219 virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
0220
0221
0222
0223
0224
0225 virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;
0226
0227
0228 virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);
0229
0230
0231
0232
0233
0234 virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);
0235
0236
0237
0238
0239 virtual Status CreateDir(const std::string& path, bool recursive) = 0;
0240 Status CreateDir(const std::string& path) { return CreateDir(path, true); }
0241
0242
0243 virtual Status DeleteDir(const std::string& path) = 0;
0244
0245
0246
0247
0248
0249 virtual Status DeleteDirContents(const std::string& path, bool missing_dir_ok) = 0;
0250 Status DeleteDirContents(const std::string& path) {
0251 return DeleteDirContents(path, false);
0252 }
0253
0254
0255 virtual Future<> DeleteDirContentsAsync(const std::string& path, bool missing_dir_ok);
0256
0257
0258
0259
0260 Future<> DeleteDirContentsAsync(const std::string& path);
0261
0262
0263
0264
0265
0266
0267 virtual Status DeleteRootDirContents() = 0;
0268
0269
0270 virtual Status DeleteFile(const std::string& path) = 0;
0271
0272
0273
0274 virtual Status DeleteFiles(const std::vector<std::string>& paths);
0275
0276
0277
0278
0279
0280
0281
0282 virtual Status Move(const std::string& src, const std::string& dest) = 0;
0283
0284
0285
0286
0287
0288 virtual Status CopyFile(const std::string& src, const std::string& dest) = 0;
0289
0290
0291 virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(
0292 const std::string& path) = 0;
0293
0294
0295
0296
0297
0298
0299 virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info);
0300
0301
0302 virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0303 const std::string& path) = 0;
0304
0305
0306
0307
0308
0309
0310 virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0311 const FileInfo& info);
0312
0313
0314 virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0315 const std::string& path);
0316
0317
0318 virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0319 const FileInfo& info);
0320
0321
0322 virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0323 const std::string& path);
0324
0325
0326 virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0327 const FileInfo& info);
0328
0329
0330
0331
0332 virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
0333 const std::string& path,
0334 const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
0335 Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);
0336
0337
0338
0339
0340
0341
0342
0343
0344 virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
0345 const std::string& path,
0346 const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
0347 Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);
0348
0349 protected:
0350 explicit FileSystem(io::IOContext io_context = io::default_io_context())
0351 : io_context_(std::move(io_context)) {}
0352
0353 io::IOContext io_context_;
0354
0355
0356
0357 bool default_async_is_sync_ = true;
0358 };
0359
0360 struct FileSystemFactory {
0361 std::function<Result<std::shared_ptr<FileSystem>>(
0362 const Uri& uri, const io::IOContext& io_context, std::string* out_path)>
0363 function;
0364 std::string_view file;
0365 int line;
0366
0367 bool operator==(const FileSystemFactory& other) const {
0368
0369
0370
0371
0372
0373
0374 return file == other.file && line == other.line;
0375 }
0376 };
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387 class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
0388 public:
0389
0390 explicit SubTreeFileSystem(const std::string& base_path,
0391 std::shared_ptr<FileSystem> base_fs);
0392 ~SubTreeFileSystem() override;
0393
0394 std::string type_name() const override { return "subtree"; }
0395 std::string base_path() const { return base_path_; }
0396 std::shared_ptr<FileSystem> base_fs() const { return base_fs_; }
0397
0398 Result<std::string> NormalizePath(std::string path) override;
0399 Result<std::string> PathFromUri(const std::string& uri_string) const override;
0400
0401 bool Equals(const FileSystem& other) const override;
0402
0403
0404 using FileSystem::CreateDir;
0405 using FileSystem::DeleteDirContents;
0406 using FileSystem::GetFileInfo;
0407 using FileSystem::OpenAppendStream;
0408 using FileSystem::OpenOutputStream;
0409
0410
0411 Result<FileInfo> GetFileInfo(const std::string& path) override;
0412 Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
0413
0414 FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
0415
0416 Status CreateDir(const std::string& path, bool recursive) override;
0417
0418 Status DeleteDir(const std::string& path) override;
0419 Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
0420 Status DeleteRootDirContents() override;
0421
0422 Status DeleteFile(const std::string& path) override;
0423
0424 Status Move(const std::string& src, const std::string& dest) override;
0425
0426 Status CopyFile(const std::string& src, const std::string& dest) override;
0427
0428 Result<std::shared_ptr<io::InputStream>> OpenInputStream(
0429 const std::string& path) override;
0430 Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
0431 Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0432 const std::string& path) override;
0433 Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0434 const FileInfo& info) override;
0435
0436 Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0437 const std::string& path) override;
0438 Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
0439 const FileInfo& info) override;
0440 Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0441 const std::string& path) override;
0442 Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
0443 const FileInfo& info) override;
0444
0445 Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
0446 const std::string& path,
0447 const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0448 Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
0449 const std::string& path,
0450 const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0451
0452 protected:
0453 SubTreeFileSystem() = default;
0454
0455 const std::string base_path_;
0456 std::shared_ptr<FileSystem> base_fs_;
0457
0458 Result<std::string> PrependBase(const std::string& s) const;
0459 Result<std::string> PrependBaseNonEmpty(const std::string& s) const;
0460 Result<std::string> StripBase(const std::string& s) const;
0461 Status FixInfo(FileInfo* info) const;
0462
0463 static Result<std::string> NormalizeBasePath(
0464 std::string base_path, const std::shared_ptr<FileSystem>& base_fs);
0465 };
0466
0467
0468
0469 class ARROW_EXPORT SlowFileSystem : public FileSystem {
0470 public:
0471 SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
0472 std::shared_ptr<io::LatencyGenerator> latencies);
0473 SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency);
0474 SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency,
0475 int32_t seed);
0476
0477 std::string type_name() const override { return "slow"; }
0478 bool Equals(const FileSystem& other) const override;
0479 Result<std::string> PathFromUri(const std::string& uri_string) const override;
0480
0481
0482 using FileSystem::CreateDir;
0483 using FileSystem::DeleteDirContents;
0484 using FileSystem::GetFileInfo;
0485 using FileSystem::OpenAppendStream;
0486 using FileSystem::OpenOutputStream;
0487
0488
0489 Result<FileInfo> GetFileInfo(const std::string& path) override;
0490 Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
0491
0492 Status CreateDir(const std::string& path, bool recursive) override;
0493
0494 Status DeleteDir(const std::string& path) override;
0495 Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
0496 Status DeleteRootDirContents() override;
0497
0498 Status DeleteFile(const std::string& path) override;
0499
0500 Status Move(const std::string& src, const std::string& dest) override;
0501
0502 Status CopyFile(const std::string& src, const std::string& dest) override;
0503
0504 Result<std::shared_ptr<io::InputStream>> OpenInputStream(
0505 const std::string& path) override;
0506 Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
0507 Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0508 const std::string& path) override;
0509 Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
0510 const FileInfo& info) override;
0511 Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
0512 const std::string& path,
0513 const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0514 Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
0515 const std::string& path,
0516 const std::shared_ptr<const KeyValueMetadata>& metadata) override;
0517
0518 protected:
0519 std::shared_ptr<FileSystem> base_fs_;
0520 std::shared_ptr<io::LatencyGenerator> latencies_;
0521 };
0522
0523
0524
0525
0526
0527
0528
0529
0530 void EnsureFinalized();
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546 ARROW_EXPORT
0547 Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
0548 std::string* out_path = NULLPTR);
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561 ARROW_EXPORT
0562 Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
0563 const io::IOContext& io_context,
0564 std::string* out_path = NULLPTR);
0565
0566
0567
0568
0569
0570
0571
0572
0573 ARROW_EXPORT
0574 Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
0575 const std::string& uri, std::string* out_path = NULLPTR);
0576
0577
0578
0579
0580
0581
0582
0583
0584 ARROW_EXPORT
0585 Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
0586 const std::string& uri, const io::IOContext& io_context,
0587 std::string* out_path = NULLPTR);
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608 ARROW_EXPORT Status RegisterFileSystemFactory(std::string scheme,
0609 FileSystemFactory factory,
0610 std::function<void()> finalizer = {});
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620
0621 ARROW_EXPORT Status LoadFileSystemFactories(const char* libpath);
0622
0623 struct ARROW_EXPORT FileSystemRegistrar {
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664 FileSystemRegistrar(std::string scheme, FileSystemFactory factory,
0665 std::function<void()> finalizer = {});
0666 };
0667
0668 #define ARROW_REGISTER_FILESYSTEM(scheme, factory_function, finalizer) \
0669 ::arrow::fs::FileSystemRegistrar { \
0670 scheme, ::arrow::fs::FileSystemFactory{factory_function, __FILE__, __LINE__}, \
0671 finalizer \
0672 }
0673
0674
0675
0676 namespace internal {
0677 ARROW_EXPORT void* GetFileSystemRegistry();
0678 }
0679
0680
0681
0682
0683
0684
0685 ARROW_EXPORT
0686 Status CopyFiles(const std::vector<FileLocator>& sources,
0687 const std::vector<FileLocator>& destinations,
0688 const io::IOContext& io_context = io::default_io_context(),
0689 int64_t chunk_size = 1024 * 1024, bool use_threads = true);
0690
0691
0692
0693
0694 ARROW_EXPORT
0695 Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
0696 const FileSelector& source_sel,
0697 const std::shared_ptr<FileSystem>& destination_fs,
0698 const std::string& destination_base_dir,
0699 const io::IOContext& io_context = io::default_io_context(),
0700 int64_t chunk_size = 1024 * 1024, bool use_threads = true);
0701
0702 struct FileSystemGlobalOptions {
0703
0704
0705
0706 std::string tls_ca_file_path;
0707
0708
0709
0710
0711
0712 std::string tls_ca_dir_path;
0713 };
0714
0715
0716
0717
0718
0719 ARROW_EXPORT
0720 Status Initialize(const FileSystemGlobalOptions& options);
0721
0722 }
0723 }