File indexing completed on 2025-08-28 08:26:59
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <unordered_map>
0024 #include <vector>
0025
0026 #include "arrow/io/interfaces.h"
0027 #include "arrow/util/macros.h"
0028 #include "arrow/util/visibility.h"
0029
0030 namespace arrow {
0031
0032 class Buffer;
0033 class MemoryPool;
0034 class Status;
0035
0036 namespace io {
0037
0038 class HdfsReadableFile;
0039 class HdfsOutputStream;
0040
0041
0042 struct ObjectType {
0043 enum type { FILE, DIRECTORY };
0044 };
0045
0046
0047 struct ARROW_EXPORT FileStatistics {
0048
0049 int64_t size;
0050 ObjectType::type kind;
0051 };
0052
0053 class ARROW_EXPORT FileSystem {
0054 public:
0055 virtual ~FileSystem() = default;
0056
0057 virtual Status MakeDirectory(const std::string& path) = 0;
0058
0059 virtual Status DeleteDirectory(const std::string& path) = 0;
0060
0061 virtual Status GetChildren(const std::string& path,
0062 std::vector<std::string>* listing) = 0;
0063
0064 virtual Status Rename(const std::string& src, const std::string& dst) = 0;
0065
0066 virtual Status Stat(const std::string& path, FileStatistics* stat) = 0;
0067 };
0068
0069 struct HdfsPathInfo {
0070 ObjectType::type kind;
0071
0072 std::string name;
0073 std::string owner;
0074 std::string group;
0075
0076
0077 int64_t size;
0078 int64_t block_size;
0079
0080 int32_t last_modified_time;
0081 int32_t last_access_time;
0082
0083 int16_t replication;
0084 int16_t permissions;
0085 };
0086
0087 struct HdfsConnectionConfig {
0088 std::string host;
0089 int port;
0090 std::string user;
0091 std::string kerb_ticket;
0092 std::unordered_map<std::string, std::string> extra_conf;
0093 };
0094
0095 class ARROW_EXPORT HadoopFileSystem : public FileSystem {
0096 public:
0097 ~HadoopFileSystem() override;
0098
0099
0100
0101
0102
0103
0104 static Status Connect(const HdfsConnectionConfig* config,
0105 std::shared_ptr<HadoopFileSystem>* fs);
0106
0107
0108
0109
0110
0111 Status MakeDirectory(const std::string& path) override;
0112
0113
0114
0115
0116
0117 Status Delete(const std::string& path, bool recursive = false);
0118
0119 Status DeleteDirectory(const std::string& path) override;
0120
0121
0122
0123
0124 Status Disconnect();
0125
0126
0127
0128 bool Exists(const std::string& path);
0129
0130
0131
0132
0133 Status GetPathInfo(const std::string& path, HdfsPathInfo* info);
0134
0135
0136
0137 Status GetCapacity(int64_t* nbytes);
0138
0139
0140
0141 Status GetUsed(int64_t* nbytes);
0142
0143 Status GetChildren(const std::string& path, std::vector<std::string>* listing) override;
0144
0145
0146
0147
0148
0149 Status ListDirectory(const std::string& path, std::vector<HdfsPathInfo>* listing);
0150
0151
0152
0153
0154
0155
0156 Status GetWorkingDirectory(std::string* out);
0157
0158
0159
0160
0161
0162
0163 Status Chown(const std::string& path, const char* owner, const char* group);
0164
0165
0166
0167
0168
0169
0170 Status Chmod(const std::string& path, int mode);
0171
0172
0173
0174 Status Rename(const std::string& src, const std::string& dst) override;
0175
0176 Status Copy(const std::string& src, const std::string& dst);
0177
0178 Status Move(const std::string& src, const std::string& dst);
0179
0180 Status Stat(const std::string& path, FileStatistics* stat) override;
0181
0182
0183
0184
0185
0186
0187
0188 Status OpenReadable(const std::string& path, int32_t buffer_size,
0189 std::shared_ptr<HdfsReadableFile>* file);
0190
0191 Status OpenReadable(const std::string& path, int32_t buffer_size,
0192 const io::IOContext& io_context,
0193 std::shared_ptr<HdfsReadableFile>* file);
0194
0195 Status OpenReadable(const std::string& path, std::shared_ptr<HdfsReadableFile>* file);
0196
0197 Status OpenReadable(const std::string& path, const io::IOContext& io_context,
0198 std::shared_ptr<HdfsReadableFile>* file);
0199
0200
0201
0202
0203
0204
0205 Status OpenWritable(const std::string& path, bool append, int32_t buffer_size,
0206 int16_t replication, int64_t default_block_size,
0207 std::shared_ptr<HdfsOutputStream>* file);
0208
0209 Status OpenWritable(const std::string& path, bool append,
0210 std::shared_ptr<HdfsOutputStream>* file);
0211
0212 private:
0213 friend class HdfsReadableFile;
0214 friend class HdfsOutputStream;
0215
0216 class ARROW_NO_EXPORT HadoopFileSystemImpl;
0217 std::unique_ptr<HadoopFileSystemImpl> impl_;
0218
0219 HadoopFileSystem();
0220 ARROW_DISALLOW_COPY_AND_ASSIGN(HadoopFileSystem);
0221 };
0222
0223 class ARROW_EXPORT HdfsReadableFile : public RandomAccessFile {
0224 public:
0225 ~HdfsReadableFile() override;
0226
0227 Status Close() override;
0228
0229 bool closed() const override;
0230
0231
0232
0233 Result<int64_t> Read(int64_t nbytes, void* out) override;
0234 Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
0235 Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
0236 Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
0237
0238 Status Seek(int64_t position) override;
0239 Result<int64_t> Tell() const override;
0240 Result<int64_t> GetSize() override;
0241
0242 private:
0243 explicit HdfsReadableFile(const io::IOContext&);
0244
0245 class ARROW_NO_EXPORT HdfsReadableFileImpl;
0246 std::unique_ptr<HdfsReadableFileImpl> impl_;
0247
0248 friend class HadoopFileSystem::HadoopFileSystemImpl;
0249
0250 ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsReadableFile);
0251 };
0252
0253
0254
0255 class ARROW_EXPORT HdfsOutputStream : public OutputStream {
0256 public:
0257 ~HdfsOutputStream() override;
0258
0259 Status Close() override;
0260
0261 bool closed() const override;
0262
0263 using OutputStream::Write;
0264 Status Write(const void* buffer, int64_t nbytes) override;
0265
0266 Status Flush() override;
0267
0268 Result<int64_t> Tell() const override;
0269
0270 private:
0271 class ARROW_NO_EXPORT HdfsOutputStreamImpl;
0272 std::unique_ptr<HdfsOutputStreamImpl> impl_;
0273
0274 friend class HadoopFileSystem::HadoopFileSystemImpl;
0275
0276 HdfsOutputStream();
0277
0278 ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsOutputStream);
0279 };
0280
0281 ARROW_EXPORT Status HaveLibHdfs();
0282
0283 }
0284 }