Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:59

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <unordered_map>
0024 #include <vector>
0025 
0026 #include "arrow/io/interfaces.h"
0027 #include "arrow/util/macros.h"
0028 #include "arrow/util/visibility.h"
0029 
0030 namespace arrow {
0031 
0032 class Buffer;
0033 class MemoryPool;
0034 class Status;
0035 
0036 namespace io {
0037 
0038 class HdfsReadableFile;
0039 class HdfsOutputStream;
0040 
0041 /// DEPRECATED.  Use the FileSystem API in arrow::fs instead.
0042 struct ObjectType {
0043   enum type { FILE, DIRECTORY };
0044 };
0045 
0046 /// DEPRECATED.  Use the FileSystem API in arrow::fs instead.
0047 struct ARROW_EXPORT FileStatistics {
0048   /// Size of file, -1 if finding length is unsupported
0049   int64_t size;
0050   ObjectType::type kind;
0051 };
0052 
0053 class ARROW_EXPORT FileSystem {
0054  public:
0055   virtual ~FileSystem() = default;
0056 
0057   virtual Status MakeDirectory(const std::string& path) = 0;
0058 
0059   virtual Status DeleteDirectory(const std::string& path) = 0;
0060 
0061   virtual Status GetChildren(const std::string& path,
0062                              std::vector<std::string>* listing) = 0;
0063 
0064   virtual Status Rename(const std::string& src, const std::string& dst) = 0;
0065 
0066   virtual Status Stat(const std::string& path, FileStatistics* stat) = 0;
0067 };
0068 
0069 struct HdfsPathInfo {
0070   ObjectType::type kind;
0071 
0072   std::string name;
0073   std::string owner;
0074   std::string group;
0075 
0076   // Access times in UNIX timestamps (seconds)
0077   int64_t size;
0078   int64_t block_size;
0079 
0080   int32_t last_modified_time;
0081   int32_t last_access_time;
0082 
0083   int16_t replication;
0084   int16_t permissions;
0085 };
0086 
0087 struct HdfsConnectionConfig {
0088   std::string host;
0089   int port;
0090   std::string user;
0091   std::string kerb_ticket;
0092   std::unordered_map<std::string, std::string> extra_conf;
0093 };
0094 
0095 class ARROW_EXPORT HadoopFileSystem : public FileSystem {
0096  public:
0097   ~HadoopFileSystem() override;
0098 
0099   // Connect to an HDFS cluster given a configuration
0100   //
0101   // @param config (in): configuration for connecting
0102   // @param fs (out): the created client
0103   // @returns Status
0104   static Status Connect(const HdfsConnectionConfig* config,
0105                         std::shared_ptr<HadoopFileSystem>* fs);
0106 
0107   // Create directory and all parents
0108   //
0109   // @param path (in): absolute HDFS path
0110   // @returns Status
0111   Status MakeDirectory(const std::string& path) override;
0112 
0113   // Delete file or directory
0114   // @param path absolute path to data
0115   // @param recursive if path is a directory, delete contents as well
0116   // @returns error status on failure
0117   Status Delete(const std::string& path, bool recursive = false);
0118 
0119   Status DeleteDirectory(const std::string& path) override;
0120 
0121   // Disconnect from cluster
0122   //
0123   // @returns Status
0124   Status Disconnect();
0125 
0126   // @param path (in): absolute HDFS path
0127   // @returns bool, true if the path exists, false if not (or on error)
0128   bool Exists(const std::string& path);
0129 
0130   // @param path (in): absolute HDFS path
0131   // @param info (out)
0132   // @returns Status
0133   Status GetPathInfo(const std::string& path, HdfsPathInfo* info);
0134 
0135   // @param nbytes (out): total capacity of the filesystem
0136   // @returns Status
0137   Status GetCapacity(int64_t* nbytes);
0138 
0139   // @param nbytes (out): total bytes used of the filesystem
0140   // @returns Status
0141   Status GetUsed(int64_t* nbytes);
0142 
0143   Status GetChildren(const std::string& path, std::vector<std::string>* listing) override;
0144 
0145   /// List directory contents
0146   ///
0147   /// If path is a relative path, returned values will be absolute paths or URIs
0148   /// starting from the current working directory.
0149   Status ListDirectory(const std::string& path, std::vector<HdfsPathInfo>* listing);
0150 
0151   /// Return the filesystem's current working directory.
0152   ///
0153   /// The working directory is the base path for all relative paths given to
0154   /// other APIs.
0155   /// NOTE: this actually returns a URI.
0156   Status GetWorkingDirectory(std::string* out);
0157 
0158   /// Change
0159   ///
0160   /// @param path file path to change
0161   /// @param owner pass null for no change
0162   /// @param group pass null for no change
0163   Status Chown(const std::string& path, const char* owner, const char* group);
0164 
0165   /// Change path permissions
0166   ///
0167   /// \param path Absolute path in file system
0168   /// \param mode Mode bitset
0169   /// \return Status
0170   Status Chmod(const std::string& path, int mode);
0171 
0172   // Move file or directory from source path to destination path within the
0173   // current filesystem
0174   Status Rename(const std::string& src, const std::string& dst) override;
0175 
0176   Status Copy(const std::string& src, const std::string& dst);
0177 
0178   Status Move(const std::string& src, const std::string& dst);
0179 
0180   Status Stat(const std::string& path, FileStatistics* stat) override;
0181 
0182   // TODO(wesm): GetWorkingDirectory, SetWorkingDirectory
0183 
0184   // Open an HDFS file in READ mode. Returns error
0185   // status if the file is not found.
0186   //
0187   // @param path complete file path
0188   Status OpenReadable(const std::string& path, int32_t buffer_size,
0189                       std::shared_ptr<HdfsReadableFile>* file);
0190 
0191   Status OpenReadable(const std::string& path, int32_t buffer_size,
0192                       const io::IOContext& io_context,
0193                       std::shared_ptr<HdfsReadableFile>* file);
0194 
0195   Status OpenReadable(const std::string& path, std::shared_ptr<HdfsReadableFile>* file);
0196 
0197   Status OpenReadable(const std::string& path, const io::IOContext& io_context,
0198                       std::shared_ptr<HdfsReadableFile>* file);
0199 
0200   // FileMode::WRITE options
0201   // @param path complete file path
0202   // @param buffer_size 0 by default
0203   // @param replication 0 by default
0204   // @param default_block_size 0 by default
0205   Status OpenWritable(const std::string& path, bool append, int32_t buffer_size,
0206                       int16_t replication, int64_t default_block_size,
0207                       std::shared_ptr<HdfsOutputStream>* file);
0208 
0209   Status OpenWritable(const std::string& path, bool append,
0210                       std::shared_ptr<HdfsOutputStream>* file);
0211 
0212  private:
0213   friend class HdfsReadableFile;
0214   friend class HdfsOutputStream;
0215 
0216   class ARROW_NO_EXPORT HadoopFileSystemImpl;
0217   std::unique_ptr<HadoopFileSystemImpl> impl_;
0218 
0219   HadoopFileSystem();
0220   ARROW_DISALLOW_COPY_AND_ASSIGN(HadoopFileSystem);
0221 };
0222 
0223 class ARROW_EXPORT HdfsReadableFile : public RandomAccessFile {
0224  public:
0225   ~HdfsReadableFile() override;
0226 
0227   Status Close() override;
0228 
0229   bool closed() const override;
0230 
0231   // NOTE: If you wish to read a particular range of a file in a multithreaded
0232   // context, you may prefer to use ReadAt to avoid locking issues
0233   Result<int64_t> Read(int64_t nbytes, void* out) override;
0234   Result<std::shared_ptr<Buffer>> Read(int64_t nbytes) override;
0235   Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override;
0236   Result<std::shared_ptr<Buffer>> ReadAt(int64_t position, int64_t nbytes) override;
0237 
0238   Status Seek(int64_t position) override;
0239   Result<int64_t> Tell() const override;
0240   Result<int64_t> GetSize() override;
0241 
0242  private:
0243   explicit HdfsReadableFile(const io::IOContext&);
0244 
0245   class ARROW_NO_EXPORT HdfsReadableFileImpl;
0246   std::unique_ptr<HdfsReadableFileImpl> impl_;
0247 
0248   friend class HadoopFileSystem::HadoopFileSystemImpl;
0249 
0250   ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsReadableFile);
0251 };
0252 
0253 // Naming this file OutputStream because it does not support seeking (like the
0254 // WritableFile interface)
0255 class ARROW_EXPORT HdfsOutputStream : public OutputStream {
0256  public:
0257   ~HdfsOutputStream() override;
0258 
0259   Status Close() override;
0260 
0261   bool closed() const override;
0262 
0263   using OutputStream::Write;
0264   Status Write(const void* buffer, int64_t nbytes) override;
0265 
0266   Status Flush() override;
0267 
0268   Result<int64_t> Tell() const override;
0269 
0270  private:
0271   class ARROW_NO_EXPORT HdfsOutputStreamImpl;
0272   std::unique_ptr<HdfsOutputStreamImpl> impl_;
0273 
0274   friend class HadoopFileSystem::HadoopFileSystemImpl;
0275 
0276   HdfsOutputStream();
0277 
0278   ARROW_DISALLOW_COPY_AND_ASSIGN(HdfsOutputStream);
0279 };
0280 
0281 ARROW_EXPORT Status HaveLibHdfs();
0282 
0283 }  // namespace io
0284 }  // namespace arrow