include/arrow/memory_pool.h

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017
0018 #pragma once
0019
0020 #include <atomic>
0021 #include <cstdint>
0022 #include <functional>
0023 #include <memory>
0024 #include <string>
0025
0026 #include "arrow/result.h"
0027 #include "arrow/status.h"
0028 #include "arrow/type_fwd.h"
0029 #include "arrow/util/visibility.h"
0030
0031 namespace arrow {
0032
0033 namespace internal {
0034
0035 ///////////////////////////////////////////////////////////////////////
0036 // Helper tracking memory statistics
0037
0038 /// \brief Memory pool statistics
0039 ///
0040 /// 64-byte aligned so that all atomic values are on the same cache line.
0041 class alignas(64) MemoryPoolStats {
0042  private:
0043   // All atomics are updated according to Acquire-Release ordering.
0044   // https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering
0045   //
0046   // max_memory_, total_allocated_bytes_, and num_allocs_ only go up (they are
0047   // monotonically increasing) which can allow some optimizations.
0048   std::atomic<int64_t> max_memory_{0};
0049   std::atomic<int64_t> bytes_allocated_{0};
0050   std::atomic<int64_t> total_allocated_bytes_{0};
0051   std::atomic<int64_t> num_allocs_{0};
0052
0053  public:
0054   int64_t max_memory() const { return max_memory_.load(std::memory_order_acquire); }
0055
0056   int64_t bytes_allocated() const {
0057     return bytes_allocated_.load(std::memory_order_acquire);
0058   }
0059
0060   int64_t total_bytes_allocated() const {
0061     return total_allocated_bytes_.load(std::memory_order_acquire);
0062   }
0063
0064   int64_t num_allocations() const { return num_allocs_.load(std::memory_order_acquire); }
0065
0066   inline void DidAllocateBytes(int64_t size) {
0067     // Issue the load before everything else. max_memory_ is monotonically increasing,
0068     // so we can use a relaxed load before the read-modify-write.
0069     auto max_memory = max_memory_.load(std::memory_order_relaxed);
0070     const auto old_bytes_allocated =
0071         bytes_allocated_.fetch_add(size, std::memory_order_acq_rel);
0072     // Issue store operations on values that we don't depend on to proceed
0073     // with execution. When done, max_memory and old_bytes_allocated have
0074     // a higher chance of being available on CPU registers. This also has the
0075     // nice side-effect of putting 3 atomic stores close to each other in the
0076     // instruction stream.
0077     total_allocated_bytes_.fetch_add(size, std::memory_order_acq_rel);
0078     num_allocs_.fetch_add(1, std::memory_order_acq_rel);
0079
0080     // If other threads are updating max_memory_ concurrently we leave the loop without
0081     // updating knowing that it already reached a value even higher than ours.
0082     const auto allocated = old_bytes_allocated + size;
0083     while (max_memory < allocated && !max_memory_.compare_exchange_weak(
0084                                          /*expected=*/max_memory, /*desired=*/allocated,
0085                                          std::memory_order_acq_rel)) {
0086     }
0087   }
0088
0089   inline void DidReallocateBytes(int64_t old_size, int64_t new_size) {
0090     if (new_size > old_size) {
0091       DidAllocateBytes(new_size - old_size);
0092     } else {
0093       DidFreeBytes(old_size - new_size);
0094     }
0095   }
0096
0097   inline void DidFreeBytes(int64_t size) {
0098     bytes_allocated_.fetch_sub(size, std::memory_order_acq_rel);
0099   }
0100 };
0101
0102 }  // namespace internal
0103
0104 /// Base class for memory allocation on the CPU.
0105 ///
0106 /// Besides tracking the number of allocated bytes, the allocator also should
0107 /// take care of the required 64-byte alignment.
0108 class ARROW_EXPORT MemoryPool {
0109  public:
0110   virtual ~MemoryPool() = default;
0111
0112   /// \brief EXPERIMENTAL. Create a new instance of the default MemoryPool
0113   static std::unique_ptr<MemoryPool> CreateDefault();
0114
0115   /// Allocate a new memory region of at least size bytes.
0116   ///
0117   /// The allocated region shall be 64-byte aligned.
0118   Status Allocate(int64_t size, uint8_t** out) {
0119     return Allocate(size, kDefaultBufferAlignment, out);
0120   }
0121
0122   /// Allocate a new memory region of at least size bytes aligned to alignment.
0123   virtual Status Allocate(int64_t size, int64_t alignment, uint8_t** out) = 0;
0124
0125   /// Resize an already allocated memory section.
0126   ///
0127   /// As by default most default allocators on a platform don't support aligned
0128   /// reallocation, this function can involve a copy of the underlying data.
0129   virtual Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment,
0130                             uint8_t** ptr) = 0;
0131   Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) {
0132     return Reallocate(old_size, new_size, kDefaultBufferAlignment, ptr);
0133   }
0134
0135   /// Free an allocated region.
0136   ///
0137   /// @param buffer Pointer to the start of the allocated memory region
0138   /// @param size Allocated size located at buffer. An allocator implementation
0139   ///   may use this for tracking the amount of allocated bytes as well as for
0140   ///   faster deallocation if supported by its backend.
0141   /// @param alignment The alignment of the allocation. Defaults to 64 bytes.
0142   virtual void Free(uint8_t* buffer, int64_t size, int64_t alignment) = 0;
0143   void Free(uint8_t* buffer, int64_t size) {
0144     Free(buffer, size, kDefaultBufferAlignment);
0145   }
0146
0147   /// Return unused memory to the OS
0148   ///
0149   /// Only applies to allocators that hold onto unused memory.  This will be
0150   /// best effort, a memory pool may not implement this feature or may be
0151   /// unable to fulfill the request due to fragmentation.
0152   virtual void ReleaseUnused() {}
0153
0154   /// The number of bytes that were allocated and not yet free'd through
0155   /// this allocator.
0156   virtual int64_t bytes_allocated() const = 0;
0157
0158   /// Return peak memory allocation in this memory pool
0159   ///
0160   /// \return Maximum bytes allocated. If not known (or not implemented),
0161   /// returns -1
0162   virtual int64_t max_memory() const;
0163
0164   /// The number of bytes that were allocated.
0165   virtual int64_t total_bytes_allocated() const = 0;
0166
0167   /// The number of allocations or reallocations that were requested.
0168   virtual int64_t num_allocations() const = 0;
0169
0170   /// The name of the backend used by this MemoryPool (e.g. "system" or "jemalloc").
0171   virtual std::string backend_name() const = 0;
0172
0173  protected:
0174   MemoryPool() = default;
0175 };
0176
0177 class ARROW_EXPORT LoggingMemoryPool : public MemoryPool {
0178  public:
0179   explicit LoggingMemoryPool(MemoryPool* pool);
0180   ~LoggingMemoryPool() override = default;
0181
0182   using MemoryPool::Allocate;
0183   using MemoryPool::Free;
0184   using MemoryPool::Reallocate;
0185
0186   Status Allocate(int64_t size, int64_t alignment, uint8_t** out) override;
0187   Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment,
0188                     uint8_t** ptr) override;
0189   void Free(uint8_t* buffer, int64_t size, int64_t alignment) override;
0190
0191   int64_t bytes_allocated() const override;
0192
0193   int64_t max_memory() const override;
0194
0195   int64_t total_bytes_allocated() const override;
0196
0197   int64_t num_allocations() const override;
0198
0199   std::string backend_name() const override;
0200
0201  private:
0202   MemoryPool* pool_;
0203 };
0204
0205 /// Derived class for memory allocation.
0206 ///
0207 /// Tracks the number of bytes and maximum memory allocated through its direct
0208 /// calls. Actual allocation is delegated to MemoryPool class.
0209 class ARROW_EXPORT ProxyMemoryPool : public MemoryPool {
0210  public:
0211   explicit ProxyMemoryPool(MemoryPool* pool);
0212   ~ProxyMemoryPool() override;
0213
0214   using MemoryPool::Allocate;
0215   using MemoryPool::Free;
0216   using MemoryPool::Reallocate;
0217
0218   Status Allocate(int64_t size, int64_t alignment, uint8_t** out) override;
0219   Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment,
0220                     uint8_t** ptr) override;
0221   void Free(uint8_t* buffer, int64_t size, int64_t alignment) override;
0222
0223   int64_t bytes_allocated() const override;
0224
0225   int64_t max_memory() const override;
0226
0227   int64_t total_bytes_allocated() const override;
0228
0229   int64_t num_allocations() const override;
0230
0231   std::string backend_name() const override;
0232
0233  private:
0234   class ProxyMemoryPoolImpl;
0235   std::unique_ptr<ProxyMemoryPoolImpl> impl_;
0236 };
0237
0238 /// \brief Return a process-wide memory pool based on the system allocator.
0239 ARROW_EXPORT MemoryPool* system_memory_pool();
0240
0241 /// \brief Return a process-wide memory pool based on jemalloc.
0242 ///
0243 /// May return NotImplemented if jemalloc is not available.
0244 ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out);
0245
0246 /// \brief Set jemalloc memory page purging behavior for future-created arenas
0247 /// to the indicated number of milliseconds. See dirty_decay_ms and
0248 /// muzzy_decay_ms options in jemalloc for a description of what these do. The
0249 /// default is configured to 1000 (1 second) which releases memory more
0250 /// aggressively to the operating system than the jemalloc default of 10
0251 /// seconds. If you set the value to 0, dirty / muzzy pages will be released
0252 /// immediately rather than with a time decay, but this may reduce application
0253 /// performance.
0254 ARROW_EXPORT
0255 Status jemalloc_set_decay_ms(int ms);
0256
0257 /// \brief Get basic statistics from jemalloc's mallctl.
0258 /// See the MALLCTL NAMESPACE section in jemalloc project documentation for
0259 /// available stats.
0260 ARROW_EXPORT
0261 Result<int64_t> jemalloc_get_stat(const char* name);
0262
0263 /// \brief Reset the counter for peak bytes allocated in the calling thread to zero.
0264 /// This affects subsequent calls to thread.peak.read, but not the values returned by
0265 /// thread.allocated or thread.deallocated.
0266 ARROW_EXPORT
0267 Status jemalloc_peak_reset();
0268
0269 /// \brief Print summary statistics in human-readable form to stderr.
0270 /// See malloc_stats_print documentation in jemalloc project documentation for
0271 /// available opt flags.
0272 ARROW_EXPORT
0273 Status jemalloc_stats_print(const char* opts = "");
0274
0275 /// \brief Print summary statistics in human-readable form using a callback
0276 /// See malloc_stats_print documentation in jemalloc project documentation for
0277 /// available opt flags.
0278 ARROW_EXPORT
0279 Status jemalloc_stats_print(std::function<void(const char*)> write_cb,
0280                             const char* opts = "");
0281
0282 /// \brief Get summary statistics in human-readable form.
0283 /// See malloc_stats_print documentation in jemalloc project documentation for
0284 /// available opt flags.
0285 ARROW_EXPORT
0286 Result<std::string> jemalloc_stats_string(const char* opts = "");
0287
0288 /// \brief Return a process-wide memory pool based on mimalloc.
0289 ///
0290 /// May return NotImplemented if mimalloc is not available.
0291 ARROW_EXPORT Status mimalloc_memory_pool(MemoryPool** out);
0292
0293 /// \brief Return the names of the backends supported by this Arrow build.
0294 ARROW_EXPORT std::vector<std::string> SupportedMemoryBackendNames();
0295
0296 }  // namespace arrow