![]() |
|
|||
File indexing completed on 2025-08-28 08:27:01
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <algorithm> 0021 0022 #include "arrow/memory_pool.h" 0023 #include "arrow/type_fwd.h" 0024 #include "arrow/util/bit_util.h" 0025 0026 namespace arrow { 0027 namespace internal { 0028 0029 struct BitmapWordAlignParams { 0030 int64_t leading_bits; 0031 int64_t trailing_bits; 0032 int64_t trailing_bit_offset; 0033 const uint8_t* aligned_start; 0034 int64_t aligned_bits; 0035 int64_t aligned_words; 0036 }; 0037 0038 // Compute parameters for accessing a bitmap using aligned word instructions. 0039 // The returned parameters describe: 0040 // - a leading area of size `leading_bits` before the aligned words 0041 // - a word-aligned area of size `aligned_bits` 0042 // - a trailing area of size `trailing_bits` after the aligned words 0043 template <uint64_t ALIGN_IN_BYTES> 0044 inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t bit_offset, 0045 int64_t length) { 0046 static_assert(bit_util::IsPowerOf2(ALIGN_IN_BYTES), 0047 "ALIGN_IN_BYTES should be a positive power of two"); 0048 constexpr uint64_t ALIGN_IN_BITS = ALIGN_IN_BYTES * 8; 0049 0050 BitmapWordAlignParams p; 0051 0052 // Compute a "bit address" that we can align up to ALIGN_IN_BITS. 0053 // We don't care about losing the upper bits since we are only interested in the 0054 // difference between both addresses. 0055 const uint64_t bit_addr = 0056 reinterpret_cast<size_t>(data) * 8 + static_cast<uint64_t>(bit_offset); 0057 const uint64_t aligned_bit_addr = bit_util::RoundUpToPowerOf2(bit_addr, ALIGN_IN_BITS); 0058 0059 p.leading_bits = std::min<int64_t>(length, aligned_bit_addr - bit_addr); 0060 p.aligned_words = (length - p.leading_bits) / ALIGN_IN_BITS; 0061 p.aligned_bits = p.aligned_words * ALIGN_IN_BITS; 0062 p.trailing_bits = length - p.leading_bits - p.aligned_bits; 0063 p.trailing_bit_offset = bit_offset + p.leading_bits + p.aligned_bits; 0064 0065 p.aligned_start = data + (bit_offset + p.leading_bits) / 8; 0066 return p; 0067 } 0068 } // namespace internal 0069 0070 namespace util { 0071 0072 // Functions to check if the provided Arrow object is aligned by the specified alignment 0073 0074 /// \brief Special alignment value to use data type-specific alignment 0075 /// 0076 /// If this is passed as the `alignment` in one of the CheckAlignment or EnsureAlignment 0077 /// functions, then the function will ensure each buffer is suitably aligned 0078 /// for the data type of the array. For example, given an int32 buffer the values 0079 /// buffer's address must be a multiple of 4. Given a large_string buffer the offsets 0080 /// buffer's address must be a multiple of 8. 0081 constexpr int64_t kValueAlignment = -3; 0082 0083 /// \brief Calculate if the buffer's address is a multiple of `alignment` 0084 /// 0085 /// If `alignment` is less than or equal to 0 then this method will always return true 0086 /// \param buffer the buffer to check 0087 /// \param alignment the alignment (in bytes) to check for 0088 ARROW_EXPORT bool CheckAlignment(const Buffer& buffer, int64_t alignment); 0089 /// \brief Calculate if all buffers in the array data are aligned 0090 /// 0091 /// This will also check the buffers in the dictionary and any children 0092 /// \param array the array data to check 0093 /// \param alignment the alignment (in bytes) to check for 0094 ARROW_EXPORT bool CheckAlignment(const ArrayData& array, int64_t alignment); 0095 /// \brief Calculate if all buffers in the array are aligned 0096 /// 0097 /// This will also check the buffers in the dictionary and any children 0098 /// \param array the array to check 0099 /// \param alignment the alignment (in bytes) to check for 0100 ARROW_EXPORT bool CheckAlignment(const Array& array, int64_t alignment); 0101 0102 // Following functions require an additional boolean vector which stores the 0103 // alignment check bits of the constituent objects. 0104 // For example, needs_alignment vector for a ChunkedArray will contain the 0105 // check bits of the constituent Arrays. 0106 // The boolean vector check was introduced to minimize the repetitive checks 0107 // of the constituent objects during the EnsureAlignment function where certain 0108 // objects can be ignored for further checking if we already know that they are 0109 // completely aligned. 0110 0111 /// \brief Calculate which (if any) chunks in a chunked array are unaligned 0112 /// \param array the array to check 0113 /// \param alignment the alignment (in bytes) to check for 0114 /// \param needs_alignment an output vector that will store the results of the check 0115 /// it must be set to a valid vector. Extra elements will be added to the end 0116 /// of the vector for each chunk that is checked. `true` will be stored if 0117 /// the chunk is unaligned. 0118 /// \param offset the index of the chunk to start checking 0119 /// \return true if all chunks (starting at `offset`) are aligned, false otherwise 0120 ARROW_EXPORT bool CheckAlignment(const ChunkedArray& array, int64_t alignment, 0121 std::vector<bool>* needs_alignment, int offset = 0); 0122 0123 /// \brief calculate which (if any) columns in a record batch are unaligned 0124 /// \param batch the batch to check 0125 /// \param alignment the alignment (in bytes) to check for 0126 /// \param needs_alignment an output vector that will store the results of the 0127 /// check. It must be set to a valid vector. Extra elements will be added 0128 /// to the end of the vector for each column that is checked. `true` will be 0129 /// stored if the column is unaligned. 0130 ARROW_EXPORT bool CheckAlignment(const RecordBatch& batch, int64_t alignment, 0131 std::vector<bool>* needs_alignment); 0132 0133 /// \brief calculate which (if any) columns in a table are unaligned 0134 /// \param table the table to check 0135 /// \param alignment the alignment (in bytes) to check for 0136 /// \param needs_alignment an output vector that will store the results of the 0137 /// check. It must be set to a valid vector. Extra elements will be added 0138 /// to the end of the vector for each column that is checked. `true` will be 0139 /// stored if the column is unaligned. 0140 ARROW_EXPORT bool CheckAlignment(const Table& table, int64_t alignment, 0141 std::vector<bool>* needs_alignment); 0142 0143 /// \brief return a buffer that has the given alignment and the same data as the input 0144 /// buffer 0145 /// 0146 /// If the input buffer is already aligned then this method will return the input buffer 0147 /// If the input buffer is not already aligned then this method will allocate a new 0148 /// buffer. The alignment of the new buffer will have at least 0149 /// max(kDefaultBufferAlignment, alignment) bytes of alignment. 0150 /// 0151 /// \param buffer the buffer to check 0152 /// \param alignment the alignment (in bytes) to check for 0153 /// \param memory_pool a memory pool that will be used to allocate a new buffer if the 0154 /// input buffer is not sufficiently aligned 0155 ARROW_EXPORT Result<std::shared_ptr<Buffer>> EnsureAlignment( 0156 std::shared_ptr<Buffer> buffer, int64_t alignment, MemoryPool* memory_pool); 0157 0158 /// \brief return an array data where all buffers are aligned by the given alignment 0159 /// 0160 /// If any input buffer is already aligned then this method will reuse that same input 0161 /// buffer. 0162 /// 0163 /// \param array_data the array data to check 0164 /// \param alignment the alignment (in bytes) to check for 0165 /// \param memory_pool a memory pool that will be used to allocate new buffers if any 0166 /// input buffer is not sufficiently aligned 0167 ARROW_EXPORT Result<std::shared_ptr<ArrayData>> EnsureAlignment( 0168 std::shared_ptr<ArrayData> array_data, int64_t alignment, MemoryPool* memory_pool); 0169 0170 /// \brief return an array where all buffers are aligned by the given alignment 0171 /// 0172 /// If any input buffer is already aligned then this method will reuse that same input 0173 /// buffer. 0174 /// 0175 /// \param array the array to check 0176 /// \param alignment the alignment (in bytes) to check for 0177 /// \param memory_pool a memory pool that will be used to allocate new buffers if any 0178 /// input buffer is not sufficiently aligned 0179 ARROW_EXPORT Result<std::shared_ptr<Array>> EnsureAlignment(std::shared_ptr<Array> array, 0180 int64_t alignment, 0181 MemoryPool* memory_pool); 0182 0183 /// \brief return a chunked array where all buffers are aligned by the given alignment 0184 /// 0185 /// If any input buffer is already aligned then this method will reuse that same input 0186 /// buffer. 0187 /// 0188 /// \param array the chunked array to check 0189 /// \param alignment the alignment (in bytes) to check for 0190 /// \param memory_pool a memory pool that will be used to allocate new buffers if any 0191 /// input buffer is not sufficiently aligned 0192 ARROW_EXPORT Result<std::shared_ptr<ChunkedArray>> EnsureAlignment( 0193 std::shared_ptr<ChunkedArray> array, int64_t alignment, MemoryPool* memory_pool); 0194 0195 /// \brief return a record batch where all buffers are aligned by the given alignment 0196 /// 0197 /// If any input buffer is already aligned then this method will reuse that same input 0198 /// buffer. 0199 /// 0200 /// \param batch the batch to check 0201 /// \param alignment the alignment (in bytes) to check for 0202 /// \param memory_pool a memory pool that will be used to allocate new buffers if any 0203 /// input buffer is not sufficiently aligned 0204 ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> EnsureAlignment( 0205 std::shared_ptr<RecordBatch> batch, int64_t alignment, MemoryPool* memory_pool); 0206 0207 /// \brief return a table where all buffers are aligned by the given alignment 0208 /// 0209 /// If any input buffer is already aligned then this method will reuse that same input 0210 /// buffer. 0211 /// 0212 /// \param table the table to check 0213 /// \param alignment the alignment (in bytes) to check for 0214 /// \param memory_pool a memory pool that will be used to allocate new buffers if any 0215 /// input buffer is not sufficiently aligned 0216 ARROW_EXPORT Result<std::shared_ptr<Table>> EnsureAlignment(std::shared_ptr<Table> table, 0217 int64_t alignment, 0218 MemoryPool* memory_pool); 0219 0220 } // namespace util 0221 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |