Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:01

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <algorithm>
0021 
0022 #include "arrow/memory_pool.h"
0023 #include "arrow/type_fwd.h"
0024 #include "arrow/util/bit_util.h"
0025 
0026 namespace arrow {
0027 namespace internal {
0028 
0029 struct BitmapWordAlignParams {
0030   int64_t leading_bits;
0031   int64_t trailing_bits;
0032   int64_t trailing_bit_offset;
0033   const uint8_t* aligned_start;
0034   int64_t aligned_bits;
0035   int64_t aligned_words;
0036 };
0037 
0038 // Compute parameters for accessing a bitmap using aligned word instructions.
0039 // The returned parameters describe:
0040 // - a leading area of size `leading_bits` before the aligned words
0041 // - a word-aligned area of size `aligned_bits`
0042 // - a trailing area of size `trailing_bits` after the aligned words
0043 template <uint64_t ALIGN_IN_BYTES>
0044 inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t bit_offset,
0045                                              int64_t length) {
0046   static_assert(bit_util::IsPowerOf2(ALIGN_IN_BYTES),
0047                 "ALIGN_IN_BYTES should be a positive power of two");
0048   constexpr uint64_t ALIGN_IN_BITS = ALIGN_IN_BYTES * 8;
0049 
0050   BitmapWordAlignParams p;
0051 
0052   // Compute a "bit address" that we can align up to ALIGN_IN_BITS.
0053   // We don't care about losing the upper bits since we are only interested in the
0054   // difference between both addresses.
0055   const uint64_t bit_addr =
0056       reinterpret_cast<size_t>(data) * 8 + static_cast<uint64_t>(bit_offset);
0057   const uint64_t aligned_bit_addr = bit_util::RoundUpToPowerOf2(bit_addr, ALIGN_IN_BITS);
0058 
0059   p.leading_bits = std::min<int64_t>(length, aligned_bit_addr - bit_addr);
0060   p.aligned_words = (length - p.leading_bits) / ALIGN_IN_BITS;
0061   p.aligned_bits = p.aligned_words * ALIGN_IN_BITS;
0062   p.trailing_bits = length - p.leading_bits - p.aligned_bits;
0063   p.trailing_bit_offset = bit_offset + p.leading_bits + p.aligned_bits;
0064 
0065   p.aligned_start = data + (bit_offset + p.leading_bits) / 8;
0066   return p;
0067 }
0068 }  // namespace internal
0069 
0070 namespace util {
0071 
0072 // Functions to check if the provided Arrow object is aligned by the specified alignment
0073 
0074 /// \brief Special alignment value to use data type-specific alignment
0075 ///
0076 /// If this is passed as the `alignment` in one of the CheckAlignment or EnsureAlignment
0077 /// functions, then the function will ensure each buffer is suitably aligned
0078 /// for the data type of the array.  For example, given an int32 buffer the values
0079 /// buffer's address must be a multiple of 4.  Given a large_string buffer the offsets
0080 /// buffer's address must be a multiple of 8.
0081 constexpr int64_t kValueAlignment = -3;
0082 
0083 /// \brief Calculate if the buffer's address is a multiple of `alignment`
0084 ///
0085 /// If `alignment` is less than or equal to 0 then this method will always return true
0086 /// \param buffer the buffer to check
0087 /// \param alignment the alignment (in bytes) to check for
0088 ARROW_EXPORT bool CheckAlignment(const Buffer& buffer, int64_t alignment);
0089 /// \brief Calculate if all buffers in the array data are aligned
0090 ///
0091 /// This will also check the buffers in the dictionary and any children
0092 /// \param array the array data to check
0093 /// \param alignment the alignment (in bytes) to check for
0094 ARROW_EXPORT bool CheckAlignment(const ArrayData& array, int64_t alignment);
0095 /// \brief Calculate if all buffers in the array are aligned
0096 ///
0097 /// This will also check the buffers in the dictionary and any children
0098 /// \param array the array to check
0099 /// \param alignment the alignment (in bytes) to check for
0100 ARROW_EXPORT bool CheckAlignment(const Array& array, int64_t alignment);
0101 
0102 // Following functions require an additional boolean vector which stores the
0103 // alignment check bits of the constituent objects.
0104 // For example, needs_alignment vector for a ChunkedArray will contain the
0105 // check bits of the constituent Arrays.
0106 // The boolean vector check was introduced to minimize the repetitive checks
0107 // of the constituent objects during the EnsureAlignment function where certain
0108 // objects can be ignored for further checking if we already know that they are
0109 // completely aligned.
0110 
0111 /// \brief Calculate which (if any) chunks in a chunked array are unaligned
0112 /// \param array the array to check
0113 /// \param alignment the alignment (in bytes) to check for
0114 /// \param needs_alignment an output vector that will store the results of the check
0115 ///        it must be set to a valid vector.  Extra elements will be added to the end
0116 ///        of the vector for each chunk that is checked.  `true` will be stored if
0117 ///        the chunk is unaligned.
0118 /// \param offset the index of the chunk to start checking
0119 /// \return true if all chunks (starting at `offset`) are aligned, false otherwise
0120 ARROW_EXPORT bool CheckAlignment(const ChunkedArray& array, int64_t alignment,
0121                                  std::vector<bool>* needs_alignment, int offset = 0);
0122 
0123 /// \brief calculate which (if any) columns in a record batch are unaligned
0124 /// \param batch the batch to check
0125 /// \param alignment the alignment (in bytes) to check for
0126 /// \param needs_alignment an output vector that will store the results of the
0127 ///        check.  It must be set to a valid vector.  Extra elements will be added
0128 ///        to the end of the vector for each column that is checked.  `true` will be
0129 ///        stored if the column is unaligned.
0130 ARROW_EXPORT bool CheckAlignment(const RecordBatch& batch, int64_t alignment,
0131                                  std::vector<bool>* needs_alignment);
0132 
0133 /// \brief calculate which (if any) columns in a table are unaligned
0134 /// \param table the table to check
0135 /// \param alignment the alignment (in bytes) to check for
0136 /// \param needs_alignment an output vector that will store the results of the
0137 ///        check.  It must be set to a valid vector.  Extra elements will be added
0138 ///        to the end of the vector for each column that is checked.  `true` will be
0139 ///        stored if the column is unaligned.
0140 ARROW_EXPORT bool CheckAlignment(const Table& table, int64_t alignment,
0141                                  std::vector<bool>* needs_alignment);
0142 
0143 /// \brief return a buffer that has the given alignment and the same data as the input
0144 /// buffer
0145 ///
0146 /// If the input buffer is already aligned then this method will return the input buffer
0147 /// If the input buffer is not already aligned then this method will allocate a new
0148 /// buffer.  The alignment of the new buffer will have at least
0149 /// max(kDefaultBufferAlignment, alignment) bytes of alignment.
0150 ///
0151 /// \param buffer the buffer to check
0152 /// \param alignment the alignment (in bytes) to check for
0153 /// \param memory_pool a memory pool that will be used to allocate a new buffer if the
0154 ///        input buffer is not sufficiently aligned
0155 ARROW_EXPORT Result<std::shared_ptr<Buffer>> EnsureAlignment(
0156     std::shared_ptr<Buffer> buffer, int64_t alignment, MemoryPool* memory_pool);
0157 
0158 /// \brief return an array data where all buffers are aligned by the given alignment
0159 ///
0160 /// If any input buffer is already aligned then this method will reuse that same input
0161 /// buffer.
0162 ///
0163 /// \param array_data the array data to check
0164 /// \param alignment the alignment (in bytes) to check for
0165 /// \param memory_pool a memory pool that will be used to allocate new buffers if any
0166 ///        input buffer is not sufficiently aligned
0167 ARROW_EXPORT Result<std::shared_ptr<ArrayData>> EnsureAlignment(
0168     std::shared_ptr<ArrayData> array_data, int64_t alignment, MemoryPool* memory_pool);
0169 
0170 /// \brief return an array where all buffers are aligned by the given alignment
0171 ///
0172 /// If any input buffer is already aligned then this method will reuse that same input
0173 /// buffer.
0174 ///
0175 /// \param array the array to check
0176 /// \param alignment the alignment (in bytes) to check for
0177 /// \param memory_pool a memory pool that will be used to allocate new buffers if any
0178 ///        input buffer is not sufficiently aligned
0179 ARROW_EXPORT Result<std::shared_ptr<Array>> EnsureAlignment(std::shared_ptr<Array> array,
0180                                                             int64_t alignment,
0181                                                             MemoryPool* memory_pool);
0182 
0183 /// \brief return a chunked array where all buffers are aligned by the given alignment
0184 ///
0185 /// If any input buffer is already aligned then this method will reuse that same input
0186 /// buffer.
0187 ///
0188 /// \param array the chunked array to check
0189 /// \param alignment the alignment (in bytes) to check for
0190 /// \param memory_pool a memory pool that will be used to allocate new buffers if any
0191 ///        input buffer is not sufficiently aligned
0192 ARROW_EXPORT Result<std::shared_ptr<ChunkedArray>> EnsureAlignment(
0193     std::shared_ptr<ChunkedArray> array, int64_t alignment, MemoryPool* memory_pool);
0194 
0195 /// \brief return a record batch where all buffers are aligned by the given alignment
0196 ///
0197 /// If any input buffer is already aligned then this method will reuse that same input
0198 /// buffer.
0199 ///
0200 /// \param batch the batch to check
0201 /// \param alignment the alignment (in bytes) to check for
0202 /// \param memory_pool a memory pool that will be used to allocate new buffers if any
0203 ///        input buffer is not sufficiently aligned
0204 ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> EnsureAlignment(
0205     std::shared_ptr<RecordBatch> batch, int64_t alignment, MemoryPool* memory_pool);
0206 
0207 /// \brief return a table where all buffers are aligned by the given alignment
0208 ///
0209 /// If any input buffer is already aligned then this method will reuse that same input
0210 /// buffer.
0211 ///
0212 /// \param table the table to check
0213 /// \param alignment the alignment (in bytes) to check for
0214 /// \param memory_pool a memory pool that will be used to allocate new buffers if any
0215 ///        input buffer is not sufficiently aligned
0216 ARROW_EXPORT Result<std::shared_ptr<Table>> EnsureAlignment(std::shared_ptr<Table> table,
0217                                                             int64_t alignment,
0218                                                             MemoryPool* memory_pool);
0219 
0220 }  // namespace util
0221 }  // namespace arrow