Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:10

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cassert>
0021 #include <cstdint>
0022 #include <cstring>
0023 
0024 #include "arrow/util/bit_run_reader.h"
0025 
0026 namespace arrow {
0027 namespace util {
0028 namespace internal {
0029 
0030 /// \brief Compress the buffer to spaced, excluding the null entries.
0031 ///
0032 /// \param[in] src the source buffer
0033 /// \param[in] num_values the size of source buffer
0034 /// \param[in] valid_bits bitmap data indicating position of valid slots
0035 /// \param[in] valid_bits_offset offset into valid_bits
0036 /// \param[out] output the output buffer spaced
0037 /// \return The size of spaced buffer.
0038 template <typename T>
0039 inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits,
0040                           int64_t valid_bits_offset, T* output) {
0041   int num_valid_values = 0;
0042 
0043   arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values);
0044   while (true) {
0045     const auto run = reader.NextRun();
0046     if (run.length == 0) {
0047       break;
0048     }
0049     std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T));
0050     num_valid_values += static_cast<int32_t>(run.length);
0051   }
0052 
0053   return num_valid_values;
0054 }
0055 
0056 /// \brief Relocate values in buffer into positions of non-null values as indicated by
0057 /// a validity bitmap.
0058 ///
0059 /// \param[in, out] buffer the in-place buffer
0060 /// \param[in] num_values total size of buffer including null slots
0061 /// \param[in] null_count number of null slots
0062 /// \param[in] valid_bits bitmap data indicating position of valid slots
0063 /// \param[in] valid_bits_offset offset into valid_bits
0064 /// \return The number of values expanded, including nulls.
0065 template <typename T>
0066 inline int SpacedExpand(T* buffer, int num_values, int null_count,
0067                         const uint8_t* valid_bits, int64_t valid_bits_offset) {
0068   // Point to end as we add the spacing from the back.
0069   int idx_decode = num_values - null_count;
0070 
0071   // Depending on the number of nulls, some of the value slots in buffer may
0072   // be uninitialized, and this will cause valgrind warnings / potentially UB
0073   std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T));
0074   if (idx_decode == 0) {
0075     // All nulls, nothing more to do
0076     return num_values;
0077   }
0078 
0079   arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset,
0080                                                  num_values);
0081   while (true) {
0082     const auto run = reader.NextRun();
0083     if (run.length == 0) {
0084       break;
0085     }
0086     idx_decode -= static_cast<int32_t>(run.length);
0087     assert(idx_decode >= 0);
0088     std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T));
0089   }
0090 
0091   // Otherwise caller gave an incorrect null_count
0092   assert(idx_decode == 0);
0093   return num_values;
0094 }
0095 
0096 }  // namespace internal
0097 }  // namespace util
0098 }  // namespace arrow