![]() |
|
|||
File indexing completed on 2025-08-28 08:27:10
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <cassert> 0021 #include <cstdint> 0022 #include <cstring> 0023 0024 #include "arrow/util/bit_run_reader.h" 0025 0026 namespace arrow { 0027 namespace util { 0028 namespace internal { 0029 0030 /// \brief Compress the buffer to spaced, excluding the null entries. 0031 /// 0032 /// \param[in] src the source buffer 0033 /// \param[in] num_values the size of source buffer 0034 /// \param[in] valid_bits bitmap data indicating position of valid slots 0035 /// \param[in] valid_bits_offset offset into valid_bits 0036 /// \param[out] output the output buffer spaced 0037 /// \return The size of spaced buffer. 0038 template <typename T> 0039 inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits, 0040 int64_t valid_bits_offset, T* output) { 0041 int num_valid_values = 0; 0042 0043 arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values); 0044 while (true) { 0045 const auto run = reader.NextRun(); 0046 if (run.length == 0) { 0047 break; 0048 } 0049 std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T)); 0050 num_valid_values += static_cast<int32_t>(run.length); 0051 } 0052 0053 return num_valid_values; 0054 } 0055 0056 /// \brief Relocate values in buffer into positions of non-null values as indicated by 0057 /// a validity bitmap. 0058 /// 0059 /// \param[in, out] buffer the in-place buffer 0060 /// \param[in] num_values total size of buffer including null slots 0061 /// \param[in] null_count number of null slots 0062 /// \param[in] valid_bits bitmap data indicating position of valid slots 0063 /// \param[in] valid_bits_offset offset into valid_bits 0064 /// \return The number of values expanded, including nulls. 0065 template <typename T> 0066 inline int SpacedExpand(T* buffer, int num_values, int null_count, 0067 const uint8_t* valid_bits, int64_t valid_bits_offset) { 0068 // Point to end as we add the spacing from the back. 0069 int idx_decode = num_values - null_count; 0070 0071 // Depending on the number of nulls, some of the value slots in buffer may 0072 // be uninitialized, and this will cause valgrind warnings / potentially UB 0073 std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T)); 0074 if (idx_decode == 0) { 0075 // All nulls, nothing more to do 0076 return num_values; 0077 } 0078 0079 arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset, 0080 num_values); 0081 while (true) { 0082 const auto run = reader.NextRun(); 0083 if (run.length == 0) { 0084 break; 0085 } 0086 idx_decode -= static_cast<int32_t>(run.length); 0087 assert(idx_decode >= 0); 0088 std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T)); 0089 } 0090 0091 // Otherwise caller gave an incorrect null_count 0092 assert(idx_decode == 0); 0093 return num_values; 0094 } 0095 0096 } // namespace internal 0097 } // namespace util 0098 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |