Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-27 08:47:24

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <string_view>
0021 
0022 #include "arrow/array.h"
0023 #include "arrow/status.h"
0024 #include "arrow/type.h"
0025 #include "arrow/type_traits.h"
0026 #include "arrow/util/binary_view_util.h"
0027 #include "arrow/util/bit_block_counter.h"
0028 #include "arrow/util/bit_util.h"
0029 #include "arrow/util/checked_cast.h"
0030 #include "arrow/util/functional.h"
0031 
0032 namespace arrow {
0033 namespace internal {
0034 
0035 template <typename T, typename Enable = void>
0036 struct ArraySpanInlineVisitor {};
0037 
0038 // Numeric and primitive C-compatible types
0039 template <typename T>
0040 struct ArraySpanInlineVisitor<T, enable_if_has_c_type<T>> {
0041   using c_type = typename T::c_type;
0042 
0043   template <typename ValidFunc, typename NullFunc>
0044   static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0045                             NullFunc&& null_func) {
0046     if constexpr (std::is_same_v<T, BooleanType>) {
0047       int64_t offset = arr.offset;
0048       const uint8_t* data = arr.buffers[1].data;
0049       return VisitBitBlocks(
0050           arr.buffers[0].data, offset, arr.length,
0051           [&](int64_t i) { return valid_func(bit_util::GetBit(data, offset + i)); },
0052           std::forward<NullFunc>(null_func));
0053     } else {
0054       const c_type* data = arr.GetValues<c_type>(1);
0055       auto visit_valid = [&](int64_t i) { return valid_func(data[i]); };
0056       return VisitBitBlocks(arr.buffers[0].data, arr.offset, arr.length,
0057                             std::move(visit_valid), std::forward<NullFunc>(null_func));
0058     }
0059   }
0060 
0061   template <typename ValidFunc, typename NullFunc>
0062   static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0063                         NullFunc&& null_func) {
0064     if constexpr (std::is_same_v<T, BooleanType>) {
0065       int64_t offset = arr.offset;
0066       const uint8_t* data = arr.buffers[1].data;
0067       VisitBitBlocksVoid(
0068           arr.buffers[0].data, offset, arr.length,
0069           [&](int64_t i) { valid_func(bit_util::GetBit(data, offset + i)); },
0070           std::forward<NullFunc>(null_func));
0071     } else {
0072       const c_type* data = arr.GetValues<c_type>(1);
0073       auto visit_valid = [&](int64_t i) { valid_func(data[i]); };
0074       VisitBitBlocksVoid(arr.buffers[0].data, arr.offset, arr.length,
0075                          std::move(visit_valid), std::forward<NullFunc>(null_func));
0076     }
0077   }
0078 };
0079 
0080 // Binary, String...
0081 template <typename T>
0082 struct ArraySpanInlineVisitor<T, enable_if_base_binary<T>> {
0083   using c_type = std::string_view;
0084 
0085   template <typename ValidFunc, typename NullFunc>
0086   static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0087                             NullFunc&& null_func) {
0088     using offset_type = typename T::offset_type;
0089     constexpr char empty_value = 0;
0090 
0091     if (arr.length == 0) {
0092       return Status::OK();
0093     }
0094     const offset_type* offsets = arr.GetValues<offset_type>(1);
0095     const char* data;
0096     if (arr.buffers[2].data == NULLPTR) {
0097       data = &empty_value;
0098     } else {
0099       // Do not apply the array offset to the values array; the value_offsets
0100       // index the non-sliced values array.
0101       data = arr.GetValues<char>(2, /*absolute_offset=*/0);
0102     }
0103     offset_type cur_offset = *offsets++;
0104     return VisitBitBlocks(
0105         arr.buffers[0].data, arr.offset, arr.length,
0106         [&](int64_t i) {
0107           ARROW_UNUSED(i);
0108           auto value = std::string_view(data + cur_offset, *offsets - cur_offset);
0109           cur_offset = *offsets++;
0110           return valid_func(value);
0111         },
0112         [&]() {
0113           cur_offset = *offsets++;
0114           return null_func();
0115         });
0116   }
0117 
0118   template <typename ValidFunc, typename NullFunc>
0119   static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0120                         NullFunc&& null_func) {
0121     using offset_type = typename T::offset_type;
0122     constexpr uint8_t empty_value = 0;
0123 
0124     if (arr.length == 0) {
0125       return;
0126     }
0127     const offset_type* offsets = arr.GetValues<offset_type>(1);
0128     const uint8_t* data;
0129     if (arr.buffers[2].data == NULLPTR) {
0130       data = &empty_value;
0131     } else {
0132       // Do not apply the array offset to the values array; the value_offsets
0133       // index the non-sliced values array.
0134       data = arr.GetValues<uint8_t>(2, /*absolute_offset=*/0);
0135     }
0136 
0137     VisitBitBlocksVoid(
0138         arr.buffers[0].data, arr.offset, arr.length,
0139         [&](int64_t i) {
0140           auto value = std::string_view(reinterpret_cast<const char*>(data + offsets[i]),
0141                                         offsets[i + 1] - offsets[i]);
0142           valid_func(value);
0143         },
0144         std::forward<NullFunc>(null_func));
0145   }
0146 };
0147 
0148 // BinaryView, StringView...
0149 template <typename T>
0150 struct ArraySpanInlineVisitor<T, enable_if_binary_view_like<T>> {
0151   using c_type = std::string_view;
0152 
0153   template <typename ValidFunc, typename NullFunc>
0154   static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0155                             NullFunc&& null_func) {
0156     if (arr.length == 0) {
0157       return Status::OK();
0158     }
0159     auto* s = arr.GetValues<BinaryViewType::c_type>(1);
0160     auto* data_buffers = arr.GetVariadicBuffers().data();
0161     return VisitBitBlocks(
0162         arr.buffers[0].data, arr.offset, arr.length,
0163         [&](int64_t index) {
0164           return valid_func(util::FromBinaryView(s[index], data_buffers));
0165         },
0166         [&]() { return null_func(); });
0167   }
0168 
0169   template <typename ValidFunc, typename NullFunc>
0170   static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0171                         NullFunc&& null_func) {
0172     if (arr.length == 0) {
0173       return;
0174     }
0175     auto* s = arr.GetValues<BinaryViewType::c_type>(1);
0176     auto* data_buffers = arr.GetVariadicBuffers().data();
0177     VisitBitBlocksVoid(
0178         arr.buffers[0].data, arr.offset, arr.length,
0179         [&](int64_t index) { valid_func(util::FromBinaryView(s[index], data_buffers)); },
0180         std::forward<NullFunc>(null_func));
0181   }
0182 };
0183 
0184 // FixedSizeBinary, Decimal128
0185 template <typename T>
0186 struct ArraySpanInlineVisitor<T, enable_if_fixed_size_binary<T>> {
0187   using c_type = std::string_view;
0188 
0189   template <typename ValidFunc, typename NullFunc>
0190   static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0191                             NullFunc&& null_func) {
0192     const int32_t byte_width = arr.type->byte_width();
0193     const char* data = arr.GetValues<char>(1,
0194                                            /*absolute_offset=*/arr.offset * byte_width);
0195     return VisitBitBlocks(
0196         arr.buffers[0].data, arr.offset, arr.length,
0197         [&](int64_t i) {
0198           auto value = std::string_view(data, byte_width);
0199           data += byte_width;
0200           return valid_func(value);
0201         },
0202         [&]() {
0203           data += byte_width;
0204           return null_func();
0205         });
0206   }
0207 
0208   template <typename ValidFunc, typename NullFunc>
0209   static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0210                         NullFunc&& null_func) {
0211     const int32_t byte_width = arr.type->byte_width();
0212     const char* data = arr.GetValues<char>(1,
0213                                            /*absolute_offset=*/arr.offset * byte_width);
0214     VisitBitBlocksVoid(
0215         arr.buffers[0].data, arr.offset, arr.length,
0216         [&](int64_t i) {
0217           valid_func(std::string_view(data, byte_width));
0218           data += byte_width;
0219         },
0220         [&]() {
0221           data += byte_width;
0222           null_func();
0223         });
0224   }
0225 };
0226 
0227 }  // namespace internal
0228 
0229 template <typename T, typename ValidFunc, typename NullFunc>
0230 typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
0231 VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
0232   return internal::ArraySpanInlineVisitor<T>::VisitStatus(
0233       arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
0234 }
0235 
0236 template <typename T, typename ValidFunc, typename NullFunc>
0237 typename internal::call_traits::enable_if_return<ValidFunc, void>::type
0238 VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
0239   return internal::ArraySpanInlineVisitor<T>::VisitVoid(
0240       arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
0241 }
0242 
0243 // Visit an array's data values, in order, without overhead.
0244 //
0245 // The Visit method's `visitor` argument should be an object with two public methods:
0246 // - Status VisitNull()
0247 // - Status VisitValue(<scalar>)
0248 //
0249 // The scalar value's type depends on the array data type:
0250 // - the type's `c_type`, if any
0251 // - for boolean arrays, a `bool`
0252 // - for binary, string, large binary and string, binary and string view, and fixed-size
0253 //   binary arrays, a `std::string_view`
0254 
0255 template <typename T>
0256 struct ArraySpanVisitor {
0257   using InlineVisitorType = internal::ArraySpanInlineVisitor<T>;
0258   using c_type = typename InlineVisitorType::c_type;
0259 
0260   template <typename Visitor>
0261   static Status Visit(const ArraySpan& arr, Visitor* visitor) {
0262     return InlineVisitorType::VisitStatus(
0263         arr, [visitor](c_type v) { return visitor->VisitValue(v); },
0264         [visitor]() { return visitor->VisitNull(); });
0265   }
0266 };
0267 
0268 // Visit a null bitmap, in order, without overhead.
0269 //
0270 // The given `ValidFunc` should be a callable with either of these signatures:
0271 // - void()
0272 // - Status()
0273 //
0274 // The `NullFunc` should have the same return type as `ValidFunc`.
0275 
0276 template <typename ValidFunc, typename NullFunc>
0277 typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
0278 VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
0279                       int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
0280                       NullFunc&& null_func) {
0281   internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
0282                                                 valid_bits_offset, num_values);
0283   int64_t position = 0;
0284   int64_t offset_position = valid_bits_offset;
0285   while (position < num_values) {
0286     internal::BitBlockCount block = bit_counter.NextBlock();
0287     if (block.AllSet()) {
0288       for (int64_t i = 0; i < block.length; ++i) {
0289         ARROW_RETURN_NOT_OK(valid_func());
0290       }
0291     } else if (block.NoneSet()) {
0292       for (int64_t i = 0; i < block.length; ++i) {
0293         ARROW_RETURN_NOT_OK(null_func());
0294       }
0295     } else {
0296       for (int64_t i = 0; i < block.length; ++i) {
0297         ARROW_RETURN_NOT_OK(bit_util::GetBit(valid_bits, offset_position + i)
0298                                 ? valid_func()
0299                                 : null_func());
0300       }
0301     }
0302     position += block.length;
0303     offset_position += block.length;
0304   }
0305   return Status::OK();
0306 }
0307 
0308 template <typename ValidFunc, typename NullFunc>
0309 typename internal::call_traits::enable_if_return<ValidFunc, void>::type
0310 VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
0311                       int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
0312                       NullFunc&& null_func) {
0313   internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
0314                                                 valid_bits_offset, num_values);
0315   int64_t position = 0;
0316   int64_t offset_position = valid_bits_offset;
0317   while (position < num_values) {
0318     internal::BitBlockCount block = bit_counter.NextBlock();
0319     if (block.AllSet()) {
0320       for (int64_t i = 0; i < block.length; ++i) {
0321         valid_func();
0322       }
0323     } else if (block.NoneSet()) {
0324       for (int64_t i = 0; i < block.length; ++i) {
0325         null_func();
0326       }
0327     } else {
0328       for (int64_t i = 0; i < block.length; ++i) {
0329         bit_util::GetBit(valid_bits, offset_position + i) ? valid_func() : null_func();
0330       }
0331     }
0332     position += block.length;
0333     offset_position += block.length;
0334   }
0335 }
0336 
0337 }  // namespace arrow