File indexing completed on 2025-08-27 08:47:24
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <string_view>
0021
0022 #include "arrow/array.h"
0023 #include "arrow/status.h"
0024 #include "arrow/type.h"
0025 #include "arrow/type_traits.h"
0026 #include "arrow/util/binary_view_util.h"
0027 #include "arrow/util/bit_block_counter.h"
0028 #include "arrow/util/bit_util.h"
0029 #include "arrow/util/checked_cast.h"
0030 #include "arrow/util/functional.h"
0031
0032 namespace arrow {
0033 namespace internal {
0034
0035 template <typename T, typename Enable = void>
0036 struct ArraySpanInlineVisitor {};
0037
0038
0039 template <typename T>
0040 struct ArraySpanInlineVisitor<T, enable_if_has_c_type<T>> {
0041 using c_type = typename T::c_type;
0042
0043 template <typename ValidFunc, typename NullFunc>
0044 static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0045 NullFunc&& null_func) {
0046 if constexpr (std::is_same_v<T, BooleanType>) {
0047 int64_t offset = arr.offset;
0048 const uint8_t* data = arr.buffers[1].data;
0049 return VisitBitBlocks(
0050 arr.buffers[0].data, offset, arr.length,
0051 [&](int64_t i) { return valid_func(bit_util::GetBit(data, offset + i)); },
0052 std::forward<NullFunc>(null_func));
0053 } else {
0054 const c_type* data = arr.GetValues<c_type>(1);
0055 auto visit_valid = [&](int64_t i) { return valid_func(data[i]); };
0056 return VisitBitBlocks(arr.buffers[0].data, arr.offset, arr.length,
0057 std::move(visit_valid), std::forward<NullFunc>(null_func));
0058 }
0059 }
0060
0061 template <typename ValidFunc, typename NullFunc>
0062 static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0063 NullFunc&& null_func) {
0064 if constexpr (std::is_same_v<T, BooleanType>) {
0065 int64_t offset = arr.offset;
0066 const uint8_t* data = arr.buffers[1].data;
0067 VisitBitBlocksVoid(
0068 arr.buffers[0].data, offset, arr.length,
0069 [&](int64_t i) { valid_func(bit_util::GetBit(data, offset + i)); },
0070 std::forward<NullFunc>(null_func));
0071 } else {
0072 const c_type* data = arr.GetValues<c_type>(1);
0073 auto visit_valid = [&](int64_t i) { valid_func(data[i]); };
0074 VisitBitBlocksVoid(arr.buffers[0].data, arr.offset, arr.length,
0075 std::move(visit_valid), std::forward<NullFunc>(null_func));
0076 }
0077 }
0078 };
0079
0080
0081 template <typename T>
0082 struct ArraySpanInlineVisitor<T, enable_if_base_binary<T>> {
0083 using c_type = std::string_view;
0084
0085 template <typename ValidFunc, typename NullFunc>
0086 static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0087 NullFunc&& null_func) {
0088 using offset_type = typename T::offset_type;
0089 constexpr char empty_value = 0;
0090
0091 if (arr.length == 0) {
0092 return Status::OK();
0093 }
0094 const offset_type* offsets = arr.GetValues<offset_type>(1);
0095 const char* data;
0096 if (arr.buffers[2].data == NULLPTR) {
0097 data = &empty_value;
0098 } else {
0099
0100
0101 data = arr.GetValues<char>(2, 0);
0102 }
0103 offset_type cur_offset = *offsets++;
0104 return VisitBitBlocks(
0105 arr.buffers[0].data, arr.offset, arr.length,
0106 [&](int64_t i) {
0107 ARROW_UNUSED(i);
0108 auto value = std::string_view(data + cur_offset, *offsets - cur_offset);
0109 cur_offset = *offsets++;
0110 return valid_func(value);
0111 },
0112 [&]() {
0113 cur_offset = *offsets++;
0114 return null_func();
0115 });
0116 }
0117
0118 template <typename ValidFunc, typename NullFunc>
0119 static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0120 NullFunc&& null_func) {
0121 using offset_type = typename T::offset_type;
0122 constexpr uint8_t empty_value = 0;
0123
0124 if (arr.length == 0) {
0125 return;
0126 }
0127 const offset_type* offsets = arr.GetValues<offset_type>(1);
0128 const uint8_t* data;
0129 if (arr.buffers[2].data == NULLPTR) {
0130 data = &empty_value;
0131 } else {
0132
0133
0134 data = arr.GetValues<uint8_t>(2, 0);
0135 }
0136
0137 VisitBitBlocksVoid(
0138 arr.buffers[0].data, arr.offset, arr.length,
0139 [&](int64_t i) {
0140 auto value = std::string_view(reinterpret_cast<const char*>(data + offsets[i]),
0141 offsets[i + 1] - offsets[i]);
0142 valid_func(value);
0143 },
0144 std::forward<NullFunc>(null_func));
0145 }
0146 };
0147
0148
0149 template <typename T>
0150 struct ArraySpanInlineVisitor<T, enable_if_binary_view_like<T>> {
0151 using c_type = std::string_view;
0152
0153 template <typename ValidFunc, typename NullFunc>
0154 static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0155 NullFunc&& null_func) {
0156 if (arr.length == 0) {
0157 return Status::OK();
0158 }
0159 auto* s = arr.GetValues<BinaryViewType::c_type>(1);
0160 auto* data_buffers = arr.GetVariadicBuffers().data();
0161 return VisitBitBlocks(
0162 arr.buffers[0].data, arr.offset, arr.length,
0163 [&](int64_t index) {
0164 return valid_func(util::FromBinaryView(s[index], data_buffers));
0165 },
0166 [&]() { return null_func(); });
0167 }
0168
0169 template <typename ValidFunc, typename NullFunc>
0170 static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0171 NullFunc&& null_func) {
0172 if (arr.length == 0) {
0173 return;
0174 }
0175 auto* s = arr.GetValues<BinaryViewType::c_type>(1);
0176 auto* data_buffers = arr.GetVariadicBuffers().data();
0177 VisitBitBlocksVoid(
0178 arr.buffers[0].data, arr.offset, arr.length,
0179 [&](int64_t index) { valid_func(util::FromBinaryView(s[index], data_buffers)); },
0180 std::forward<NullFunc>(null_func));
0181 }
0182 };
0183
0184
0185 template <typename T>
0186 struct ArraySpanInlineVisitor<T, enable_if_fixed_size_binary<T>> {
0187 using c_type = std::string_view;
0188
0189 template <typename ValidFunc, typename NullFunc>
0190 static Status VisitStatus(const ArraySpan& arr, ValidFunc&& valid_func,
0191 NullFunc&& null_func) {
0192 const int32_t byte_width = arr.type->byte_width();
0193 const char* data = arr.GetValues<char>(1,
0194 arr.offset * byte_width);
0195 return VisitBitBlocks(
0196 arr.buffers[0].data, arr.offset, arr.length,
0197 [&](int64_t i) {
0198 auto value = std::string_view(data, byte_width);
0199 data += byte_width;
0200 return valid_func(value);
0201 },
0202 [&]() {
0203 data += byte_width;
0204 return null_func();
0205 });
0206 }
0207
0208 template <typename ValidFunc, typename NullFunc>
0209 static void VisitVoid(const ArraySpan& arr, ValidFunc&& valid_func,
0210 NullFunc&& null_func) {
0211 const int32_t byte_width = arr.type->byte_width();
0212 const char* data = arr.GetValues<char>(1,
0213 arr.offset * byte_width);
0214 VisitBitBlocksVoid(
0215 arr.buffers[0].data, arr.offset, arr.length,
0216 [&](int64_t i) {
0217 valid_func(std::string_view(data, byte_width));
0218 data += byte_width;
0219 },
0220 [&]() {
0221 data += byte_width;
0222 null_func();
0223 });
0224 }
0225 };
0226
0227 }
0228
0229 template <typename T, typename ValidFunc, typename NullFunc>
0230 typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
0231 VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
0232 return internal::ArraySpanInlineVisitor<T>::VisitStatus(
0233 arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
0234 }
0235
0236 template <typename T, typename ValidFunc, typename NullFunc>
0237 typename internal::call_traits::enable_if_return<ValidFunc, void>::type
0238 VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& null_func) {
0239 return internal::ArraySpanInlineVisitor<T>::VisitVoid(
0240 arr, std::forward<ValidFunc>(valid_func), std::forward<NullFunc>(null_func));
0241 }
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255 template <typename T>
0256 struct ArraySpanVisitor {
0257 using InlineVisitorType = internal::ArraySpanInlineVisitor<T>;
0258 using c_type = typename InlineVisitorType::c_type;
0259
0260 template <typename Visitor>
0261 static Status Visit(const ArraySpan& arr, Visitor* visitor) {
0262 return InlineVisitorType::VisitStatus(
0263 arr, [visitor](c_type v) { return visitor->VisitValue(v); },
0264 [visitor]() { return visitor->VisitNull(); });
0265 }
0266 };
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276 template <typename ValidFunc, typename NullFunc>
0277 typename internal::call_traits::enable_if_return<ValidFunc, Status>::type
0278 VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
0279 int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
0280 NullFunc&& null_func) {
0281 internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
0282 valid_bits_offset, num_values);
0283 int64_t position = 0;
0284 int64_t offset_position = valid_bits_offset;
0285 while (position < num_values) {
0286 internal::BitBlockCount block = bit_counter.NextBlock();
0287 if (block.AllSet()) {
0288 for (int64_t i = 0; i < block.length; ++i) {
0289 ARROW_RETURN_NOT_OK(valid_func());
0290 }
0291 } else if (block.NoneSet()) {
0292 for (int64_t i = 0; i < block.length; ++i) {
0293 ARROW_RETURN_NOT_OK(null_func());
0294 }
0295 } else {
0296 for (int64_t i = 0; i < block.length; ++i) {
0297 ARROW_RETURN_NOT_OK(bit_util::GetBit(valid_bits, offset_position + i)
0298 ? valid_func()
0299 : null_func());
0300 }
0301 }
0302 position += block.length;
0303 offset_position += block.length;
0304 }
0305 return Status::OK();
0306 }
0307
0308 template <typename ValidFunc, typename NullFunc>
0309 typename internal::call_traits::enable_if_return<ValidFunc, void>::type
0310 VisitNullBitmapInline(const uint8_t* valid_bits, int64_t valid_bits_offset,
0311 int64_t num_values, int64_t null_count, ValidFunc&& valid_func,
0312 NullFunc&& null_func) {
0313 internal::OptionalBitBlockCounter bit_counter(null_count == 0 ? NULLPTR : valid_bits,
0314 valid_bits_offset, num_values);
0315 int64_t position = 0;
0316 int64_t offset_position = valid_bits_offset;
0317 while (position < num_values) {
0318 internal::BitBlockCount block = bit_counter.NextBlock();
0319 if (block.AllSet()) {
0320 for (int64_t i = 0; i < block.length; ++i) {
0321 valid_func();
0322 }
0323 } else if (block.NoneSet()) {
0324 for (int64_t i = 0; i < block.length; ++i) {
0325 null_func();
0326 }
0327 } else {
0328 for (int64_t i = 0; i < block.length; ++i) {
0329 bit_util::GetBit(valid_bits, offset_position + i) ? valid_func() : null_func();
0330 }
0331 }
0332 position += block.length;
0333 offset_position += block.length;
0334 }
0335 }
0336
0337 }