File indexing completed on 2025-08-28 08:26:57
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <atomic>
0021 #include <cstdint>
0022 #include <optional>
0023 #include <thread>
0024 #include <unordered_map>
0025 #include <vector>
0026
0027 #include "arrow/compute/expression.h"
0028 #include "arrow/compute/type_fwd.h"
0029 #include "arrow/result.h"
0030 #include "arrow/util/cpu_info.h"
0031 #include "arrow/util/simd.h"
0032
0033 #if defined(__clang__) || defined(__GNUC__)
0034 # define BYTESWAP(x) __builtin_bswap64(x)
0035 # define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
0036 # define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
0037 #elif defined(_MSC_VER)
0038 # include <intrin.h>
0039 # define BYTESWAP(x) _byteswap_uint64(x)
0040 # define ROTL(x, n) _rotl((x), (n))
0041 # define ROTL64(x, n) _rotl64((x), (n))
0042 #endif
0043
0044 namespace arrow {
0045 namespace util {
0046
0047
0048
0049
0050
0051
0052
0053 using int64_for_gather_t = const long long int;
0054
0055
0056
0057
0058
0059
0060
0061 class MiniBatch {
0062 public:
0063 static constexpr int kLogMiniBatchLength = 10;
0064 static constexpr int kMiniBatchLength = 1 << kLogMiniBatchLength;
0065 };
0066
0067 namespace bit_util {
0068
0069 ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags,
0070 const int num_bits, const uint8_t* bits,
0071 int* num_indexes, uint16_t* indexes,
0072 int bit_offset = 0);
0073
0074 ARROW_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags,
0075 const int num_bits, const uint8_t* bits,
0076 const uint16_t* input_indexes, int* num_indexes,
0077 uint16_t* indexes, int bit_offset = 0);
0078
0079
0080 ARROW_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits,
0081 const uint8_t* bits, int* num_indexes_bit0,
0082 uint16_t* indexes_bit0, uint16_t* indexes_bit1,
0083 int bit_offset = 0);
0084
0085
0086 ARROW_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits,
0087 const uint8_t* bits, uint8_t* bytes, int bit_offset = 0);
0088
0089
0090 ARROW_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits,
0091 const uint8_t* bytes, uint8_t* bits, int bit_offset = 0);
0092
0093 ARROW_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes,
0094 uint32_t num_bytes);
0095
0096 #if defined(ARROW_HAVE_RUNTIME_AVX2) && defined(ARROW_HAVE_RUNTIME_BMI2)
0097
0098
0099 namespace avx2 {
0100 ARROW_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits,
0101 const uint8_t* bits,
0102 const uint16_t* input_indexes,
0103 int* num_indexes, uint16_t* indexes);
0104 ARROW_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits,
0105 const uint8_t* bits, int* num_indexes,
0106 uint16_t* indexes, uint16_t base_index = 0);
0107 ARROW_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits,
0108 uint8_t* bytes);
0109 ARROW_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes,
0110 uint8_t* bits);
0111 ARROW_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes);
0112 }
0113
0114 #endif
0115
0116 }
0117 }
0118
0119 namespace compute {
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131 template <typename PreVisit, typename PostVisitCall>
0132 Result<Expression> ModifyExpression(Expression expr, const PreVisit& pre,
0133 const PostVisitCall& post_call) {
0134 ARROW_ASSIGN_OR_RAISE(expr, Result<Expression>(pre(std::move(expr))));
0135
0136 auto call = expr.call();
0137 if (!call) return expr;
0138
0139 bool at_least_one_modified = false;
0140 std::vector<Expression> modified_arguments;
0141
0142 for (size_t i = 0; i < call->arguments.size(); ++i) {
0143 ARROW_ASSIGN_OR_RAISE(auto modified_argument,
0144 ModifyExpression(call->arguments[i], pre, post_call));
0145
0146 if (Identical(modified_argument, call->arguments[i])) {
0147 continue;
0148 }
0149
0150 if (!at_least_one_modified) {
0151 modified_arguments = call->arguments;
0152 at_least_one_modified = true;
0153 }
0154
0155 modified_arguments[i] = std::move(modified_argument);
0156 }
0157
0158 if (at_least_one_modified) {
0159
0160 auto modified_call = *call;
0161 modified_call.arguments = std::move(modified_arguments);
0162 return post_call(Expression(std::move(modified_call)), &expr);
0163 }
0164
0165 return post_call(std::move(expr), NULLPTR);
0166 }
0167
0168
0169
0170
0171
0172
0173
0174 class TailSkipForSIMD {
0175 public:
0176 static int64_t FixBitAccess(int num_bytes_accessed_together, int64_t num_rows,
0177 int bit_offset) {
0178 int64_t num_bytes = bit_util::BytesForBits(num_rows + bit_offset);
0179 int64_t num_bytes_safe =
0180 std::max(static_cast<int64_t>(0LL), num_bytes - num_bytes_accessed_together + 1);
0181 int64_t num_rows_safe =
0182 std::max(static_cast<int64_t>(0LL), 8 * num_bytes_safe - bit_offset);
0183 return std::min(num_rows_safe, num_rows);
0184 }
0185 static int64_t FixBinaryAccess(int num_bytes_accessed_together, int64_t num_rows,
0186 int64_t length) {
0187 int64_t num_rows_to_skip = bit_util::CeilDiv(length, num_bytes_accessed_together);
0188 int64_t num_rows_safe =
0189 std::max(static_cast<int64_t>(0LL), num_rows - num_rows_to_skip);
0190 return num_rows_safe;
0191 }
0192 static int64_t FixVarBinaryAccess(int num_bytes_accessed_together, int64_t num_rows,
0193 const uint32_t* offsets) {
0194
0195
0196
0197 int64_t num_rows_safe = num_rows;
0198 while (num_rows_safe > 0 &&
0199 offsets[num_rows_safe] + num_bytes_accessed_together > offsets[num_rows]) {
0200 --num_rows_safe;
0201 }
0202 return num_rows_safe;
0203 }
0204 static int FixSelection(int64_t num_rows_safe, int num_selected,
0205 const uint16_t* selection) {
0206 int num_selected_safe = num_selected;
0207 while (num_selected_safe > 0 && selection[num_selected_safe - 1] >= num_rows_safe) {
0208 --num_selected_safe;
0209 }
0210 return num_selected_safe;
0211 }
0212 };
0213
0214 }
0215 }