File indexing completed on 2025-08-28 08:27:08
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #pragma once
0021
0022 #include <array>
0023 #include <cassert>
0024 #include <chrono>
0025 #include <limits>
0026 #include <memory>
0027 #include <string>
0028 #include <string_view>
0029 #include <type_traits>
0030 #include <utility>
0031
0032 #include "arrow/status.h"
0033 #include "arrow/type.h"
0034 #include "arrow/type_traits.h"
0035 #include "arrow/util/double_conversion.h"
0036 #include "arrow/util/macros.h"
0037 #include "arrow/util/string.h"
0038 #include "arrow/util/time.h"
0039 #include "arrow/util/visibility.h"
0040 #include "arrow/vendored/datetime.h"
0041
0042 namespace arrow {
0043 namespace internal {
0044
0045
0046 template <typename ARROW_TYPE, typename Enable = void>
0047 class StringFormatter;
0048
0049 template <typename T>
0050 struct is_formattable {
0051 template <typename U, typename = typename StringFormatter<U>::value_type>
0052 static std::true_type Test(U*);
0053
0054 template <typename U>
0055 static std::false_type Test(...);
0056
0057 static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
0058 };
0059
0060 template <typename T, typename R = void>
0061 using enable_if_formattable = enable_if_t<is_formattable<T>::value, R>;
0062
0063 template <typename Appender>
0064 using Return = decltype(std::declval<Appender>()(std::string_view{}));
0065
0066
0067
0068
0069 template <>
0070 class StringFormatter<BooleanType> {
0071 public:
0072 explicit StringFormatter(const DataType* = NULLPTR) {}
0073
0074 using value_type = bool;
0075
0076 template <typename Appender>
0077 Return<Appender> operator()(bool value, Appender&& append) {
0078 if (value) {
0079 const char string[] = "true";
0080 return append(std::string_view(string));
0081 } else {
0082 const char string[] = "false";
0083 return append(std::string_view(string));
0084 }
0085 }
0086 };
0087
0088
0089
0090
0091 template <typename ARROW_TYPE>
0092 class DecimalToStringFormatterMixin {
0093 public:
0094 explicit DecimalToStringFormatterMixin(const DataType* type)
0095 : scale_(static_cast<const ARROW_TYPE*>(type)->scale()) {}
0096
0097 using value_type = typename TypeTraits<ARROW_TYPE>::CType;
0098
0099 template <typename Appender>
0100 Return<Appender> operator()(const value_type& value, Appender&& append) {
0101 return append(value.ToString(scale_));
0102 }
0103
0104 private:
0105 int32_t scale_;
0106 };
0107
0108 template <>
0109 class StringFormatter<Decimal32Type>
0110 : public DecimalToStringFormatterMixin<Decimal32Type> {
0111 using DecimalToStringFormatterMixin::DecimalToStringFormatterMixin;
0112 };
0113
0114 template <>
0115 class StringFormatter<Decimal64Type>
0116 : public DecimalToStringFormatterMixin<Decimal64Type> {
0117 using DecimalToStringFormatterMixin::DecimalToStringFormatterMixin;
0118 };
0119
0120 template <>
0121 class StringFormatter<Decimal128Type>
0122 : public DecimalToStringFormatterMixin<Decimal128Type> {
0123 using DecimalToStringFormatterMixin::DecimalToStringFormatterMixin;
0124 };
0125
0126 template <>
0127 class StringFormatter<Decimal256Type>
0128 : public DecimalToStringFormatterMixin<Decimal256Type> {
0129 using DecimalToStringFormatterMixin::DecimalToStringFormatterMixin;
0130 };
0131
0132
0133
0134
0135 namespace detail {
0136
0137
0138 ARROW_EXPORT extern const char digit_pairs[];
0139
0140
0141
0142
0143
0144 inline void FormatOneChar(char c, char** cursor) { *(--(*cursor)) = c; }
0145
0146 template <typename Int>
0147 void FormatOneDigit(Int value, char** cursor) {
0148 assert(value >= 0 && value <= 9);
0149 FormatOneChar(static_cast<char>('0' + value), cursor);
0150 }
0151
0152
0153
0154
0155
0156 template <typename Int>
0157 inline void FormatTwoDigits(Int value, char** cursor) {
0158 assert(value >= 0 && value <= 99);
0159 auto digit_pair = &digit_pairs[value * 2];
0160 FormatOneChar(digit_pair[1], cursor);
0161 FormatOneChar(digit_pair[0], cursor);
0162 }
0163
0164 template <typename Int>
0165 void FormatAllDigits(Int value, char** cursor) {
0166 assert(value >= 0);
0167 while (value >= 100) {
0168 FormatTwoDigits(value % 100, cursor);
0169 value /= 100;
0170 }
0171
0172 if (value >= 10) {
0173 FormatTwoDigits(value, cursor);
0174 } else {
0175 FormatOneDigit(value, cursor);
0176 }
0177 }
0178
0179 template <typename Int>
0180 void FormatAllDigitsLeftPadded(Int value, size_t pad, char pad_char, char** cursor) {
0181 auto end = *cursor - pad;
0182 FormatAllDigits(value, cursor);
0183 while (*cursor > end) {
0184 FormatOneChar(pad_char, cursor);
0185 }
0186 }
0187
0188 template <size_t BUFFER_SIZE>
0189 std::string_view ViewDigitBuffer(const std::array<char, BUFFER_SIZE>& buffer,
0190 char* cursor) {
0191 auto buffer_end = buffer.data() + BUFFER_SIZE;
0192 return {cursor, static_cast<size_t>(buffer_end - cursor)};
0193 }
0194
0195 template <typename Int, typename UInt = typename std::make_unsigned<Int>::type>
0196 constexpr UInt Abs(Int value) {
0197 return value < 0 ? ~static_cast<UInt>(value) + 1 : static_cast<UInt>(value);
0198 }
0199
0200 template <typename Int>
0201 constexpr size_t Digits10(Int value) {
0202 return value <= 9 ? 1 : Digits10(value / 10) + 1;
0203 }
0204
0205 }
0206
0207 template <typename ARROW_TYPE>
0208 class IntToStringFormatterMixin {
0209 public:
0210 explicit IntToStringFormatterMixin(const DataType* = NULLPTR) {}
0211
0212 using value_type = typename ARROW_TYPE::c_type;
0213
0214 template <typename Appender>
0215 Return<Appender> operator()(value_type value, Appender&& append) {
0216 constexpr size_t buffer_size =
0217 detail::Digits10(std::numeric_limits<value_type>::max()) + 1;
0218
0219 std::array<char, buffer_size> buffer;
0220 char* cursor = buffer.data() + buffer_size;
0221 detail::FormatAllDigits(detail::Abs(value), &cursor);
0222 if (value < 0) {
0223 detail::FormatOneChar('-', &cursor);
0224 }
0225 return append(detail::ViewDigitBuffer(buffer, cursor));
0226 }
0227 };
0228
0229 template <>
0230 class StringFormatter<Int8Type> : public IntToStringFormatterMixin<Int8Type> {
0231 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0232 };
0233
0234 template <>
0235 class StringFormatter<Int16Type> : public IntToStringFormatterMixin<Int16Type> {
0236 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0237 };
0238
0239 template <>
0240 class StringFormatter<Int32Type> : public IntToStringFormatterMixin<Int32Type> {
0241 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0242 };
0243
0244 template <>
0245 class StringFormatter<Int64Type> : public IntToStringFormatterMixin<Int64Type> {
0246 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0247 };
0248
0249 template <>
0250 class StringFormatter<UInt8Type> : public IntToStringFormatterMixin<UInt8Type> {
0251 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0252 };
0253
0254 template <>
0255 class StringFormatter<UInt16Type> : public IntToStringFormatterMixin<UInt16Type> {
0256 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0257 };
0258
0259 template <>
0260 class StringFormatter<UInt32Type> : public IntToStringFormatterMixin<UInt32Type> {
0261 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0262 };
0263
0264 template <>
0265 class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type> {
0266 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0267 };
0268
0269
0270
0271
0272 class ARROW_EXPORT FloatToStringFormatter {
0273 public:
0274 FloatToStringFormatter();
0275 FloatToStringFormatter(int flags, const char* inf_symbol, const char* nan_symbol,
0276 char exp_character, int decimal_in_shortest_low,
0277 int decimal_in_shortest_high,
0278 int max_leading_padding_zeroes_in_precision_mode,
0279 int max_trailing_padding_zeroes_in_precision_mode);
0280 ~FloatToStringFormatter();
0281
0282
0283 int FormatFloat(float v, char* out_buffer, int out_size);
0284 int FormatFloat(double v, char* out_buffer, int out_size);
0285 int FormatFloat(uint16_t v, char* out_buffer, int out_size);
0286
0287 protected:
0288 struct Impl;
0289 std::unique_ptr<Impl> impl_;
0290 };
0291
0292 template <typename ARROW_TYPE>
0293 class FloatToStringFormatterMixin : public FloatToStringFormatter {
0294 public:
0295 using value_type = typename ARROW_TYPE::c_type;
0296
0297 static constexpr int buffer_size = 50;
0298
0299 explicit FloatToStringFormatterMixin(const DataType* = NULLPTR) {}
0300
0301 FloatToStringFormatterMixin(int flags, const char* inf_symbol, const char* nan_symbol,
0302 char exp_character, int decimal_in_shortest_low,
0303 int decimal_in_shortest_high,
0304 int max_leading_padding_zeroes_in_precision_mode,
0305 int max_trailing_padding_zeroes_in_precision_mode)
0306 : FloatToStringFormatter(flags, inf_symbol, nan_symbol, exp_character,
0307 decimal_in_shortest_low, decimal_in_shortest_high,
0308 max_leading_padding_zeroes_in_precision_mode,
0309 max_trailing_padding_zeroes_in_precision_mode) {}
0310
0311 template <typename Appender>
0312 Return<Appender> operator()(value_type value, Appender&& append) {
0313 char buffer[buffer_size];
0314 int size = FormatFloat(value, buffer, buffer_size);
0315 return append(std::string_view(buffer, size));
0316 }
0317 };
0318
0319 template <>
0320 class StringFormatter<HalfFloatType> : public FloatToStringFormatterMixin<HalfFloatType> {
0321 public:
0322 using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
0323 };
0324
0325 template <>
0326 class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
0327 public:
0328 using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
0329 };
0330
0331 template <>
0332 class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
0333 public:
0334 using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
0335 };
0336
0337
0338
0339
0340 namespace detail {
0341
0342 constexpr size_t BufferSizeYYYY_MM_DD() {
0343
0344 return 1 + detail::Digits10(99999) + 1 + detail::Digits10(12) + 1 +
0345 detail::Digits10(31);
0346 }
0347
0348 inline void FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cursor) {
0349 FormatTwoDigits(static_cast<unsigned>(ymd.day()), cursor);
0350 FormatOneChar('-', cursor);
0351 FormatTwoDigits(static_cast<unsigned>(ymd.month()), cursor);
0352 FormatOneChar('-', cursor);
0353 auto year = static_cast<int>(ymd.year());
0354 const auto is_neg_year = year < 0;
0355 year = std::abs(year);
0356 assert(year <= 99999);
0357 FormatTwoDigits(year % 100, cursor);
0358 year /= 100;
0359 FormatTwoDigits(year % 100, cursor);
0360 if (year >= 100) {
0361 FormatOneDigit(year / 100, cursor);
0362 }
0363 if (is_neg_year) {
0364 FormatOneChar('-', cursor);
0365 }
0366 }
0367
0368 template <typename Duration>
0369 constexpr size_t BufferSizeHH_MM_SS() {
0370
0371 return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 +
0372 detail::Digits10(Duration::period::den) - 1;
0373 }
0374
0375 template <typename Duration>
0376 void FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms, char** cursor) {
0377 constexpr size_t subsecond_digits = Digits10(Duration::period::den) - 1;
0378 if (subsecond_digits != 0) {
0379 FormatAllDigitsLeftPadded(hms.subseconds().count(), subsecond_digits, '0', cursor);
0380 FormatOneChar('.', cursor);
0381 }
0382 FormatTwoDigits(hms.seconds().count(), cursor);
0383 FormatOneChar(':', cursor);
0384 FormatTwoDigits(hms.minutes().count(), cursor);
0385 FormatOneChar(':', cursor);
0386 FormatTwoDigits(hms.hours().count(), cursor);
0387 }
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398 template <typename Unit>
0399 bool IsDateTimeInRange(Unit duration) {
0400 constexpr Unit kMinIncl =
0401 std::chrono::duration_cast<Unit>(arrow_vendored::date::days{-12687428});
0402 constexpr Unit kMaxExcl =
0403 std::chrono::duration_cast<Unit>(arrow_vendored::date::days{11248738});
0404 return duration >= kMinIncl && duration < kMaxExcl;
0405 }
0406
0407
0408
0409
0410 constexpr bool IsDateTimeInRange(std::chrono::nanoseconds duration) { return true; }
0411
0412 template <typename Unit>
0413 bool IsTimeInRange(Unit duration) {
0414 constexpr Unit kMinIncl = std::chrono::duration_cast<Unit>(std::chrono::seconds{0});
0415 constexpr Unit kMaxExcl = std::chrono::duration_cast<Unit>(std::chrono::seconds{86400});
0416 return duration >= kMinIncl && duration < kMaxExcl;
0417 }
0418
0419 template <typename RawValue, typename Appender>
0420 Return<Appender> FormatOutOfRange(RawValue&& raw_value, Appender&& append) {
0421
0422 std::string formatted = "<value out of range: " + ToChars(raw_value) + ">";
0423 return append(std::move(formatted));
0424 }
0425
0426 const auto kEpoch = arrow_vendored::date::sys_days{arrow_vendored::date::jan / 1 / 1970};
0427
0428 }
0429
0430 template <>
0431 class StringFormatter<DurationType> : public IntToStringFormatterMixin<DurationType> {
0432 using IntToStringFormatterMixin::IntToStringFormatterMixin;
0433 };
0434
0435 class DateToStringFormatterMixin {
0436 public:
0437 explicit DateToStringFormatterMixin(const DataType* = NULLPTR) {}
0438
0439 protected:
0440 template <typename Appender>
0441 Return<Appender> FormatDays(arrow_vendored::date::days since_epoch, Appender&& append) {
0442 arrow_vendored::date::sys_days timepoint_days{since_epoch};
0443
0444 constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD();
0445
0446 std::array<char, buffer_size> buffer;
0447 char* cursor = buffer.data() + buffer_size;
0448
0449 detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
0450 &cursor);
0451 return append(detail::ViewDigitBuffer(buffer, cursor));
0452 }
0453 };
0454
0455 template <>
0456 class StringFormatter<Date32Type> : public DateToStringFormatterMixin {
0457 public:
0458 using value_type = typename Date32Type::c_type;
0459
0460 using DateToStringFormatterMixin::DateToStringFormatterMixin;
0461
0462 template <typename Appender>
0463 Return<Appender> operator()(value_type value, Appender&& append) {
0464 const auto since_epoch = arrow_vendored::date::days{value};
0465 if (!ARROW_PREDICT_TRUE(detail::IsDateTimeInRange(since_epoch))) {
0466 return detail::FormatOutOfRange(value, append);
0467 }
0468 return FormatDays(since_epoch, std::forward<Appender>(append));
0469 }
0470 };
0471
0472 template <>
0473 class StringFormatter<Date64Type> : public DateToStringFormatterMixin {
0474 public:
0475 using value_type = typename Date64Type::c_type;
0476
0477 using DateToStringFormatterMixin::DateToStringFormatterMixin;
0478
0479 template <typename Appender>
0480 Return<Appender> operator()(value_type value, Appender&& append) {
0481 const auto since_epoch = std::chrono::milliseconds{value};
0482 if (!ARROW_PREDICT_TRUE(detail::IsDateTimeInRange(since_epoch))) {
0483 return detail::FormatOutOfRange(value, append);
0484 }
0485 return FormatDays(std::chrono::duration_cast<arrow_vendored::date::days>(since_epoch),
0486 std::forward<Appender>(append));
0487 }
0488 };
0489
0490 template <>
0491 class StringFormatter<TimestampType> {
0492 public:
0493 using value_type = int64_t;
0494
0495 explicit StringFormatter(const DataType* type)
0496 : unit_(checked_cast<const TimestampType&>(*type).unit()),
0497 timezone_(checked_cast<const TimestampType&>(*type).timezone()) {}
0498
0499 template <typename Duration, typename Appender>
0500 Return<Appender> operator()(Duration, value_type value, Appender&& append) {
0501 using arrow_vendored::date::days;
0502
0503 const Duration since_epoch{value};
0504 if (!ARROW_PREDICT_TRUE(detail::IsDateTimeInRange(since_epoch))) {
0505 return detail::FormatOutOfRange(value, append);
0506 }
0507
0508 const auto timepoint = detail::kEpoch + since_epoch;
0509
0510
0511
0512
0513 auto timepoint_days = std::chrono::time_point_cast<days>(timepoint);
0514 Duration since_midnight;
0515 if (timepoint_days <= timepoint) {
0516
0517 since_midnight = timepoint - timepoint_days;
0518 } else {
0519
0520 since_midnight = days(1) - (timepoint_days - timepoint);
0521 timepoint_days -= days(1);
0522 }
0523
0524
0525 constexpr size_t buffer_size =
0526 detail::BufferSizeYYYY_MM_DD() + 1 + detail::BufferSizeHH_MM_SS<Duration>() + 1;
0527
0528 std::array<char, buffer_size> buffer;
0529 char* cursor = buffer.data() + buffer_size;
0530
0531 if (timezone_.size() > 0) {
0532 detail::FormatOneChar('Z', &cursor);
0533 }
0534 detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
0535 detail::FormatOneChar(' ', &cursor);
0536 detail::FormatYYYY_MM_DD(timepoint_days, &cursor);
0537 return append(detail::ViewDigitBuffer(buffer, cursor));
0538 }
0539
0540 template <typename Appender>
0541 Return<Appender> operator()(value_type value, Appender&& append) {
0542 return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
0543 }
0544
0545 private:
0546 TimeUnit::type unit_;
0547 std::string timezone_;
0548 };
0549
0550 template <typename T>
0551 class StringFormatter<T, enable_if_time<T>> {
0552 public:
0553 using value_type = typename T::c_type;
0554
0555 explicit StringFormatter(const DataType* type)
0556 : unit_(checked_cast<const T&>(*type).unit()) {}
0557
0558 template <typename Duration, typename Appender>
0559 Return<Appender> operator()(Duration, value_type count, Appender&& append) {
0560 const Duration since_midnight{count};
0561 if (!ARROW_PREDICT_TRUE(detail::IsTimeInRange(since_midnight))) {
0562 return detail::FormatOutOfRange(count, append);
0563 }
0564
0565 constexpr size_t buffer_size = detail::BufferSizeHH_MM_SS<Duration>();
0566
0567 std::array<char, buffer_size> buffer;
0568 char* cursor = buffer.data() + buffer_size;
0569
0570 detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
0571 return append(detail::ViewDigitBuffer(buffer, cursor));
0572 }
0573
0574 template <typename Appender>
0575 Return<Appender> operator()(value_type value, Appender&& append) {
0576 return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
0577 }
0578
0579 private:
0580 TimeUnit::type unit_;
0581 };
0582
0583 template <>
0584 class StringFormatter<MonthIntervalType> {
0585 public:
0586 using value_type = MonthIntervalType::c_type;
0587
0588 explicit StringFormatter(const DataType*) {}
0589
0590 template <typename Appender>
0591 Return<Appender> operator()(value_type interval, Appender&& append) {
0592 constexpr size_t buffer_size =
0593 3 + 1 +
0594 detail::Digits10(std::numeric_limits<value_type>::max());
0595 std::array<char, buffer_size> buffer;
0596 char* cursor = buffer.data() + buffer_size;
0597
0598 detail::FormatOneChar('M', &cursor);
0599 detail::FormatAllDigits(detail::Abs(interval), &cursor);
0600 if (interval < 0) detail::FormatOneChar('-', &cursor);
0601
0602 return append(detail::ViewDigitBuffer(buffer, cursor));
0603 }
0604 };
0605
0606 template <>
0607 class StringFormatter<DayTimeIntervalType> {
0608 public:
0609 using value_type = DayTimeIntervalType::DayMilliseconds;
0610
0611 explicit StringFormatter(const DataType*) {}
0612
0613 template <typename Appender>
0614 Return<Appender> operator()(value_type interval, Appender&& append) {
0615 constexpr size_t buffer_size =
0616 3 + 2 +
0617 2 * detail::Digits10(std::numeric_limits<int32_t>::max());
0618 std::array<char, buffer_size> buffer;
0619 char* cursor = buffer.data() + buffer_size;
0620
0621 detail::FormatOneChar('s', &cursor);
0622 detail::FormatOneChar('m', &cursor);
0623 detail::FormatAllDigits(detail::Abs(interval.milliseconds), &cursor);
0624 if (interval.milliseconds < 0) detail::FormatOneChar('-', &cursor);
0625
0626 detail::FormatOneChar('d', &cursor);
0627 detail::FormatAllDigits(detail::Abs(interval.days), &cursor);
0628 if (interval.days < 0) detail::FormatOneChar('-', &cursor);
0629
0630 return append(detail::ViewDigitBuffer(buffer, cursor));
0631 }
0632 };
0633
0634 template <>
0635 class StringFormatter<MonthDayNanoIntervalType> {
0636 public:
0637 using value_type = MonthDayNanoIntervalType::MonthDayNanos;
0638
0639 explicit StringFormatter(const DataType*) {}
0640
0641 template <typename Appender>
0642 Return<Appender> operator()(value_type interval, Appender&& append) {
0643 constexpr size_t buffer_size =
0644 4 + 3 +
0645 2 * detail::Digits10(std::numeric_limits<int32_t>::max()) +
0646 detail::Digits10(std::numeric_limits<int64_t>::max());
0647 std::array<char, buffer_size> buffer;
0648 char* cursor = buffer.data() + buffer_size;
0649
0650 detail::FormatOneChar('s', &cursor);
0651 detail::FormatOneChar('n', &cursor);
0652 detail::FormatAllDigits(detail::Abs(interval.nanoseconds), &cursor);
0653 if (interval.nanoseconds < 0) detail::FormatOneChar('-', &cursor);
0654
0655 detail::FormatOneChar('d', &cursor);
0656 detail::FormatAllDigits(detail::Abs(interval.days), &cursor);
0657 if (interval.days < 0) detail::FormatOneChar('-', &cursor);
0658
0659 detail::FormatOneChar('M', &cursor);
0660 detail::FormatAllDigits(detail::Abs(interval.months), &cursor);
0661 if (interval.months < 0) detail::FormatOneChar('-', &cursor);
0662
0663 return append(detail::ViewDigitBuffer(buffer, cursor));
0664 }
0665 };
0666
0667 }
0668 }