File indexing completed on 2025-08-28 08:27:11
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #pragma once
0021
0022 #include <cassert>
0023 #include <chrono>
0024 #include <cstddef>
0025 #include <cstdint>
0026 #include <limits>
0027 #include <memory>
0028 #include <string>
0029 #include <type_traits>
0030
0031 #include "arrow/type.h"
0032 #include "arrow/type_traits.h"
0033 #include "arrow/util/checked_cast.h"
0034 #include "arrow/util/config.h"
0035 #include "arrow/util/macros.h"
0036 #include "arrow/util/time.h"
0037 #include "arrow/util/visibility.h"
0038 #include "arrow/vendored/datetime.h"
0039 #include "arrow/vendored/strptime.h"
0040
0041 namespace arrow {
0042
0043
0044 class ARROW_EXPORT TimestampParser {
0045 public:
0046 virtual ~TimestampParser() = default;
0047
0048 virtual bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
0049 int64_t* out,
0050 bool* out_zone_offset_present = NULLPTR) const = 0;
0051
0052 virtual const char* kind() const = 0;
0053
0054 virtual const char* format() const;
0055
0056
0057 static std::shared_ptr<TimestampParser> MakeStrptime(std::string format);
0058
0059
0060
0061 static std::shared_ptr<TimestampParser> MakeISO8601();
0062 };
0063
0064 namespace internal {
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076 template <typename ARROW_TYPE, typename Enable = void>
0077 struct StringConverter;
0078
0079 template <typename T>
0080 struct is_parseable {
0081 template <typename U, typename = typename StringConverter<U>::value_type>
0082 static std::true_type Test(U*);
0083
0084 template <typename U>
0085 static std::false_type Test(...);
0086
0087 static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
0088 };
0089
0090 template <typename T, typename R = void>
0091 using enable_if_parseable = enable_if_t<is_parseable<T>::value, R>;
0092
0093 template <>
0094 struct StringConverter<BooleanType> {
0095 using value_type = bool;
0096
0097 bool Convert(const BooleanType&, const char* s, size_t length, value_type* out) {
0098 if (length == 1) {
0099
0100 if (s[0] == '0') {
0101 *out = false;
0102 return true;
0103 }
0104 if (s[0] == '1') {
0105 *out = true;
0106 return true;
0107 }
0108 return false;
0109 }
0110 if (length == 4) {
0111
0112 *out = true;
0113 return ((s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
0114 (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E'));
0115 }
0116 if (length == 5) {
0117
0118 *out = false;
0119 return ((s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
0120 (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
0121 (s[4] == 'e' || s[4] == 'E'));
0122 }
0123 return false;
0124 }
0125 };
0126
0127
0128
0129
0130
0131
0132 ARROW_EXPORT
0133 bool StringToFloat(const char* s, size_t length, char decimal_point, float* out);
0134
0135 ARROW_EXPORT
0136 bool StringToFloat(const char* s, size_t length, char decimal_point, double* out);
0137
0138 ARROW_EXPORT
0139 bool StringToFloat(const char* s, size_t length, char decimal_point, uint16_t* out);
0140
0141 template <>
0142 struct StringConverter<FloatType> {
0143 using value_type = float;
0144
0145 explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {}
0146
0147 bool Convert(const FloatType&, const char* s, size_t length, value_type* out) {
0148 return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out));
0149 }
0150
0151 private:
0152 const char decimal_point;
0153 };
0154
0155 template <>
0156 struct StringConverter<DoubleType> {
0157 using value_type = double;
0158
0159 explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {}
0160
0161 bool Convert(const DoubleType&, const char* s, size_t length, value_type* out) {
0162 return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out));
0163 }
0164
0165 private:
0166 const char decimal_point;
0167 };
0168
0169 template <>
0170 struct StringConverter<HalfFloatType> {
0171 using value_type = uint16_t;
0172
0173 explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {}
0174
0175 bool Convert(const HalfFloatType&, const char* s, size_t length, value_type* out) {
0176 return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out));
0177 }
0178
0179 private:
0180 const char decimal_point;
0181 };
0182
0183
0184
0185 inline uint8_t ParseDecimalDigit(char c) { return static_cast<uint8_t>(c - '0'); }
0186
0187 #define PARSE_UNSIGNED_ITERATION(C_TYPE) \
0188 if (length > 0) { \
0189 uint8_t digit = ParseDecimalDigit(*s++); \
0190 result = static_cast<C_TYPE>(result * 10U); \
0191 length--; \
0192 if (ARROW_PREDICT_FALSE(digit > 9U)) { \
0193 \
0194 return false; \
0195 } \
0196 result = static_cast<C_TYPE>(result + digit); \
0197 } else { \
0198 break; \
0199 }
0200
0201 #define PARSE_UNSIGNED_ITERATION_LAST(C_TYPE) \
0202 if (length > 0) { \
0203 if (ARROW_PREDICT_FALSE(result > std::numeric_limits<C_TYPE>::max() / 10U)) { \
0204 \
0205 return false; \
0206 } \
0207 uint8_t digit = ParseDecimalDigit(*s++); \
0208 result = static_cast<C_TYPE>(result * 10U); \
0209 C_TYPE new_result = static_cast<C_TYPE>(result + digit); \
0210 if (ARROW_PREDICT_FALSE(--length > 0)) { \
0211 \
0212 return false; \
0213 } \
0214 if (ARROW_PREDICT_FALSE(digit > 9U)) { \
0215 \
0216 return false; \
0217 } \
0218 if (ARROW_PREDICT_FALSE(new_result < result)) { \
0219 \
0220 return false; \
0221 } \
0222 result = new_result; \
0223 }
0224
0225 inline bool ParseUnsigned(const char* s, size_t length, uint8_t* out) {
0226 uint8_t result = 0;
0227
0228 do {
0229 PARSE_UNSIGNED_ITERATION(uint8_t);
0230 PARSE_UNSIGNED_ITERATION(uint8_t);
0231 PARSE_UNSIGNED_ITERATION_LAST(uint8_t);
0232 } while (false);
0233 *out = result;
0234 return true;
0235 }
0236
0237 inline bool ParseUnsigned(const char* s, size_t length, uint16_t* out) {
0238 uint16_t result = 0;
0239 do {
0240 PARSE_UNSIGNED_ITERATION(uint16_t);
0241 PARSE_UNSIGNED_ITERATION(uint16_t);
0242 PARSE_UNSIGNED_ITERATION(uint16_t);
0243 PARSE_UNSIGNED_ITERATION(uint16_t);
0244 PARSE_UNSIGNED_ITERATION_LAST(uint16_t);
0245 } while (false);
0246 *out = result;
0247 return true;
0248 }
0249
0250 inline bool ParseUnsigned(const char* s, size_t length, uint32_t* out) {
0251 uint32_t result = 0;
0252 do {
0253 PARSE_UNSIGNED_ITERATION(uint32_t);
0254 PARSE_UNSIGNED_ITERATION(uint32_t);
0255 PARSE_UNSIGNED_ITERATION(uint32_t);
0256 PARSE_UNSIGNED_ITERATION(uint32_t);
0257 PARSE_UNSIGNED_ITERATION(uint32_t);
0258
0259 PARSE_UNSIGNED_ITERATION(uint32_t);
0260 PARSE_UNSIGNED_ITERATION(uint32_t);
0261 PARSE_UNSIGNED_ITERATION(uint32_t);
0262 PARSE_UNSIGNED_ITERATION(uint32_t);
0263
0264 PARSE_UNSIGNED_ITERATION_LAST(uint32_t);
0265 } while (false);
0266 *out = result;
0267 return true;
0268 }
0269
0270 inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) {
0271 uint64_t result = 0;
0272 do {
0273 PARSE_UNSIGNED_ITERATION(uint64_t);
0274 PARSE_UNSIGNED_ITERATION(uint64_t);
0275 PARSE_UNSIGNED_ITERATION(uint64_t);
0276 PARSE_UNSIGNED_ITERATION(uint64_t);
0277 PARSE_UNSIGNED_ITERATION(uint64_t);
0278
0279 PARSE_UNSIGNED_ITERATION(uint64_t);
0280 PARSE_UNSIGNED_ITERATION(uint64_t);
0281 PARSE_UNSIGNED_ITERATION(uint64_t);
0282 PARSE_UNSIGNED_ITERATION(uint64_t);
0283 PARSE_UNSIGNED_ITERATION(uint64_t);
0284
0285 PARSE_UNSIGNED_ITERATION(uint64_t);
0286 PARSE_UNSIGNED_ITERATION(uint64_t);
0287 PARSE_UNSIGNED_ITERATION(uint64_t);
0288 PARSE_UNSIGNED_ITERATION(uint64_t);
0289 PARSE_UNSIGNED_ITERATION(uint64_t);
0290
0291 PARSE_UNSIGNED_ITERATION(uint64_t);
0292 PARSE_UNSIGNED_ITERATION(uint64_t);
0293 PARSE_UNSIGNED_ITERATION(uint64_t);
0294 PARSE_UNSIGNED_ITERATION(uint64_t);
0295
0296 PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
0297 } while (false);
0298 *out = result;
0299 return true;
0300 }
0301
0302 #undef PARSE_UNSIGNED_ITERATION
0303 #undef PARSE_UNSIGNED_ITERATION_LAST
0304
0305 template <typename T>
0306 bool ParseHex(const char* s, size_t length, T* out) {
0307
0308 if (!ARROW_PREDICT_TRUE(sizeof(T) * 2 >= length && length > 0)) {
0309 return false;
0310 }
0311 T result = 0;
0312 for (size_t i = 0; i < length; i++) {
0313 result = static_cast<T>(result << 4);
0314 if (s[i] >= '0' && s[i] <= '9') {
0315 result = static_cast<T>(result | (s[i] - '0'));
0316 } else if (s[i] >= 'A' && s[i] <= 'F') {
0317 result = static_cast<T>(result | (s[i] - 'A' + 10));
0318 } else if (s[i] >= 'a' && s[i] <= 'f') {
0319 result = static_cast<T>(result | (s[i] - 'a' + 10));
0320 } else {
0321
0322 return false;
0323 }
0324 }
0325 *out = result;
0326 return true;
0327 }
0328
0329 template <class ARROW_TYPE>
0330 struct StringToUnsignedIntConverterMixin {
0331 using value_type = typename ARROW_TYPE::c_type;
0332
0333 bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
0334 if (ARROW_PREDICT_FALSE(length == 0)) {
0335 return false;
0336 }
0337
0338 if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) {
0339 length -= 2;
0340 s += 2;
0341
0342 return ARROW_PREDICT_TRUE(ParseHex(s, length, out));
0343 }
0344
0345 while (length > 0 && *s == '0') {
0346 length--;
0347 s++;
0348 }
0349 return ParseUnsigned(s, length, out);
0350 }
0351 };
0352
0353 template <>
0354 struct StringConverter<UInt8Type> : public StringToUnsignedIntConverterMixin<UInt8Type> {
0355 using StringToUnsignedIntConverterMixin<UInt8Type>::StringToUnsignedIntConverterMixin;
0356 };
0357
0358 template <>
0359 struct StringConverter<UInt16Type>
0360 : public StringToUnsignedIntConverterMixin<UInt16Type> {
0361 using StringToUnsignedIntConverterMixin<UInt16Type>::StringToUnsignedIntConverterMixin;
0362 };
0363
0364 template <>
0365 struct StringConverter<UInt32Type>
0366 : public StringToUnsignedIntConverterMixin<UInt32Type> {
0367 using StringToUnsignedIntConverterMixin<UInt32Type>::StringToUnsignedIntConverterMixin;
0368 };
0369
0370 template <>
0371 struct StringConverter<UInt64Type>
0372 : public StringToUnsignedIntConverterMixin<UInt64Type> {
0373 using StringToUnsignedIntConverterMixin<UInt64Type>::StringToUnsignedIntConverterMixin;
0374 };
0375
0376 template <class ARROW_TYPE>
0377 struct StringToSignedIntConverterMixin {
0378 using value_type = typename ARROW_TYPE::c_type;
0379 using unsigned_type = typename std::make_unsigned<value_type>::type;
0380
0381 bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
0382 static constexpr auto max_positive =
0383 static_cast<unsigned_type>(std::numeric_limits<value_type>::max());
0384
0385 static constexpr unsigned_type max_negative = max_positive + 1;
0386 bool negative = false;
0387 unsigned_type unsigned_value = 0;
0388
0389 if (ARROW_PREDICT_FALSE(length == 0)) {
0390 return false;
0391 }
0392
0393 if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) {
0394 length -= 2;
0395 s += 2;
0396
0397 if (!ARROW_PREDICT_TRUE(ParseHex(s, length, &unsigned_value))) {
0398 return false;
0399 }
0400 *out = static_cast<value_type>(unsigned_value);
0401 return true;
0402 }
0403
0404 if (*s == '-') {
0405 negative = true;
0406 s++;
0407 if (--length == 0) {
0408 return false;
0409 }
0410 }
0411
0412 while (length > 0 && *s == '0') {
0413 length--;
0414 s++;
0415 }
0416 if (!ARROW_PREDICT_TRUE(ParseUnsigned(s, length, &unsigned_value))) {
0417 return false;
0418 }
0419 if (negative) {
0420 if (ARROW_PREDICT_FALSE(unsigned_value > max_negative)) {
0421 return false;
0422 }
0423
0424
0425
0426 *out = static_cast<value_type>(~unsigned_value + 1);
0427 } else {
0428 if (ARROW_PREDICT_FALSE(unsigned_value > max_positive)) {
0429 return false;
0430 }
0431 *out = static_cast<value_type>(unsigned_value);
0432 }
0433 return true;
0434 }
0435 };
0436
0437 template <>
0438 struct StringConverter<Int8Type> : public StringToSignedIntConverterMixin<Int8Type> {
0439 using StringToSignedIntConverterMixin<Int8Type>::StringToSignedIntConverterMixin;
0440 };
0441
0442 template <>
0443 struct StringConverter<Int16Type> : public StringToSignedIntConverterMixin<Int16Type> {
0444 using StringToSignedIntConverterMixin<Int16Type>::StringToSignedIntConverterMixin;
0445 };
0446
0447 template <>
0448 struct StringConverter<Int32Type> : public StringToSignedIntConverterMixin<Int32Type> {
0449 using StringToSignedIntConverterMixin<Int32Type>::StringToSignedIntConverterMixin;
0450 };
0451
0452 template <>
0453 struct StringConverter<Int64Type> : public StringToSignedIntConverterMixin<Int64Type> {
0454 using StringToSignedIntConverterMixin<Int64Type>::StringToSignedIntConverterMixin;
0455 };
0456
0457 namespace detail {
0458
0459
0460
0461 using ts_type = TimestampType::c_type;
0462
0463 template <typename Duration>
0464 static inline bool ParseHH(const char* s, Duration* out) {
0465 uint8_t hours = 0;
0466 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
0467 return false;
0468 }
0469 if (ARROW_PREDICT_FALSE(hours >= 24)) {
0470 return false;
0471 }
0472 *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours));
0473 return true;
0474 }
0475
0476 template <typename Duration>
0477 static inline bool ParseHH_MM(const char* s, Duration* out) {
0478 uint8_t hours = 0;
0479 uint8_t minutes = 0;
0480 if (ARROW_PREDICT_FALSE(s[2] != ':')) {
0481 return false;
0482 }
0483 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
0484 return false;
0485 }
0486 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
0487 return false;
0488 }
0489 if (ARROW_PREDICT_FALSE(hours >= 24)) {
0490 return false;
0491 }
0492 if (ARROW_PREDICT_FALSE(minutes >= 60)) {
0493 return false;
0494 }
0495 *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
0496 std::chrono::minutes(minutes));
0497 return true;
0498 }
0499
0500 template <typename Duration>
0501 static inline bool ParseHHMM(const char* s, Duration* out) {
0502 uint8_t hours = 0;
0503 uint8_t minutes = 0;
0504 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
0505 return false;
0506 }
0507 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 2, 2, &minutes))) {
0508 return false;
0509 }
0510 if (ARROW_PREDICT_FALSE(hours >= 24)) {
0511 return false;
0512 }
0513 if (ARROW_PREDICT_FALSE(minutes >= 60)) {
0514 return false;
0515 }
0516 *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
0517 std::chrono::minutes(minutes));
0518 return true;
0519 }
0520
0521 template <typename Duration>
0522 static inline bool ParseHH_MM_SS(const char* s, Duration* out) {
0523 uint8_t hours = 0;
0524 uint8_t minutes = 0;
0525 uint8_t seconds = 0;
0526 if (ARROW_PREDICT_FALSE(s[2] != ':') || ARROW_PREDICT_FALSE(s[5] != ':')) {
0527 return false;
0528 }
0529 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
0530 return false;
0531 }
0532 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
0533 return false;
0534 }
0535 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 6, 2, &seconds))) {
0536 return false;
0537 }
0538 if (ARROW_PREDICT_FALSE(hours >= 24)) {
0539 return false;
0540 }
0541 if (ARROW_PREDICT_FALSE(minutes >= 60)) {
0542 return false;
0543 }
0544 if (ARROW_PREDICT_FALSE(seconds >= 60)) {
0545 return false;
0546 }
0547 *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
0548 std::chrono::minutes(minutes) +
0549 std::chrono::seconds(seconds));
0550 return true;
0551 }
0552
0553 static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type unit,
0554 uint32_t* out) {
0555
0556
0557
0558
0559
0560 size_t omitted = 0;
0561 switch (unit) {
0562 case TimeUnit::MILLI:
0563 if (ARROW_PREDICT_FALSE(length > 3)) {
0564 return false;
0565 }
0566 if (length < 3) {
0567 omitted = 3 - length;
0568 }
0569 break;
0570 case TimeUnit::MICRO:
0571 if (ARROW_PREDICT_FALSE(length > 6)) {
0572 return false;
0573 }
0574 if (length < 6) {
0575 omitted = 6 - length;
0576 }
0577 break;
0578 case TimeUnit::NANO:
0579 if (ARROW_PREDICT_FALSE(length > 9)) {
0580 return false;
0581 }
0582 if (length < 9) {
0583 omitted = 9 - length;
0584 }
0585 break;
0586 default:
0587 return false;
0588 }
0589
0590 if (ARROW_PREDICT_TRUE(omitted == 0)) {
0591 return ParseUnsigned(s, length, out);
0592 } else {
0593 uint32_t subseconds = 0;
0594 bool success = ParseUnsigned(s, length, &subseconds);
0595 if (ARROW_PREDICT_TRUE(success)) {
0596 switch (omitted) {
0597 case 1:
0598 *out = subseconds * 10;
0599 break;
0600 case 2:
0601 *out = subseconds * 100;
0602 break;
0603 case 3:
0604 *out = subseconds * 1000;
0605 break;
0606 case 4:
0607 *out = subseconds * 10000;
0608 break;
0609 case 5:
0610 *out = subseconds * 100000;
0611 break;
0612 case 6:
0613 *out = subseconds * 1000000;
0614 break;
0615 case 7:
0616 *out = subseconds * 10000000;
0617 break;
0618 case 8:
0619 *out = subseconds * 100000000;
0620 break;
0621 default:
0622
0623 break;
0624 }
0625 return true;
0626 } else {
0627 return false;
0628 }
0629 }
0630 }
0631
0632 }
0633
0634 template <typename Duration>
0635 static inline bool ParseYYYY_MM_DD(const char* s, Duration* since_epoch) {
0636 uint16_t year = 0;
0637 uint8_t month = 0;
0638 uint8_t day = 0;
0639 if (ARROW_PREDICT_FALSE(s[4] != '-') || ARROW_PREDICT_FALSE(s[7] != '-')) {
0640 return false;
0641 }
0642 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 4, &year))) {
0643 return false;
0644 }
0645 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 5, 2, &month))) {
0646 return false;
0647 }
0648 if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 8, 2, &day))) {
0649 return false;
0650 }
0651 arrow_vendored::date::year_month_day ymd{arrow_vendored::date::year{year},
0652 arrow_vendored::date::month{month},
0653 arrow_vendored::date::day{day}};
0654 if (ARROW_PREDICT_FALSE(!ymd.ok())) return false;
0655
0656 *since_epoch = std::chrono::duration_cast<Duration>(
0657 arrow_vendored::date::sys_days{ymd}.time_since_epoch());
0658 return true;
0659 }
0660
0661 static inline bool ParseTimestampISO8601(const char* s, size_t length,
0662 TimeUnit::type unit, TimestampType::c_type* out,
0663 bool* out_zone_offset_present = NULLPTR) {
0664 using seconds_type = std::chrono::duration<TimestampType::c_type>;
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689 if (ARROW_PREDICT_FALSE(length < 10)) return false;
0690
0691 seconds_type seconds_since_epoch;
0692 if (ARROW_PREDICT_FALSE(!ParseYYYY_MM_DD(s, &seconds_since_epoch))) {
0693 return false;
0694 }
0695
0696 if (length == 10) {
0697 *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
0698 return true;
0699 }
0700
0701 if (ARROW_PREDICT_FALSE(s[10] != ' ') && ARROW_PREDICT_FALSE(s[10] != 'T')) {
0702 return false;
0703 }
0704
0705 if (out_zone_offset_present) {
0706 *out_zone_offset_present = false;
0707 }
0708
0709 seconds_type zone_offset(0);
0710 if (s[length - 1] == 'Z') {
0711 --length;
0712 if (out_zone_offset_present) *out_zone_offset_present = true;
0713 } else if (s[length - 3] == '+' || s[length - 3] == '-') {
0714
0715 length -= 3;
0716 if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + length + 1, &zone_offset))) {
0717 return false;
0718 }
0719 if (s[length] == '+') zone_offset *= -1;
0720 if (out_zone_offset_present) *out_zone_offset_present = true;
0721 } else if (s[length - 5] == '+' || s[length - 5] == '-') {
0722
0723 length -= 5;
0724 if (ARROW_PREDICT_FALSE(!detail::ParseHHMM(s + length + 1, &zone_offset))) {
0725 return false;
0726 }
0727 if (s[length] == '+') zone_offset *= -1;
0728 if (out_zone_offset_present) *out_zone_offset_present = true;
0729 } else if ((s[length - 6] == '+' || s[length - 6] == '-') && (s[length - 3] == ':')) {
0730
0731 length -= 6;
0732 if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + length + 1, &zone_offset))) {
0733 return false;
0734 }
0735 if (s[length] == '+') zone_offset *= -1;
0736 if (out_zone_offset_present) *out_zone_offset_present = true;
0737 }
0738
0739 seconds_type seconds_since_midnight;
0740 switch (length) {
0741 case 13:
0742 if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + 11, &seconds_since_midnight))) {
0743 return false;
0744 }
0745 break;
0746 case 16:
0747 if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + 11, &seconds_since_midnight))) {
0748 return false;
0749 }
0750 break;
0751 case 19:
0752 case 21:
0753 case 22:
0754 case 23:
0755 case 24:
0756 case 25:
0757 case 26:
0758 case 27:
0759 case 28:
0760 case 29:
0761 if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s + 11, &seconds_since_midnight))) {
0762 return false;
0763 }
0764 break;
0765 default:
0766 return false;
0767 }
0768
0769 seconds_since_epoch += seconds_since_midnight;
0770 seconds_since_epoch += zone_offset;
0771
0772 if (length <= 19) {
0773 *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
0774 return true;
0775 }
0776
0777 if (ARROW_PREDICT_FALSE(s[19] != '.')) {
0778 return false;
0779 }
0780
0781 uint32_t subseconds = 0;
0782 if (ARROW_PREDICT_FALSE(
0783 !detail::ParseSubSeconds(s + 20, length - 20, unit, &subseconds))) {
0784 return false;
0785 }
0786
0787 *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()) + subseconds;
0788 return true;
0789 }
0790
0791 #if defined(_WIN32) || defined(ARROW_WITH_MUSL)
0792 static constexpr bool kStrptimeSupportsZone = false;
0793 #else
0794 static constexpr bool kStrptimeSupportsZone = true;
0795 #endif
0796
0797
0798 static inline bool ParseTimestampStrptime(const char* buf, size_t length,
0799 const char* format, bool ignore_time_in_day,
0800 bool allow_trailing_chars, TimeUnit::type unit,
0801 int64_t* out) {
0802
0803
0804 std::string clean_copy(buf, length);
0805 struct tm result;
0806 memset(&result, 0, sizeof(struct tm));
0807 #ifdef _WIN32
0808 char* ret = arrow_strptime(clean_copy.c_str(), format, &result);
0809 #else
0810 char* ret = strptime(clean_copy.c_str(), format, &result);
0811 #endif
0812 if (ret == NULLPTR) {
0813 return false;
0814 }
0815 if (!allow_trailing_chars && static_cast<size_t>(ret - clean_copy.c_str()) != length) {
0816 return false;
0817 }
0818
0819 arrow_vendored::date::sys_seconds secs =
0820 arrow_vendored::date::sys_days(arrow_vendored::date::year(result.tm_year + 1900) /
0821 (result.tm_mon + 1) / std::max(result.tm_mday, 1));
0822 if (!ignore_time_in_day) {
0823 secs += (std::chrono::hours(result.tm_hour) + std::chrono::minutes(result.tm_min) +
0824 std::chrono::seconds(result.tm_sec));
0825 #ifndef _WIN32
0826 secs -= std::chrono::seconds(result.tm_gmtoff);
0827 #endif
0828 }
0829 *out = util::CastSecondsToUnit(unit, secs.time_since_epoch().count());
0830 return true;
0831 }
0832
0833 template <>
0834 struct StringConverter<TimestampType> {
0835 using value_type = int64_t;
0836
0837 bool Convert(const TimestampType& type, const char* s, size_t length, value_type* out) {
0838 return ParseTimestampISO8601(s, length, type.unit(), out);
0839 }
0840 };
0841
0842 template <>
0843 struct StringConverter<DurationType>
0844 : public StringToSignedIntConverterMixin<DurationType> {
0845 using StringToSignedIntConverterMixin<DurationType>::StringToSignedIntConverterMixin;
0846 };
0847
0848 template <typename DATE_TYPE>
0849 struct StringConverter<DATE_TYPE, enable_if_date<DATE_TYPE>> {
0850 using value_type = typename DATE_TYPE::c_type;
0851
0852 using duration_type =
0853 typename std::conditional<std::is_same<DATE_TYPE, Date32Type>::value,
0854 arrow_vendored::date::days,
0855 std::chrono::milliseconds>::type;
0856
0857 bool Convert(const DATE_TYPE& type, const char* s, size_t length, value_type* out) {
0858 if (ARROW_PREDICT_FALSE(length != 10)) {
0859 return false;
0860 }
0861
0862 duration_type since_epoch;
0863 if (ARROW_PREDICT_FALSE(!ParseYYYY_MM_DD(s, &since_epoch))) {
0864 return false;
0865 }
0866
0867 *out = static_cast<value_type>(since_epoch.count());
0868 return true;
0869 }
0870 };
0871
0872 template <typename TIME_TYPE>
0873 struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
0874 using value_type = typename TIME_TYPE::c_type;
0875
0876
0877
0878
0879
0880
0881
0882
0883
0884
0885
0886
0887
0888
0889 bool Convert(const TIME_TYPE& type, const char* s, size_t length, value_type* out) {
0890 const auto unit = type.unit();
0891 std::chrono::seconds since_midnight;
0892
0893 if (length == 5) {
0894 if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s, &since_midnight))) {
0895 return false;
0896 }
0897 *out =
0898 static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
0899 return true;
0900 }
0901
0902 if (ARROW_PREDICT_FALSE(length < 8)) {
0903 return false;
0904 }
0905 if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s, &since_midnight))) {
0906 return false;
0907 }
0908
0909 *out = static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
0910
0911 if (length == 8) {
0912 return true;
0913 }
0914
0915 if (ARROW_PREDICT_FALSE(s[8] != '.')) {
0916 return false;
0917 }
0918
0919 uint32_t subseconds_count = 0;
0920 if (ARROW_PREDICT_FALSE(
0921 !detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) {
0922 return false;
0923 }
0924
0925 *out += subseconds_count;
0926 return true;
0927 }
0928 };
0929
0930
0931 template <typename T>
0932 bool ParseValue(const T& type, const char* s, size_t length,
0933 typename StringConverter<T>::value_type* out) {
0934 return StringConverter<T>{}.Convert(type, s, length, out);
0935 }
0936
0937 template <typename T>
0938 enable_if_parameter_free<T, bool> ParseValue(
0939 const char* s, size_t length, typename StringConverter<T>::value_type* out) {
0940 static T type;
0941 return StringConverter<T>{}.Convert(type, s, length, out);
0942 }
0943
0944 }
0945 }