Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:00

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <memory>
0021 #include <string>
0022 
0023 #include "arrow/json/options.h"
0024 #include "arrow/status.h"
0025 #include "arrow/util/key_value_metadata.h"
0026 #include "arrow/util/macros.h"
0027 #include "arrow/util/visibility.h"
0028 
0029 namespace arrow {
0030 
0031 class Array;
0032 class Buffer;
0033 class MemoryPool;
0034 class KeyValueMetadata;
0035 class ResizableBuffer;
0036 
0037 namespace json {
0038 
0039 struct Kind {
0040   enum type : uint8_t {
0041     kNull,
0042     kBoolean,
0043     kNumber,
0044     kString,
0045     kArray,
0046     kObject,
0047     kNumberOrString
0048   };
0049 
0050   static const std::string& Name(Kind::type);
0051 
0052   static const std::shared_ptr<const KeyValueMetadata>& Tag(Kind::type);
0053 
0054   static Kind::type FromTag(const std::shared_ptr<const KeyValueMetadata>& tag);
0055 
0056   static Status ForType(const DataType& type, Kind::type* kind);
0057 };
0058 
0059 /// \class BlockParser
0060 /// \brief A reusable block-based parser for JSON data
0061 ///
0062 /// The parser takes a block of newline delimited JSON data and extracts Arrays
0063 /// of unconverted strings which can be fed to a Converter to obtain a usable Array.
0064 ///
0065 /// Note that in addition to parse errors (such as malformed JSON) some conversion
0066 /// errors are caught at parse time:
0067 /// - A null value in non-nullable column
0068 /// - Change in the JSON kind of a column. For example, if an explicit schema is provided
0069 ///   which stipulates that field "a" is integral, a row of {"a": "not a number"} will
0070 ///   result in an error. This also applies to fields outside an explicit schema.
0071 class ARROW_EXPORT BlockParser {
0072  public:
0073   virtual ~BlockParser() = default;
0074 
0075   /// \brief Reserve storage for scalars parsed from a block of json
0076   virtual Status ReserveScalarStorage(int64_t nbytes) = 0;
0077 
0078   /// \brief Parse a block of data
0079   virtual Status Parse(const std::shared_ptr<Buffer>& json) = 0;
0080 
0081   /// \brief Extract parsed data
0082   virtual Status Finish(std::shared_ptr<Array>* parsed) = 0;
0083 
0084   /// \brief Return the number of parsed rows
0085   int32_t num_rows() const { return num_rows_; }
0086 
0087   /// \brief Construct a BlockParser
0088   ///
0089   /// \param[in] pool MemoryPool to use when constructing parsed array
0090   /// \param[in] options ParseOptions to use when parsing JSON
0091   /// \param[out] out constructed BlockParser
0092   static Status Make(MemoryPool* pool, const ParseOptions& options,
0093                      std::unique_ptr<BlockParser>* out);
0094 
0095   static Status Make(const ParseOptions& options, std::unique_ptr<BlockParser>* out);
0096 
0097  protected:
0098   ARROW_DISALLOW_COPY_AND_ASSIGN(BlockParser);
0099 
0100   explicit BlockParser(MemoryPool* pool) : pool_(pool) {}
0101 
0102   MemoryPool* pool_;
0103   int32_t num_rows_ = 0;
0104 };
0105 
0106 }  // namespace json
0107 }  // namespace arrow