Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:57

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <memory>
0021 #include <optional>
0022 #include <string>
0023 
0024 #include "arrow/dataset/dataset.h"
0025 #include "arrow/dataset/file_base.h"
0026 #include "arrow/dataset/type_fwd.h"
0027 #include "arrow/dataset/visibility.h"
0028 #include "arrow/ipc/type_fwd.h"
0029 #include "arrow/json/options.h"
0030 #include "arrow/result.h"
0031 #include "arrow/status.h"
0032 #include "arrow/util/future.h"
0033 #include "arrow/util/macros.h"
0034 
0035 namespace arrow::dataset {
0036 
0037 /// \addtogroup dataset-file-formats
0038 ///
0039 /// @{
0040 
0041 constexpr char kJsonTypeName[] = "json";
0042 
0043 /// \brief A FileFormat implementation that reads from JSON files
0044 class ARROW_DS_EXPORT JsonFileFormat : public FileFormat {
0045  public:
0046   JsonFileFormat();
0047 
0048   std::string type_name() const override { return kJsonTypeName; }
0049 
0050   bool Equals(const FileFormat& other) const override;
0051 
0052   Result<bool> IsSupported(const FileSource& source) const override;
0053 
0054   Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override;
0055 
0056   Future<std::shared_ptr<InspectedFragment>> InspectFragment(
0057       const FileSource& source, const FragmentScanOptions* format_options,
0058       compute::ExecContext* exec_context) const override;
0059 
0060   Future<std::shared_ptr<FragmentScanner>> BeginScan(
0061       const FragmentScanRequest& scan_request, const InspectedFragment& inspected,
0062       const FragmentScanOptions* format_options,
0063       compute::ExecContext* exec_context) const override;
0064 
0065   Result<RecordBatchGenerator> ScanBatchesAsync(
0066       const std::shared_ptr<ScanOptions>& scan_options,
0067       const std::shared_ptr<FileFragment>& file) const override;
0068 
0069   Future<std::optional<int64_t>> CountRows(
0070       const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
0071       const std::shared_ptr<ScanOptions>& scan_options) override;
0072 
0073   Result<std::shared_ptr<FileWriter>> MakeWriter(
0074       std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
0075       std::shared_ptr<FileWriteOptions> options,
0076       fs::FileLocator destination_locator) const override {
0077     return Status::NotImplemented("Writing JSON files is not currently supported");
0078   }
0079 
0080   std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return NULLPTR; }
0081 };
0082 
0083 /// \brief Per-scan options for JSON fragments
0084 struct ARROW_DS_EXPORT JsonFragmentScanOptions : public FragmentScanOptions {
0085   std::string type_name() const override { return kJsonTypeName; }
0086 
0087   /// @brief Options that affect JSON parsing
0088   ///
0089   /// Note: `explicit_schema` and `unexpected_field_behavior` are ignored.
0090   json::ParseOptions parse_options = json::ParseOptions::Defaults();
0091 
0092   /// @brief Options that affect JSON reading
0093   json::ReadOptions read_options = json::ReadOptions::Defaults();
0094 };
0095 
0096 /// @}
0097 
0098 }  // namespace arrow::dataset