File indexing completed on 2025-08-28 08:26:57
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <memory>
0021 #include <optional>
0022 #include <string>
0023
0024 #include "arrow/dataset/dataset.h"
0025 #include "arrow/dataset/file_base.h"
0026 #include "arrow/dataset/type_fwd.h"
0027 #include "arrow/dataset/visibility.h"
0028 #include "arrow/ipc/type_fwd.h"
0029 #include "arrow/json/options.h"
0030 #include "arrow/result.h"
0031 #include "arrow/status.h"
0032 #include "arrow/util/future.h"
0033 #include "arrow/util/macros.h"
0034
0035 namespace arrow::dataset {
0036
0037
0038
0039
0040
0041 constexpr char kJsonTypeName[] = "json";
0042
0043
0044 class ARROW_DS_EXPORT JsonFileFormat : public FileFormat {
0045 public:
0046 JsonFileFormat();
0047
0048 std::string type_name() const override { return kJsonTypeName; }
0049
0050 bool Equals(const FileFormat& other) const override;
0051
0052 Result<bool> IsSupported(const FileSource& source) const override;
0053
0054 Result<std::shared_ptr<Schema>> Inspect(const FileSource& source) const override;
0055
0056 Future<std::shared_ptr<InspectedFragment>> InspectFragment(
0057 const FileSource& source, const FragmentScanOptions* format_options,
0058 compute::ExecContext* exec_context) const override;
0059
0060 Future<std::shared_ptr<FragmentScanner>> BeginScan(
0061 const FragmentScanRequest& scan_request, const InspectedFragment& inspected,
0062 const FragmentScanOptions* format_options,
0063 compute::ExecContext* exec_context) const override;
0064
0065 Result<RecordBatchGenerator> ScanBatchesAsync(
0066 const std::shared_ptr<ScanOptions>& scan_options,
0067 const std::shared_ptr<FileFragment>& file) const override;
0068
0069 Future<std::optional<int64_t>> CountRows(
0070 const std::shared_ptr<FileFragment>& file, compute::Expression predicate,
0071 const std::shared_ptr<ScanOptions>& scan_options) override;
0072
0073 Result<std::shared_ptr<FileWriter>> MakeWriter(
0074 std::shared_ptr<io::OutputStream> destination, std::shared_ptr<Schema> schema,
0075 std::shared_ptr<FileWriteOptions> options,
0076 fs::FileLocator destination_locator) const override {
0077 return Status::NotImplemented("Writing JSON files is not currently supported");
0078 }
0079
0080 std::shared_ptr<FileWriteOptions> DefaultWriteOptions() override { return NULLPTR; }
0081 };
0082
0083
0084 struct ARROW_DS_EXPORT JsonFragmentScanOptions : public FragmentScanOptions {
0085 std::string type_name() const override { return kJsonTypeName; }
0086
0087
0088
0089
0090 json::ParseOptions parse_options = json::ParseOptions::Defaults();
0091
0092
0093 json::ReadOptions read_options = json::ReadOptions::Defaults();
0094 };
0095
0096
0097
0098 }