Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:55

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <functional>
0022 #include <iosfwd>
0023 #include <memory>
0024 
0025 #include "arrow/array/array_base.h"
0026 #include "arrow/array/array_nested.h"
0027 #include "arrow/result.h"
0028 #include "arrow/status.h"
0029 #include "arrow/type.h"
0030 #include "arrow/util/visibility.h"
0031 
0032 namespace arrow {
0033 
0034 /// \brief Compare two arrays, returning an edit script which expresses the difference
0035 /// between them
0036 ///
0037 /// An edit script is an array of struct(insert: bool, run_length: int64_t).
0038 /// Each element of "insert" determines whether an element was inserted into (true)
0039 /// or deleted from (false) base. Each insertion or deletion is followed by a run of
0040 /// elements which are unchanged from base to target; the length of this run is stored
0041 /// in "run_length". (Note that the edit script begins and ends with a run of shared
0042 /// elements but both fields of the struct must have the same length. To accommodate this
0043 /// the first element of "insert" should be ignored.)
0044 ///
0045 /// For example for base "hlloo" and target "hello", the edit script would be
0046 /// [
0047 ///   {"insert": false, "run_length": 1}, // leading run of length 1 ("h")
0048 ///   {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo")
0049 ///   {"insert": false, "run_length": 0} // delete("o") then an empty run
0050 /// ]
0051 ///
0052 /// Diffing arrays containing nulls is not currently supported.
0053 ///
0054 /// \param[in] base baseline for comparison
0055 /// \param[in] target an array of identical type to base whose elements differ from base's
0056 /// \param[in] pool memory to store the result will be allocated from this memory pool
0057 /// \return an edit script array which can be applied to base to produce target
0058 ARROW_EXPORT
0059 Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target,
0060                                           MemoryPool* pool = default_memory_pool());
0061 
0062 /// \brief visitor interface for easy traversal of an edit script
0063 ///
0064 /// visitor will be called for each hunk of insertions and deletions.
0065 ARROW_EXPORT Status VisitEditScript(
0066     const Array& edits,
0067     const std::function<Status(int64_t delete_begin, int64_t delete_end,
0068                                int64_t insert_begin, int64_t insert_end)>& visitor);
0069 
0070 /// \brief return a function which will format an edit script in unified
0071 /// diff format to os, given base and target arrays of type
0072 ARROW_EXPORT Result<
0073     std::function<Status(const Array& edits, const Array& base, const Array& target)>>
0074 MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os);
0075 
0076 }  // namespace arrow