![]() |
|
|||
File indexing completed on 2025-08-28 08:26:55
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <cstdint> 0021 #include <functional> 0022 #include <iosfwd> 0023 #include <memory> 0024 0025 #include "arrow/array/array_base.h" 0026 #include "arrow/array/array_nested.h" 0027 #include "arrow/result.h" 0028 #include "arrow/status.h" 0029 #include "arrow/type.h" 0030 #include "arrow/util/visibility.h" 0031 0032 namespace arrow { 0033 0034 /// \brief Compare two arrays, returning an edit script which expresses the difference 0035 /// between them 0036 /// 0037 /// An edit script is an array of struct(insert: bool, run_length: int64_t). 0038 /// Each element of "insert" determines whether an element was inserted into (true) 0039 /// or deleted from (false) base. Each insertion or deletion is followed by a run of 0040 /// elements which are unchanged from base to target; the length of this run is stored 0041 /// in "run_length". (Note that the edit script begins and ends with a run of shared 0042 /// elements but both fields of the struct must have the same length. To accommodate this 0043 /// the first element of "insert" should be ignored.) 0044 /// 0045 /// For example for base "hlloo" and target "hello", the edit script would be 0046 /// [ 0047 /// {"insert": false, "run_length": 1}, // leading run of length 1 ("h") 0048 /// {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo") 0049 /// {"insert": false, "run_length": 0} // delete("o") then an empty run 0050 /// ] 0051 /// 0052 /// Diffing arrays containing nulls is not currently supported. 0053 /// 0054 /// \param[in] base baseline for comparison 0055 /// \param[in] target an array of identical type to base whose elements differ from base's 0056 /// \param[in] pool memory to store the result will be allocated from this memory pool 0057 /// \return an edit script array which can be applied to base to produce target 0058 ARROW_EXPORT 0059 Result<std::shared_ptr<StructArray>> Diff(const Array& base, const Array& target, 0060 MemoryPool* pool = default_memory_pool()); 0061 0062 /// \brief visitor interface for easy traversal of an edit script 0063 /// 0064 /// visitor will be called for each hunk of insertions and deletions. 0065 ARROW_EXPORT Status VisitEditScript( 0066 const Array& edits, 0067 const std::function<Status(int64_t delete_begin, int64_t delete_end, 0068 int64_t insert_begin, int64_t insert_end)>& visitor); 0069 0070 /// \brief return a function which will format an edit script in unified 0071 /// diff format to os, given base and target arrays of type 0072 ARROW_EXPORT Result< 0073 std::function<Status(const Array& edits, const Array& base, const Array& target)>> 0074 MakeUnifiedDiffFormatter(const DataType& type, std::ostream* os); 0075 0076 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |