|
|
|||
File indexing completed on 2026-05-10 08:43:13
0001 //===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 // 0009 // \file 0010 // Interface file for the IRSimilarityIdentifier for identifying similarities in 0011 // IR including the IRInstructionMapper, which maps an Instruction to unsigned 0012 // integers. 0013 // 0014 // Two sequences of instructions are called "similar" if they perform the same 0015 // series of operations for all inputs. 0016 // 0017 // \code 0018 // %1 = add i32 %a, 10 0019 // %2 = add i32 %a, %1 0020 // %3 = icmp slt icmp %1, %2 0021 // \endcode 0022 // 0023 // and 0024 // 0025 // \code 0026 // %1 = add i32 11, %a 0027 // %2 = sub i32 %a, %1 0028 // %3 = icmp sgt icmp %2, %1 0029 // \endcode 0030 // 0031 // ultimately have the same result, even if the inputs, and structure are 0032 // slightly different. 0033 // 0034 // For instructions, we do not worry about operands that do not have fixed 0035 // semantic meaning to the program. We consider the opcode that the instruction 0036 // has, the types, parameters, and extra information such as the function name, 0037 // or comparison predicate. These are used to create a hash to map instructions 0038 // to integers to be used in similarity matching in sequences of instructions 0039 // 0040 // Terminology: 0041 // An IRSimilarityCandidate is a region of IRInstructionData (wrapped 0042 // Instructions), usually used to denote a region of similarity has been found. 0043 // 0044 // A SimilarityGroup is a set of IRSimilarityCandidates that are structurally 0045 // similar to one another. 0046 // 0047 //===----------------------------------------------------------------------===// 0048 0049 #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H 0050 #define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H 0051 0052 #include "llvm/IR/InstVisitor.h" 0053 #include "llvm/IR/Instructions.h" 0054 #include "llvm/IR/PassManager.h" 0055 #include "llvm/Pass.h" 0056 #include "llvm/Support/Allocator.h" 0057 #include <optional> 0058 0059 namespace llvm { 0060 0061 namespace IRSimilarity { 0062 0063 struct IRInstructionDataList; 0064 0065 /// This represents what is and is not supported when finding similarity in 0066 /// Instructions. 0067 /// 0068 /// Legal Instructions are considered when looking at similarity between 0069 /// Instructions. 0070 /// 0071 /// Illegal Instructions cannot be considered when looking for similarity 0072 /// between Instructions. They act as boundaries between similarity regions. 0073 /// 0074 /// Invisible Instructions are skipped over during analysis. 0075 // TODO: Shared with MachineOutliner 0076 enum InstrType { Legal, Illegal, Invisible }; 0077 0078 /// This provides the utilities for hashing an Instruction to an unsigned 0079 /// integer. Two IRInstructionDatas produce the same hash value when their 0080 /// underlying Instructions perform the same operation (even if they don't have 0081 /// the same input operands.) 0082 /// As a more concrete example, consider the following: 0083 /// 0084 /// \code 0085 /// %add1 = add i32 %a, %b 0086 /// %add2 = add i32 %c, %d 0087 /// %add3 = add i64 %e, %f 0088 /// \endcode 0089 /// 0090 // Then the IRInstructionData wrappers for these Instructions may be hashed like 0091 /// so: 0092 /// 0093 /// \code 0094 /// ; These two adds have the same types and operand types, so they hash to the 0095 /// ; same number. 0096 /// %add1 = add i32 %a, %b ; Hash: 1 0097 /// %add2 = add i32 %c, %d ; Hash: 1 0098 /// ; This add produces an i64. This differentiates it from %add1 and %add2. So, 0099 /// ; it hashes to a different number. 0100 /// %add3 = add i64 %e, %f; Hash: 2 0101 /// \endcode 0102 /// 0103 /// 0104 /// This hashing scheme will be used to represent the program as a very long 0105 /// string. This string can then be placed in a data structure which can be used 0106 /// for similarity queries. 0107 /// 0108 /// TODO: Handle types of Instructions which can be equal even with different 0109 /// operands. (E.g. comparisons with swapped predicates.) 0110 /// TODO: Handle CallInsts, which are only checked for function type 0111 /// by \ref isSameOperationAs. 0112 /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the 0113 /// exact same, and some do not. 0114 struct IRInstructionData 0115 : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> { 0116 0117 /// The source Instruction that is being wrapped. 0118 Instruction *Inst = nullptr; 0119 /// The values of the operands in the Instruction. 0120 SmallVector<Value *, 4> OperVals; 0121 /// The legality of the wrapped instruction. This is informed by InstrType, 0122 /// and is used when checking when two instructions are considered similar. 0123 /// If either instruction is not legal, the instructions are automatically not 0124 /// considered similar. 0125 bool Legal = false; 0126 0127 /// This is only relevant if we are wrapping a CmpInst where we needed to 0128 /// change the predicate of a compare instruction from a greater than form 0129 /// to a less than form. It is std::nullopt otherwise. 0130 std::optional<CmpInst::Predicate> RevisedPredicate; 0131 0132 /// This is only relevant if we are wrapping a CallInst. If we are requiring 0133 /// that the function calls have matching names as well as types, and the 0134 /// call is not an indirect call, this will hold the name of the function. If 0135 /// it is an indirect string, it will be the empty string. However, if this 0136 /// requirement is not in place it will be the empty string regardless of the 0137 /// function call type. The value held here is used to create the hash of the 0138 /// instruction, and check to make sure two instructions are close to one 0139 /// another. 0140 std::optional<std::string> CalleeName; 0141 0142 /// This structure holds the distances of how far "ahead of" or "behind" the 0143 /// target blocks of a branch, or the incoming blocks of a phi nodes are. 0144 /// If the value is negative, it means that the block was registered before 0145 /// the block of this instruction in terms of blocks in the function. 0146 /// Code Example: 0147 /// \code 0148 /// block_1: 0149 /// br i1 %0, label %block_2, label %block_3 0150 /// block_2: 0151 /// br i1 %1, label %block_1, label %block_2 0152 /// block_3: 0153 /// br i1 %2, label %block_2, label %block_1 0154 /// ; Replacing the labels with relative values, this becomes: 0155 /// block_1: 0156 /// br i1 %0, distance 1, distance 2 0157 /// block_2: 0158 /// br i1 %1, distance -1, distance 0 0159 /// block_3: 0160 /// br i1 %2, distance -1, distance -2 0161 /// \endcode 0162 /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is 0163 /// "ahead" of block_2. 0164 SmallVector<int, 4> RelativeBlockLocations; 0165 0166 /// Gather the information that is difficult to gather for an Instruction, or 0167 /// is changed. i.e. the operands of an Instruction and the Types of those 0168 /// operands. This extra information allows for similarity matching to make 0169 /// assertions that allow for more flexibility when checking for whether an 0170 /// Instruction performs the same operation. 0171 IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); 0172 IRInstructionData(IRInstructionDataList &IDL); 0173 0174 /// Fills data stuctures for IRInstructionData when it is constructed from a 0175 // reference or a pointer. 0176 void initializeInstruction(); 0177 0178 /// Get the predicate that the compare instruction is using for hashing the 0179 /// instruction. the IRInstructionData must be wrapping a CmpInst. 0180 CmpInst::Predicate getPredicate() const; 0181 0182 /// Get the callee name that the call instruction is using for hashing the 0183 /// instruction. The IRInstructionData must be wrapping a CallInst. 0184 StringRef getCalleeName() const; 0185 0186 /// A function that swaps the predicates to their less than form if they are 0187 /// in a greater than form. Otherwise, the predicate is unchanged. 0188 /// 0189 /// \param CI - The comparison operation to find a consistent preidcate for. 0190 /// \return the consistent comparison predicate. 0191 static CmpInst::Predicate predicateForConsistency(CmpInst *CI); 0192 0193 /// For an IRInstructionData containing a branch, finds the 0194 /// relative distances from the source basic block to the target by taking 0195 /// the difference of the number assigned to the current basic block and the 0196 /// target basic block of the branch. 0197 /// 0198 /// \param BasicBlockToInteger - The mapping of basic blocks to their location 0199 /// in the module. 0200 void 0201 setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger); 0202 0203 /// For an IRInstructionData containing a CallInst, set the function name 0204 /// appropriately. This will be an empty string if it is an indirect call, 0205 /// or we are not matching by name of the called function. It will be the 0206 /// name of the function if \p MatchByName is true and it is not an indirect 0207 /// call. We may decide not to match by name in order to expand the 0208 /// size of the regions we can match. If a function name has the same type 0209 /// signature, but the different name, the region of code is still almost the 0210 /// same. Since function names can be treated as constants, the name itself 0211 /// could be extrapolated away. However, matching by name provides a 0212 /// specificity and more "identical" code than not matching by name. 0213 /// 0214 /// \param MatchByName - A flag to mark whether we are using the called 0215 /// function name as a differentiating parameter. 0216 void setCalleeName(bool MatchByName = true); 0217 0218 /// For an IRInstructionData containing a PHINode, finds the 0219 /// relative distances from the incoming basic block to the current block by 0220 /// taking the difference of the number assigned to the current basic block 0221 /// and the incoming basic block of the branch. 0222 /// 0223 /// \param BasicBlockToInteger - The mapping of basic blocks to their location 0224 /// in the module. 0225 void 0226 setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger); 0227 0228 /// Get the BasicBlock based operands for PHINodes and BranchInsts. 0229 /// 0230 /// \returns A list of relevant BasicBlocks. 0231 ArrayRef<Value *> getBlockOperVals(); 0232 0233 /// Hashes \p Value based on its opcode, types, and operand types. 0234 /// Two IRInstructionData instances produce the same hash when they perform 0235 /// the same operation. 0236 /// 0237 /// As a simple example, consider the following instructions. 0238 /// 0239 /// \code 0240 /// %add1 = add i32 %x1, %y1 0241 /// %add2 = add i32 %x2, %y2 0242 /// 0243 /// %sub = sub i32 %x1, %y1 0244 /// 0245 /// %add_i64 = add i64 %x2, %y2 0246 /// \endcode 0247 /// 0248 /// Because the first two adds operate the same types, and are performing the 0249 /// same action, they will be hashed to the same value. 0250 /// 0251 /// However, the subtraction instruction is not the same as an addition, and 0252 /// will be hashed to a different value. 0253 /// 0254 /// Finally, the last add has a different type compared to the first two add 0255 /// instructions, so it will also be hashed to a different value that any of 0256 /// the previous instructions. 0257 /// 0258 /// \param [in] ID - The IRInstructionData instance to be hashed. 0259 /// \returns A hash_value of the IRInstructionData. 0260 friend hash_code hash_value(const IRInstructionData &ID) { 0261 SmallVector<Type *, 4> OperTypes; 0262 for (Value *V : ID.OperVals) 0263 OperTypes.push_back(V->getType()); 0264 0265 if (isa<CmpInst>(ID.Inst)) 0266 return llvm::hash_combine( 0267 llvm::hash_value(ID.Inst->getOpcode()), 0268 llvm::hash_value(ID.Inst->getType()), 0269 llvm::hash_value(ID.getPredicate()), 0270 llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); 0271 0272 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) { 0273 // To hash intrinsics, we use the opcode, and types like the other 0274 // instructions, but also, the Intrinsic ID, and the Name of the 0275 // intrinsic. 0276 Intrinsic::ID IntrinsicID = II->getIntrinsicID(); 0277 return llvm::hash_combine( 0278 llvm::hash_value(ID.Inst->getOpcode()), 0279 llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), 0280 llvm::hash_value(*ID.CalleeName), 0281 llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); 0282 } 0283 0284 if (isa<CallInst>(ID.Inst)) { 0285 std::string FunctionName = *ID.CalleeName; 0286 return llvm::hash_combine( 0287 llvm::hash_value(ID.Inst->getOpcode()), 0288 llvm::hash_value(ID.Inst->getType()), 0289 llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName), 0290 llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); 0291 } 0292 0293 return llvm::hash_combine( 0294 llvm::hash_value(ID.Inst->getOpcode()), 0295 llvm::hash_value(ID.Inst->getType()), 0296 llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); 0297 } 0298 0299 IRInstructionDataList *IDL = nullptr; 0300 }; 0301 0302 struct IRInstructionDataList 0303 : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {}; 0304 0305 /// Compare one IRInstructionData class to another IRInstructionData class for 0306 /// whether they are performing a the same operation, and can mapped to the 0307 /// same value. For regular instructions if the hash value is the same, then 0308 /// they will also be close. 0309 /// 0310 /// \param A - The first IRInstructionData class to compare 0311 /// \param B - The second IRInstructionData class to compare 0312 /// \returns true if \p A and \p B are similar enough to be mapped to the same 0313 /// value. 0314 bool isClose(const IRInstructionData &A, const IRInstructionData &B); 0315 0316 struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> { 0317 static inline IRInstructionData *getEmptyKey() { return nullptr; } 0318 static inline IRInstructionData *getTombstoneKey() { 0319 return reinterpret_cast<IRInstructionData *>(-1); 0320 } 0321 0322 static unsigned getHashValue(const IRInstructionData *E) { 0323 using llvm::hash_value; 0324 assert(E && "IRInstructionData is a nullptr?"); 0325 return hash_value(*E); 0326 } 0327 0328 static bool isEqual(const IRInstructionData *LHS, 0329 const IRInstructionData *RHS) { 0330 if (RHS == getEmptyKey() || RHS == getTombstoneKey() || 0331 LHS == getEmptyKey() || LHS == getTombstoneKey()) 0332 return LHS == RHS; 0333 0334 assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); 0335 return isClose(*LHS, *RHS); 0336 } 0337 }; 0338 0339 /// Helper struct for converting the Instructions in a Module into a vector of 0340 /// unsigned integers. This vector of unsigned integers can be thought of as a 0341 /// "numeric string". This numeric string can then be queried by, for example, 0342 /// data structures that find repeated substrings. 0343 /// 0344 /// This hashing is done per BasicBlock in the module. To hash Instructions 0345 /// based off of their operations, each Instruction is wrapped in an 0346 /// IRInstructionData struct. The unsigned integer for an IRInstructionData 0347 /// depends on: 0348 /// - The hash provided by the IRInstructionData. 0349 /// - Which member of InstrType the IRInstructionData is classified as. 0350 // See InstrType for more details on the possible classifications, and how they 0351 // manifest in the numeric string. 0352 /// 0353 /// The numeric string for an individual BasicBlock is terminated by an unique 0354 /// unsigned integer. This prevents data structures which rely on repetition 0355 /// from matching across BasicBlocks. (For example, the SuffixTree.) 0356 /// As a concrete example, if we have the following two BasicBlocks: 0357 /// \code 0358 /// bb0: 0359 /// %add1 = add i32 %a, %b 0360 /// %add2 = add i32 %c, %d 0361 /// %add3 = add i64 %e, %f 0362 /// bb1: 0363 /// %sub = sub i32 %c, %d 0364 /// \endcode 0365 /// We may hash the Instructions like this (via IRInstructionData): 0366 /// \code 0367 /// bb0: 0368 /// %add1 = add i32 %a, %b ; Hash: 1 0369 /// %add2 = add i32 %c, %d; Hash: 1 0370 /// %add3 = add i64 %e, %f; Hash: 2 0371 /// bb1: 0372 /// %sub = sub i32 %c, %d; Hash: 3 0373 /// %add4 = add i32 %c, %d ; Hash: 1 0374 /// \endcode 0375 /// And produce a "numeric string representation" like so: 0376 /// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 0377 /// 0378 /// TODO: This is very similar to the MachineOutliner, and should be 0379 /// consolidated into the same interface. 0380 struct IRInstructionMapper { 0381 /// The starting illegal instruction number to map to. 0382 /// 0383 /// Set to -3 for compatibility with DenseMapInfo<unsigned>. 0384 unsigned IllegalInstrNumber = static_cast<unsigned>(-3); 0385 0386 /// The next available integer to assign to a legal Instruction to. 0387 unsigned LegalInstrNumber = 0; 0388 0389 /// Correspondence from IRInstructionData to unsigned integers. 0390 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits> 0391 InstructionIntegerMap; 0392 0393 /// A mapping for a basic block in a module to its assigned number/location 0394 /// in the module. 0395 DenseMap<BasicBlock *, unsigned> BasicBlockToInteger; 0396 0397 /// Set if we added an illegal number in the previous step. 0398 /// Since each illegal number is unique, we only need one of them between 0399 /// each range of legal numbers. This lets us make sure we don't add more 0400 /// than one illegal number per range. 0401 bool AddedIllegalLastTime = false; 0402 0403 /// Marks whether we found a illegal instruction in the previous step. 0404 bool CanCombineWithPrevInstr = false; 0405 0406 /// Marks whether we have found a set of instructions that is long enough 0407 /// to be considered for similarity. 0408 bool HaveLegalRange = false; 0409 0410 /// Marks whether we should use exact function names, as well as types to 0411 /// find similarity between calls. 0412 bool EnableMatchCallsByName = false; 0413 0414 /// This allocator pointer is in charge of holding on to the IRInstructionData 0415 /// so it is not deallocated until whatever external tool is using it is done 0416 /// with the information. 0417 SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr; 0418 0419 /// This allocator pointer is in charge of creating the IRInstructionDataList 0420 /// so it is not deallocated until whatever external tool is using it is done 0421 /// with the information. 0422 SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr; 0423 0424 /// Get an allocated IRInstructionData struct using the InstDataAllocator. 0425 /// 0426 /// \param I - The Instruction to wrap with IRInstructionData. 0427 /// \param Legality - A boolean value that is true if the instruction is to 0428 /// be considered for similarity, and false if not. 0429 /// \param IDL - The InstructionDataList that the IRInstructionData is 0430 /// inserted into. 0431 /// \returns An allocated IRInstructionData struct. 0432 IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality, 0433 IRInstructionDataList &IDL); 0434 0435 /// Get an empty allocated IRInstructionData struct using the 0436 /// InstDataAllocator. 0437 /// 0438 /// \param IDL - The InstructionDataList that the IRInstructionData is 0439 /// inserted into. 0440 /// \returns An allocated IRInstructionData struct. 0441 IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL); 0442 0443 /// Get an allocated IRInstructionDataList object using the IDLAllocator. 0444 /// 0445 /// \returns An allocated IRInstructionDataList object. 0446 IRInstructionDataList *allocateIRInstructionDataList(); 0447 0448 IRInstructionDataList *IDL = nullptr; 0449 0450 /// Assigns values to all the basic blocks in function \p F starting from 0451 /// integer \p BBNumber. 0452 /// 0453 /// \param F - The function containing the basic blocks to assign numbers to. 0454 /// \param BBNumber - The number to start from. 0455 void initializeForBBs(Function &F, unsigned &BBNumber) { 0456 for (BasicBlock &BB : F) 0457 BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++)); 0458 } 0459 0460 /// Assigns values to all the basic blocks in Module \p M. 0461 /// \param M - The module containing the basic blocks to assign numbers to. 0462 void initializeForBBs(Module &M) { 0463 unsigned BBNumber = 0; 0464 for (Function &F : M) 0465 initializeForBBs(F, BBNumber); 0466 } 0467 0468 /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers 0469 /// determined by \p InstrType. Two Instructions are mapped to the same value 0470 /// if they are close as defined by the InstructionData class above. 0471 /// 0472 /// \param [in] BB - The BasicBlock to be mapped to integers. 0473 /// \param [in,out] InstrList - Vector of IRInstructionData to append to. 0474 /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. 0475 void convertToUnsignedVec(BasicBlock &BB, 0476 std::vector<IRInstructionData *> &InstrList, 0477 std::vector<unsigned> &IntegerMapping); 0478 0479 /// Maps an Instruction to a legal integer. 0480 /// 0481 /// \param [in] It - The Instruction to be mapped to an integer. 0482 /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to 0483 /// append to. 0484 /// \param [in,out] InstrListForBB - Vector of InstructionData to append to. 0485 /// \returns The integer \p It was mapped to. 0486 unsigned mapToLegalUnsigned(BasicBlock::iterator &It, 0487 std::vector<unsigned> &IntegerMappingForBB, 0488 std::vector<IRInstructionData *> &InstrListForBB); 0489 0490 /// Maps an Instruction to an illegal integer. 0491 /// 0492 /// \param [in] It - The \p Instruction to be mapped to an integer. 0493 /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to 0494 /// append to. 0495 /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to. 0496 /// \param End - true if creating a dummy IRInstructionData at the end of a 0497 /// basic block. 0498 /// \returns The integer \p It was mapped to. 0499 unsigned mapToIllegalUnsigned( 0500 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 0501 std::vector<IRInstructionData *> &InstrListForBB, bool End = false); 0502 0503 IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA, 0504 SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA) 0505 : InstDataAllocator(IDA), IDLAllocator(IDLA) { 0506 // Make sure that the implementation of DenseMapInfo<unsigned> hasn't 0507 // changed. 0508 assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) && 0509 "DenseMapInfo<unsigned>'s empty key isn't -1!"); 0510 assert(DenseMapInfo<unsigned>::getTombstoneKey() == 0511 static_cast<unsigned>(-2) && 0512 "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); 0513 0514 IDL = new (IDLAllocator->Allocate()) 0515 IRInstructionDataList(); 0516 } 0517 0518 /// Custom InstVisitor to classify different instructions for whether it can 0519 /// be analyzed for similarity. 0520 struct InstructionClassification 0521 : public InstVisitor<InstructionClassification, InstrType> { 0522 InstructionClassification() = default; 0523 0524 // TODO: Determine a scheme to resolve when the label is similar enough. 0525 InstrType visitBranchInst(BranchInst &BI) { 0526 if (EnableBranches) 0527 return Legal; 0528 return Illegal; 0529 } 0530 InstrType visitPHINode(PHINode &PN) { 0531 if (EnableBranches) 0532 return Legal; 0533 return Illegal; 0534 } 0535 // TODO: Handle allocas. 0536 InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } 0537 // We exclude variable argument instructions since variable arguments 0538 // requires extra checking of the argument list. 0539 InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } 0540 // We exclude all exception handling cases since they are so context 0541 // dependent. 0542 InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } 0543 InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } 0544 // DebugInfo should be included in the regions, but should not be 0545 // analyzed for similarity as it has no bearing on the outcome of the 0546 // program. 0547 InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } 0548 InstrType visitIntrinsicInst(IntrinsicInst &II) { 0549 // These are disabled due to complications in the CodeExtractor when 0550 // outlining these instructions. For instance, It is unclear what we 0551 // should do when moving only the start or end lifetime instruction into 0552 // an outlined function. Also, assume-like intrinsics could be removed 0553 // from the region, removing arguments, causing discrepencies in the 0554 // number of inputs between different regions. 0555 if (II.isAssumeLikeIntrinsic()) 0556 return Illegal; 0557 return EnableIntrinsics ? Legal : Illegal; 0558 } 0559 // We only allow call instructions where the function has a name and 0560 // is not an indirect call. 0561 InstrType visitCallInst(CallInst &CI) { 0562 Function *F = CI.getCalledFunction(); 0563 bool IsIndirectCall = CI.isIndirectCall(); 0564 if (IsIndirectCall && !EnableIndirectCalls) 0565 return Illegal; 0566 if (!F && !IsIndirectCall) 0567 return Illegal; 0568 // Functions marked with the swifttailcc and tailcc calling conventions 0569 // require special handling when outlining musttail functions. The 0570 // calling convention must be passed down to the outlined function as 0571 // well. Further, there is special handling for musttail calls as well, 0572 // requiring a return call directly after. For now, the outliner does not 0573 // support this, so we do not handle matching this case either. 0574 if ((CI.getCallingConv() == CallingConv::SwiftTail || 0575 CI.getCallingConv() == CallingConv::Tail) && 0576 !EnableMustTailCalls) 0577 return Illegal; 0578 if (CI.isMustTailCall() && !EnableMustTailCalls) 0579 return Illegal; 0580 return Legal; 0581 } 0582 // TODO: We do not current handle similarity that changes the control flow. 0583 InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } 0584 // TODO: We do not current handle similarity that changes the control flow. 0585 InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } 0586 // TODO: Handle interblock similarity. 0587 InstrType visitTerminator(Instruction &I) { return Illegal; } 0588 InstrType visitInstruction(Instruction &I) { return Legal; } 0589 0590 // The flag variable that lets the classifier know whether we should 0591 // allow branches to be checked for similarity. 0592 bool EnableBranches = false; 0593 0594 // The flag variable that lets the classifier know whether we should 0595 // allow indirect calls to be considered legal instructions. 0596 bool EnableIndirectCalls = false; 0597 0598 // Flag that lets the classifier know whether we should allow intrinsics to 0599 // be checked for similarity. 0600 bool EnableIntrinsics = false; 0601 0602 // Flag that lets the classifier know whether we should allow tail calls to 0603 // be checked for similarity. 0604 bool EnableMustTailCalls = false; 0605 }; 0606 0607 /// Maps an Instruction to a member of InstrType. 0608 InstructionClassification InstClassifier; 0609 }; 0610 0611 /// This is a class that wraps a range of IRInstructionData from one point to 0612 /// another in the vector of IRInstructionData, which is a region of the 0613 /// program. It is also responsible for defining the structure within this 0614 /// region of instructions. 0615 /// 0616 /// The structure of a region is defined through a value numbering system 0617 /// assigned to each unique value in a region at the creation of the 0618 /// IRSimilarityCandidate. 0619 /// 0620 /// For example, for each Instruction we add a mapping for each new 0621 /// value seen in that Instruction. 0622 /// IR: Mapping Added: 0623 /// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 0624 /// %add2 = add i32 %a, %1 %add2 -> 4 0625 /// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 0626 /// 0627 /// We can compare IRSimilarityCandidates against one another. 0628 /// The \ref isSimilar function compares each IRInstructionData against one 0629 /// another and if we have the same sequences of IRInstructionData that would 0630 /// create the same hash, we have similar IRSimilarityCandidates. 0631 /// 0632 /// We can also compare the structure of IRSimilarityCandidates. If we can 0633 /// create a mapping of registers in the region contained by one 0634 /// IRSimilarityCandidate to the region contained by different 0635 /// IRSimilarityCandidate, they can be considered structurally similar. 0636 /// 0637 /// IRSimilarityCandidate1: IRSimilarityCandidate2: 0638 /// %add1 = add i32 %a, %b %add1 = add i32 %d, %e 0639 /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f 0640 /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 0641 /// 0642 /// Can have the following mapping from candidate to candidate of: 0643 /// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4 0644 /// and can be considered similar. 0645 /// 0646 /// IRSimilarityCandidate1: IRSimilarityCandidate2: 0647 /// %add1 = add i32 %a, %b %add1 = add i32 %d, c4 0648 /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f 0649 /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 0650 /// 0651 /// We cannot create the same mapping since the use of c4 is not used in the 0652 /// same way as %b or c2. 0653 class IRSimilarityCandidate { 0654 private: 0655 /// The start index of this IRSimilarityCandidate in the instruction list. 0656 unsigned StartIdx = 0; 0657 0658 /// The number of instructions in this IRSimilarityCandidate. 0659 unsigned Len = 0; 0660 0661 /// The first instruction in this IRSimilarityCandidate. 0662 IRInstructionData *FirstInst = nullptr; 0663 0664 /// The last instruction in this IRSimilarityCandidate. 0665 IRInstructionData *LastInst = nullptr; 0666 0667 /// Global Value Numbering structures 0668 /// @{ 0669 /// Stores the mapping of the value to the number assigned to it in the 0670 /// IRSimilarityCandidate. 0671 DenseMap<Value *, unsigned> ValueToNumber; 0672 /// Stores the mapping of the number to the value assigned this number. 0673 DenseMap<unsigned, Value *> NumberToValue; 0674 /// Stores the mapping of a value's number to canonical numbering in the 0675 /// candidate's respective similarity group. 0676 DenseMap<unsigned, unsigned> NumberToCanonNum; 0677 /// Stores the mapping of canonical number in the candidate's respective 0678 /// similarity group to a value number. 0679 DenseMap<unsigned, unsigned> CanonNumToNumber; 0680 /// @} 0681 0682 public: 0683 /// \param StartIdx - The starting location of the region. 0684 /// \param Len - The length of the region. 0685 /// \param FirstInstIt - The starting IRInstructionData of the region. 0686 /// \param LastInstIt - The ending IRInstructionData of the region. 0687 IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 0688 IRInstructionData *FirstInstIt, 0689 IRInstructionData *LastInstIt); 0690 0691 /// \param A - The first IRInstructionCandidate to compare. 0692 /// \param B - The second IRInstructionCandidate to compare. 0693 /// \returns True when every IRInstructionData in \p A is similar to every 0694 /// IRInstructionData in \p B. 0695 static bool isSimilar(const IRSimilarityCandidate &A, 0696 const IRSimilarityCandidate &B); 0697 0698 /// \param [in] A - The first IRInstructionCandidate to compare. 0699 /// \param [in] B - The second IRInstructionCandidate to compare. 0700 /// \returns True when every IRInstructionData in \p A is structurally similar 0701 /// to \p B. 0702 static bool compareStructure(const IRSimilarityCandidate &A, 0703 const IRSimilarityCandidate &B); 0704 0705 /// \param [in] A - The first IRInstructionCandidate to compare. 0706 /// \param [in] B - The second IRInstructionCandidate to compare. 0707 /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from 0708 /// candidate \p A to candidate \B. 0709 /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from 0710 /// candidate \p B to candidate \A. 0711 /// \returns True when every IRInstructionData in \p A is structurally similar 0712 /// to \p B. 0713 static bool 0714 compareStructure(const IRSimilarityCandidate &A, 0715 const IRSimilarityCandidate &B, 0716 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 0717 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB); 0718 0719 struct OperandMapping { 0720 /// The IRSimilarityCandidate that holds the instruction the OperVals were 0721 /// pulled from. 0722 const IRSimilarityCandidate &IRSC; 0723 0724 /// The operand values to be analyzed. 0725 ArrayRef<Value *> &OperVals; 0726 0727 /// The current mapping of global value numbers from one IRSimilarityCandidate 0728 /// to another IRSimilarityCandidate. 0729 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping; 0730 }; 0731 0732 /// A helper struct to hold the candidate, for a branch instruction, the 0733 /// relative location of a label, and the label itself. This is mostly to 0734 /// group the values together before passing them as a bundle to a function. 0735 struct RelativeLocMapping { 0736 /// The IRSimilarityCandidate that holds the instruction the relative 0737 /// location was pulled from. 0738 const IRSimilarityCandidate &IRSC; 0739 0740 /// The relative location to be analyzed. 0741 int RelativeLocation; 0742 0743 /// The corresponding value. 0744 Value *OperVal; 0745 }; 0746 0747 /// Compare the operands in \p A and \p B and check that the current mapping 0748 /// of global value numbers from \p A to \p B and \p B to \A is consistent. 0749 /// 0750 /// \param A - The first IRInstructionCandidate, operand values, and current 0751 /// operand mappings to compare. 0752 /// \param B - The second IRInstructionCandidate, operand values, and current 0753 /// operand mappings to compare. 0754 /// \returns true if the IRSimilarityCandidates operands are compatible. 0755 static bool compareNonCommutativeOperandMapping(OperandMapping A, 0756 OperandMapping B); 0757 0758 /// Compare the operands in \p A and \p B and check that the current mapping 0759 /// of global value numbers from \p A to \p B and \p B to \A is consistent 0760 /// given that the operands are commutative. 0761 /// 0762 /// \param A - The first IRInstructionCandidate, operand values, and current 0763 /// operand mappings to compare. 0764 /// \param B - The second IRInstructionCandidate, operand values, and current 0765 /// operand mappings to compare. 0766 /// \returns true if the IRSimilarityCandidates operands are compatible. 0767 static bool compareCommutativeOperandMapping(OperandMapping A, 0768 OperandMapping B); 0769 0770 /// Compare the GVN of the assignment value in corresponding instructions in 0771 /// IRSimilarityCandidates \p A and \p B and check that there exists a mapping 0772 /// between the values and replaces the mapping with a one-to-one value if 0773 /// needed. 0774 /// 0775 /// \param InstValA - The assignment GVN from the first IRSimilarityCandidate. 0776 /// \param InstValB - The assignment GVN from the second 0777 /// IRSimilarityCandidate. 0778 /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from 0779 /// candidate \p A to candidate \B. 0780 /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from 0781 /// candidate \p B to candidate \A. 0782 /// \returns true if the IRSimilarityCandidates assignments are compatible. 0783 static bool compareAssignmentMapping( 0784 const unsigned InstValA, const unsigned &InstValB, 0785 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 0786 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB); 0787 0788 /// Compare the relative locations in \p A and \p B and check that the 0789 /// distances match if both locations are contained in the region, and that 0790 /// the branches both point outside the region if they do not. 0791 /// Example Region: 0792 /// \code 0793 /// entry: 0794 /// br i1 %0, label %block_1, label %block_3 0795 /// block_0: 0796 /// br i1 %0, label %block_1, label %block_2 0797 /// block_1: 0798 /// br i1 %0, label %block_2, label %block_3 0799 /// block_2: 0800 /// br i1 %1, label %block_1, label %block_4 0801 /// block_3: 0802 /// br i1 %2, label %block_2, label %block_5 0803 /// \endcode 0804 /// If we compare the branches in block_0 and block_1 the relative values are 0805 /// 1 and 2 for both, so we consider this a match. 0806 /// 0807 /// If we compare the branches in entry and block_0 the relative values are 0808 /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not 0809 /// consider them a match. 0810 /// 0811 /// If we compare the branches in block_1 and block_2 the relative values are 0812 /// 1 and 2, and -1 and None respectively. As a result we do not consider 0813 /// these to be the same 0814 /// 0815 /// If we compare the branches in block_2 and block_3 the relative values are 0816 /// -1 and None for both. We do consider these to be a match. 0817 /// 0818 /// \param A - The first IRInstructionCandidate, relative location value, 0819 /// and incoming block. 0820 /// \param B - The second IRInstructionCandidate, relative location value, 0821 /// and incoming block. 0822 /// \returns true if the relative locations match. 0823 static bool checkRelativeLocations(RelativeLocMapping A, 0824 RelativeLocMapping B); 0825 0826 /// Create a mapping from the value numbering to a different separate set of 0827 /// numbers. This will serve as a guide for relating one candidate to another. 0828 /// The canonical number gives use the ability identify which global value 0829 /// number in one candidate relates to the global value number in the other. 0830 /// 0831 /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a 0832 /// canonical numbering for. 0833 static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand); 0834 0835 /// Create a mapping for the value numbering of the calling 0836 /// IRSimilarityCandidate, to a different separate set of numbers, based on 0837 /// the canonical ordering in \p SourceCand. These are defined based on the 0838 /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of 0839 /// these relationships should have the same information, just in opposite 0840 /// directions. 0841 /// 0842 /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a 0843 /// canonical numbering from. 0844 /// \param ToSourceMapping - The mapping of value numbers from this candidate 0845 /// to \p SourceCand. 0846 /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand 0847 /// to this candidate. 0848 void createCanonicalRelationFrom( 0849 IRSimilarityCandidate &SourceCand, 0850 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 0851 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping); 0852 0853 /// Create a mapping for the value numbering of the calling 0854 /// IRSimilarityCandidate, to a different separate set of numbers, based on 0855 /// the canonical ordering in \p SourceCand. These are defined based on the 0856 /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of 0857 /// these relationships should have the same information, just in opposite 0858 /// directions. Uses the \p OneToOne mapping from target candidate to \p 0859 /// SourceCand GVNs to determine the mapping first for values with multiple 0860 /// mappings. This mapping is created by the ordering of operands in the 0861 /// instruction they are first seen in the candidates. 0862 /// 0863 /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a 0864 /// canonical numbering from. 0865 /// \param [in,out] OneToOne - A mapping of value numbers from candidate 0866 /// \p A to candidate \B using the structure of the original instructions. 0867 /// \param ToSourceMapping - The mapping of value numbers from this candidate 0868 /// to \p SourceCand. 0869 /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand 0870 /// to this candidate. 0871 void createCanonicalRelationFrom( 0872 IRSimilarityCandidate &SourceCand, 0873 DenseMap<unsigned, unsigned> &OneToOne, 0874 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 0875 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping); 0876 0877 /// Create a mapping for the value numbering of the calling 0878 /// IRSimilarityCandidate, to a different separate set of numbers, based on 0879 /// the canonical ordering in \p SourceCand. These are defined based on the 0880 /// canonical mapping defined between \p SoureCandLarge and 0881 /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally 0882 /// similar, and fully encapsulate the IRSimilarityCandidates in question. 0883 /// These are used as a "bridge" from the \p SourceCand to the target. 0884 /// 0885 /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a 0886 /// canonical numbering from. 0887 /// \param SoureCandLarge - The IRSimilarityCandidate fully containing 0888 /// \p SourceCand. 0889 /// \param TargetCandLarge - The IRSimilarityCandidate fully containing 0890 /// this Candidate. 0891 void createCanonicalRelationFrom( 0892 IRSimilarityCandidate &SourceCand, 0893 IRSimilarityCandidate &SourceCandLarge, 0894 IRSimilarityCandidate &TargetCandLarge); 0895 0896 /// \param [in,out] BBSet - The set to track the basic blocks. 0897 void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const { 0898 for (IRInstructionData &ID : *this) { 0899 BasicBlock *BB = ID.Inst->getParent(); 0900 BBSet.insert(BB); 0901 } 0902 } 0903 0904 /// \param [in,out] BBSet - The set to track the basic blocks. 0905 /// \param [in,out] BBList - A list in order of use to track the basic blocks. 0906 void getBasicBlocks(DenseSet<BasicBlock *> &BBSet, 0907 SmallVector<BasicBlock *> &BBList) const { 0908 for (IRInstructionData &ID : *this) { 0909 BasicBlock *BB = ID.Inst->getParent(); 0910 if (BBSet.insert(BB).second) 0911 BBList.push_back(BB); 0912 } 0913 } 0914 0915 /// Compare the start and end indices of the two IRSimilarityCandidates for 0916 /// whether they overlap. If the start instruction of one 0917 /// IRSimilarityCandidate is less than the end instruction of the other, and 0918 /// the start instruction of one is greater than the start instruction of the 0919 /// other, they overlap. 0920 /// 0921 /// \returns true if the IRSimilarityCandidates do not have overlapping 0922 /// instructions. 0923 static bool overlap(const IRSimilarityCandidate &A, 0924 const IRSimilarityCandidate &B); 0925 0926 /// \returns the number of instructions in this Candidate. 0927 unsigned getLength() const { return Len; } 0928 0929 /// \returns the start index of this IRSimilarityCandidate. 0930 unsigned getStartIdx() const { return StartIdx; } 0931 0932 /// \returns the end index of this IRSimilarityCandidate. 0933 unsigned getEndIdx() const { return StartIdx + Len - 1; } 0934 0935 /// \returns The first IRInstructionData. 0936 IRInstructionData *front() const { return FirstInst; } 0937 /// \returns The last IRInstructionData. 0938 IRInstructionData *back() const { return LastInst; } 0939 0940 /// \returns The first Instruction. 0941 Instruction *frontInstruction() { return FirstInst->Inst; } 0942 /// \returns The last Instruction 0943 Instruction *backInstruction() { return LastInst->Inst; } 0944 0945 /// \returns The BasicBlock the IRSimilarityCandidate starts in. 0946 BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); } 0947 /// \returns The BasicBlock the IRSimilarityCandidate ends in. 0948 BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } 0949 0950 /// \returns The Function that the IRSimilarityCandidate is located in. 0951 Function *getFunction() { return getStartBB()->getParent(); } 0952 0953 /// Finds the positive number associated with \p V if it has been mapped. 0954 /// \param [in] V - the Value to find. 0955 /// \returns The positive number corresponding to the value. 0956 /// \returns std::nullopt if not present. 0957 std::optional<unsigned> getGVN(Value *V) { 0958 assert(V != nullptr && "Value is a nullptr?"); 0959 DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V); 0960 if (VNIt == ValueToNumber.end()) 0961 return std::nullopt; 0962 return VNIt->second; 0963 } 0964 0965 /// Finds the Value associate with \p Num if it exists. 0966 /// \param [in] Num - the number to find. 0967 /// \returns The Value associated with the number. 0968 /// \returns std::nullopt if not present. 0969 std::optional<Value *> fromGVN(unsigned Num) { 0970 DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num); 0971 if (VNIt == NumberToValue.end()) 0972 return std::nullopt; 0973 assert(VNIt->second != nullptr && "Found value is a nullptr!"); 0974 return VNIt->second; 0975 } 0976 0977 /// Find the canonical number from the global value number \p N stored in the 0978 /// candidate. 0979 /// 0980 /// \param N - The global value number to find the canonical number for. 0981 /// \returns An optional containing the value, and std::nullopt if it could 0982 /// not be found. 0983 std::optional<unsigned> getCanonicalNum(unsigned N) { 0984 DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N); 0985 if (NCIt == NumberToCanonNum.end()) 0986 return std::nullopt; 0987 return NCIt->second; 0988 } 0989 0990 /// Find the global value number from the canonical number \p N stored in the 0991 /// candidate. 0992 /// 0993 /// \param N - The canonical number to find the global vlaue number for. 0994 /// \returns An optional containing the value, and std::nullopt if it could 0995 /// not be found. 0996 std::optional<unsigned> fromCanonicalNum(unsigned N) { 0997 DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N); 0998 if (CNIt == CanonNumToNumber.end()) 0999 return std::nullopt; 1000 return CNIt->second; 1001 } 1002 1003 /// \param RHS -The IRSimilarityCandidate to compare against 1004 /// \returns true if the IRSimilarityCandidate is occurs after the 1005 /// IRSimilarityCandidate in the program. 1006 bool operator<(const IRSimilarityCandidate &RHS) const { 1007 return getStartIdx() > RHS.getStartIdx(); 1008 } 1009 1010 using iterator = IRInstructionDataList::iterator; 1011 iterator begin() const { return iterator(front()); } 1012 iterator end() const { return std::next(iterator(back())); } 1013 }; 1014 1015 typedef DenseMap<IRSimilarityCandidate *, 1016 DenseMap<unsigned, DenseSet<unsigned>>> 1017 CandidateGVNMapping; 1018 typedef std::vector<IRSimilarityCandidate> SimilarityGroup; 1019 typedef std::vector<SimilarityGroup> SimilarityGroupList; 1020 1021 /// This class puts all the pieces of the IRInstructionData, 1022 /// IRInstructionMapper, IRSimilarityCandidate together. 1023 /// 1024 /// It first feeds the Module or vector of Modules into the IRInstructionMapper, 1025 /// and puts all the mapped instructions into a single long list of 1026 /// IRInstructionData. 1027 /// 1028 /// The list of unsigned integers is given to the Suffix Tree or similar data 1029 /// structure to find repeated subsequences. We construct an 1030 /// IRSimilarityCandidate for each instance of the subsequence. We compare them 1031 /// against one another since These repeated subsequences can have different 1032 /// structure. For each different kind of structure found, we create a 1033 /// similarity group. 1034 /// 1035 /// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are 1036 /// structurally similar to one another, while C is different we would have two 1037 /// SimilarityGroups: 1038 /// 1039 /// SimilarityGroup 1: SimilarityGroup 2 1040 /// A, B, D C 1041 /// 1042 /// A list of the different similarity groups is then returned after 1043 /// analyzing the module. 1044 class IRSimilarityIdentifier { 1045 public: 1046 IRSimilarityIdentifier(bool MatchBranches = true, 1047 bool MatchIndirectCalls = true, 1048 bool MatchCallsWithName = false, 1049 bool MatchIntrinsics = true, 1050 bool MatchMustTailCalls = true) 1051 : Mapper(&InstDataAllocator, &InstDataListAllocator), 1052 EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), 1053 EnableMatchingCallsByName(MatchCallsWithName), 1054 EnableIntrinsics(MatchIntrinsics), 1055 EnableMustTailCalls(MatchMustTailCalls) {} 1056 1057 private: 1058 /// Map the instructions in the module to unsigned integers, using mapping 1059 /// already present in the Mapper if possible. 1060 /// 1061 /// \param [in] M Module - To map to integers. 1062 /// \param [in,out] InstrList - The vector to append IRInstructionData to. 1063 /// \param [in,out] IntegerMapping - The vector to append integers to. 1064 void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList, 1065 std::vector<unsigned> &IntegerMapping); 1066 1067 /// Map the instructions in the modules vector to unsigned integers, using 1068 /// mapping already present in the mapper if possible. 1069 /// 1070 /// \param [in] Modules - The list of modules to use to populate the mapper 1071 /// \param [in,out] InstrList - The vector to append IRInstructionData to. 1072 /// \param [in,out] IntegerMapping - The vector to append integers to. 1073 void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules, 1074 std::vector<IRInstructionData *> &InstrList, 1075 std::vector<unsigned> &IntegerMapping); 1076 1077 /// Find the similarity candidates in \p InstrList and corresponding 1078 /// \p UnsignedVec 1079 /// 1080 /// \param [in,out] InstrList - The vector to append IRInstructionData to. 1081 /// \param [in,out] IntegerMapping - The vector to append integers to. 1082 /// candidates found in the program. 1083 void findCandidates(std::vector<IRInstructionData *> &InstrList, 1084 std::vector<unsigned> &IntegerMapping); 1085 1086 public: 1087 // Find the IRSimilarityCandidates in the \p Modules and group by structural 1088 // similarity in a SimilarityGroup, each group is returned in a 1089 // SimilarityGroupList. 1090 // 1091 // \param [in] Modules - the modules to analyze. 1092 // \returns The groups of similarity ranges found in the modules. 1093 SimilarityGroupList & 1094 findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules); 1095 1096 // Find the IRSimilarityCandidates in the given Module grouped by structural 1097 // similarity in a SimilarityGroup, contained inside a SimilarityGroupList. 1098 // 1099 // \param [in] M - the module to analyze. 1100 // \returns The groups of similarity ranges found in the module. 1101 SimilarityGroupList &findSimilarity(Module &M); 1102 1103 // Clears \ref SimilarityCandidates if it is already filled by a previous run. 1104 void resetSimilarityCandidates() { 1105 // If we've already analyzed a Module or set of Modules, so we must clear 1106 // the SimilarityCandidates to make sure we do not have only old values 1107 // hanging around. 1108 if (SimilarityCandidates) 1109 SimilarityCandidates->clear(); 1110 else 1111 SimilarityCandidates = SimilarityGroupList(); 1112 } 1113 1114 // \returns The groups of similarity ranges found in the most recently passed 1115 // set of modules. 1116 std::optional<SimilarityGroupList> &getSimilarity() { 1117 return SimilarityCandidates; 1118 } 1119 1120 private: 1121 /// The allocator for IRInstructionData. 1122 SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; 1123 1124 /// The allocator for IRInstructionDataLists. 1125 SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator; 1126 1127 /// Map Instructions to unsigned integers and wraps the Instruction in an 1128 /// instance of IRInstructionData. 1129 IRInstructionMapper Mapper; 1130 1131 /// The flag variable that marks whether we should check branches for 1132 /// similarity, or only look within basic blocks. 1133 bool EnableBranches = true; 1134 1135 /// The flag variable that marks whether we allow indirect calls to be checked 1136 /// for similarity, or exclude them as a legal instruction. 1137 bool EnableIndirectCalls = true; 1138 1139 /// The flag variable that marks whether we allow calls to be marked as 1140 /// similar if they do not have the same name, only the same calling 1141 /// convention, attributes and type signature. 1142 bool EnableMatchingCallsByName = true; 1143 1144 /// The flag variable that marks whether we should check intrinsics for 1145 /// similarity. 1146 bool EnableIntrinsics = true; 1147 1148 // The flag variable that marks whether we should allow tailcalls 1149 // to be checked for similarity. 1150 bool EnableMustTailCalls = false; 1151 1152 /// The SimilarityGroups found with the most recent run of \ref 1153 /// findSimilarity. std::nullopt if there is no recent run. 1154 std::optional<SimilarityGroupList> SimilarityCandidates; 1155 }; 1156 1157 } // end namespace IRSimilarity 1158 1159 /// An analysis pass based on legacy pass manager that runs and returns 1160 /// IRSimilarityIdentifier run on the Module. 1161 class IRSimilarityIdentifierWrapperPass : public ModulePass { 1162 std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI; 1163 1164 public: 1165 static char ID; 1166 IRSimilarityIdentifierWrapperPass(); 1167 1168 IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; } 1169 const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; } 1170 1171 bool doInitialization(Module &M) override; 1172 bool doFinalization(Module &M) override; 1173 bool runOnModule(Module &M) override; 1174 void getAnalysisUsage(AnalysisUsage &AU) const override { 1175 AU.setPreservesAll(); 1176 } 1177 }; 1178 1179 /// An analysis pass that runs and returns the IRSimilarityIdentifier run on the 1180 /// Module. 1181 class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> { 1182 public: 1183 typedef IRSimilarity::IRSimilarityIdentifier Result; 1184 1185 Result run(Module &M, ModuleAnalysisManager &); 1186 1187 private: 1188 friend AnalysisInfoMixin<IRSimilarityAnalysis>; 1189 static AnalysisKey Key; 1190 }; 1191 1192 /// Printer pass that uses \c IRSimilarityAnalysis. 1193 class IRSimilarityAnalysisPrinterPass 1194 : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> { 1195 raw_ostream &OS; 1196 1197 public: 1198 explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} 1199 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 1200 static bool isRequired() { return true; } 1201 }; 1202 1203 } // end namespace llvm 1204 1205 #endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|