Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:37:06

0001 //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // We wish to encode the SourceLocation from other module file not dependent
0010 // on the other module file. So that the source location changes from other
0011 // module file may not affect the contents of the current module file. Then the
0012 // users don't need to recompile the whole project due to a new line in a module
0013 // unit in the root of the dependency graph.
0014 //
0015 // To achieve this, we need to encode the index of the module file into the
0016 // encoding of the source location. The encoding of the source location may be:
0017 //
0018 //      |-----------------------|-----------------------|
0019 //      |          A            |         B         | C |
0020 //
0021 //  * A: 32 bit. The index of the module file in the module manager + 1. The +1
0022 //  here is necessary since we wish 0 stands for the current module file.
0023 //  * B: 31 bit. The offset of the source location to the module file containing
0024 //  it.
0025 //  * C: The macro bit. We rotate it to the lowest bit so that we can save some
0026 //  space in case the index of the module file is 0.
0027 //
0028 // Specially, if the index of the module file is 0, we allow to encode a
0029 // sequence of locations we store only differences between successive elements.
0030 //
0031 //===----------------------------------------------------------------------===//
0032 
0033 #include "clang/Basic/SourceLocation.h"
0034 #include "llvm/Support/MathExtras.h"
0035 #include <climits>
0036 
0037 #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
0038 #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
0039 
0040 namespace clang {
0041 class SourceLocationSequence;
0042 
0043 /// Serialized encoding of SourceLocations without context.
0044 /// Optimized to have small unsigned values (=> small after VBR encoding).
0045 ///
0046 // Macro locations have the top bit set, we rotate by one so it is the low bit.
0047 class SourceLocationEncoding {
0048   using UIntTy = SourceLocation::UIntTy;
0049   constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
0050 
0051   static UIntTy encodeRaw(UIntTy Raw) {
0052     return (Raw << 1) | (Raw >> (UIntBits - 1));
0053   }
0054   static UIntTy decodeRaw(UIntTy Raw) {
0055     return (Raw >> 1) | (Raw << (UIntBits - 1));
0056   }
0057   friend SourceLocationSequence;
0058 
0059 public:
0060   using RawLocEncoding = uint64_t;
0061 
0062   static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
0063                                unsigned BaseModuleFileIndex,
0064                                SourceLocationSequence * = nullptr);
0065   static std::pair<SourceLocation, unsigned>
0066   decode(RawLocEncoding, SourceLocationSequence * = nullptr);
0067 };
0068 
0069 /// Serialized encoding of a sequence of SourceLocations.
0070 ///
0071 /// Optimized to produce small values when locations with the sequence are
0072 /// similar. Each element can be delta-encoded against the last nonzero element.
0073 ///
0074 /// Sequences should be started by creating a SourceLocationSequence::State,
0075 /// and then passed around as SourceLocationSequence*. Example:
0076 ///
0077 ///   // establishes a sequence
0078 ///   void EmitTopLevelThing() {
0079 ///     SourceLocationSequence::State Seq;
0080 ///     EmitContainedThing(Seq);
0081 ///     EmitRecursiveThing(Seq);
0082 ///   }
0083 ///
0084 ///   // optionally part of a sequence
0085 ///   void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
0086 ///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
0087 ///   }
0088 ///
0089 ///   // establishes a sequence if there isn't one already
0090 ///   void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
0091 ///     SourceLocationSequence::State Seq(ParentSeq);
0092 ///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
0093 ///     EmitRecursiveThing(Seq);
0094 ///   }
0095 ///
0096 class SourceLocationSequence {
0097   using UIntTy = SourceLocation::UIntTy;
0098   using EncodedTy = uint64_t;
0099   constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
0100   static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
0101 
0102   // Prev stores the rotated last nonzero location.
0103   UIntTy &Prev;
0104 
0105   // Zig-zag encoding turns small signed integers into small unsigned integers.
0106   // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
0107   static UIntTy zigZag(UIntTy V) {
0108     UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
0109     return Sign ^ (V << 1);
0110   }
0111   static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
0112 
0113   SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
0114 
0115   EncodedTy encodeRaw(UIntTy Raw) {
0116     if (Raw == 0)
0117       return 0;
0118     UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
0119     if (Prev == 0)
0120       return Prev = Rotated;
0121     UIntTy Delta = Rotated - Prev;
0122     Prev = Rotated;
0123     // Exactly one 33 bit value is possible! (1 << 32).
0124     // This is because we have two representations of zero: trivial & relative.
0125     return 1 + EncodedTy{zigZag(Delta)};
0126   }
0127   UIntTy decodeRaw(EncodedTy Encoded) {
0128     if (Encoded == 0)
0129       return 0;
0130     if (Prev == 0)
0131       return SourceLocationEncoding::decodeRaw(Prev = Encoded);
0132     return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
0133   }
0134 
0135 public:
0136   SourceLocation decode(EncodedTy Encoded) {
0137     return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
0138   }
0139   EncodedTy encode(SourceLocation Loc) {
0140     return encodeRaw(Loc.getRawEncoding());
0141   }
0142 
0143   class State;
0144 };
0145 
0146 /// This object establishes a SourceLocationSequence.
0147 class SourceLocationSequence::State {
0148   UIntTy Prev = 0;
0149   SourceLocationSequence Seq;
0150 
0151 public:
0152   // If Parent is provided and non-null, then this root becomes part of that
0153   // enclosing sequence instead of establishing a new one.
0154   State(SourceLocationSequence *Parent = nullptr)
0155       : Seq(Parent ? Parent->Prev : Prev) {}
0156 
0157   // Implicit conversion for uniform use of roots vs propagated sequences.
0158   operator SourceLocationSequence *() { return &Seq; }
0159 };
0160 
0161 inline SourceLocationEncoding::RawLocEncoding
0162 SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
0163                                unsigned BaseModuleFileIndex,
0164                                SourceLocationSequence *Seq) {
0165   // If the source location is a local source location, we can try to optimize
0166   // the similar sequences to only record the differences.
0167   if (!BaseOffset)
0168     return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
0169 
0170   if (Loc.isInvalid())
0171     return 0;
0172 
0173   // Otherwise, the higher bits are used to store the module file index,
0174   // so it is meaningless to optimize the source locations into small
0175   // integers. Let's try to always use the raw encodings.
0176   assert(Loc.getOffset() >= BaseOffset);
0177   Loc = Loc.getLocWithOffset(-BaseOffset);
0178   RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
0179 
0180   // 16 bits should be sufficient to store the module file index.
0181   assert(BaseModuleFileIndex < (1 << 16));
0182   Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
0183   return Encoded;
0184 }
0185 inline std::pair<SourceLocation, unsigned>
0186 SourceLocationEncoding::decode(RawLocEncoding Encoded,
0187                                SourceLocationSequence *Seq) {
0188   unsigned ModuleFileIndex = Encoded >> 32;
0189 
0190   if (!ModuleFileIndex)
0191     return {Seq ? Seq->decode(Encoded)
0192                 : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
0193             ModuleFileIndex};
0194 
0195   Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
0196   SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
0197 
0198   return {Loc, ModuleFileIndex};
0199 }
0200 
0201 } // namespace clang
0202 #endif