Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:36

0001 //===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_TABLEGEN_STRINGTOOFFSETTABLE_H
0010 #define LLVM_TABLEGEN_STRINGTOOFFSETTABLE_H
0011 
0012 #include "llvm/ADT/SmallString.h"
0013 #include "llvm/ADT/StringExtras.h"
0014 #include "llvm/ADT/StringMap.h"
0015 #include "llvm/Support/FormatVariadic.h"
0016 #include "llvm/Support/raw_ostream.h"
0017 #include <optional>
0018 
0019 namespace llvm {
0020 
0021 /// StringToOffsetTable - This class uniques a bunch of nul-terminated strings
0022 /// and keeps track of their offset in a massive contiguous string allocation.
0023 /// It can then output this string blob and use indexes into the string to
0024 /// reference each piece.
0025 class StringToOffsetTable {
0026   StringMap<unsigned> StringOffset;
0027   std::string AggregateString;
0028 
0029 public:
0030   StringToOffsetTable() {
0031     // Ensure we always put the empty string at offset zero. That lets empty
0032     // initialization also be zero initialization for offsets into the table.
0033     GetOrAddStringOffset("");
0034   }
0035 
0036   bool empty() const { return StringOffset.empty(); }
0037   size_t size() const { return AggregateString.size(); }
0038 
0039   unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
0040     auto [II, Inserted] = StringOffset.insert({Str, size()});
0041     if (Inserted) {
0042       // Add the string to the aggregate if this is the first time found.
0043       AggregateString.append(Str.begin(), Str.end());
0044       if (appendZero)
0045         AggregateString += '\0';
0046     }
0047 
0048     return II->second;
0049   }
0050 
0051   // Returns the offset of `Str` in the table if its preset, else return
0052   // std::nullopt.
0053   std::optional<unsigned> GetStringOffset(StringRef Str) const {
0054     auto II = StringOffset.find(Str);
0055     if (II == StringOffset.end())
0056       return std::nullopt;
0057     return II->second;
0058   }
0059 
0060   // Emit a string table definition with the provided name and indent.
0061   //
0062   // When possible, this uses string-literal concatenation to emit the string
0063   // contents in a readable and searchable way. However, for (very) large string
0064   // tables MSVC cannot reliably use string literals and so there we use a large
0065   // character array. We still use a line oriented emission and add comments to
0066   // provide searchability even in this case.
0067   //
0068   // The string table, and its input string contents, are always emitted as both
0069   // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
0070   // valid identifiers to declare.
0071   void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
0072                           const Twine &Indent = "") const {
0073     OS << formatv(R"(
0074 #ifdef __GNUC__
0075 #pragma GCC diagnostic push
0076 #pragma GCC diagnostic ignored "-Woverlength-strings"
0077 #endif
0078 {0}static constexpr char {1}Storage[] = )",
0079                   Indent, Name);
0080 
0081     // MSVC silently miscompiles string literals longer than 64k in some
0082     // circumstances. When the string table is longer, emit it as an array of
0083     // character literals.
0084     bool UseChars = AggregateString.size() > (64 * 1024);
0085     OS << (UseChars ? "{\n" : "\n");
0086 
0087     llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
0088     llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
0089     // We should always have an empty string at the start, and because these are
0090     // null terminators rather than separators, we'll have one at the end as
0091     // well. Skip the end one.
0092     assert(Strings.front().empty() && "Expected empty initial string!");
0093     assert(Strings.back().empty() &&
0094            "Expected empty string at the end due to terminators!");
0095     Strings.pop_back();
0096     for (StringRef Str : Strings) {
0097       OS << LineSep << Indent << "  ";
0098       // If we can, just emit this as a string literal to be concatenated.
0099       if (!UseChars) {
0100         OS << "\"";
0101         OS.write_escaped(Str);
0102         OS << "\\0\"";
0103         continue;
0104       }
0105 
0106       llvm::ListSeparator CharSep(", ");
0107       for (char C : Str) {
0108         OS << CharSep << "'";
0109         OS.write_escaped(StringRef(&C, 1));
0110         OS << "'";
0111       }
0112       OS << CharSep << "'\\0'";
0113     }
0114     OS << LineSep << Indent << (UseChars ? "};" : "  ;");
0115 
0116     OS << formatv(R"(
0117 #ifdef __GNUC__
0118 #pragma GCC diagnostic pop
0119 #endif
0120 
0121 {0}static constexpr llvm::StringTable {1} =
0122 {0}    {1}Storage;
0123 )",
0124                   Indent, Name);
0125   }
0126 
0127   // Emit the string as one single string.
0128   void EmitString(raw_ostream &O) const {
0129     // Escape the string.
0130     SmallString<256> EscapedStr;
0131     raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
0132 
0133     O << "    \"";
0134     unsigned CharsPrinted = 0;
0135     for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
0136       if (CharsPrinted > 70) {
0137         O << "\"\n    \"";
0138         CharsPrinted = 0;
0139       }
0140       O << EscapedStr[i];
0141       ++CharsPrinted;
0142 
0143       // Print escape sequences all together.
0144       if (EscapedStr[i] != '\\')
0145         continue;
0146 
0147       assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
0148       if (isDigit(EscapedStr[i + 1])) {
0149         assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
0150                "Expected 3 digit octal escape!");
0151         O << EscapedStr[++i];
0152         O << EscapedStr[++i];
0153         O << EscapedStr[++i];
0154         CharsPrinted += 3;
0155       } else {
0156         O << EscapedStr[++i];
0157         ++CharsPrinted;
0158       }
0159     }
0160     O << "\"";
0161   }
0162 };
0163 
0164 } // end namespace llvm
0165 
0166 #endif