Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-05-12 09:08:10

0001 #ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
0002 #define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
0003 
0004 #if defined(_MSC_VER) ||                                            \
0005     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
0006      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
0007 #pragma once
0008 #endif
0009 
0010 #include <cstddef>
0011 #include <ios>
0012 #include <map>
0013 #include <queue>
0014 #include <set>
0015 #include <stack>
0016 #include <string>
0017 
0018 #include "ptr_vector.h"
0019 #include "stream.h"
0020 #include "token.h"
0021 #include "ATOOLS/YAML/yaml-cpp/mark.h"
0022 
0023 namespace SHERPA_YAML {
0024 class Node;
0025 class RegEx;
0026 
0027 /**
0028  * A scanner transforms a stream of characters into a stream of tokens.
0029  */
0030 class Scanner {
0031  public:
0032   explicit Scanner(std::istream &in);
0033   ~Scanner();
0034 
0035   /** Returns true if there are no more tokens to be read. */
0036   bool empty();
0037 
0038   /** Removes the next token in the queue. */
0039   void pop();
0040 
0041   /** Returns, but does not remove, the next token in the queue. */
0042   Token &peek();
0043 
0044   /** Returns the current mark in the input stream. */
0045   Mark mark() const;
0046 
0047  private:
0048   struct IndentMarker {
0049     enum INDENT_TYPE { MAP, SEQ, NONE };
0050     enum STATUS { VALID, INVALID, UNKNOWN };
0051     IndentMarker(int column_, INDENT_TYPE type_)
0052         : column(column_), type(type_), status(VALID), pStartToken(nullptr) {}
0053 
0054     int column;
0055     INDENT_TYPE type;
0056     STATUS status;
0057     Token *pStartToken;
0058   };
0059 
0060   enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
0061 
0062  private:
0063   // scanning
0064 
0065   /**
0066    * Scans until there's a valid token at the front of the queue, or the queue
0067    * is empty. The state can be checked by {@link #empty}, and the next token
0068    * retrieved by {@link #peek}.
0069    */
0070   void EnsureTokensInQueue();
0071 
0072   /**
0073    * The main scanning function; this method branches out to scan whatever the
0074    * next token should be.
0075    */
0076   void ScanNextToken();
0077 
0078   /** Eats the input stream until it reaches the next token-like thing. */
0079   void ScanToNextToken();
0080 
0081   /** Sets the initial conditions for starting a stream. */
0082   void StartStream();
0083 
0084   /** Closes out the stream, finish up, etc. */
0085   void EndStream();
0086 
0087   Token *PushToken(Token::TYPE type);
0088 
0089   bool InFlowContext() const { return !m_flows.empty(); }
0090   bool InBlockContext() const { return m_flows.empty(); }
0091   std::size_t GetFlowLevel() const { return m_flows.size(); }
0092 
0093   Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
0094 
0095   /**
0096    * Pushes an indentation onto the stack, and enqueues the proper token
0097    * (sequence start or mapping start).
0098    *
0099    * @return the indent marker it generates (if any).
0100    */
0101   IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
0102 
0103   /**
0104    * Pops indentations off the stack until it reaches the current indentation
0105    * level, and enqueues the proper token each time. Then pops all invalid
0106    * indentations off.
0107    */
0108   void PopIndentToHere();
0109 
0110   /**
0111    * Pops all indentations (except for the base empty one) off the stack, and
0112    * enqueues the proper token each time.
0113    */
0114   void PopAllIndents();
0115 
0116   /** Pops a single indent, pushing the proper token. */
0117   void PopIndent();
0118   int GetTopIndent() const;
0119 
0120   // checking input
0121   bool CanInsertPotentialSimpleKey() const;
0122   bool ExistsActiveSimpleKey() const;
0123   void InsertPotentialSimpleKey();
0124   void InvalidateSimpleKey();
0125   bool VerifySimpleKey();
0126   void PopAllSimpleKeys();
0127 
0128   /**
0129    * Throws a ParserException with the current token location (if available),
0130    * and does not parse any more tokens.
0131    */
0132   void ThrowParserException(const std::string &msg) const;
0133 
0134   bool IsWhitespaceToBeEaten(char ch);
0135 
0136   /**
0137    * Returns the appropriate regex to check if the next token is a value token.
0138    */
0139   const RegEx &GetValueRegex() const;
0140 
0141   struct SimpleKey {
0142     SimpleKey(const Mark &mark_, std::size_t flowLevel_);
0143 
0144     void Validate();
0145     void Invalidate();
0146 
0147     Mark mark;
0148     std::size_t flowLevel;
0149     IndentMarker *pIndent;
0150     Token *pMapStart, *pKey;
0151   };
0152 
0153   // and the tokens
0154   void ScanDirective();
0155   void ScanDocStart();
0156   void ScanDocEnd();
0157   void ScanBlockSeqStart();
0158   void ScanBlockMapSTart();
0159   void ScanBlockEnd();
0160   void ScanBlockEntry();
0161   void ScanFlowStart();
0162   void ScanFlowEnd();
0163   void ScanFlowEntry();
0164   void ScanKey();
0165   void ScanValue();
0166   void ScanAnchorOrAlias();
0167   void ScanTag();
0168   void ScanPlainScalar();
0169   void ScanQuotedScalar();
0170   void ScanBlockScalar();
0171 
0172  private:
0173   // the stream
0174   Stream INPUT;
0175 
0176   // the output (tokens)
0177   std::queue<Token> m_tokens;
0178 
0179   // state info
0180   bool m_startedStream, m_endedStream;
0181   bool m_simpleKeyAllowed;
0182   bool m_canBeJSONFlow;
0183   std::stack<SimpleKey> m_simpleKeys;
0184   std::stack<IndentMarker *> m_indents;
0185   ptr_vector<IndentMarker> m_indentRefs;  // for "garbage collection"
0186   std::stack<FLOW_MARKER> m_flows;
0187 };
0188 }
0189 
0190 #endif  // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66