|
|
|||
File indexing completed on 2025-12-16 10:34:30
0001 /* 0002 * Licensed to the Apache Software Foundation (ASF) under one or more 0003 * contributor license agreements. See the NOTICE file distributed with 0004 * this work for additional information regarding copyright ownership. 0005 * The ASF licenses this file to You under the Apache License, Version 2.0 0006 * (the "License"); you may not use this file except in compliance with 0007 * the License. You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 */ 0017 0018 /* 0019 * $Id$ 0020 */ 0021 0022 #if !defined(XERCESC_INCLUDE_GUARD_DFACONTENTMODEL_HPP) 0023 #define XERCESC_INCLUDE_GUARD_DFACONTENTMODEL_HPP 0024 0025 #include <xercesc/util/XercesDefs.hpp> 0026 #include <xercesc/util/ArrayIndexOutOfBoundsException.hpp> 0027 #include <xercesc/framework/XMLContentModel.hpp> 0028 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> 0029 0030 XERCES_CPP_NAMESPACE_BEGIN 0031 0032 class ContentSpecNode; 0033 class CMLeaf; 0034 class CMRepeatingLeaf; 0035 class CMNode; 0036 class CMStateSet; 0037 0038 // 0039 // DFAContentModel is the heavy weight derivative of ContentModel that does 0040 // all of the non-trivial element content validation. This guy does the full 0041 // bore regular expression to DFA conversion to create a DFA that it then 0042 // uses in its validation algorithm. 0043 // 0044 // NOTE: Upstream work insures that this guy will never see a content model 0045 // with PCDATA in it. Any model with PCDATA is 'mixed' and is handled 0046 // via the MixedContentModel class, since mixed models are very 0047 // constrained in form and easily handled via a special case. This 0048 // also makes our life much easier here. 0049 // 0050 class DFAContentModel : public XMLContentModel 0051 { 0052 public: 0053 // ----------------------------------------------------------------------- 0054 // Constructors and Destructor 0055 // ----------------------------------------------------------------------- 0056 DFAContentModel 0057 ( 0058 const bool dtd 0059 , ContentSpecNode* const elemContentSpec 0060 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0061 ); 0062 DFAContentModel 0063 ( 0064 const bool dtd 0065 , ContentSpecNode* const elemContentSpec 0066 , const bool isMixed 0067 , MemoryManager* const manager 0068 ); 0069 0070 virtual ~DFAContentModel(); 0071 0072 0073 // ----------------------------------------------------------------------- 0074 // Implementation of the virtual content model interface 0075 // ----------------------------------------------------------------------- 0076 virtual bool validateContent 0077 ( 0078 QName** const children 0079 , XMLSize_t childCount 0080 , unsigned int emptyNamespaceId 0081 , XMLSize_t* indexFailingChild 0082 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0083 ) const; 0084 0085 virtual bool validateContentSpecial 0086 ( 0087 QName** const children 0088 , XMLSize_t childCount 0089 , unsigned int emptyNamespaceId 0090 , GrammarResolver* const pGrammarResolver 0091 , XMLStringPool* const pStringPool 0092 , XMLSize_t* indexFailingChild 0093 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0094 ) const; 0095 0096 virtual void checkUniqueParticleAttribution 0097 ( 0098 SchemaGrammar* const pGrammar 0099 , GrammarResolver* const pGrammarResolver 0100 , XMLStringPool* const pStringPool 0101 , XMLValidator* const pValidator 0102 , unsigned int* const pContentSpecOrgURI 0103 , const XMLCh* pComplexTypeName = 0 0104 ) ; 0105 0106 virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ; 0107 0108 virtual unsigned int getNextState(unsigned int currentState, 0109 XMLSize_t elementIndex) const; 0110 0111 virtual bool handleRepetitions( const QName* const curElem, 0112 unsigned int curState, 0113 unsigned int currentLoop, 0114 unsigned int& nextState, 0115 unsigned int& nextLoop, 0116 XMLSize_t elementIndex, 0117 SubstitutionGroupComparator * comparator) const; 0118 0119 private : 0120 // ----------------------------------------------------------------------- 0121 // Unimplemented constructors and operators 0122 // ----------------------------------------------------------------------- 0123 DFAContentModel(); 0124 DFAContentModel(const DFAContentModel&); 0125 DFAContentModel& operator=(const DFAContentModel&); 0126 0127 0128 // ----------------------------------------------------------------------- 0129 // Private helper methods 0130 // ----------------------------------------------------------------------- 0131 void cleanup(); 0132 void buildDFA(ContentSpecNode* const curNode); 0133 CMNode* buildSyntaxTree(ContentSpecNode* const curNode, unsigned int& curIndex); 0134 unsigned int* makeDefStateList() const; 0135 unsigned int countLeafNodes(ContentSpecNode* const curNode); 0136 0137 class Occurence : public XMemory 0138 { 0139 public: 0140 Occurence(int minOcc, int maxOcc, int eltIndex); 0141 0142 int minOccurs; 0143 int maxOccurs; 0144 int elemIndex; 0145 }; 0146 0147 // ----------------------------------------------------------------------- 0148 // Private data members 0149 // 0150 // fElemMap 0151 // fElemMapSize 0152 // This is the map of unique input symbol elements to indices into 0153 // each state's per-input symbol transition table entry. This is part 0154 // of the built DFA information that must be kept around to do the 0155 // actual validation. 0156 // 0157 // fElemMapType 0158 // This is a map of whether the element map contains information 0159 // related to ANY models. 0160 // 0161 // fEmptyOk 0162 // This is an optimization. While building the transition table we 0163 // can see whether this content model would approve of an empty 0164 // content (which could happen if everything was optional.) So we 0165 // set this flag and short circuit that check, which would otherwise 0166 // be ugly and time consuming if we tried to determine it at each 0167 // validation call. 0168 // 0169 // fEOCPos 0170 // The NFA position of the special EOC (end of content) node. This 0171 // is saved away since its used during the DFA build. 0172 // 0173 // fFinalStateFlags 0174 // This is an array of booleans, one per state (there are 0175 // fTransTableSize states in the DFA) that indicates whether that 0176 // state is a final state. 0177 // 0178 // fFollowList 0179 // The list of follow positions for each NFA position (i.e. for each 0180 // non-epsilon leaf node.) This is only used during the building of 0181 // the DFA, and is let go afterwards. 0182 // 0183 // fHeadNode 0184 // This is the head node of our intermediate representation. It is 0185 // only non-null during the building of the DFA (just so that it 0186 // does not have to be passed all around.) Once the DFA is built, 0187 // this is no longer required so its deleted. 0188 // 0189 // fLeafCount 0190 // The count of leaf nodes. This is an important number that set some 0191 // limits on the sizes of data structures in the DFA process. 0192 // 0193 // fLeafList 0194 // An array of non-epsilon leaf nodes, which is used during the DFA 0195 // build operation, then dropped. These are just references to nodes 0196 // pointed to by fHeadNode, so we don't have to clean them up, just 0197 // the actually leaf list array itself needs cleanup. 0198 // 0199 // fLeafListType 0200 // Array mapping ANY types to the leaf list. 0201 // 0202 // fTransTable 0203 // fTransTableSize 0204 // This is the transition table that is the main by product of all 0205 // of the effort here. It is an array of arrays of ints. The first 0206 // dimension is the number of states we end up with in the DFA. The 0207 // second dimensions is the number of unique elements in the content 0208 // model (fElemMapSize). Each entry in the second dimension indicates 0209 // the new state given that input for the first dimension's start 0210 // state. 0211 // 0212 // The fElemMap array handles mapping from element indexes to 0213 // positions in the second dimension of the transition table. 0214 // 0215 // fTransTableSize is the number of valid entries in the transition 0216 // table, and in the other related tables such as fFinalStateFlags. 0217 // 0218 // fCountingStates 0219 // This is the table holding the minOccurs/maxOccurs for elements 0220 // that can be repeated a finite number of times. 0221 // 0222 // fDTD 0223 // Boolean to allow DTDs to validate even with namespace support. 0224 // 0225 // fIsMixed 0226 // DFA ContentModel with mixed PCDATA. 0227 // ----------------------------------------------------------------------- 0228 QName** fElemMap; 0229 ContentSpecNode::NodeTypes* fElemMapType; 0230 unsigned int fElemMapSize; 0231 bool fEmptyOk; 0232 unsigned int fEOCPos; 0233 bool* fFinalStateFlags; 0234 CMStateSet** fFollowList; 0235 CMNode* fHeadNode; 0236 unsigned int fLeafCount; 0237 CMLeaf** fLeafList; 0238 ContentSpecNode::NodeTypes* fLeafListType; 0239 unsigned int** fTransTable; 0240 unsigned int fTransTableSize; 0241 Occurence** fCountingStates; 0242 bool fDTD; 0243 bool fIsMixed; 0244 ContentLeafNameTypeVector * fLeafNameTypeVector; 0245 MemoryManager* fMemoryManager; 0246 }; 0247 0248 0249 inline unsigned int 0250 DFAContentModel::getNextState(unsigned int currentState, 0251 XMLSize_t elementIndex) const { 0252 0253 if (currentState == XMLContentModel::gInvalidTrans) { 0254 return XMLContentModel::gInvalidTrans; 0255 } 0256 0257 if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) { 0258 ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager); 0259 } 0260 0261 return fTransTable[currentState][elementIndex]; 0262 } 0263 0264 inline 0265 DFAContentModel::Occurence::Occurence(int minOcc, int maxOcc, int eltIndex) 0266 { 0267 minOccurs = minOcc; 0268 maxOccurs = maxOcc; 0269 elemIndex = eltIndex; 0270 } 0271 0272 XERCES_CPP_NAMESPACE_END 0273 0274 #endif 0275
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|