Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:34:30

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  * 
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  * 
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_DFACONTENTMODEL_HPP)
0023 #define XERCESC_INCLUDE_GUARD_DFACONTENTMODEL_HPP
0024 
0025 #include <xercesc/util/XercesDefs.hpp>
0026 #include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
0027 #include <xercesc/framework/XMLContentModel.hpp>
0028 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
0029 
0030 XERCES_CPP_NAMESPACE_BEGIN
0031 
0032 class ContentSpecNode;
0033 class CMLeaf;
0034 class CMRepeatingLeaf;
0035 class CMNode;
0036 class CMStateSet;
0037 
0038 //
0039 //  DFAContentModel is the heavy weight derivative of ContentModel that does
0040 //  all of the non-trivial element content validation. This guy does the full
0041 //  bore regular expression to DFA conversion to create a DFA that it then
0042 //  uses in its validation algorithm.
0043 //
0044 //  NOTE:   Upstream work insures that this guy will never see a content model
0045 //          with PCDATA in it. Any model with PCDATA is 'mixed' and is handled
0046 //          via the MixedContentModel class, since mixed models are very
0047 //          constrained in form and easily handled via a special case. This
0048 //          also makes our life much easier here.
0049 //
0050 class DFAContentModel : public XMLContentModel
0051 {
0052 public:
0053     // -----------------------------------------------------------------------
0054     //  Constructors and Destructor
0055     // -----------------------------------------------------------------------
0056     DFAContentModel
0057     ( 
0058           const bool             dtd
0059         , ContentSpecNode* const elemContentSpec
0060         , MemoryManager* const   manager = XMLPlatformUtils::fgMemoryManager
0061     );
0062     DFAContentModel
0063     (
0064           const bool             dtd
0065         , ContentSpecNode* const elemContentSpec
0066         , const bool             isMixed
0067         , MemoryManager* const   manager
0068     );
0069 
0070     virtual ~DFAContentModel();
0071 
0072 
0073     // -----------------------------------------------------------------------
0074     //  Implementation of the virtual content model interface
0075     // -----------------------------------------------------------------------
0076     virtual bool validateContent
0077     (
0078         QName** const         children
0079       , XMLSize_t             childCount
0080       , unsigned int          emptyNamespaceId
0081       , XMLSize_t*            indexFailingChild
0082       , MemoryManager*  const manager = XMLPlatformUtils::fgMemoryManager
0083     ) const;
0084 
0085     virtual bool validateContentSpecial
0086     (
0087         QName** const           children
0088       , XMLSize_t               childCount
0089       , unsigned int            emptyNamespaceId
0090       , GrammarResolver*  const pGrammarResolver
0091       , XMLStringPool*    const pStringPool
0092       , XMLSize_t*              indexFailingChild
0093       , MemoryManager*    const manager = XMLPlatformUtils::fgMemoryManager
0094     ) const;
0095 
0096     virtual void checkUniqueParticleAttribution
0097     (
0098         SchemaGrammar*    const pGrammar
0099       , GrammarResolver*  const pGrammarResolver
0100       , XMLStringPool*    const pStringPool
0101       , XMLValidator*     const pValidator
0102       , unsigned int*     const pContentSpecOrgURI
0103       , const XMLCh*            pComplexTypeName = 0
0104     ) ;
0105 
0106     virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ;
0107 
0108     virtual unsigned int getNextState(unsigned int currentState,
0109                                       XMLSize_t    elementIndex) const;
0110 
0111     virtual bool handleRepetitions( const QName* const curElem,
0112                                     unsigned int curState,
0113                                     unsigned int currentLoop,
0114                                     unsigned int& nextState,
0115                                     unsigned int& nextLoop,
0116                                     XMLSize_t elementIndex,
0117                                     SubstitutionGroupComparator * comparator) const;
0118 
0119 private :
0120     // -----------------------------------------------------------------------
0121     //  Unimplemented constructors and operators
0122     // -----------------------------------------------------------------------
0123     DFAContentModel();
0124     DFAContentModel(const DFAContentModel&);
0125     DFAContentModel& operator=(const DFAContentModel&);
0126 
0127 
0128     // -----------------------------------------------------------------------
0129     //  Private helper methods
0130     // -----------------------------------------------------------------------
0131     void cleanup();
0132     void buildDFA(ContentSpecNode* const curNode);
0133     CMNode* buildSyntaxTree(ContentSpecNode* const curNode, unsigned int& curIndex);
0134     unsigned int* makeDefStateList() const;
0135     unsigned int countLeafNodes(ContentSpecNode* const curNode);
0136 
0137     class Occurence : public XMemory
0138     {
0139     public:
0140         Occurence(int minOcc, int maxOcc, int eltIndex);
0141 
0142         int minOccurs;
0143         int maxOccurs;
0144         int elemIndex;
0145     };
0146 
0147     // -----------------------------------------------------------------------
0148     //  Private data members
0149     //
0150     //  fElemMap
0151     //  fElemMapSize
0152     //      This is the map of unique input symbol elements to indices into
0153     //      each state's per-input symbol transition table entry. This is part
0154     //      of the built DFA information that must be kept around to do the
0155     //      actual validation.
0156     //
0157     //  fElemMapType
0158     //      This is a map of whether the element map contains information
0159     //      related to ANY models.
0160     //
0161     //  fEmptyOk
0162     //      This is an optimization. While building the transition table we
0163     //      can see whether this content model would approve of an empty
0164     //      content (which could happen if everything was optional.) So we
0165     //      set this flag and short circuit that check, which would otherwise
0166     //      be ugly and time consuming if we tried to determine it at each
0167     //      validation call.
0168     //
0169     //  fEOCPos
0170     //      The NFA position of the special EOC (end of content) node. This
0171     //      is saved away since its used during the DFA build.
0172     //
0173     //  fFinalStateFlags
0174     //      This is an array of booleans, one per state (there are
0175     //      fTransTableSize states in the DFA) that indicates whether that
0176     //      state is a final state.
0177     //
0178     //  fFollowList
0179     //      The list of follow positions for each NFA position (i.e. for each
0180     //      non-epsilon leaf node.) This is only used during the building of
0181     //      the DFA, and is let go afterwards.
0182     //
0183     //  fHeadNode
0184     //      This is the head node of our intermediate representation. It is
0185     //      only non-null during the building of the DFA (just so that it
0186     //      does not have to be passed all around.) Once the DFA is built,
0187     //      this is no longer required so its deleted.
0188     //
0189     //  fLeafCount
0190     //      The count of leaf nodes. This is an important number that set some
0191     //      limits on the sizes of data structures in the DFA process.
0192     //
0193     //  fLeafList
0194     //      An array of non-epsilon leaf nodes, which is used during the DFA
0195     //      build operation, then dropped. These are just references to nodes
0196     //      pointed to by fHeadNode, so we don't have to clean them up, just
0197     //      the actually leaf list array itself needs cleanup.
0198     //
0199     //  fLeafListType
0200     //      Array mapping ANY types to the leaf list.
0201     //
0202     //  fTransTable
0203     //  fTransTableSize
0204     //      This is the transition table that is the main by product of all
0205     //      of the effort here. It is an array of arrays of ints. The first
0206     //      dimension is the number of states we end up with in the DFA. The
0207     //      second dimensions is the number of unique elements in the content
0208     //      model (fElemMapSize). Each entry in the second dimension indicates
0209     //      the new state given that input for the first dimension's start
0210     //      state.
0211     //
0212     //      The fElemMap array handles mapping from element indexes to
0213     //      positions in the second dimension of the transition table.
0214     //
0215     //      fTransTableSize is the number of valid entries in the transition
0216     //      table, and in the other related tables such as fFinalStateFlags.
0217     //
0218     //  fCountingStates
0219     //      This is the table holding the minOccurs/maxOccurs for elements
0220     //      that can be repeated a finite number of times.
0221     //
0222     //  fDTD
0223     //      Boolean to allow DTDs to validate even with namespace support.
0224     //
0225     //  fIsMixed
0226     //      DFA ContentModel with mixed PCDATA.
0227     // -----------------------------------------------------------------------
0228     QName**                     fElemMap;
0229     ContentSpecNode::NodeTypes* fElemMapType;
0230     unsigned int                fElemMapSize;
0231     bool                        fEmptyOk;
0232     unsigned int                fEOCPos;
0233     bool*                       fFinalStateFlags;
0234     CMStateSet**                fFollowList;
0235     CMNode*                     fHeadNode;
0236     unsigned int                fLeafCount;
0237     CMLeaf**                    fLeafList;
0238     ContentSpecNode::NodeTypes* fLeafListType;
0239     unsigned int**              fTransTable;
0240     unsigned int                fTransTableSize;
0241     Occurence**                 fCountingStates;
0242     bool                        fDTD;
0243     bool                        fIsMixed;
0244     ContentLeafNameTypeVector * fLeafNameTypeVector;
0245     MemoryManager*              fMemoryManager;
0246 };
0247 
0248 
0249 inline unsigned int
0250 DFAContentModel::getNextState(unsigned int currentState,
0251                               XMLSize_t    elementIndex) const {
0252 
0253     if (currentState == XMLContentModel::gInvalidTrans) {
0254         return XMLContentModel::gInvalidTrans;
0255     }
0256 
0257     if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) {
0258         ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
0259     }
0260 
0261     return fTransTable[currentState][elementIndex];
0262 }
0263 
0264 inline
0265 DFAContentModel::Occurence::Occurence(int minOcc, int maxOcc, int eltIndex)
0266 {
0267     minOccurs = minOcc;
0268     maxOccurs = maxOcc;
0269     elemIndex = eltIndex;
0270 }
0271 
0272 XERCES_CPP_NAMESPACE_END
0273 
0274 #endif
0275