Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:14:55

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_IGXMLSCANNER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_IGXMLSCANNER_HPP
0024 
0025 #include <xercesc/internal/XMLScanner.hpp>
0026 #include <xercesc/util/KVStringPair.hpp>
0027 #include <xercesc/util/NameIdPool.hpp>
0028 #include <xercesc/util/RefHash2KeysTableOf.hpp>
0029 #include <xercesc/util/RefHash3KeysIdPool.hpp>
0030 #include <xercesc/util/Hash2KeysSetOf.hpp>
0031 #include <xercesc/validators/common/Grammar.hpp>
0032 #include <xercesc/validators/schema/SchemaInfo.hpp>
0033 #include <xercesc/validators/schema/SchemaElementDecl.hpp>
0034 
0035 XERCES_CPP_NAMESPACE_BEGIN
0036 
0037 class DTDElementDecl;
0038 class DTDGrammar;
0039 class DTDValidator;
0040 class SchemaValidator;
0041 class IdentityConstraintHandler;
0042 class IdentityConstraint;
0043 class ContentLeafNameTypeVector;
0044 class SchemaAttDef;
0045 class XMLContentModel;
0046 class XSModel;
0047 class PSVIAttributeList;
0048 class PSVIElement;
0049 
0050 //  This is an integrated scanner class, which does DTD/XML Schema grammar
0051 //  processing.
0052 class XMLPARSER_EXPORT IGXMLScanner : public XMLScanner
0053 {
0054 public :
0055     // -----------------------------------------------------------------------
0056     //  Constructors and Destructor
0057     // -----------------------------------------------------------------------
0058     IGXMLScanner
0059     (
0060         XMLValidator* const valToAdopt
0061         , GrammarResolver* const grammarResolver
0062         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
0063     );
0064     IGXMLScanner
0065     (
0066           XMLDocumentHandler* const docHandler
0067         , DocTypeHandler* const     docTypeHandler
0068         , XMLEntityHandler* const   entityHandler
0069         , XMLErrorReporter* const   errReporter
0070         , XMLValidator* const       valToAdopt
0071         , GrammarResolver* const    grammarResolver
0072         , MemoryManager* const      manager = XMLPlatformUtils::fgMemoryManager
0073     );
0074     virtual ~IGXMLScanner();
0075 
0076     // -----------------------------------------------------------------------
0077     //  XMLScanner public virtual methods
0078     // -----------------------------------------------------------------------
0079     virtual const XMLCh* getName() const;
0080     virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool();
0081     virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const;
0082     virtual void scanDocument
0083     (
0084         const   InputSource&    src
0085     );
0086     virtual bool scanNext(XMLPScanToken& toFill);
0087     virtual Grammar* loadGrammar
0088     (
0089         const   InputSource&    src
0090         , const short           grammarType
0091         , const bool            toCache = false
0092     );
0093 
0094     virtual void resetCachedGrammar ();
0095     virtual Grammar::GrammarType getCurrentGrammarType() const;
0096 
0097 private :
0098     // -----------------------------------------------------------------------
0099     //  Unimplemented constructors and operators
0100     // -----------------------------------------------------------------------
0101     IGXMLScanner();
0102     IGXMLScanner(const IGXMLScanner&);
0103     IGXMLScanner& operator=(const IGXMLScanner&);
0104 
0105     // -----------------------------------------------------------------------
0106     //  XMLScanner virtual methods
0107     // -----------------------------------------------------------------------
0108     virtual void scanCDSection();
0109     virtual void scanCharData(XMLBuffer& toToUse);
0110     virtual EntityExpRes scanEntityRef
0111     (
0112         const   bool    inAttVal
0113         ,       XMLCh&  firstCh
0114         ,       XMLCh&  secondCh
0115         ,       bool&   escaped
0116     );
0117     virtual void scanDocTypeDecl();
0118     virtual void scanReset(const InputSource& src);
0119     virtual void sendCharData(XMLBuffer& toSend);
0120     virtual InputSource* resolveSystemId(const XMLCh* const sysId
0121                                         ,const XMLCh* const pubId);
0122 
0123     // -----------------------------------------------------------------------
0124     //  Private helper methods
0125     // -----------------------------------------------------------------------
0126     void commonInit();
0127     void cleanUp();
0128 
0129     XMLSize_t buildAttList
0130     (
0131         const   RefVectorOf<KVStringPair>&  providedAttrs
0132         , const XMLSize_t                   attCount
0133         ,       XMLElementDecl*             elemDecl
0134         ,       RefVectorOf<XMLAttr>&       toFill
0135     );
0136     bool normalizeAttValue
0137     (
0138         const   XMLAttDef* const    attDef
0139         , const XMLCh* const       name
0140         , const XMLCh* const        value
0141         ,       XMLBuffer&          toFill
0142     );
0143     bool normalizeAttRawValue
0144     (
0145         const   XMLCh* const        attrName
0146         , const XMLCh* const        value
0147         ,       XMLBuffer&          toFill
0148     );
0149     void updateNSMap
0150     (
0151         const   XMLCh* const    attrName
0152         , const XMLCh* const    attrValue
0153     );
0154     void updateNSMap
0155     (
0156         const   XMLCh* const    attrName
0157         , const XMLCh* const    attrValue
0158         , const int             colonPosition
0159     );
0160     void scanRawAttrListforNameSpaces(XMLSize_t attCount);
0161     void parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema = false);
0162     void resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema = false);
0163     bool switchGrammar(const XMLCh* const newGrammarNameSpace);
0164     bool laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
0165                               const XMLContentModel* const cm,
0166                               const XMLSize_t parentElemDepth);
0167     bool anyAttributeValidation(SchemaAttDef* attWildCard,
0168                                 unsigned int uriId,
0169                                 bool& skipThisOne,
0170                                 bool& laxThisOne);
0171     void resizeElemState();
0172     void processSchemaLocation(XMLCh* const schemaLoc);
0173 
0174     void resizeRawAttrColonList();
0175 
0176     // -----------------------------------------------------------------------
0177     //  Private scanning methods
0178     // -----------------------------------------------------------------------
0179     bool basicAttrValueScan
0180     (
0181         const   XMLCh* const    attrName
0182         ,       XMLBuffer&      toFill
0183     );
0184     XMLSize_t rawAttrScan
0185     (
0186         const   XMLCh* const                elemName
0187         ,       RefVectorOf<KVStringPair>&  toFill
0188         ,       bool&                       isEmpty
0189     );
0190     bool scanAttValue
0191     (
0192         const   XMLAttDef* const    attDef
0193         , const   XMLCh* const      attrName
0194         ,       XMLBuffer&          toFill
0195     );
0196     bool scanContent();
0197     void scanEndTag(bool& gotData);
0198     bool scanStartTag(bool& gotData);
0199     bool scanStartTagNS(bool& gotData);
0200 
0201     // -----------------------------------------------------------------------
0202     //  IdentityConstraints Activation methods
0203     // -----------------------------------------------------------------------
0204     inline bool toCheckIdentityConstraint()  const;
0205 
0206     // -----------------------------------------------------------------------
0207     //  Grammar preparsing methods
0208     // -----------------------------------------------------------------------
0209     Grammar* loadXMLSchemaGrammar(const InputSource& src, const bool toCache = false);
0210     Grammar* loadDTDGrammar(const InputSource& src, const bool toCache = false);
0211 
0212     // -----------------------------------------------------------------------
0213     //  PSVI handling methods
0214     // -----------------------------------------------------------------------
0215     void endElementPSVI(SchemaElementDecl* const elemDecl,
0216                         DatatypeValidator* const memberDV);
0217     void resetPSVIElemContext();
0218 
0219     // -----------------------------------------------------------------------
0220     //  Data members
0221     //
0222     //  fRawAttrList
0223     //      During the initial scan of the attributes we can only do a raw
0224     //      scan for key/value pairs. So this vector is used to store them
0225     //      until they can be processed (and put into fAttrList.)
0226     //
0227     //  fDTDValidator
0228     //      The DTD validator instance.
0229     //
0230     //  fSchemaValidator
0231     //      The Schema validator instance.
0232     //
0233     //  fSeeXsi
0234     //      This flag indicates a schema has been seen.
0235     //
0236     //  fElemState
0237     //  fElemLoopState
0238     //  fElemStateSize
0239     //      Stores an element next state from DFA content model - used for
0240     //      wildcard validation
0241     //
0242     // fDTDElemNonDeclPool
0243     //      registry of "faulted-in" DTD element decls
0244     // fSchemaElemNonDeclPool
0245     //      registry for elements without decls in the grammar
0246     // fElemCount
0247     //      count of the number of start tags seen so far (starts at 1).
0248     //      Used for duplicate attribute detection/processing of required/defaulted attributes
0249     // fAttDefRegistry
0250     //      mapping from XMLAttDef instances to the count of the last
0251     //      start tag where they were utilized.
0252     // fUndeclaredAttrRegistry
0253     //      set of attr QNames to detect duplicates
0254     // fPSVIAttrList
0255     //      PSVI attribute list implementation that needs to be
0256     //      filled when a PSVIHandler is registered
0257     // fSchemaInfoList
0258     //      Transient schema info list that is passed to TraverseSchema instances.
0259     // fCachedSchemaInfoList
0260     //      Cached Schema info list that is passed to TraverseSchema instances.
0261     //
0262     // -----------------------------------------------------------------------
0263     bool                                    fSeeXsi;
0264     Grammar::GrammarType                    fGrammarType;
0265     unsigned int                            fElemStateSize;
0266     unsigned int*                           fElemState;
0267     unsigned int*                           fElemLoopState;
0268     XMLBuffer                               fContent;
0269     RefVectorOf<KVStringPair>*              fRawAttrList;
0270     unsigned int                            fRawAttrColonListSize;
0271     int*                                    fRawAttrColonList;
0272     DTDValidator*                           fDTDValidator;
0273     SchemaValidator*                        fSchemaValidator;
0274     DTDGrammar*                             fDTDGrammar;
0275     IdentityConstraintHandler*              fICHandler;
0276     ValueVectorOf<XMLCh*>*                  fLocationPairs;
0277     NameIdPool<DTDElementDecl>*             fDTDElemNonDeclPool;
0278     RefHash3KeysIdPool<SchemaElementDecl>*  fSchemaElemNonDeclPool;
0279     unsigned int                            fElemCount;
0280     RefHashTableOf<unsigned int, PtrHasher>*fAttDefRegistry;
0281     Hash2KeysSetOf<StringHasher>*           fUndeclaredAttrRegistry;
0282     PSVIAttributeList *                     fPSVIAttrList;
0283     XSModel*                                fModel;
0284     PSVIElement*                            fPSVIElement;
0285     ValueStackOf<bool>*                     fErrorStack;
0286     PSVIElemContext                         fPSVIElemContext;
0287     RefHash2KeysTableOf<SchemaInfo>*        fSchemaInfoList;
0288     RefHash2KeysTableOf<SchemaInfo>*        fCachedSchemaInfoList;
0289 };
0290 
0291 inline const XMLCh* IGXMLScanner::getName() const
0292 {
0293     return XMLUni::fgIGXMLScanner;
0294 }
0295 
0296 inline bool IGXMLScanner::toCheckIdentityConstraint()  const
0297 {
0298     return fValidate && fIdentityConstraintChecking && fICHandler;
0299 }
0300 
0301 inline Grammar::GrammarType IGXMLScanner::getCurrentGrammarType() const
0302 {
0303     return fGrammarType;
0304 }
0305 
0306 XERCES_CPP_NAMESPACE_END
0307 
0308 #endif