Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:14:57

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP
0024 
0025 
0026 #include <xercesc/parsers/AbstractDOMParser.hpp>
0027 #include <xercesc/validators/common/Grammar.hpp>
0028 
0029 XERCES_CPP_NAMESPACE_BEGIN
0030 
0031 
0032 class EntityResolver;
0033 class ErrorHandler;
0034 class XMLEntityResolver;
0035 class XMLResourceIdentifier;
0036 
0037  /**
0038   * This class implements the Document Object Model (DOM) interface.
0039   * It should be used by applications which choose to parse and
0040   * process the XML document using the DOM api's. This implementation
0041   * also allows the applications to install an error and an entity
0042   * handler (useful extensions to the DOM specification).
0043   *
0044   * <p>It can be used to instantiate a validating or non-validating
0045   * parser, by setting a member flag.</p>
0046   */
0047 class PARSERS_EXPORT XercesDOMParser : public AbstractDOMParser
0048 {
0049 public :
0050     // -----------------------------------------------------------------------
0051     //  Constructors and Destructor
0052     // -----------------------------------------------------------------------
0053 
0054     /** @name Constructors and Destructor */
0055     //@{
0056     /** Construct a XercesDOMParser, with an optional validator
0057       *
0058       * Constructor with an instance of validator class to use for
0059       * validation. If you don't provide a validator, a default one will
0060       * be created for you in the scanner.
0061       *
0062       * @param gramPool   Pointer to the grammar pool instance from
0063       *                   external application.
0064       *                   The parser does NOT own it.
0065       *
0066       * @param valToAdopt Pointer to the validator instance to use. The
0067       *                   parser is responsible for freeing the memory.
0068       * @param  manager   Pointer to the memory manager to be used to
0069       *                   allocate objects.
0070       */
0071     XercesDOMParser
0072     (
0073           XMLValidator* const   valToAdopt = 0
0074         , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
0075         , XMLGrammarPool* const gramPool = 0
0076     );
0077 
0078     /**
0079       * Destructor
0080       */
0081     virtual ~XercesDOMParser();
0082 
0083     //@}
0084 
0085 
0086     // -----------------------------------------------------------------------
0087     //  Getter methods
0088     // -----------------------------------------------------------------------
0089 
0090     /** @name Getter methods */
0091     //@{
0092 
0093     /** Get a pointer to the error handler
0094       *
0095       * This method returns the installed error handler. If no handler
0096       * has been installed, then it will be a zero pointer.
0097       *
0098       * @return The pointer to the installed error handler object.
0099       */
0100     ErrorHandler* getErrorHandler();
0101 
0102     /** Get a const pointer to the error handler
0103       *
0104       * This method returns the installed error handler.  If no handler
0105       * has been installed, then it will be a zero pointer.
0106       *
0107       * @return A const pointer to the installed error handler object.
0108       */
0109     const ErrorHandler* getErrorHandler() const;
0110 
0111     /** Get a pointer to the entity resolver
0112       *
0113       * This method returns the installed entity resolver.  If no resolver
0114       * has been installed, then it will be a zero pointer.
0115       *
0116       * @return The pointer to the installed entity resolver object.
0117       */
0118     EntityResolver* getEntityResolver();
0119 
0120     /** Get a const pointer to the entity resolver
0121       *
0122       * This method returns the installed entity resolver. If no resolver
0123       * has been installed, then it will be a zero pointer.
0124       *
0125       * @return A const pointer to the installed entity resolver object.
0126       */
0127     const EntityResolver* getEntityResolver() const;
0128 
0129     /**
0130       * Get a pointer to the entity resolver
0131       *
0132       * This method returns the installed entity resolver.  If no resolver
0133       * has been installed, then it will be a zero pointer.
0134       *
0135       * @return The pointer to the installed entity resolver object.
0136       */
0137     XMLEntityResolver* getXMLEntityResolver();
0138 
0139     /**
0140       * Get a const pointer to the entity resolver
0141       *
0142       * This method returns the installed entity resolver. If no resolver
0143       * has been installed, then it will be a zero pointer.
0144       *
0145       * @return A const pointer to the installed entity resolver object.
0146       */
0147     const XMLEntityResolver* getXMLEntityResolver() const;
0148 
0149     /** Get the 'Grammar caching' flag
0150       *
0151       * This method returns the state of the parser's grammar caching when
0152       * parsing an XML document.
0153       *
0154       * @return true, if the parser is currently configured to
0155       *         cache grammars, false otherwise.
0156       *
0157       * @see #cacheGrammarFromParse
0158       */
0159     bool isCachingGrammarFromParse() const;
0160 
0161     /** Get the 'Use cached grammar' flag
0162       *
0163       * This method returns the state of the parser's use of cached grammar
0164       * when parsing an XML document.
0165       *
0166       * @return true, if the parser is currently configured to
0167       *         use cached grammars, false otherwise.
0168       *
0169       * @see #useCachedGrammarInParse
0170       */
0171     bool isUsingCachedGrammarInParse() const;
0172 
0173     /**
0174      * Retrieve the grammar that is associated with the specified namespace key
0175      *
0176      * @param  nameSpaceKey Namespace key
0177      * @return Grammar associated with the Namespace key.
0178      */
0179     Grammar* getGrammar(const XMLCh* const nameSpaceKey);
0180 
0181     /**
0182      * Retrieve the grammar where the root element is declared.
0183      *
0184      * @return Grammar where root element declared
0185      */
0186     Grammar* getRootGrammar();
0187 
0188     /**
0189      * Returns the string corresponding to a URI id from the URI string pool.
0190      *
0191      * @param uriId id of the string in the URI string pool.
0192      * @return URI string corresponding to the URI id.
0193      */
0194     const XMLCh* getURIText(unsigned int uriId) const;
0195 
0196     /**
0197      * Returns the current src offset within the input source.
0198      * To be used only while parsing is in progress.
0199      *
0200      * @return offset within the input source
0201      */
0202     XMLFilePos getSrcOffset() const;
0203 
0204     /** Get the 'ignore cached DTD grammar' flag
0205       *
0206       * @return true, if the parser is currently configured to
0207       *         ignore cached DTD, false otherwise.
0208       *
0209       * @see #setIgnoreCachedDTD
0210       */
0211     bool getIgnoreCachedDTD() const;
0212 
0213     //@}
0214 
0215 
0216     // -----------------------------------------------------------------------
0217     //  Setter methods
0218     // -----------------------------------------------------------------------
0219 
0220     /** @name Setter methods */
0221     //@{
0222 
0223     /** Set the error handler
0224       *
0225       * This method allows applications to install their own error handler
0226       * to trap error and warning messages.
0227       *
0228       * <i>Any previously set handler is merely dropped, since the parser
0229       * does not own them.</i>
0230       *
0231       * @param handler  A const pointer to the user supplied error
0232       *                 handler.
0233       *
0234       * @see #getErrorHandler
0235       */
0236     void setErrorHandler(ErrorHandler* const handler);
0237 
0238     /** Set the entity resolver
0239       *
0240       * This method allows applications to install their own entity
0241       * resolver. By installing an entity resolver, the applications
0242       * can trap and potentially redirect references to external
0243       * entities.
0244       *
0245       * <i>Any previously set entity resolver is merely dropped, since the parser
0246       * does not own them.  If both setEntityResolver and setXMLEntityResolver
0247       * are called, then the last one is used.</i>
0248       *
0249       * @param handler  A const pointer to the user supplied entity
0250       *                 resolver.
0251       *
0252       * @see #getEntityResolver
0253       */
0254     void setEntityResolver(EntityResolver* const handler);
0255 
0256     /**
0257       * Set the entity resolver
0258       *
0259       * This method allows applications to install their own entity
0260       * resolver. By installing an entity resolver, the applications
0261       * can trap and potentially redirect references to external
0262       * entities.
0263       *
0264       * <i>Any previously set entity resolver is merely dropped, since the parser
0265       * does not own them.  If both setEntityResolver and setXMLEntityResolver
0266       * are called, then the last one set is used.</i>
0267       *
0268       * @param handler  A const pointer to the user supplied entity
0269       *                 resolver.
0270       *
0271       * @see #getXMLEntityResolver
0272       */
0273     void setXMLEntityResolver(XMLEntityResolver* const handler);
0274 
0275     /** Set the 'Grammar caching' flag
0276       *
0277       * This method allows users to enable or disable caching of grammar when
0278       * parsing XML documents. When set to true, the parser will cache the
0279       * resulting grammar for use in subsequent parses.
0280       *
0281       * If the flag is set to true, the 'Use cached grammar' flag will also be
0282       * set to true.
0283       *
0284       * The parser's default state is: false.
0285       *
0286       * @param newState The value specifying whether we should cache grammars
0287       *                 or not.
0288       *
0289       * @see #isCachingGrammarFromParse
0290       * @see #useCachedGrammarInParse
0291       */
0292     void cacheGrammarFromParse(const bool newState);
0293 
0294     /** Set the 'Use cached grammar' flag
0295       *
0296       * This method allows users to enable or disable the use of cached
0297       * grammars.  When set to true, the parser will use the cached grammar,
0298       * instead of building the grammar from scratch, to validate XML
0299       * documents.
0300       *
0301       * If the 'Grammar caching' flag is set to true, this method ignore the
0302       * value passed in.
0303       *
0304       * The parser's default state is: false.
0305       *
0306       * @param newState The value specifying whether we should use the cached
0307       *                 grammar or not.
0308       *
0309       * @see #isUsingCachedGrammarInParse
0310       * @see #cacheGrammarFromParse
0311       */
0312     void useCachedGrammarInParse(const bool newState);
0313 
0314     /** Set the 'ignore cached DTD grammar' flag
0315       *
0316       * This method gives users the option to ignore a cached DTD grammar, when
0317       * an XML document contains both an internal and external DTD, and the use
0318       * cached grammar from parse option is enabled. Currently, we do not allow
0319       * using cached DTD grammar when an internal subset is present in the
0320       * document. This option will only affect the behavior of the parser when
0321       * an internal and external DTD both exist in a document (i.e. no effect
0322       * if document has no internal subset).
0323       *
0324       * The parser's default state is false
0325       *
0326       * @param newValue The state to set
0327       */
0328     void setIgnoreCachedDTD(const bool newValue);
0329 
0330     //@}
0331 
0332     // -----------------------------------------------------------------------
0333     //  Utility methods
0334     // -----------------------------------------------------------------------
0335 
0336     /** @name Utility methods */
0337     //@{
0338     /** Reset the documents vector pool and release all the associated memory
0339       * back to the system.
0340       *
0341       * When parsing a document using a DOM parser, all memory allocated
0342       * for a DOM tree is associated to the DOM document.
0343       *
0344       * If you do multiple parse using the same DOM parser instance, then
0345       * multiple DOM documents will be generated and saved in a vector pool.
0346       * All these documents (and thus all the allocated memory)
0347       * won't be deleted until the parser instance is destroyed.
0348       *
0349       * If you don't need these DOM documents anymore and don't want to
0350       * destroy the DOM parser instance at this moment, then you can call this method
0351       * to reset the document vector pool and release all the allocated memory
0352       * back to the system.
0353       *
0354       * It is an error to call this method if you are in the middle of a
0355       * parse (e.g. in the mid of a progressive parse).
0356       *
0357       * @exception IOException An exception from the parser if this function
0358       *            is called when a parse is in progress.
0359       *
0360       */
0361     void resetDocumentPool();
0362 
0363     //@}
0364 
0365     // -----------------------------------------------------------------------
0366     //  Implementation of the XMLErrorReporter interface.
0367     // -----------------------------------------------------------------------
0368 
0369     /** @name Implementation of the XMLErrorReporter interface. */
0370     //@{
0371 
0372     /** Handle errors reported from the parser
0373       *
0374       * This method is used to report back errors found while parsing the
0375       * XML file. This method is also borrowed from the SAX specification.
0376       * It calls the corresponding user installed Error Handler method:
0377       * 'fatal', 'error', 'warning' depending on the severity of the error.
0378       * This classification is defined by the XML specification.
0379       *
0380       * @param errCode An integer code for the error.
0381       * @param msgDomain A const pointer to an Unicode string representing
0382       *                  the message domain to use.
0383       * @param errType An enumeration classifying the severity of the error.
0384       * @param errorText A const pointer to an Unicode string representing
0385       *                  the text of the error message.
0386       * @param systemId  A const pointer to an Unicode string representing
0387       *                  the system id of the XML file where this error
0388       *                  was discovered.
0389       * @param publicId  A const pointer to an Unicode string representing
0390       *                  the public id of the XML file where this error
0391       *                  was discovered.
0392       * @param lineNum   The line number where the error occurred.
0393       * @param colNum    The column number where the error occurred.
0394       * @see ErrorHandler
0395       */
0396     virtual void error
0397     (
0398         const   unsigned int                errCode
0399         , const XMLCh* const                msgDomain
0400         , const XMLErrorReporter::ErrTypes  errType
0401         , const XMLCh* const                errorText
0402         , const XMLCh* const                systemId
0403         , const XMLCh* const                publicId
0404         , const XMLFileLoc                  lineNum
0405         , const XMLFileLoc                  colNum
0406     );
0407 
0408     /** Reset any error data before a new parse
0409      *
0410       * This method allows the user installed Error Handler callback to
0411       * 'reset' itself.
0412       *
0413       * <b>This method is a no-op for this DOM
0414       * implementation.</b>
0415       */
0416     virtual void resetErrors();
0417     //@}
0418 
0419 
0420     // -----------------------------------------------------------------------
0421     //  Implementation of the XMLEntityHandler interface.
0422     // -----------------------------------------------------------------------
0423 
0424     /** @name Implementation of the XMLEntityHandler interface. */
0425     //@{
0426 
0427     /** Handle an end of input source event
0428       *
0429       * This method is used to indicate the end of parsing of an external
0430       * entity file.
0431       *
0432       * <b>This method is a no-op for this DOM
0433       * implementation.</b>
0434       *
0435       * @param inputSource A const reference to the InputSource object
0436       *                    which points to the XML file being parsed.
0437       * @see InputSource
0438       */
0439     virtual void endInputSource(const InputSource& inputSource);
0440 
0441     /** Expand a system id
0442       *
0443       * This method allows an installed XMLEntityHandler to further
0444       * process any system id's of external entities encountered in
0445       * the XML file being parsed, such as redirection etc.
0446       *
0447       * <b>This method always returns 'false'
0448       * for this DOM implementation.</b>
0449       *
0450       * @param systemId  A const pointer to an Unicode string representing
0451       *                  the system id scanned by the parser.
0452       * @param toFill    A pointer to a buffer in which the application
0453       *                  processed system id is stored.
0454       * @return 'true', if any processing is done, 'false' otherwise.
0455       */
0456     virtual bool expandSystemId
0457     (
0458         const   XMLCh* const    systemId
0459         ,       XMLBuffer&      toFill
0460     );
0461 
0462     /** Reset any entity handler information
0463       *
0464       * This method allows the installed XMLEntityHandler to reset
0465       * itself.
0466       *
0467       * <b>This method is a no-op for this DOM
0468       * implementation.</b>
0469       */
0470     virtual void resetEntities();
0471 
0472     /** Resolve a public/system id
0473       *
0474       * This method allows a user installed entity handler to further
0475       * process any pointers to external entities. The applications can
0476       * implement 'redirection' via this callback.
0477       *
0478       * @param resourceIdentifier An object containing the type of
0479       *        resource to be resolved and the associated data members
0480       *        corresponding to this type.
0481       * @return The value returned by the user installed resolveEntity
0482       *         method or NULL otherwise to indicate no processing was done.
0483       *         The returned InputSource is owned by the parser which is
0484       *         responsible to clean up the memory.
0485       * @see XMLEntityHandler
0486       * @see XMLEntityResolver
0487       */
0488     virtual InputSource* resolveEntity
0489     (
0490         XMLResourceIdentifier* resourceIdentifier
0491     );
0492 
0493     /** Handle a 'start input source' event
0494       *
0495       * This method is used to indicate the start of parsing an external
0496       * entity file.
0497       *
0498       * <b>This method is a no-op for this DOM parse
0499       * implementation.</b>
0500       *
0501       * @param inputSource A const reference to the InputSource object
0502       *                    which points to the external entity
0503       *                    being parsed.
0504       */
0505     virtual void startInputSource(const InputSource& inputSource);
0506 
0507     //@}
0508 
0509     // -----------------------------------------------------------------------
0510     //  Grammar preparsing interface
0511     // -----------------------------------------------------------------------
0512 
0513     /** @name Implementation of Grammar preparsing interface's. */
0514     //@{
0515     /**
0516       * Preparse schema grammar (XML Schema, DTD, etc.) via an input source
0517       * object.
0518       *
0519       * This method invokes the preparsing process on a schema grammar XML
0520       * file specified by the SAX InputSource parameter. If the 'toCache' flag
0521       * is enabled, the parser will cache the grammars for re-use. If a grammar
0522       * key is found in the pool, no caching of any grammar will take place.
0523       *
0524       *
0525       * @param source A const reference to the SAX InputSource object which
0526       *               points to the schema grammar file to be preparsed.
0527       * @param grammarType The grammar type (Schema or DTD).
0528       * @param toCache If <code>true</code>, we cache the preparsed grammar,
0529       *                otherwise, no caching. Default is <code>false</code>.
0530       * @return The preparsed schema grammar object (SchemaGrammar or
0531       *         DTDGrammar). That grammar object is owned by the parser.
0532       *
0533       * @exception SAXException Any SAX exception, possibly
0534       *            wrapping another exception.
0535       * @exception XMLException An exception from the parser or client
0536       *            handler code.
0537       * @exception DOMException A DOM exception as per DOM spec.
0538       *
0539       * @see InputSource#InputSource
0540       */
0541     Grammar* loadGrammar(const InputSource& source,
0542                          const Grammar::GrammarType grammarType,
0543                          const bool toCache = false);
0544 
0545     /**
0546       * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
0547       *
0548       * This method invokes the preparsing process on a schema grammar XML
0549       * file specified by the file path parameter. If the 'toCache' flag
0550       * is enabled, the parser will cache the grammars for re-use. If a grammar
0551       * key is found in the pool, no caching of any grammar will take place.
0552       *
0553       *
0554       * @param systemId A const XMLCh pointer to the Unicode string which
0555       *                 contains the path to the XML grammar file to be
0556       *                 preparsed.
0557       * @param grammarType The grammar type (Schema or DTD).
0558       * @param toCache If <code>true</code>, we cache the preparsed grammar,
0559       *                otherwise, no caching. Default is <code>false</code>.
0560       * @return The preparsed schema grammar object (SchemaGrammar or
0561       *         DTDGrammar). That grammar object is owned by the parser.
0562       *
0563       * @exception SAXException Any SAX exception, possibly
0564       *            wrapping another exception.
0565       * @exception XMLException An exception from the parser or client
0566       *            handler code.
0567       * @exception DOMException A DOM exception as per DOM spec.
0568       */
0569     Grammar* loadGrammar(const XMLCh* const systemId,
0570                          const Grammar::GrammarType grammarType,
0571                          const bool toCache = false);
0572 
0573     /**
0574       * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
0575       *
0576       * This method invokes the preparsing process on a schema grammar XML
0577       * file specified by the file path parameter. If the 'toCache' flag
0578       * is enabled, the parser will cache the grammars for re-use. If a grammar
0579       * key is found in the pool, no caching of any grammar will take place.
0580       *
0581       *
0582       * @param systemId A const char pointer to a native string which contains
0583       *                 the path to the XML grammar file to be preparsed.
0584       * @param grammarType The grammar type (Schema or DTD).
0585       * @param toCache If <code>true</code>, we cache the preparsed grammar,
0586       *                otherwise, no caching. Default is <code>false</code>.
0587       * @return The preparsed schema grammar object (SchemaGrammar or
0588       *         DTDGrammar). That grammar object is owned by the parser.
0589       *
0590       * @exception SAXException Any SAX exception, possibly
0591       *            wrapping another exception.
0592       * @exception XMLException An exception from the parser or client
0593       *            handler code.
0594       * @exception DOMException A DOM exception as per DOM spec.
0595       */
0596     Grammar* loadGrammar(const char* const systemId,
0597                          const Grammar::GrammarType grammarType,
0598                          const bool toCache = false);
0599 
0600     /**
0601       * This method allows the user to reset the pool of cached grammars.
0602       */
0603     void resetCachedGrammarPool();
0604 
0605     //@}
0606 
0607 
0608 private :
0609     // -----------------------------------------------------------------------
0610     //  Initialize/Cleanup methods
0611     // -----------------------------------------------------------------------
0612     void resetParse();
0613 
0614     // -----------------------------------------------------------------------
0615     //  Unimplemented constructors and operators
0616     // -----------------------------------------------------------------------
0617     XercesDOMParser(const XercesDOMParser&);
0618     XercesDOMParser& operator=(const XercesDOMParser&);
0619 
0620     // -----------------------------------------------------------------------
0621     //  Private data members
0622     //
0623     //  fEntityResolver
0624     //      The installed SAX entity resolver, if any. Null if none.
0625     //
0626     //  fErrorHandler
0627     //      The installed SAX error handler, if any. Null if none.
0628     //-----------------------------------------------------------------------
0629     EntityResolver*          fEntityResolver;
0630     XMLEntityResolver*       fXMLEntityResolver;
0631     ErrorHandler*            fErrorHandler;
0632 };
0633 
0634 
0635 
0636 // ---------------------------------------------------------------------------
0637 //  XercesDOMParser: Handlers for the XMLEntityHandler interface
0638 // ---------------------------------------------------------------------------
0639 inline void XercesDOMParser::endInputSource(const InputSource&)
0640 {
0641     // The DOM entity resolver doesn't handle this
0642 }
0643 
0644 inline bool XercesDOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
0645 {
0646     // The DOM entity resolver doesn't handle this
0647     return false;
0648 }
0649 
0650 inline void XercesDOMParser::resetEntities()
0651 {
0652     // Nothing to do on this one
0653 }
0654 
0655 inline void XercesDOMParser::startInputSource(const InputSource&)
0656 {
0657     // The DOM entity resolver doesn't handle this
0658 }
0659 
0660 
0661 // ---------------------------------------------------------------------------
0662 //  XercesDOMParser: Getter methods
0663 // ---------------------------------------------------------------------------
0664 inline ErrorHandler* XercesDOMParser::getErrorHandler()
0665 {
0666     return fErrorHandler;
0667 }
0668 
0669 inline const ErrorHandler* XercesDOMParser::getErrorHandler() const
0670 {
0671     return fErrorHandler;
0672 }
0673 
0674 inline EntityResolver* XercesDOMParser::getEntityResolver()
0675 {
0676     return fEntityResolver;
0677 }
0678 
0679 inline const EntityResolver* XercesDOMParser::getEntityResolver() const
0680 {
0681     return fEntityResolver;
0682 }
0683 
0684 inline XMLEntityResolver* XercesDOMParser::getXMLEntityResolver()
0685 {
0686     return fXMLEntityResolver;
0687 }
0688 
0689 inline const XMLEntityResolver* XercesDOMParser::getXMLEntityResolver() const
0690 {
0691     return fXMLEntityResolver;
0692 }
0693 
0694 XERCES_CPP_NAMESPACE_END
0695 
0696 #endif