|
||||
File indexing completed on 2025-01-18 10:14:57
0001 /* 0002 * Licensed to the Apache Software Foundation (ASF) under one or more 0003 * contributor license agreements. See the NOTICE file distributed with 0004 * this work for additional information regarding copyright ownership. 0005 * The ASF licenses this file to You under the Apache License, Version 2.0 0006 * (the "License"); you may not use this file except in compliance with 0007 * the License. You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 */ 0017 0018 /* 0019 * $Id$ 0020 */ 0021 0022 #if !defined(XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP) 0023 #define XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP 0024 0025 0026 #include <xercesc/parsers/AbstractDOMParser.hpp> 0027 #include <xercesc/validators/common/Grammar.hpp> 0028 0029 XERCES_CPP_NAMESPACE_BEGIN 0030 0031 0032 class EntityResolver; 0033 class ErrorHandler; 0034 class XMLEntityResolver; 0035 class XMLResourceIdentifier; 0036 0037 /** 0038 * This class implements the Document Object Model (DOM) interface. 0039 * It should be used by applications which choose to parse and 0040 * process the XML document using the DOM api's. This implementation 0041 * also allows the applications to install an error and an entity 0042 * handler (useful extensions to the DOM specification). 0043 * 0044 * <p>It can be used to instantiate a validating or non-validating 0045 * parser, by setting a member flag.</p> 0046 */ 0047 class PARSERS_EXPORT XercesDOMParser : public AbstractDOMParser 0048 { 0049 public : 0050 // ----------------------------------------------------------------------- 0051 // Constructors and Destructor 0052 // ----------------------------------------------------------------------- 0053 0054 /** @name Constructors and Destructor */ 0055 //@{ 0056 /** Construct a XercesDOMParser, with an optional validator 0057 * 0058 * Constructor with an instance of validator class to use for 0059 * validation. If you don't provide a validator, a default one will 0060 * be created for you in the scanner. 0061 * 0062 * @param gramPool Pointer to the grammar pool instance from 0063 * external application. 0064 * The parser does NOT own it. 0065 * 0066 * @param valToAdopt Pointer to the validator instance to use. The 0067 * parser is responsible for freeing the memory. 0068 * @param manager Pointer to the memory manager to be used to 0069 * allocate objects. 0070 */ 0071 XercesDOMParser 0072 ( 0073 XMLValidator* const valToAdopt = 0 0074 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager 0075 , XMLGrammarPool* const gramPool = 0 0076 ); 0077 0078 /** 0079 * Destructor 0080 */ 0081 virtual ~XercesDOMParser(); 0082 0083 //@} 0084 0085 0086 // ----------------------------------------------------------------------- 0087 // Getter methods 0088 // ----------------------------------------------------------------------- 0089 0090 /** @name Getter methods */ 0091 //@{ 0092 0093 /** Get a pointer to the error handler 0094 * 0095 * This method returns the installed error handler. If no handler 0096 * has been installed, then it will be a zero pointer. 0097 * 0098 * @return The pointer to the installed error handler object. 0099 */ 0100 ErrorHandler* getErrorHandler(); 0101 0102 /** Get a const pointer to the error handler 0103 * 0104 * This method returns the installed error handler. If no handler 0105 * has been installed, then it will be a zero pointer. 0106 * 0107 * @return A const pointer to the installed error handler object. 0108 */ 0109 const ErrorHandler* getErrorHandler() const; 0110 0111 /** Get a pointer to the entity resolver 0112 * 0113 * This method returns the installed entity resolver. If no resolver 0114 * has been installed, then it will be a zero pointer. 0115 * 0116 * @return The pointer to the installed entity resolver object. 0117 */ 0118 EntityResolver* getEntityResolver(); 0119 0120 /** Get a const pointer to the entity resolver 0121 * 0122 * This method returns the installed entity resolver. If no resolver 0123 * has been installed, then it will be a zero pointer. 0124 * 0125 * @return A const pointer to the installed entity resolver object. 0126 */ 0127 const EntityResolver* getEntityResolver() const; 0128 0129 /** 0130 * Get a pointer to the entity resolver 0131 * 0132 * This method returns the installed entity resolver. If no resolver 0133 * has been installed, then it will be a zero pointer. 0134 * 0135 * @return The pointer to the installed entity resolver object. 0136 */ 0137 XMLEntityResolver* getXMLEntityResolver(); 0138 0139 /** 0140 * Get a const pointer to the entity resolver 0141 * 0142 * This method returns the installed entity resolver. If no resolver 0143 * has been installed, then it will be a zero pointer. 0144 * 0145 * @return A const pointer to the installed entity resolver object. 0146 */ 0147 const XMLEntityResolver* getXMLEntityResolver() const; 0148 0149 /** Get the 'Grammar caching' flag 0150 * 0151 * This method returns the state of the parser's grammar caching when 0152 * parsing an XML document. 0153 * 0154 * @return true, if the parser is currently configured to 0155 * cache grammars, false otherwise. 0156 * 0157 * @see #cacheGrammarFromParse 0158 */ 0159 bool isCachingGrammarFromParse() const; 0160 0161 /** Get the 'Use cached grammar' flag 0162 * 0163 * This method returns the state of the parser's use of cached grammar 0164 * when parsing an XML document. 0165 * 0166 * @return true, if the parser is currently configured to 0167 * use cached grammars, false otherwise. 0168 * 0169 * @see #useCachedGrammarInParse 0170 */ 0171 bool isUsingCachedGrammarInParse() const; 0172 0173 /** 0174 * Retrieve the grammar that is associated with the specified namespace key 0175 * 0176 * @param nameSpaceKey Namespace key 0177 * @return Grammar associated with the Namespace key. 0178 */ 0179 Grammar* getGrammar(const XMLCh* const nameSpaceKey); 0180 0181 /** 0182 * Retrieve the grammar where the root element is declared. 0183 * 0184 * @return Grammar where root element declared 0185 */ 0186 Grammar* getRootGrammar(); 0187 0188 /** 0189 * Returns the string corresponding to a URI id from the URI string pool. 0190 * 0191 * @param uriId id of the string in the URI string pool. 0192 * @return URI string corresponding to the URI id. 0193 */ 0194 const XMLCh* getURIText(unsigned int uriId) const; 0195 0196 /** 0197 * Returns the current src offset within the input source. 0198 * To be used only while parsing is in progress. 0199 * 0200 * @return offset within the input source 0201 */ 0202 XMLFilePos getSrcOffset() const; 0203 0204 /** Get the 'ignore cached DTD grammar' flag 0205 * 0206 * @return true, if the parser is currently configured to 0207 * ignore cached DTD, false otherwise. 0208 * 0209 * @see #setIgnoreCachedDTD 0210 */ 0211 bool getIgnoreCachedDTD() const; 0212 0213 //@} 0214 0215 0216 // ----------------------------------------------------------------------- 0217 // Setter methods 0218 // ----------------------------------------------------------------------- 0219 0220 /** @name Setter methods */ 0221 //@{ 0222 0223 /** Set the error handler 0224 * 0225 * This method allows applications to install their own error handler 0226 * to trap error and warning messages. 0227 * 0228 * <i>Any previously set handler is merely dropped, since the parser 0229 * does not own them.</i> 0230 * 0231 * @param handler A const pointer to the user supplied error 0232 * handler. 0233 * 0234 * @see #getErrorHandler 0235 */ 0236 void setErrorHandler(ErrorHandler* const handler); 0237 0238 /** Set the entity resolver 0239 * 0240 * This method allows applications to install their own entity 0241 * resolver. By installing an entity resolver, the applications 0242 * can trap and potentially redirect references to external 0243 * entities. 0244 * 0245 * <i>Any previously set entity resolver is merely dropped, since the parser 0246 * does not own them. If both setEntityResolver and setXMLEntityResolver 0247 * are called, then the last one is used.</i> 0248 * 0249 * @param handler A const pointer to the user supplied entity 0250 * resolver. 0251 * 0252 * @see #getEntityResolver 0253 */ 0254 void setEntityResolver(EntityResolver* const handler); 0255 0256 /** 0257 * Set the entity resolver 0258 * 0259 * This method allows applications to install their own entity 0260 * resolver. By installing an entity resolver, the applications 0261 * can trap and potentially redirect references to external 0262 * entities. 0263 * 0264 * <i>Any previously set entity resolver is merely dropped, since the parser 0265 * does not own them. If both setEntityResolver and setXMLEntityResolver 0266 * are called, then the last one set is used.</i> 0267 * 0268 * @param handler A const pointer to the user supplied entity 0269 * resolver. 0270 * 0271 * @see #getXMLEntityResolver 0272 */ 0273 void setXMLEntityResolver(XMLEntityResolver* const handler); 0274 0275 /** Set the 'Grammar caching' flag 0276 * 0277 * This method allows users to enable or disable caching of grammar when 0278 * parsing XML documents. When set to true, the parser will cache the 0279 * resulting grammar for use in subsequent parses. 0280 * 0281 * If the flag is set to true, the 'Use cached grammar' flag will also be 0282 * set to true. 0283 * 0284 * The parser's default state is: false. 0285 * 0286 * @param newState The value specifying whether we should cache grammars 0287 * or not. 0288 * 0289 * @see #isCachingGrammarFromParse 0290 * @see #useCachedGrammarInParse 0291 */ 0292 void cacheGrammarFromParse(const bool newState); 0293 0294 /** Set the 'Use cached grammar' flag 0295 * 0296 * This method allows users to enable or disable the use of cached 0297 * grammars. When set to true, the parser will use the cached grammar, 0298 * instead of building the grammar from scratch, to validate XML 0299 * documents. 0300 * 0301 * If the 'Grammar caching' flag is set to true, this method ignore the 0302 * value passed in. 0303 * 0304 * The parser's default state is: false. 0305 * 0306 * @param newState The value specifying whether we should use the cached 0307 * grammar or not. 0308 * 0309 * @see #isUsingCachedGrammarInParse 0310 * @see #cacheGrammarFromParse 0311 */ 0312 void useCachedGrammarInParse(const bool newState); 0313 0314 /** Set the 'ignore cached DTD grammar' flag 0315 * 0316 * This method gives users the option to ignore a cached DTD grammar, when 0317 * an XML document contains both an internal and external DTD, and the use 0318 * cached grammar from parse option is enabled. Currently, we do not allow 0319 * using cached DTD grammar when an internal subset is present in the 0320 * document. This option will only affect the behavior of the parser when 0321 * an internal and external DTD both exist in a document (i.e. no effect 0322 * if document has no internal subset). 0323 * 0324 * The parser's default state is false 0325 * 0326 * @param newValue The state to set 0327 */ 0328 void setIgnoreCachedDTD(const bool newValue); 0329 0330 //@} 0331 0332 // ----------------------------------------------------------------------- 0333 // Utility methods 0334 // ----------------------------------------------------------------------- 0335 0336 /** @name Utility methods */ 0337 //@{ 0338 /** Reset the documents vector pool and release all the associated memory 0339 * back to the system. 0340 * 0341 * When parsing a document using a DOM parser, all memory allocated 0342 * for a DOM tree is associated to the DOM document. 0343 * 0344 * If you do multiple parse using the same DOM parser instance, then 0345 * multiple DOM documents will be generated and saved in a vector pool. 0346 * All these documents (and thus all the allocated memory) 0347 * won't be deleted until the parser instance is destroyed. 0348 * 0349 * If you don't need these DOM documents anymore and don't want to 0350 * destroy the DOM parser instance at this moment, then you can call this method 0351 * to reset the document vector pool and release all the allocated memory 0352 * back to the system. 0353 * 0354 * It is an error to call this method if you are in the middle of a 0355 * parse (e.g. in the mid of a progressive parse). 0356 * 0357 * @exception IOException An exception from the parser if this function 0358 * is called when a parse is in progress. 0359 * 0360 */ 0361 void resetDocumentPool(); 0362 0363 //@} 0364 0365 // ----------------------------------------------------------------------- 0366 // Implementation of the XMLErrorReporter interface. 0367 // ----------------------------------------------------------------------- 0368 0369 /** @name Implementation of the XMLErrorReporter interface. */ 0370 //@{ 0371 0372 /** Handle errors reported from the parser 0373 * 0374 * This method is used to report back errors found while parsing the 0375 * XML file. This method is also borrowed from the SAX specification. 0376 * It calls the corresponding user installed Error Handler method: 0377 * 'fatal', 'error', 'warning' depending on the severity of the error. 0378 * This classification is defined by the XML specification. 0379 * 0380 * @param errCode An integer code for the error. 0381 * @param msgDomain A const pointer to an Unicode string representing 0382 * the message domain to use. 0383 * @param errType An enumeration classifying the severity of the error. 0384 * @param errorText A const pointer to an Unicode string representing 0385 * the text of the error message. 0386 * @param systemId A const pointer to an Unicode string representing 0387 * the system id of the XML file where this error 0388 * was discovered. 0389 * @param publicId A const pointer to an Unicode string representing 0390 * the public id of the XML file where this error 0391 * was discovered. 0392 * @param lineNum The line number where the error occurred. 0393 * @param colNum The column number where the error occurred. 0394 * @see ErrorHandler 0395 */ 0396 virtual void error 0397 ( 0398 const unsigned int errCode 0399 , const XMLCh* const msgDomain 0400 , const XMLErrorReporter::ErrTypes errType 0401 , const XMLCh* const errorText 0402 , const XMLCh* const systemId 0403 , const XMLCh* const publicId 0404 , const XMLFileLoc lineNum 0405 , const XMLFileLoc colNum 0406 ); 0407 0408 /** Reset any error data before a new parse 0409 * 0410 * This method allows the user installed Error Handler callback to 0411 * 'reset' itself. 0412 * 0413 * <b>This method is a no-op for this DOM 0414 * implementation.</b> 0415 */ 0416 virtual void resetErrors(); 0417 //@} 0418 0419 0420 // ----------------------------------------------------------------------- 0421 // Implementation of the XMLEntityHandler interface. 0422 // ----------------------------------------------------------------------- 0423 0424 /** @name Implementation of the XMLEntityHandler interface. */ 0425 //@{ 0426 0427 /** Handle an end of input source event 0428 * 0429 * This method is used to indicate the end of parsing of an external 0430 * entity file. 0431 * 0432 * <b>This method is a no-op for this DOM 0433 * implementation.</b> 0434 * 0435 * @param inputSource A const reference to the InputSource object 0436 * which points to the XML file being parsed. 0437 * @see InputSource 0438 */ 0439 virtual void endInputSource(const InputSource& inputSource); 0440 0441 /** Expand a system id 0442 * 0443 * This method allows an installed XMLEntityHandler to further 0444 * process any system id's of external entities encountered in 0445 * the XML file being parsed, such as redirection etc. 0446 * 0447 * <b>This method always returns 'false' 0448 * for this DOM implementation.</b> 0449 * 0450 * @param systemId A const pointer to an Unicode string representing 0451 * the system id scanned by the parser. 0452 * @param toFill A pointer to a buffer in which the application 0453 * processed system id is stored. 0454 * @return 'true', if any processing is done, 'false' otherwise. 0455 */ 0456 virtual bool expandSystemId 0457 ( 0458 const XMLCh* const systemId 0459 , XMLBuffer& toFill 0460 ); 0461 0462 /** Reset any entity handler information 0463 * 0464 * This method allows the installed XMLEntityHandler to reset 0465 * itself. 0466 * 0467 * <b>This method is a no-op for this DOM 0468 * implementation.</b> 0469 */ 0470 virtual void resetEntities(); 0471 0472 /** Resolve a public/system id 0473 * 0474 * This method allows a user installed entity handler to further 0475 * process any pointers to external entities. The applications can 0476 * implement 'redirection' via this callback. 0477 * 0478 * @param resourceIdentifier An object containing the type of 0479 * resource to be resolved and the associated data members 0480 * corresponding to this type. 0481 * @return The value returned by the user installed resolveEntity 0482 * method or NULL otherwise to indicate no processing was done. 0483 * The returned InputSource is owned by the parser which is 0484 * responsible to clean up the memory. 0485 * @see XMLEntityHandler 0486 * @see XMLEntityResolver 0487 */ 0488 virtual InputSource* resolveEntity 0489 ( 0490 XMLResourceIdentifier* resourceIdentifier 0491 ); 0492 0493 /** Handle a 'start input source' event 0494 * 0495 * This method is used to indicate the start of parsing an external 0496 * entity file. 0497 * 0498 * <b>This method is a no-op for this DOM parse 0499 * implementation.</b> 0500 * 0501 * @param inputSource A const reference to the InputSource object 0502 * which points to the external entity 0503 * being parsed. 0504 */ 0505 virtual void startInputSource(const InputSource& inputSource); 0506 0507 //@} 0508 0509 // ----------------------------------------------------------------------- 0510 // Grammar preparsing interface 0511 // ----------------------------------------------------------------------- 0512 0513 /** @name Implementation of Grammar preparsing interface's. */ 0514 //@{ 0515 /** 0516 * Preparse schema grammar (XML Schema, DTD, etc.) via an input source 0517 * object. 0518 * 0519 * This method invokes the preparsing process on a schema grammar XML 0520 * file specified by the SAX InputSource parameter. If the 'toCache' flag 0521 * is enabled, the parser will cache the grammars for re-use. If a grammar 0522 * key is found in the pool, no caching of any grammar will take place. 0523 * 0524 * 0525 * @param source A const reference to the SAX InputSource object which 0526 * points to the schema grammar file to be preparsed. 0527 * @param grammarType The grammar type (Schema or DTD). 0528 * @param toCache If <code>true</code>, we cache the preparsed grammar, 0529 * otherwise, no caching. Default is <code>false</code>. 0530 * @return The preparsed schema grammar object (SchemaGrammar or 0531 * DTDGrammar). That grammar object is owned by the parser. 0532 * 0533 * @exception SAXException Any SAX exception, possibly 0534 * wrapping another exception. 0535 * @exception XMLException An exception from the parser or client 0536 * handler code. 0537 * @exception DOMException A DOM exception as per DOM spec. 0538 * 0539 * @see InputSource#InputSource 0540 */ 0541 Grammar* loadGrammar(const InputSource& source, 0542 const Grammar::GrammarType grammarType, 0543 const bool toCache = false); 0544 0545 /** 0546 * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL 0547 * 0548 * This method invokes the preparsing process on a schema grammar XML 0549 * file specified by the file path parameter. If the 'toCache' flag 0550 * is enabled, the parser will cache the grammars for re-use. If a grammar 0551 * key is found in the pool, no caching of any grammar will take place. 0552 * 0553 * 0554 * @param systemId A const XMLCh pointer to the Unicode string which 0555 * contains the path to the XML grammar file to be 0556 * preparsed. 0557 * @param grammarType The grammar type (Schema or DTD). 0558 * @param toCache If <code>true</code>, we cache the preparsed grammar, 0559 * otherwise, no caching. Default is <code>false</code>. 0560 * @return The preparsed schema grammar object (SchemaGrammar or 0561 * DTDGrammar). That grammar object is owned by the parser. 0562 * 0563 * @exception SAXException Any SAX exception, possibly 0564 * wrapping another exception. 0565 * @exception XMLException An exception from the parser or client 0566 * handler code. 0567 * @exception DOMException A DOM exception as per DOM spec. 0568 */ 0569 Grammar* loadGrammar(const XMLCh* const systemId, 0570 const Grammar::GrammarType grammarType, 0571 const bool toCache = false); 0572 0573 /** 0574 * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL 0575 * 0576 * This method invokes the preparsing process on a schema grammar XML 0577 * file specified by the file path parameter. If the 'toCache' flag 0578 * is enabled, the parser will cache the grammars for re-use. If a grammar 0579 * key is found in the pool, no caching of any grammar will take place. 0580 * 0581 * 0582 * @param systemId A const char pointer to a native string which contains 0583 * the path to the XML grammar file to be preparsed. 0584 * @param grammarType The grammar type (Schema or DTD). 0585 * @param toCache If <code>true</code>, we cache the preparsed grammar, 0586 * otherwise, no caching. Default is <code>false</code>. 0587 * @return The preparsed schema grammar object (SchemaGrammar or 0588 * DTDGrammar). That grammar object is owned by the parser. 0589 * 0590 * @exception SAXException Any SAX exception, possibly 0591 * wrapping another exception. 0592 * @exception XMLException An exception from the parser or client 0593 * handler code. 0594 * @exception DOMException A DOM exception as per DOM spec. 0595 */ 0596 Grammar* loadGrammar(const char* const systemId, 0597 const Grammar::GrammarType grammarType, 0598 const bool toCache = false); 0599 0600 /** 0601 * This method allows the user to reset the pool of cached grammars. 0602 */ 0603 void resetCachedGrammarPool(); 0604 0605 //@} 0606 0607 0608 private : 0609 // ----------------------------------------------------------------------- 0610 // Initialize/Cleanup methods 0611 // ----------------------------------------------------------------------- 0612 void resetParse(); 0613 0614 // ----------------------------------------------------------------------- 0615 // Unimplemented constructors and operators 0616 // ----------------------------------------------------------------------- 0617 XercesDOMParser(const XercesDOMParser&); 0618 XercesDOMParser& operator=(const XercesDOMParser&); 0619 0620 // ----------------------------------------------------------------------- 0621 // Private data members 0622 // 0623 // fEntityResolver 0624 // The installed SAX entity resolver, if any. Null if none. 0625 // 0626 // fErrorHandler 0627 // The installed SAX error handler, if any. Null if none. 0628 //----------------------------------------------------------------------- 0629 EntityResolver* fEntityResolver; 0630 XMLEntityResolver* fXMLEntityResolver; 0631 ErrorHandler* fErrorHandler; 0632 }; 0633 0634 0635 0636 // --------------------------------------------------------------------------- 0637 // XercesDOMParser: Handlers for the XMLEntityHandler interface 0638 // --------------------------------------------------------------------------- 0639 inline void XercesDOMParser::endInputSource(const InputSource&) 0640 { 0641 // The DOM entity resolver doesn't handle this 0642 } 0643 0644 inline bool XercesDOMParser::expandSystemId(const XMLCh* const, XMLBuffer&) 0645 { 0646 // The DOM entity resolver doesn't handle this 0647 return false; 0648 } 0649 0650 inline void XercesDOMParser::resetEntities() 0651 { 0652 // Nothing to do on this one 0653 } 0654 0655 inline void XercesDOMParser::startInputSource(const InputSource&) 0656 { 0657 // The DOM entity resolver doesn't handle this 0658 } 0659 0660 0661 // --------------------------------------------------------------------------- 0662 // XercesDOMParser: Getter methods 0663 // --------------------------------------------------------------------------- 0664 inline ErrorHandler* XercesDOMParser::getErrorHandler() 0665 { 0666 return fErrorHandler; 0667 } 0668 0669 inline const ErrorHandler* XercesDOMParser::getErrorHandler() const 0670 { 0671 return fErrorHandler; 0672 } 0673 0674 inline EntityResolver* XercesDOMParser::getEntityResolver() 0675 { 0676 return fEntityResolver; 0677 } 0678 0679 inline const EntityResolver* XercesDOMParser::getEntityResolver() const 0680 { 0681 return fEntityResolver; 0682 } 0683 0684 inline XMLEntityResolver* XercesDOMParser::getXMLEntityResolver() 0685 { 0686 return fXMLEntityResolver; 0687 } 0688 0689 inline const XMLEntityResolver* XercesDOMParser::getXMLEntityResolver() const 0690 { 0691 return fXMLEntityResolver; 0692 } 0693 0694 XERCES_CPP_NAMESPACE_END 0695 0696 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |