Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-03-13 09:30:14

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP
0024 
0025 #include <xercesc/util/PlatformUtils.hpp>
0026 
0027 XERCES_CPP_NAMESPACE_BEGIN
0028 
0029 class XMLFormatTarget;
0030 class XMLTranscoder;
0031 
0032 /**
0033  *  This class provides the basic formatting capabilities that are required
0034  *  to turn the Unicode based XML data from the parsers into a form that can
0035  *  be used on non-Unicode based systems, that is, into local or generic text
0036  *  encodings.
0037  *
0038  *  A number of flags are provided to control whether various optional
0039  *  formatting operations are performed.
0040  */
0041 class XMLPARSER_EXPORT XMLFormatter : public XMemory
0042 {
0043 public:
0044     // -----------------------------------------------------------------------
0045     //  Class types
0046     // -----------------------------------------------------------------------
0047     /** @name Public Constants */
0048     //@{
0049     /**
0050      * EscapeFlags - Different styles of escape flags to control various formatting.
0051      *
0052      * <p><code>NoEscapes:</code>
0053      * No character needs to be escaped.   Just write them out as is.</p>
0054      * <p><code>StdEscapes:</code>
0055      * The following characters need to be escaped:</p>
0056      * <table border='1'>
0057      * <tr>
0058      * <td>character</td>
0059      * <td>should be escaped and written as</td>
0060      * </tr>
0061      * <tr>
0062      * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
0063      * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
0064      * </tr>
0065      * <tr>
0066      * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
0067      * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
0068      * </tr>
0069      * <tr>
0070      * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
0071      * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
0072      * </tr>
0073      * <tr>
0074      * <td valign='top' rowspan='1' colspan='1'>&lt;</td>
0075      * <td valign='top' rowspan='1' colspan='1'>&amp;lt;</td>
0076      * </tr>
0077      * <tr>
0078      * <td valign='top' rowspan='1' colspan='1'>&apos;</td>
0079      * <td valign='top' rowspan='1' colspan='1'>&amp;apos;</td>
0080      * </tr>
0081      * </table>
0082      * <p><code>AttrEscapes:</code>
0083      * The following characters need to be escaped:</p>
0084      * <table border='1'>
0085      * <tr>
0086      * <td>character</td>
0087      * <td>should be escaped and written as</td>
0088      * </tr>
0089      * <tr>
0090      * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
0091      * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
0092      * </tr>
0093      * <tr>
0094      * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
0095      * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
0096      * </tr>
0097      * <tr>
0098      * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
0099      * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
0100      * </tr>
0101      * </table>
0102      * <p><code>CharEscapes:</code>
0103      * The following characters need to be escaped:</p>
0104      * <table border='1'>
0105      * <tr>
0106      * <td>character</td>
0107      * <td>should be escaped and written as</td>
0108      * </tr>
0109      * <tr>
0110      * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
0111      * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
0112      * </tr>
0113      * <tr>
0114      * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
0115      * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
0116      * </tr>
0117      * </table>
0118      * <p><code>EscapeFlags_Count:</code>
0119      * Special value, do not use directly.</p>
0120      * <p><code>DefaultEscape:</code>
0121      * Special value, do not use directly.</p>
0122      *
0123      */
0124     enum EscapeFlags
0125     {
0126         NoEscapes
0127         , StdEscapes
0128         , AttrEscapes
0129         , CharEscapes
0130 
0131         // Special values, don't use directly
0132         , EscapeFlags_Count
0133         , DefaultEscape     = 999
0134     };
0135 
0136     /**
0137      * UnRepFlags
0138      *
0139      * The unrepresentable flags that indicate how to react when a
0140      * character cannot be represented in the target encoding.
0141      *
0142      * <p><code>UnRep_Fail:</code>
0143      * Fail the operation.</p>
0144      * <p><code>UnRep_CharRef:</code>
0145      * Display the unrepresented character as reference.</p>
0146      * <p><code>UnRep_Replace:</code>
0147      * Replace the unrepresented character with the replacement character.</p>
0148      * <p><code>DefaultUnRep:</code>
0149      * Special value, do not use directly.</p>
0150      *
0151      */
0152     enum UnRepFlags
0153     {
0154         UnRep_Fail
0155         , UnRep_CharRef
0156         , UnRep_Replace
0157 
0158         , DefaultUnRep      = 999
0159     };
0160     //@}
0161 
0162 
0163     // -----------------------------------------------------------------------
0164     //  Constructors and Destructor
0165     // -----------------------------------------------------------------------
0166     /** @name Constructor and Destructor */
0167     //@{
0168     /**
0169      * @param outEncoding the encoding for the formatted content.
0170      * @param docVersion  the document version.
0171      * @param target      the formatTarget where the formatted content is written to.
0172      * @param escapeFlags the escape style for certain character.
0173      * @param unrepFlags  the reaction to unrepresentable character.
0174      * @param manager     Pointer to the memory manager to be used to
0175      *                    allocate objects.
0176      */
0177     XMLFormatter
0178     (
0179         const   XMLCh* const            outEncoding
0180         , const XMLCh* const            docVersion
0181         ,       XMLFormatTarget* const  target
0182         , const EscapeFlags             escapeFlags = NoEscapes
0183         , const UnRepFlags              unrepFlags = UnRep_Fail
0184         ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
0185     );
0186 
0187     XMLFormatter
0188     (
0189         const   char* const             outEncoding
0190         , const char* const             docVersion
0191         ,       XMLFormatTarget* const  target
0192         , const EscapeFlags             escapeFlags = NoEscapes
0193         , const UnRepFlags              unrepFlags = UnRep_Fail
0194         ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
0195     );
0196 
0197     XMLFormatter
0198     (
0199         const   XMLCh* const            outEncoding
0200         ,       XMLFormatTarget* const  target
0201         , const EscapeFlags             escapeFlags = NoEscapes
0202         , const UnRepFlags              unrepFlags = UnRep_Fail
0203         ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
0204     );
0205 
0206     XMLFormatter
0207     (
0208         const   char* const             outEncoding
0209         ,       XMLFormatTarget* const  target
0210         , const EscapeFlags             escapeFlags = NoEscapes
0211         , const UnRepFlags              unrepFlags = UnRep_Fail
0212         ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
0213     );
0214 
0215     ~XMLFormatter();
0216     //@}
0217 
0218 
0219     // -----------------------------------------------------------------------
0220     //  Formatting methods
0221     // -----------------------------------------------------------------------
0222     /** @name Formatting methods */
0223     //@{
0224     /**
0225      * @param toFormat the string to be formatted
0226      * @param count    length of the string
0227      * @param escapeFlags the escape style for formatting toFormat
0228      * @param unrepFlags the reaction for any unrepresentable character in toFormat
0229      *
0230      */
0231     void formatBuf
0232     (
0233         const   XMLCh* const    toFormat
0234         , const XMLSize_t       count
0235         , const EscapeFlags     escapeFlags = DefaultEscape
0236         , const UnRepFlags      unrepFlags = DefaultUnRep
0237     );
0238 
0239     /**
0240      * @see formatBuf
0241      */
0242     XMLFormatter& operator<<
0243     (
0244         const   XMLCh* const    toFormat
0245     );
0246 
0247     XMLFormatter& operator<<
0248     (
0249         const   XMLCh           toFormat
0250     );
0251 
0252     void writeBOM(const XMLByte* const toFormat
0253                 , const XMLSize_t      count);
0254 
0255     //@}
0256 
0257     // -----------------------------------------------------------------------
0258     //  Getter methods
0259     // -----------------------------------------------------------------------
0260     /** @name Getter methods */
0261     //@{
0262     /**
0263      * @return return the encoding set for the formatted content
0264      */
0265 
0266     const XMLCh* getEncodingName() const;
0267 
0268     /**
0269      * @return return constant transcoder used internally for transcoding the formatter conent
0270      */
0271     inline const XMLTranscoder*   getTranscoder() const;
0272 
0273     /**
0274      * @return return the transcoder used internally for transcoding the formatter content
0275      */
0276     inline XMLTranscoder*   getTranscoder();
0277 
0278    //@}
0279 
0280     // -----------------------------------------------------------------------
0281     //  Setter methods
0282     // -----------------------------------------------------------------------
0283     /** @name Setter methods */
0284     //@{
0285     /**
0286      * @param newFlags set the escape style for the follow-on formatted content
0287      */
0288     void setEscapeFlags
0289     (
0290         const   EscapeFlags     newFlags
0291     );
0292 
0293     /**
0294      * @param newFlags set the reaction for unrepresentable character
0295      */
0296     void setUnRepFlags
0297     (
0298         const   UnRepFlags      newFlags
0299     );
0300 
0301     /**
0302      * @param newFlags set the escape style for the follow-on formatted content
0303      * @see setEscapeFlags
0304      */
0305     XMLFormatter& operator<<
0306     (
0307         const   EscapeFlags     newFlags
0308     );
0309 
0310     /**
0311      * @param newFlags set the reaction for unrepresentable character
0312      * @see setUnRepFlags
0313      */
0314     XMLFormatter& operator<<
0315     (
0316         const   UnRepFlags      newFlags
0317     );
0318     //@}
0319 
0320     // -----------------------------------------------------------------------
0321     //  Getter methods
0322     // -----------------------------------------------------------------------
0323     /** @name Setter methods */
0324     //@{
0325     /**
0326      * @return return the escape style for the formatted content
0327      */
0328     EscapeFlags getEscapeFlags() const;
0329 
0330     /**
0331      * @return return the reaction for unrepresentable character
0332      */
0333     UnRepFlags getUnRepFlags() const;
0334     //@}
0335 
0336 private :
0337     // -----------------------------------------------------------------------
0338     //  Unimplemented constructors and operators
0339     // -----------------------------------------------------------------------
0340     XMLFormatter();
0341     XMLFormatter(const XMLFormatter&);
0342     XMLFormatter& operator=(const XMLFormatter&);
0343 
0344 
0345     // -----------------------------------------------------------------------
0346     //  Private class constants
0347     // -----------------------------------------------------------------------
0348     enum Constants
0349     {
0350         kTmpBufSize     = 16 * 1024
0351     };
0352 
0353 
0354     // -----------------------------------------------------------------------
0355     //  Private helper methods
0356     // -----------------------------------------------------------------------
0357     const XMLByte* getCharRef(XMLSize_t     &count,
0358                               XMLByte*      &ref,
0359                               const XMLCh *  stdRef);
0360 
0361     void writeCharRef(const XMLCh &toWrite);
0362     void writeCharRef(XMLSize_t toWrite);
0363 
0364     bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
0365                     , const XMLCh                     toCheck);
0366 
0367 
0368     XMLSize_t handleUnEscapedChars(const XMLCh *      srcPtr,
0369                                    const XMLSize_t    count,
0370                                    const UnRepFlags   unrepFlags);
0371 
0372     void specialFormat
0373     (
0374         const   XMLCh* const    toFormat
0375         , const XMLSize_t       count
0376         , const EscapeFlags     escapeFlags
0377     );
0378 
0379 
0380     // -----------------------------------------------------------------------
0381     //  Private, non-virtual methods
0382     //
0383     //  fEscapeFlags
0384     //      The escape flags we were told to use in formatting. These are
0385     //      defaults set in the ctor, which can be overridden on a particular
0386     //      call.
0387     //
0388     //  fOutEncoding
0389     //      This the name of the output encoding. Saved mainly for meaningful
0390     //      error messages.
0391     //
0392     //  fTarget
0393     //      This is the target object for the formatting operation.
0394     //
0395     //  fUnRepFlags
0396     //      The unrepresentable flags that indicate how to react when a
0397     //      character cannot be represented in the target encoding.
0398     //
0399     //  fXCoder
0400     //      This the transcoder that we will use. It is created using the
0401     //      encoding name we were told to use.
0402     //
0403     //  fTmpBuf
0404     //      An output buffer that we use to transcode chars into before we
0405     //      send them off to be output.
0406     //
0407     //  fAposRef
0408     //  fAmpRef
0409     //  fGTRef
0410     //  fLTRef
0411     //  fQuoteRef
0412     //      These are character refs for the standard char refs, in the
0413     //      output encoding. They are faulted in as required, by transcoding
0414     //      them from fixed Unicode versions.
0415     //
0416     //  fIsXML11
0417     //      for performance reason, we do not store the actual version string
0418     //      and do the string comparison again and again.
0419     //
0420     // -----------------------------------------------------------------------
0421     EscapeFlags                 fEscapeFlags;
0422     XMLCh*                      fOutEncoding;
0423     XMLFormatTarget*            fTarget;
0424     UnRepFlags                  fUnRepFlags;
0425     XMLTranscoder*              fXCoder;
0426     XMLByte                     fTmpBuf[kTmpBufSize + 4];
0427     XMLByte*                    fAposRef;
0428     XMLSize_t                   fAposLen;
0429     XMLByte*                    fAmpRef;
0430     XMLSize_t                   fAmpLen;
0431     XMLByte*                    fGTRef;
0432     XMLSize_t                   fGTLen;
0433     XMLByte*                    fLTRef;
0434     XMLSize_t                   fLTLen;
0435     XMLByte*                    fQuoteRef;
0436     XMLSize_t                   fQuoteLen;
0437     bool                        fIsXML11;
0438     MemoryManager*              fMemoryManager;
0439 };
0440 
0441 
0442 class XMLPARSER_EXPORT XMLFormatTarget : public XMemory
0443 {
0444 public:
0445     // -----------------------------------------------------------------------
0446     //  Constructors and Destructor
0447     // -----------------------------------------------------------------------
0448     virtual ~XMLFormatTarget() {}
0449 
0450 
0451     // -----------------------------------------------------------------------
0452     //  Virtual interface
0453     // -----------------------------------------------------------------------
0454     virtual void writeChars
0455     (
0456           const XMLByte* const      toWrite
0457         , const XMLSize_t           count
0458         ,       XMLFormatter* const formatter
0459     ) = 0;
0460 
0461     virtual void flush() {};
0462 
0463 
0464 protected :
0465     // -----------------------------------------------------------------------
0466     //  Hidden constructors and operators
0467     // -----------------------------------------------------------------------
0468     XMLFormatTarget() {};
0469 
0470 private:
0471     // -----------------------------------------------------------------------
0472     //  Unimplemented constructors and operators
0473     // -----------------------------------------------------------------------
0474     XMLFormatTarget(const XMLFormatTarget&);
0475     XMLFormatTarget& operator=(const XMLFormatTarget&);
0476 };
0477 
0478 
0479 // ---------------------------------------------------------------------------
0480 //  XMLFormatter: Getter methods
0481 // ---------------------------------------------------------------------------
0482 inline const XMLCh* XMLFormatter::getEncodingName() const
0483 {
0484     return fOutEncoding;
0485 }
0486 
0487 inline const XMLTranscoder* XMLFormatter::getTranscoder() const
0488 {
0489     return fXCoder;
0490 }
0491 
0492 inline XMLTranscoder* XMLFormatter::getTranscoder()
0493 {
0494     return fXCoder;
0495 }
0496 
0497 // ---------------------------------------------------------------------------
0498 //  XMLFormatter: Setter methods
0499 // ---------------------------------------------------------------------------
0500 inline void XMLFormatter::setEscapeFlags(const EscapeFlags newFlags)
0501 {
0502     fEscapeFlags = newFlags;
0503 }
0504 
0505 inline void XMLFormatter::setUnRepFlags(const UnRepFlags newFlags)
0506 {
0507     fUnRepFlags = newFlags;
0508 }
0509 
0510 
0511 inline XMLFormatter& XMLFormatter::operator<<(const EscapeFlags newFlags)
0512 {
0513     fEscapeFlags = newFlags;
0514     return *this;
0515 }
0516 
0517 inline XMLFormatter& XMLFormatter::operator<<(const UnRepFlags newFlags)
0518 {
0519     fUnRepFlags = newFlags;
0520     return *this;
0521 }
0522 
0523 // ---------------------------------------------------------------------------
0524 //  XMLFormatter: Getter methods
0525 // ---------------------------------------------------------------------------
0526 inline XMLFormatter::EscapeFlags XMLFormatter::getEscapeFlags() const
0527 {
0528     return fEscapeFlags;
0529 }
0530 
0531 inline XMLFormatter::UnRepFlags XMLFormatter::getUnRepFlags() const
0532 {
0533     return fUnRepFlags;
0534 }
0535 
0536 XERCES_CPP_NAMESPACE_END
0537 
0538 #endif