Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-10-26 09:01:35

0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  * 
0009  *      http://www.apache.org/licenses/LICENSE-2.0
0010  * 
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  *  $Id$
0020  */
0021 
0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLRECOGNIZER_HPP)
0023 #define XERCESC_INCLUDE_GUARD_XMLRECOGNIZER_HPP
0024 
0025 #include <xercesc/util/XercesDefs.hpp>
0026 #include <xercesc/util/PlatformUtils.hpp>
0027 
0028 XERCES_CPP_NAMESPACE_BEGIN
0029 
0030 /**
0031  *  This class provides some simple code to recognize the encodings of
0032  *  XML files. This recognition only does very basic sensing of the encoding
0033  *  in a broad sense. Basically its just enough to let us get started and
0034  *  read the XMLDecl line. The scanner, once it reads the XMLDecl, will
0035  *  tell the reader any actual encoding string it found and the reader can
0036  *  update itself to be more specific at that point.
0037  */
0038 class XMLPARSER_EXPORT XMLRecognizer
0039 {
0040 public :
0041     // -----------------------------------------------------------------------
0042     //  Class types
0043     //
0044     //  This enum represents the various encoding families that we have to
0045     //  deal with individually at the scanner level. This does not indicate
0046     //  the exact encoding, just the rough family that would let us scan
0047     //  the XML/TextDecl to find the encoding string.
0048     //
0049     //  The 'L's and 'B's stand for little or big endian. 
0050     //
0051     //  OtherEncoding means that its some transcoder based encoding, i.e. not
0052     //  one of the ones that we do internally. Its a special case and should
0053     //  never be used directly outside of the reader.
0054     //
0055     //  NOTE: Keep this in sync with the name map array in the Cpp file!!
0056     // -----------------------------------------------------------------------
0057     enum Encodings
0058     {
0059         EBCDIC          = 0
0060         , UCS_4B        = 1
0061         , UCS_4L        = 2
0062         , US_ASCII      = 3
0063         , UTF_8         = 4
0064         , UTF_16B       = 5
0065         , UTF_16L       = 6
0066         , XERCES_XMLCH  = 7
0067 
0068         , Encodings_Count
0069         , Encodings_Min = EBCDIC
0070         , Encodings_Max = XERCES_XMLCH
0071 
0072         , OtherEncoding = 999
0073     };
0074 
0075 
0076     // -----------------------------------------------------------------------
0077     //  Public, const static data
0078     //
0079     //  These are the byte sequences for each of the encodings that we can
0080     //  auto sense, and their lengths.
0081     // -----------------------------------------------------------------------
0082     static const char           fgASCIIPre[];
0083     static const XMLSize_t      fgASCIIPreLen;
0084     static const XMLByte        fgEBCDICPre[];
0085     static const XMLSize_t      fgEBCDICPreLen;
0086     static const XMLByte        fgUTF16BPre[];
0087     static const XMLByte        fgUTF16LPre[];
0088     static const XMLSize_t      fgUTF16PreLen;
0089     static const XMLByte        fgUCS4BPre[];
0090     static const XMLByte        fgUCS4LPre[];
0091     static const XMLSize_t      fgUCS4PreLen;
0092     static const char           fgUTF8BOM[];
0093     static const XMLSize_t      fgUTF8BOMLen;
0094 
0095 
0096     // -----------------------------------------------------------------------
0097     //  Encoding recognition methods
0098     // -----------------------------------------------------------------------
0099     static Encodings basicEncodingProbe
0100     (
0101         const   XMLByte* const      rawBuffer
0102         , const XMLSize_t           rawByteCount
0103     );
0104 
0105     static Encodings encodingForName
0106     (
0107         const   XMLCh* const    theEncName
0108     );
0109 
0110     static const XMLCh* nameForEncoding(const Encodings theEncoding
0111         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
0112 
0113 
0114 protected :
0115     // -----------------------------------------------------------------------
0116     //  Unimplemented constructors, operators, and destructor
0117     //
0118     //  This class is effectively being used as a namespace for some static
0119     //  methods.
0120     //
0121     //   (these functions are protected rather than private only to get rid of
0122     //    some annoying compiler warnings.)
0123     //
0124     // -----------------------------------------------------------------------
0125     XMLRecognizer();
0126     ~XMLRecognizer();
0127 
0128 private:
0129     // -----------------------------------------------------------------------
0130     //  Unimplemented constructors and operators
0131     // -----------------------------------------------------------------------
0132     XMLRecognizer(const XMLRecognizer&);    
0133     XMLRecognizer& operator=(const XMLRecognizer&);
0134 };
0135 
0136 XERCES_CPP_NAMESPACE_END
0137 
0138 #endif