|
||||
File indexing completed on 2025-01-18 10:14:53
0001 /* 0002 * Licensed to the Apache Software Foundation (ASF) under one or more 0003 * contributor license agreements. See the NOTICE file distributed with 0004 * this work for additional information regarding copyright ownership. 0005 * The ASF licenses this file to You under the Apache License, Version 2.0 0006 * (the "License"); you may not use this file except in compliance with 0007 * the License. You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 */ 0017 0018 /* 0019 * $Id$ 0020 */ 0021 0022 #if !defined(XERCESC_INCLUDE_GUARD_XMLRECOGNIZER_HPP) 0023 #define XERCESC_INCLUDE_GUARD_XMLRECOGNIZER_HPP 0024 0025 #include <xercesc/util/XercesDefs.hpp> 0026 #include <xercesc/util/PlatformUtils.hpp> 0027 0028 XERCES_CPP_NAMESPACE_BEGIN 0029 0030 /** 0031 * This class provides some simple code to recognize the encodings of 0032 * XML files. This recognition only does very basic sensing of the encoding 0033 * in a broad sense. Basically its just enough to let us get started and 0034 * read the XMLDecl line. The scanner, once it reads the XMLDecl, will 0035 * tell the reader any actual encoding string it found and the reader can 0036 * update itself to be more specific at that point. 0037 */ 0038 class XMLPARSER_EXPORT XMLRecognizer 0039 { 0040 public : 0041 // ----------------------------------------------------------------------- 0042 // Class types 0043 // 0044 // This enum represents the various encoding families that we have to 0045 // deal with individually at the scanner level. This does not indicate 0046 // the exact encoding, just the rough family that would let us scan 0047 // the XML/TextDecl to find the encoding string. 0048 // 0049 // The 'L's and 'B's stand for little or big endian. 0050 // 0051 // OtherEncoding means that its some transcoder based encoding, i.e. not 0052 // one of the ones that we do internally. Its a special case and should 0053 // never be used directly outside of the reader. 0054 // 0055 // NOTE: Keep this in sync with the name map array in the Cpp file!! 0056 // ----------------------------------------------------------------------- 0057 enum Encodings 0058 { 0059 EBCDIC = 0 0060 , UCS_4B = 1 0061 , UCS_4L = 2 0062 , US_ASCII = 3 0063 , UTF_8 = 4 0064 , UTF_16B = 5 0065 , UTF_16L = 6 0066 , XERCES_XMLCH = 7 0067 0068 , Encodings_Count 0069 , Encodings_Min = EBCDIC 0070 , Encodings_Max = XERCES_XMLCH 0071 0072 , OtherEncoding = 999 0073 }; 0074 0075 0076 // ----------------------------------------------------------------------- 0077 // Public, const static data 0078 // 0079 // These are the byte sequences for each of the encodings that we can 0080 // auto sense, and their lengths. 0081 // ----------------------------------------------------------------------- 0082 static const char fgASCIIPre[]; 0083 static const XMLSize_t fgASCIIPreLen; 0084 static const XMLByte fgEBCDICPre[]; 0085 static const XMLSize_t fgEBCDICPreLen; 0086 static const XMLByte fgUTF16BPre[]; 0087 static const XMLByte fgUTF16LPre[]; 0088 static const XMLSize_t fgUTF16PreLen; 0089 static const XMLByte fgUCS4BPre[]; 0090 static const XMLByte fgUCS4LPre[]; 0091 static const XMLSize_t fgUCS4PreLen; 0092 static const char fgUTF8BOM[]; 0093 static const XMLSize_t fgUTF8BOMLen; 0094 0095 0096 // ----------------------------------------------------------------------- 0097 // Encoding recognition methods 0098 // ----------------------------------------------------------------------- 0099 static Encodings basicEncodingProbe 0100 ( 0101 const XMLByte* const rawBuffer 0102 , const XMLSize_t rawByteCount 0103 ); 0104 0105 static Encodings encodingForName 0106 ( 0107 const XMLCh* const theEncName 0108 ); 0109 0110 static const XMLCh* nameForEncoding(const Encodings theEncoding 0111 , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); 0112 0113 0114 protected : 0115 // ----------------------------------------------------------------------- 0116 // Unimplemented constructors, operators, and destructor 0117 // 0118 // This class is effectively being used as a namespace for some static 0119 // methods. 0120 // 0121 // (these functions are protected rather than private only to get rid of 0122 // some annoying compiler warnings.) 0123 // 0124 // ----------------------------------------------------------------------- 0125 XMLRecognizer(); 0126 ~XMLRecognizer(); 0127 0128 private: 0129 // ----------------------------------------------------------------------- 0130 // Unimplemented constructors and operators 0131 // ----------------------------------------------------------------------- 0132 XMLRecognizer(const XMLRecognizer&); 0133 XMLRecognizer& operator=(const XMLRecognizer&); 0134 }; 0135 0136 XERCES_CPP_NAMESPACE_END 0137 0138 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |