Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:03

0001 // Tencent is pleased to support the open source community by making RapidJSON available.
0002 // 
0003 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
0004 //
0005 // Licensed under the MIT License (the "License"); you may not use this file except
0006 // in compliance with the License. You may obtain a copy of the License at
0007 //
0008 // http://opensource.org/licenses/MIT
0009 //
0010 // Unless required by applicable law or agreed to in writing, software distributed 
0011 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
0012 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 
0013 // specific language governing permissions and limitations under the License.
0014 
0015 #ifndef RAPIDJSON_ENCODEDSTREAM_H_
0016 #define RAPIDJSON_ENCODEDSTREAM_H_
0017 
0018 #include "stream.h"
0019 #include "memorystream.h"
0020 
0021 #ifdef __GNUC__
0022 RAPIDJSON_DIAG_PUSH
0023 RAPIDJSON_DIAG_OFF(effc++)
0024 #endif
0025 
0026 #ifdef __clang__
0027 RAPIDJSON_DIAG_PUSH
0028 RAPIDJSON_DIAG_OFF(padded)
0029 #endif
0030 
0031 RAPIDJSON_NAMESPACE_BEGIN
0032 
0033 //! Input byte stream wrapper with a statically bound encoding.
0034 /*!
0035     \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
0036     \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
0037 */
0038 template <typename Encoding, typename InputByteStream>
0039 class EncodedInputStream {
0040     RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
0041 public:
0042     typedef typename Encoding::Ch Ch;
0043 
0044     EncodedInputStream(InputByteStream& is) : is_(is) { 
0045         current_ = Encoding::TakeBOM(is_);
0046     }
0047 
0048     Ch Peek() const { return current_; }
0049     Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
0050     size_t Tell() const { return is_.Tell(); }
0051 
0052     // Not implemented
0053     void Put(Ch) { RAPIDJSON_ASSERT(false); }
0054     void Flush() { RAPIDJSON_ASSERT(false); } 
0055     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
0056     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
0057 
0058 private:
0059     EncodedInputStream(const EncodedInputStream&);
0060     EncodedInputStream& operator=(const EncodedInputStream&);
0061 
0062     InputByteStream& is_;
0063     Ch current_;
0064 };
0065 
0066 //! Specialized for UTF8 MemoryStream.
0067 template <>
0068 class EncodedInputStream<UTF8<>, MemoryStream> {
0069 public:
0070     typedef UTF8<>::Ch Ch;
0071 
0072     EncodedInputStream(MemoryStream& is) : is_(is) {
0073         if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
0074         if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
0075         if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
0076     }
0077     Ch Peek() const { return is_.Peek(); }
0078     Ch Take() { return is_.Take(); }
0079     size_t Tell() const { return is_.Tell(); }
0080 
0081     // Not implemented
0082     void Put(Ch) {}
0083     void Flush() {} 
0084     Ch* PutBegin() { return 0; }
0085     size_t PutEnd(Ch*) { return 0; }
0086 
0087     MemoryStream& is_;
0088 
0089 private:
0090     EncodedInputStream(const EncodedInputStream&);
0091     EncodedInputStream& operator=(const EncodedInputStream&);
0092 };
0093 
0094 //! Output byte stream wrapper with statically bound encoding.
0095 /*!
0096     \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
0097     \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream.
0098 */
0099 template <typename Encoding, typename OutputByteStream>
0100 class EncodedOutputStream {
0101     RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
0102 public:
0103     typedef typename Encoding::Ch Ch;
0104 
0105     EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { 
0106         if (putBOM)
0107             Encoding::PutBOM(os_);
0108     }
0109 
0110     void Put(Ch c) { Encoding::Put(os_, c);  }
0111     void Flush() { os_.Flush(); }
0112 
0113     // Not implemented
0114     Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
0115     Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
0116     size_t Tell() const { RAPIDJSON_ASSERT(false);  return 0; }
0117     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
0118     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
0119 
0120 private:
0121     EncodedOutputStream(const EncodedOutputStream&);
0122     EncodedOutputStream& operator=(const EncodedOutputStream&);
0123 
0124     OutputByteStream& os_;
0125 };
0126 
0127 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
0128 
0129 //! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
0130 /*!
0131     \tparam CharType Type of character for reading.
0132     \tparam InputByteStream type of input byte stream to be wrapped.
0133 */
0134 template <typename CharType, typename InputByteStream>
0135 class AutoUTFInputStream {
0136     RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
0137 public:
0138     typedef CharType Ch;
0139 
0140     //! Constructor.
0141     /*!
0142         \param is input stream to be wrapped.
0143         \param type UTF encoding type if it is not detected from the stream.
0144     */
0145     AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
0146         RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);        
0147         DetectType();
0148         static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
0149         takeFunc_ = f[type_];
0150         current_ = takeFunc_(*is_);
0151     }
0152 
0153     UTFType GetType() const { return type_; }
0154     bool HasBOM() const { return hasBOM_; }
0155 
0156     Ch Peek() const { return current_; }
0157     Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
0158     size_t Tell() const { return is_->Tell(); }
0159 
0160     // Not implemented
0161     void Put(Ch) { RAPIDJSON_ASSERT(false); }
0162     void Flush() { RAPIDJSON_ASSERT(false); } 
0163     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
0164     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
0165 
0166 private:
0167     AutoUTFInputStream(const AutoUTFInputStream&);
0168     AutoUTFInputStream& operator=(const AutoUTFInputStream&);
0169 
0170     // Detect encoding type with BOM or RFC 4627
0171     void DetectType() {
0172         // BOM (Byte Order Mark):
0173         // 00 00 FE FF  UTF-32BE
0174         // FF FE 00 00  UTF-32LE
0175         // FE FF        UTF-16BE
0176         // FF FE        UTF-16LE
0177         // EF BB BF     UTF-8
0178 
0179         const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
0180         if (!c)
0181             return;
0182 
0183         unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
0184         hasBOM_ = false;
0185         if (bom == 0xFFFE0000)                  { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
0186         else if (bom == 0x0000FEFF)             { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
0187         else if ((bom & 0xFFFF) == 0xFFFE)      { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take();                           }
0188         else if ((bom & 0xFFFF) == 0xFEFF)      { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take();                           }
0189         else if ((bom & 0xFFFFFF) == 0xBFBBEF)  { type_ = kUTF8;    hasBOM_ = true; is_->Take(); is_->Take(); is_->Take();              }
0190 
0191         // RFC 4627: Section 3
0192         // "Since the first two characters of a JSON text will always be ASCII
0193         // characters [RFC0020], it is possible to determine whether an octet
0194         // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
0195         // at the pattern of nulls in the first four octets."
0196         // 00 00 00 xx  UTF-32BE
0197         // 00 xx 00 xx  UTF-16BE
0198         // xx 00 00 00  UTF-32LE
0199         // xx 00 xx 00  UTF-16LE
0200         // xx xx xx xx  UTF-8
0201 
0202         if (!hasBOM_) {
0203             int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
0204             switch (pattern) {
0205             case 0x08: type_ = kUTF32BE; break;
0206             case 0x0A: type_ = kUTF16BE; break;
0207             case 0x01: type_ = kUTF32LE; break;
0208             case 0x05: type_ = kUTF16LE; break;
0209             case 0x0F: type_ = kUTF8;    break;
0210             default: break; // Use type defined by user.
0211             }
0212         }
0213 
0214         // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
0215         if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
0216         if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
0217     }
0218 
0219     typedef Ch (*TakeFunc)(InputByteStream& is);
0220     InputByteStream* is_;
0221     UTFType type_;
0222     Ch current_;
0223     TakeFunc takeFunc_;
0224     bool hasBOM_;
0225 };
0226 
0227 //! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
0228 /*!
0229     \tparam CharType Type of character for writing.
0230     \tparam OutputByteStream type of output byte stream to be wrapped.
0231 */
0232 template <typename CharType, typename OutputByteStream>
0233 class AutoUTFOutputStream {
0234     RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
0235 public:
0236     typedef CharType Ch;
0237 
0238     //! Constructor.
0239     /*!
0240         \param os output stream to be wrapped.
0241         \param type UTF encoding type.
0242         \param putBOM Whether to write BOM at the beginning of the stream.
0243     */
0244     AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
0245         RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
0246 
0247         // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
0248         if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
0249         if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
0250 
0251         static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
0252         putFunc_ = f[type_];
0253 
0254         if (putBOM)
0255             PutBOM();
0256     }
0257 
0258     UTFType GetType() const { return type_; }
0259 
0260     void Put(Ch c) { putFunc_(*os_, c); }
0261     void Flush() { os_->Flush(); } 
0262 
0263     // Not implemented
0264     Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
0265     Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
0266     size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
0267     Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
0268     size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
0269 
0270 private:
0271     AutoUTFOutputStream(const AutoUTFOutputStream&);
0272     AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
0273 
0274     void PutBOM() { 
0275         typedef void (*PutBOMFunc)(OutputByteStream&);
0276         static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
0277         f[type_](*os_);
0278     }
0279 
0280     typedef void (*PutFunc)(OutputByteStream&, Ch);
0281 
0282     OutputByteStream* os_;
0283     UTFType type_;
0284     PutFunc putFunc_;
0285 };
0286 
0287 #undef RAPIDJSON_ENCODINGS_FUNC
0288 
0289 RAPIDJSON_NAMESPACE_END
0290 
0291 #ifdef __clang__
0292 RAPIDJSON_DIAG_POP
0293 #endif
0294 
0295 #ifdef __GNUC__
0296 RAPIDJSON_DIAG_POP
0297 #endif
0298 
0299 #endif // RAPIDJSON_FILESTREAM_H_