Geant4/tools/toojpeg.icc

0001
0002 // G.Barrand: pure header version of toojpeg found at https://github.com/stbrumme/toojpeg
0003
0004 // //////////////////////////////////////////////////////////
0005 // toojpeg.cpp
0006 // written by Stephan Brumme, 2018-2019
0007 // see https://create.stephan-brumme.com/toojpeg/
0008 //
0009
0010 #include <cstddef> //size_t
0011
0012 // - the "official" specifications: https://www.w3.org/Graphics/JPEG/itu-t81.pdf and https://www.w3.org/Graphics/JPEG/jfif3.pdf
0013 // - Wikipedia has a short description of the JFIF/JPEG file format: https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format
0014 // - the popular STB Image library includes Jon's JPEG encoder as well: https://github.com/nothings/stb/blob/master/stb_image_write.h
0015 // - the most readable JPEG book (from a developer's perspective) is Miano's "Compressed Image File Formats" (1999, ISBN 0-201-60443-4),
0016 //   used copies are really cheap nowadays and include a CD with C++ sources as well (plus great format descriptions of GIF & PNG)
0017 // - much more detailled is Mitchell/Pennebaker's "JPEG: Still Image Data Compression Standard" (1993, ISBN 0-442-01272-1)
0018 //   which contains the official JPEG standard, too - fun fact: I bought a signed copy in a second-hand store without noticing
0019
0020 namespace tools {
0021 namespace toojpeg {
0022 // ////////////////////////////////////////
0023 // data types
0024 typedef unsigned char uint8_t;
0025 typedef unsigned short uint16_t;
0026 typedef short int16_t;
0027 typedef int int32_t; // at least four bytes
0028
0029 // ////////////////////////////////////////
0030 // constants
0031
0032 // quantization tables from JPEG Standard, Annex K
0033 const uint8_t DefaultQuantLuminance[8*8] =
0034     { 16, 11, 10, 16, 24, 40, 51, 61, // there are a few experts proposing slightly more efficient values,
0035       12, 12, 14, 19, 26, 58, 60, 55, // e.g. https://www.imagemagick.org/discourse-server/viewtopic.php?t=20333
0036       14, 13, 16, 24, 40, 57, 69, 56, // btw: Google's Guetzli project optimizes the quantization tables per image
0037       14, 17, 22, 29, 51, 87, 80, 62,
0038       18, 22, 37, 56, 68,109,103, 77,
0039       24, 35, 55, 64, 81,104,113, 92,
0040       49, 64, 78, 87,103,121,120,101,
0041       72, 92, 95, 98,112,100,103, 99 };
0042 const uint8_t DefaultQuantChrominance[8*8] =
0043     { 17, 18, 24, 47, 99, 99, 99, 99,
0044       18, 21, 26, 66, 99, 99, 99, 99,
0045       24, 26, 56, 99, 99, 99, 99, 99,
0046       47, 66, 99, 99, 99, 99, 99, 99,
0047       99, 99, 99, 99, 99, 99, 99, 99,
0048       99, 99, 99, 99, 99, 99, 99, 99,
0049       99, 99, 99, 99, 99, 99, 99, 99,
0050       99, 99, 99, 99, 99, 99, 99, 99 };
0051
0052 // 8x8 blocks are processed in zig-zag order
0053 // most encoders use a zig-zag "forward" table, I switched to its inverse for performance reasons
0054 // note: ZigZagInv[ZigZag[i]] = i
0055 const uint8_t ZigZagInv[8*8] =
0056     {  0, 1, 8,16, 9, 2, 3,10,   // ZigZag[] =  0, 1, 5, 6,14,15,27,28,
0057       17,24,32,25,18,11, 4, 5,   //             2, 4, 7,13,16,26,29,42,
0058       12,19,26,33,40,48,41,34,   //             3, 8,12,17,25,30,41,43,
0059       27,20,13, 6, 7,14,21,28,   //             9,11,18,24,31,40,44,53,
0060       35,42,49,56,57,50,43,36,   //            10,19,23,32,39,45,52,54,
0061       29,22,15,23,30,37,44,51,   //            20,22,33,38,46,51,55,60,
0062       58,59,52,45,38,31,39,46,   //            21,34,37,47,50,56,59,61,
0063       53,60,61,54,47,55,62,63 }; //            35,36,48,49,57,58,62,63
0064
0065 // static Huffman code tables from JPEG standard Annex K
0066 // - CodesPerBitsize tables define how many Huffman codes will have a certain bitsize (plus 1 because there nothing with zero bits),
0067 //   e.g. DcLuminanceCodesPerBitsize[2] = 5 because there are 5 Huffman codes being 2+1=3 bits long
0068 // - Values tables are a list of values ordered by their Huffman code bitsize,
0069 //   e.g. AcLuminanceValues => Huffman(0x01,0x02 and 0x03) will have 2 bits, Huffman(0x00) will have 3 bits, Huffman(0x04,0x11 and 0x05) will have 4 bits, ...
0070
0071 // Huffman definitions for first DC/AC tables (luminance / Y channel)
0072 const uint8_t DcLuminanceCodesPerBitsize[16]   = { 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 };   // sum = 12
0073 const uint8_t DcLuminanceValues         [12]   = { 0,1,2,3,4,5,6,7,8,9,10,11 };         // => 12 codes
0074 const uint8_t AcLuminanceCodesPerBitsize[16]   = { 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,125 }; // sum = 162
0075 const uint8_t AcLuminanceValues        [162]   =                                        // => 162 codes
0076     { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xA1,0x08, // 16*10+2 symbols because
0077       0x23,0x42,0xB1,0xC1,0x15,0x52,0xD1,0xF0,0x24,0x33,0x62,0x72,0x82,0x09,0x0A,0x16,0x17,0x18,0x19,0x1A,0x25,0x26,0x27,0x28, // upper 4 bits can be 0..F
0078       0x29,0x2A,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,0x59, // while lower 4 bits can be 1..A
0079       0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x83,0x84,0x85,0x86,0x87,0x88,0x89, // plus two special codes 0x00 and 0xF0
0080       0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,0xB5,0xB6, // order of these symbols was determined empirically by JPEG committee
0081       0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xE1,0xE2,
0082       0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
0083 // Huffman definitions for second DC/AC tables (chrominance / Cb and Cr channels)
0084 const uint8_t DcChrominanceCodesPerBitsize[16] = { 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };   // sum = 12
0085 const uint8_t DcChrominanceValues         [12] = { 0,1,2,3,4,5,6,7,8,9,10,11 };         // => 12 codes (identical to DcLuminanceValues)
0086 const uint8_t AcChrominanceCodesPerBitsize[16] = { 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,119 }; // sum = 162
0087 const uint8_t AcChrominanceValues        [162] =                                        // => 162 codes
0088     { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, // same number of symbol, just different order
0089       0xA1,0xB1,0xC1,0x09,0x23,0x33,0x52,0xF0,0x15,0x62,0x72,0xD1,0x0A,0x16,0x24,0x34,0xE1,0x25,0xF1,0x17,0x18,0x19,0x1A,0x26, // (which is more efficient for AC coding)
0090       0x27,0x28,0x29,0x2A,0x35,0x36,0x37,0x38,0x39,0x3A,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x53,0x54,0x55,0x56,0x57,0x58,
0091       0x59,0x5A,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x82,0x83,0x84,0x85,0x86,0x87,
0092       0x88,0x89,0x8A,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xB2,0xB3,0xB4,
0093       0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,
0094       0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA };
0095 const int16_t CodeWordLimit = 2048; // +/-2^11, maximum value after DCT
0096
0097 // ////////////////////////////////////////
0098 // structs
0099
0100 // represent a single Huffman code
0101 struct BitCode
0102 {
0103   //BitCode() = default; // undefined state, must be initialized at a later time
0104   BitCode():code(0),numBits(0) {}
0105   BitCode(const BitCode& a_from):code(a_from.code),numBits(a_from.numBits) {}
0106   BitCode& operator=(const BitCode& a_from) {
0107     code = a_from.code;
0108     numBits = a_from.numBits;
0109     return *this;
0110   }
0111
0112   BitCode(uint16_t code_, uint8_t numBits_)
0113   : code(code_), numBits(numBits_) {}
0114   uint16_t code;       // JPEG's Huffman codes are limited to 16 bits
0115   uint8_t  numBits;    // number of valid bits
0116 };
0117
0118 // wrapper for bit output operations
0119 struct BitWriter
0120 {
0121   // user-supplied callback that writes/stores one byte
0122   WRITE_ONE_BYTE output;
0123   void* tag;
0124   // initialize writer
0125   explicit BitWriter(WRITE_ONE_BYTE output_,void* tag_) : output(output_),tag(tag_) {
0126     buffer.data = 0;
0127     buffer.numBits = 0;
0128   }
0129
0130   // store the most recently encoded bits that are not written yet
0131   struct BitBuffer
0132   {
0133     int32_t data    /*= 0*/; // actually only at most 24 bits are used
0134     uint8_t numBits /*= 0*/; // number of valid bits (the right-most bits)
0135   } buffer;
0136
0137   // write Huffman bits stored in BitCode, keep excess bits in BitBuffer
0138   BitWriter& operator<<(const BitCode& data)
0139   {
0140     // append the new bits to those bits leftover from previous call(s)
0141     buffer.numBits += data.numBits;
0142     buffer.data   <<= data.numBits;
0143     buffer.data    |= data.code;
0144
0145     // write all "full" bytes
0146     while (buffer.numBits >= 8)
0147     {
0148       // extract highest 8 bits
0149       buffer.numBits -= 8;
0150       uint8_t oneByte = uint8_t(buffer.data >> buffer.numBits);
0151       output(oneByte,tag);
0152
0153       if (oneByte == 0xFF) // 0xFF has a special meaning for JPEGs (it's a block marker)
0154         output(0,tag);         // therefore pad a zero to indicate "nope, this one ain't a marker, it's just a coincidence"
0155
0156       // note: I don't clear those written bits, therefore buffer.bits may contain garbage in the high bits
0157       //       if you really want to "clean up" (e.g. for debugging purposes) then uncomment the following line
0158       //buffer.bits &= (1 << buffer.numBits) - 1;
0159     }
0160     return *this;
0161   }
0162
0163   // write all non-yet-written bits, fill gaps with 1s (that's a strange JPEG thing)
0164   void flush()
0165   {
0166     // at most seven set bits needed to "fill" the last byte: 0x7F = binary 0111 1111
0167     *this << BitCode(0x7F, 7); // I should set buffer.numBits = 0 but since there are no single bits written after flush() I can safely ignore it
0168   }
0169
0170   // NOTE: all the following BitWriter functions IGNORE the BitBuffer and write straight to output !
0171   // write a single byte
0172   BitWriter& operator<<(uint8_t oneByte)
0173   {
0174     output(oneByte,tag);
0175     return *this;
0176   }
0177
0178   // write an array of bytes
0179   template <typename T, int Size>
0180   BitWriter& operator<<(T (&manyBytes)[Size])
0181   {
0182   //for (auto c : manyBytes)
0183   //  output(c);
0184     for(size_t i=0;i<Size;i++) output(manyBytes[i],tag);
0185     return *this;
0186   }
0187
0188   // start a new JFIF block
0189   void addMarker(uint8_t id, uint16_t length)
0190   {
0191     output(0xFF,tag); output(id,tag);     // ID, always preceded by 0xFF
0192     output(uint8_t(length >> 8),tag); // length of the block (big-endian, includes the 2 length bytes as well)
0193     output(uint8_t(length & 0xFF),tag);
0194   }
0195 };
0196
0197 // ////////////////////////////////////////
0198 // functions / templates
0199
0200 // same as std::min()
0201 template <typename Number>
0202 inline Number minimum(Number value, Number maximum)
0203 {
0204   return value <= maximum ? value : maximum;
0205 }
0206
0207 // restrict a value to the interval [minimum, maximum]
0208 template <typename Number, typename Limit>
0209 inline Number clamp(Number value, Limit minValue, Limit maxValue)
0210 {
0211   if (value <= minValue) return minValue; // never smaller than the minimum
0212   if (value >= maxValue) return maxValue; // never bigger  than the maximum
0213   return value;                           // value was inside interval, keep it
0214 }
0215
0216 // convert from RGB to YCbCr, constants are similar to ITU-R, see https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion
0217 inline float rgb2y (float r, float g, float b) { return +0.299f   * r +0.587f   * g +0.114f   * b; }
0218 inline float rgb2cb(float r, float g, float b) { return -0.16874f * r -0.33126f * g +0.5f     * b; }
0219 inline float rgb2cr(float r, float g, float b) { return +0.5f     * r -0.41869f * g -0.08131f * b; }
0220
0221 // forward DCT computation "in one dimension" (fast AAN algorithm by Arai, Agui and Nakajima: "A fast DCT-SQ scheme for images")
0222 inline void DCT(float block[8*8], uint8_t stride) // stride must be 1 (=horizontal) or 8 (=vertical)
0223 {
0224   const float SqrtHalfSqrt = 1.306562965f; //    sqrt((2 + sqrt(2)) / 2) = cos(pi * 1 / 8) * sqrt(2)
0225   const float InvSqrt      = 0.707106781f; // 1 / sqrt(2)                = cos(pi * 2 / 8)
0226   const float HalfSqrtSqrt = 0.382683432f; //     sqrt(2 - sqrt(2)) / 2  = cos(pi * 3 / 8)
0227   const float InvSqrtSqrt  = 0.541196100f; // 1 / sqrt(2 - sqrt(2))      = cos(pi * 3 / 8) * sqrt(2)
0228
0229   // modify in-place
0230   float& block0 = block[0         ];
0231   float& block1 = block[1 * stride];
0232   float& block2 = block[2 * stride];
0233   float& block3 = block[3 * stride];
0234   float& block4 = block[4 * stride];
0235   float& block5 = block[5 * stride];
0236   float& block6 = block[6 * stride];
0237   float& block7 = block[7 * stride];
0238
0239   // based on https://dev.w3.org/Amaya/libjpeg/jfdctflt.c , the original variable names can be found in my comments
0240   float add07 = block0 + block7; float sub07 = block0 - block7; // tmp0, tmp7
0241   float add16 = block1 + block6; float sub16 = block1 - block6; // tmp1, tmp6
0242   float add25 = block2 + block5; float sub25 = block2 - block5; // tmp2, tmp5
0243   float add34 = block3 + block4; float sub34 = block3 - block4; // tmp3, tmp4
0244
0245   float add0347 = add07 + add34; float sub07_34 = add07 - add34; // tmp10, tmp13 ("even part" / "phase 2")
0246   float add1256 = add16 + add25; float sub16_25 = add16 - add25; // tmp11, tmp12
0247
0248   block0 = add0347 + add1256; block4 = add0347 - add1256; // "phase 3"
0249
0250   float z1 = (sub16_25 + sub07_34) * InvSqrt; // all temporary z-variables kept their original names
0251   block2 = sub07_34 + z1; block6 = sub07_34 - z1; // "phase 5"
0252
0253   float sub23_45 = sub25 + sub34; // tmp10 ("odd part" / "phase 2")
0254   float sub12_56 = sub16 + sub25; // tmp11
0255   float sub01_67 = sub16 + sub07; // tmp12
0256
0257   float z5 = (sub23_45 - sub01_67) * HalfSqrtSqrt;
0258   float z2 = sub23_45 * InvSqrtSqrt  + z5;
0259   float z3 = sub12_56 * InvSqrt;
0260   float z4 = sub01_67 * SqrtHalfSqrt + z5;
0261   float z6 = sub07 + z3; // z11 ("phase 5")
0262   float z7 = sub07 - z3; // z13
0263   block1 = z6 + z4; block7 = z6 - z4; // "phase 6"
0264   block5 = z7 + z2; block3 = z7 - z2;
0265 }
0266
0267 // run DCT, quantize and write Huffman bit codes
0268 inline int16_t encodeBlock(BitWriter& writer, float block[8][8], const float scaled[8*8], int16_t lastDC,
0269                     const BitCode huffmanDC[256], const BitCode huffmanAC[256], const BitCode* codewords)
0270 {
0271   // "linearize" the 8x8 block, treat it as a flat array of 64 floats
0272   float* block64 = (float*) block;
0273
0274   // DCT: rows
0275   for (size_t offset = 0; offset < 8; offset++)
0276     DCT(block64 + offset*8, 1);
0277   // DCT: columns
0278   for (size_t offset = 0; offset < 8; offset++)
0279     DCT(block64 + offset*1, 8);
0280
0281   // scale
0282   for (size_t i = 0; i < 8*8; i++)
0283     block64[i] *= scaled[i];
0284
0285   // encode DC (the first coefficient is the "average color" of the 8x8 block)
0286   int DC = int(block64[0] + (block64[0] >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
0287
0288   // quantize and zigzag the other 63 coefficients
0289   size_t posNonZero = 0; // find last coefficient which is not zero (because trailing zeros are encoded differently)
0290   int16_t quantized[8*8];
0291   for (size_t i = 1; i < 8*8; i++) // start at 1 because block64[0]=DC was already processed
0292   {
0293     float value = block64[ZigZagInv[i]];
0294     // round to nearest integer
0295     quantized[i] = int(value + (value >= 0 ? +0.5f : -0.5f)); // C++11's nearbyint() achieves a similar effect
0296     // remember offset of last non-zero coefficient
0297     if (quantized[i] != 0)
0298       posNonZero = i;
0299   }
0300
0301   // same "average color" as previous block ?
0302   int diff = DC - lastDC;
0303   if (diff == 0)
0304     writer << huffmanDC[0x00];   // yes, write a special short symbol
0305   else
0306   {
0307     const BitCode bits = codewords[diff]; // nope, encode the difference to previous block's average color
0308     writer << huffmanDC[bits.numBits] << bits;
0309   }
0310
0311   // encode ACs (quantized[1..63])
0312   size_t offset = 0; // upper 4 bits count the number of consecutive zeros
0313   for (size_t i = 1; i <= posNonZero; i++) // quantized[0] was already written, skip all trailing zeros, too
0314   {
0315     // zeros are encoded in a special way
0316     while (quantized[i] == 0) // found another zero ?
0317     {
0318       offset    += 0x10; // add 1 to the upper 4 bits
0319       // split into blocks of at most 16 consecutive zeros
0320       if (offset > 0xF0) // remember, the counter is in the upper 4 bits, 0xF = 15
0321       {
0322         writer << huffmanAC[0xF0]; // 0xF0 is a special code for "16 zeros"
0323         offset = 0;
0324       }
0325       i++;
0326     }
0327
0328     const BitCode encoded = codewords[quantized[i]];
0329     // combine number of zeros with the number of bits of the next non-zero value
0330     writer << huffmanAC[offset + encoded.numBits] << encoded; // and the value itself
0331     offset = 0;
0332   }
0333
0334   // send end-of-block code (0x00), only needed if there are trailing zeros
0335   if (posNonZero < 8*8 - 1) // = 63
0336     writer << huffmanAC[0x00];
0337
0338   return DC;
0339 }
0340
0341 // Jon's code includes the pre-generated Huffman codes
0342 // I don't like these "magic constants" and compute them on my own :-)
0343 inline void generateHuffmanTable(const uint8_t numCodes[16], const uint8_t* values, BitCode result[256])
0344 {
0345   // process all bitsizes 1 thru 16, no JPEG Huffman code is allowed to exceed 16 bits
0346   uint16_t huffmanCode = 0;
0347   for (uint8_t numBits = 1; numBits <= 16; numBits++)
0348   {
0349     // ... and each code of these bitsizes
0350     for (uint8_t i = 0; i < numCodes[numBits - 1]; i++) // note: numCodes array starts at zero, but smallest bitsize is 1
0351       result[*values++] = BitCode(huffmanCode++, numBits);
0352
0353     // next Huffman code needs to be one bit wider
0354     huffmanCode <<= 1;
0355   }
0356 }
0357
0358 // -------------------- externally visible code --------------------
0359
0360 // the only exported function ...
0361 inline bool writeJpeg(WRITE_ONE_BYTE output, void* tag,const void* pixels_, unsigned short width, unsigned short height,
0362                bool isRGB, unsigned char quality_, bool downsample, const char* comment)
0363 {
0364   // reject invalid pointers
0365   if (output == 0/*nullptr*/ || pixels_ == 0/*nullptr*/)
0366     return false;
0367   // check image format
0368   if (width == 0 || height == 0)
0369     return false;
0370
0371   // number of components
0372   const uint16_t numComponents = isRGB ? 3 : 1;
0373   // note: if there is just one component (=grayscale), then only luminance needs to be stored in the file
0374   //       thus everything related to chrominance need not to be written to the JPEG
0375   //       I still compute a few things, like quantization tables to avoid a complete code mess
0376
0377   // grayscale images can't be downsampled (because there are no Cb + Cr channels)
0378   if (!isRGB)
0379     downsample = false;
0380
0381   // wrapper for all output operations
0382   BitWriter bitWriter(output,tag);
0383
0384   // ////////////////////////////////////////
0385   // JFIF headers
0386   const uint8_t HeaderJfif[2+2+16] =
0387       { 0xFF,0xD8,         // SOI marker (start of image)
0388         0xFF,0xE0,         // JFIF APP0 tag
0389         0,16,              // length: 16 bytes (14 bytes payload + 2 bytes for this length field)
0390         'J','F','I','F',0, // JFIF identifier, zero-terminated
0391         1,1,               // JFIF version 1.1
0392         0,                 // no density units specified
0393         0,1,0,1,           // density: 1 pixel "per pixel" horizontally and vertically
0394         0,0 };             // no thumbnail (size 0 x 0)
0395   bitWriter << HeaderJfif;
0396
0397   // ////////////////////////////////////////
0398   // comment (optional)
0399   if (comment != 0/*nullptr*/)
0400   {
0401     // look for zero terminator
0402     uint16_t length = 0; // = strlen(comment);
0403     while (comment[length] != 0)
0404       length++;
0405
0406     // write COM marker
0407     bitWriter.addMarker(0xFE, 2+length); // block size is number of bytes (without zero terminator) + 2 bytes for this length field
0408     // ... and write the comment itself
0409     for (uint16_t i = 0; i < length; i++)
0410       bitWriter << comment[i];
0411   }
0412
0413   // ////////////////////////////////////////
0414   // adjust quantization tables to desired quality
0415
0416   // quality level must be in 1 ... 100
0417   uint16_t quality = clamp<uint16_t>(quality_, 1, 100);
0418   // convert to an internal JPEG quality factor, formula taken from libjpeg
0419   quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
0420
0421   uint8_t quantLuminance  [8*8];
0422   uint8_t quantChrominance[8*8];
0423   for (size_t i = 0; i < 8*8; i++)
0424   {
0425     int luminance   = (DefaultQuantLuminance  [ZigZagInv[i]] * quality + 50) / 100;
0426     int chrominance = (DefaultQuantChrominance[ZigZagInv[i]] * quality + 50) / 100;
0427
0428     // clamp to 1..255
0429     quantLuminance  [i] = clamp(luminance,   1, 255);
0430     quantChrominance[i] = clamp(chrominance, 1, 255);
0431   }
0432
0433   // write quantization tables
0434   bitWriter.addMarker(0xDB, 2 + (isRGB ? 2 : 1) * (1 + 8*8)); // length: 65 bytes per table + 2 bytes for this length field
0435                                                               // each table has 64 entries and is preceded by an ID byte
0436
0437   bitWriter   << 0x00 << quantLuminance;   // first  quantization table
0438   if (isRGB)
0439     bitWriter << 0x01 << quantChrominance; // second quantization table, only relevant for color images
0440
0441   // ////////////////////////////////////////
0442   // write image infos (SOF0 - start of frame)
0443   bitWriter.addMarker(0xC0, 2+6+3*numComponents); // length: 6 bytes general info + 3 per channel + 2 bytes for this length field
0444
0445   // 8 bits per channel
0446   bitWriter << 0x08
0447   // image dimensions (big-endian)
0448             << (height >> 8) << (height & 0xFF)
0449             << (width  >> 8) << (width  & 0xFF);
0450
0451   // sampling and quantization tables for each component
0452   bitWriter << numComponents;       // 1 component (grayscale, Y only) or 3 components (Y,Cb,Cr)
0453   for (uint16_t id = 1; id <= numComponents; id++)
0454     bitWriter <<  id                // component ID (Y=1, Cb=2, Cr=3)
0455     // bitmasks for sampling: highest 4 bits: horizontal, lowest 4 bits: vertical
0456               << (id == 1 && downsample ? 0x22 : 0x11) // 0x11 is default YCbCr 4:4:4 and 0x22 stands for YCbCr 4:2:0
0457               << (id == 1 ? 0 : 1); // use quantization table 0 for Y, table 1 for Cb and Cr
0458
0459   // ////////////////////////////////////////
0460   // Huffman tables
0461   // DHT marker - define Huffman tables
0462   bitWriter.addMarker(0xC4, isRGB ? (2+208+208) : (2+208));
0463                             // 2 bytes for the length field, store chrominance only if needed
0464                             //   1+16+12  for the DC luminance
0465                             //   1+16+162 for the AC luminance   (208 = 1+16+12 + 1+16+162)
0466                             //   1+16+12  for the DC chrominance
0467                             //   1+16+162 for the AC chrominance (208 = 1+16+12 + 1+16+162, same as above)
0468
0469   // store luminance's DC+AC Huffman table definitions
0470   bitWriter << 0x00 // highest 4 bits: 0 => DC, lowest 4 bits: 0 => Y (baseline)
0471             << DcLuminanceCodesPerBitsize
0472             << DcLuminanceValues;
0473   bitWriter << 0x10 // highest 4 bits: 1 => AC, lowest 4 bits: 0 => Y (baseline)
0474             << AcLuminanceCodesPerBitsize
0475             << AcLuminanceValues;
0476
0477   // compute actual Huffman code tables (see Jon's code for precalculated tables)
0478   BitCode huffmanLuminanceDC[256];
0479   BitCode huffmanLuminanceAC[256];
0480   generateHuffmanTable(DcLuminanceCodesPerBitsize, DcLuminanceValues, huffmanLuminanceDC);
0481   generateHuffmanTable(AcLuminanceCodesPerBitsize, AcLuminanceValues, huffmanLuminanceAC);
0482
0483   // chrominance is only relevant for color images
0484   BitCode huffmanChrominanceDC[256];
0485   BitCode huffmanChrominanceAC[256];
0486   if (isRGB)
0487   {
0488     // store luminance's DC+AC Huffman table definitions
0489     bitWriter << 0x01 // highest 4 bits: 0 => DC, lowest 4 bits: 1 => Cr,Cb (baseline)
0490               << DcChrominanceCodesPerBitsize
0491               << DcChrominanceValues;
0492     bitWriter << 0x11 // highest 4 bits: 1 => AC, lowest 4 bits: 1 => Cr,Cb (baseline)
0493               << AcChrominanceCodesPerBitsize
0494               << AcChrominanceValues;
0495
0496     // compute actual Huffman code tables (see Jon's code for precalculated tables)
0497     generateHuffmanTable(DcChrominanceCodesPerBitsize, DcChrominanceValues, huffmanChrominanceDC);
0498     generateHuffmanTable(AcChrominanceCodesPerBitsize, AcChrominanceValues, huffmanChrominanceAC);
0499   }
0500
0501   // ////////////////////////////////////////
0502   // start of scan (there is only a single scan for baseline JPEGs)
0503   bitWriter.addMarker(0xDA, 2+1+2*numComponents+3); // 2 bytes for the length field, 1 byte for number of components,
0504                                                     // then 2 bytes for each component and 3 bytes for spectral selection
0505
0506   // assign Huffman tables to each component
0507   bitWriter << numComponents;
0508   for (uint16_t id = 1; id <= numComponents; id++)
0509     // highest 4 bits: DC Huffman table, lowest 4 bits: AC Huffman table
0510     bitWriter << id << (id == 1 ? 0x00 : 0x11); // Y: tables 0 for DC and AC; Cb + Cr: tables 1 for DC and AC
0511
0512   // constant values for our baseline JPEGs (which have a single sequential scan)
0513   static const uint8_t Spectral[3] = { 0, 63, 0 }; // spectral selection: must be from 0 to 63; successive approximation must be 0
0514   bitWriter << Spectral;
0515
0516   // ////////////////////////////////////////
0517   // adjust quantization tables with AAN scaling factors to simplify DCT
0518   float scaledLuminance  [8*8];
0519   float scaledChrominance[8*8];
0520   for (size_t i = 0; i < 8*8; i++)
0521   {
0522     size_t row    = ZigZagInv[i] / 8; // same as ZigZagInv[i] >> 3
0523     size_t column = ZigZagInv[i] % 8; // same as ZigZagInv[i] &  7
0524
0525     // scaling constants for AAN DCT algorithm: AanScaleFactors[0] = 1, AanScaleFactors[k=1..7] = cos(k*PI/16) * sqrt(2)
0526     static const float AanScaleFactors[8] = { 1, 1.387039845f, 1.306562965f, 1.175875602f, 1, 0.785694958f, 0.541196100f, 0.275899379f };
0527     float factor = 1 / (AanScaleFactors[row] * AanScaleFactors[column] * 8);
0528     scaledLuminance  [ZigZagInv[i]] = factor / quantLuminance  [i];
0529     scaledChrominance[ZigZagInv[i]] = factor / quantChrominance[i];
0530     // if you really want JPEGs that are bitwise identical to Jon Olick's code then you need slightly different formulas (note: sqrt(8) = 2.828427125f)
0531     //static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; // line 240 of jo_jpeg.cpp
0532     //scaledLuminance  [ZigZagInv[i]] = 1 / (quantLuminance  [i] * aasf[row] * aasf[column]); // lines 266-267 of jo_jpeg.cpp
0533     //scaledChrominance[ZigZagInv[i]] = 1 / (quantChrominance[i] * aasf[row] * aasf[column]);
0534   }
0535
0536   // ////////////////////////////////////////
0537   // precompute JPEG codewords for quantized DCT
0538   BitCode  codewordsArray[2 * CodeWordLimit];          // note: quantized[i] is found at codewordsArray[quantized[i] + CodeWordLimit]
0539   BitCode* codewords = &codewordsArray[CodeWordLimit]; // allow negative indices, so quantized[i] is at codewords[quantized[i]]
0540   uint8_t numBits = 1; // each codeword has at least one bit (value == 0 is undefined)
0541   int32_t mask    = 1; // mask is always 2^numBits - 1, initial value 2^1-1 = 2-1 = 1
0542   for (int16_t value = 1; value < CodeWordLimit; value++)
0543   {
0544     // numBits = position of highest set bit (ignoring the sign)
0545     // mask    = (2^numBits) - 1
0546     if (value > mask) // one more bit ?
0547     {
0548       numBits++;
0549       mask = (mask << 1) | 1; // append a set bit
0550     }
0551     codewords[-value] = BitCode(mask - value, numBits); // note that I use a negative index => codewords[-value] = codewordsArray[CodeWordLimit  value]
0552     codewords[+value] = BitCode(       value, numBits);
0553   }
0554
0555   // just convert image data from void*
0556   const uint8_t* pixels = (const uint8_t*)pixels_;
0557
0558   // the next two variables are frequently used when checking for image borders
0559   const unsigned short maxWidth  = width  - 1; // "last row"
0560   const unsigned short maxHeight = height - 1; // "bottom line"
0561
0562   // process MCUs (minimum codes units) => image is subdivided into a grid of 8x8 or 16x16 tiles
0563   const unsigned short sampling = downsample ? 2 : 1; // 1x1 or 2x2 sampling
0564   const unsigned short mcuSize  = 8 * sampling;
0565
0566   // average color of the previous MCU
0567   int16_t lastYDC = 0, lastCbDC = 0, lastCrDC = 0;
0568   // convert from RGB to YCbCr
0569   float Y[8][8], Cb[8][8], Cr[8][8];
0570
0571   for (unsigned short mcuY = 0; mcuY < height; mcuY += mcuSize) // each step is either 8 or 16 (=mcuSize)
0572     for (unsigned short mcuX = 0; mcuX < width; mcuX += mcuSize)
0573     {
0574       // YCbCr 4:4:4 format: each MCU is a 8x8 block - the same applies to grayscale images, too
0575       // YCbCr 4:2:0 format: each MCU represents a 16x16 block, stored as 4x 8x8 Y-blocks plus 1x 8x8 Cb and 1x 8x8 Cr block)
0576       for (unsigned short blockY = 0; blockY < mcuSize; blockY += 8) // iterate once (YCbCr444 and grayscale) or twice (YCbCr420)
0577         for (unsigned short blockX = 0; blockX < mcuSize; blockX += 8)
0578         {
0579           // now we finally have an 8x8 block ...
0580           for (unsigned short deltaY = 0; deltaY < 8; deltaY++)
0581           {
0582             size_t column = minimum(uint16_t(mcuX + blockX)         , maxWidth); // must not exceed image borders, replicate last row/column if needed
0583             size_t row    = minimum(uint16_t(mcuY + blockY + deltaY), maxHeight);
0584             for (size_t deltaX = 0; deltaX < 8; deltaX++)
0585             {
0586               // find actual pixel position within the current image
0587               size_t pixelPos = row * int(width) + column; // the cast ensures that we don't run into multiplication overflows
0588               if (column < maxWidth)
0589                 column++;
0590
0591               // grayscale images have solely a Y channel which can be easily derived from the input pixel by shifting it by 128
0592               if (!isRGB)
0593               {
0594                 Y[deltaY][deltaX] = pixels[pixelPos] - 128.f;
0595                 continue;
0596               }
0597
0598               // RGB: 3 bytes per pixel (whereas grayscale images have only 1 byte per pixel)
0599               uint8_t r = pixels[3 * pixelPos    ];
0600               uint8_t g = pixels[3 * pixelPos + 1];
0601               uint8_t b = pixels[3 * pixelPos + 2];
0602
0603               Y   [deltaY][deltaX] = rgb2y (r, g, b) - 128; // again, the JPEG standard requires Y to be shifted by 128
0604               // YCbCr444 is easy - the more complex YCbCr420 has to be computed about 20 lines below in a second pass
0605               if (!downsample)
0606               {
0607                 Cb[deltaY][deltaX] = rgb2cb(r, g, b); // standard RGB-to-YCbCr conversion
0608                 Cr[deltaY][deltaX] = rgb2cr(r, g, b);
0609               }
0610             }
0611           }
0612
0613         // encode Y channel
0614         lastYDC = encodeBlock(bitWriter, Y, scaledLuminance, lastYDC, huffmanLuminanceDC, huffmanLuminanceAC, codewords);
0615         // Cb and Cr are encoded about 50 lines below
0616       }
0617
0618       // grayscale images don't need any Cb and Cr information
0619       if (!isRGB)
0620         continue;
0621
0622       // ////////////////////////////////////////
0623       // the following lines are only relevant for YCbCr420:
0624       // average/downsample chrominance of four pixels while respecting the image borders
0625       if (downsample)
0626         for (short deltaY = 7; downsample && deltaY >= 0; deltaY--) // iterating loop in reverse increases cache read efficiency
0627         {
0628           size_t row      = minimum(uint16_t(mcuY + 2*deltaY), maxHeight); // each deltaX/Y step covers a 2x2 area
0629           size_t column   =         mcuX;                        // column is updated inside next loop
0630           size_t pixelPos = (row * int(width) + column) * 3;     // numComponents = 3
0631
0632           // deltas (in bytes) to next row / column, must not exceed image borders
0633           size_t rowStep    = (row    < maxHeight) ? 3 * int(width) : 0; // always numComponents*width except for bottom    line
0634           size_t columnStep = (column < maxWidth ) ? 3              : 0; // always numComponents       except for rightmost pixel
0635
0636           for (short deltaX = 0; deltaX < 8; deltaX++)
0637           {
0638             // let's add all four samples (2x2 area)
0639             size_t right     = pixelPos + columnStep;
0640             size_t down      = pixelPos +              rowStep;
0641             size_t downRight = pixelPos + columnStep + rowStep;
0642
0643             // note: cast from 8 bits to >8 bits to avoid overflows when adding
0644             short r = short(pixels[pixelPos    ]) + pixels[right    ] + pixels[down    ] + pixels[downRight    ];
0645             short g = short(pixels[pixelPos + 1]) + pixels[right + 1] + pixels[down + 1] + pixels[downRight + 1];
0646             short b = short(pixels[pixelPos + 2]) + pixels[right + 2] + pixels[down + 2] + pixels[downRight + 2];
0647
0648             // convert to Cb and Cr
0649             Cb[deltaY][deltaX] = rgb2cb(r, g, b) / 4; // I still have to divide r,g,b by 4 to get their average values
0650             Cr[deltaY][deltaX] = rgb2cr(r, g, b) / 4; // it's a bit faster if done AFTER CbCr conversion
0651
0652             // step forward to next 2x2 area
0653             pixelPos += 2*3; // 2 pixels => 6 bytes (2*numComponents)
0654             column   += 2;
0655
0656             // reached right border ?
0657             if (column >= maxWidth)
0658             {
0659               columnStep = 0;
0660               pixelPos = ((row + 1) * int(width) - 1) * 3; // same as (row * width + maxWidth) * numComponents => current's row last pixel
0661             }
0662           }
0663         } // end of YCbCr420 code for Cb and Cr
0664
0665       // encode Cb and Cr
0666       lastCbDC = encodeBlock(bitWriter, Cb, scaledChrominance, lastCbDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords);
0667       lastCrDC = encodeBlock(bitWriter, Cr, scaledChrominance, lastCrDC, huffmanChrominanceDC, huffmanChrominanceAC, codewords);
0668     }
0669
0670   bitWriter.flush(); // now image is completely encoded, write any bits still left in the buffer
0671
0672   // ///////////////////////////
0673   // EOI marker
0674   bitWriter << 0xFF << 0xD9; // this marker has no length, therefore I can't use addMarker()
0675   return true;
0676 } // writeJpeg()
0677
0678 }}