crc/internal/crc_internal.h

0001 // Copyright 2022 The Abseil Authors.
0002 //
0003 // Licensed under the Apache License, Version 2.0 (the "License");
0004 // you may not use this file except in compliance with the License.
0005 // You may obtain a copy of the License at
0006 //
0007 //      https://www.apache.org/licenses/LICENSE-2.0
0008 //
0009 // Unless required by applicable law or agreed to in writing, software
0010 // distributed under the License is distributed on an "AS IS" BASIS,
0011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0012 // See the License for the specific language governing permissions and
0013 // limitations under the License.
0014
0015 #ifndef ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
0016 #define ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
0017
0018 #include <cstdint>
0019 #include <memory>
0020 #include <vector>
0021
0022 #include "absl/base/internal/raw_logging.h"
0023 #include "absl/crc/internal/crc.h"
0024
0025 namespace absl {
0026 ABSL_NAMESPACE_BEGIN
0027
0028 namespace crc_internal {
0029
0030 // Prefetch constants used in some Extend() implementations
0031 constexpr int kPrefetchHorizon = ABSL_CACHELINE_SIZE * 4;  // Prefetch this far
0032 // Shorter prefetch distance for smaller buffers
0033 constexpr int kPrefetchHorizonMedium = ABSL_CACHELINE_SIZE * 1;
0034 static_assert(kPrefetchHorizon >= 64, "CRCPrefetchHorizon less than loop len");
0035
0036 // We require the Scramble() function:
0037 //  - to be reversible (Unscramble() must exist)
0038 //  - to be non-linear in the polynomial's Galois field (so the CRC of a
0039 //    scrambled CRC is not linearly affected by the scrambled CRC, even if
0040 //    using the same polynomial)
0041 //  - not to be its own inverse.  Preferably, if X=Scramble^N(X) and N!=0, then
0042 //    N is large.
0043 //  - to be fast.
0044 //  - not to change once defined.
0045 // We introduce non-linearity in two ways:
0046 //     Addition of a constant.
0047 //         - The carries introduce non-linearity; we use bits of an irrational
0048 //           (phi) to make it unlikely that we introduce no carries.
0049 //     Rotate by a constant number of bits.
0050 //         - We use floor(degree/2)+1, which does not divide the degree, and
0051 //           splits the bits nearly evenly, which makes it less likely the
0052 //           halves will be the same or one will be all zeroes.
0053 // We do both things to improve the chances of non-linearity in the face of
0054 // bit patterns with low numbers of bits set, while still being fast.
0055 // Below is the constant that we add.  The bits are the first 128 bits of the
0056 // fractional part of phi, with a 1 ored into the bottom bit to maximize the
0057 // cycle length of repeated adds.
0058 constexpr uint64_t kScrambleHi = (static_cast<uint64_t>(0x4f1bbcdcU) << 32) |
0059                                  static_cast<uint64_t>(0xbfa53e0aU);
0060 constexpr uint64_t kScrambleLo = (static_cast<uint64_t>(0xf9ce6030U) << 32) |
0061                                  static_cast<uint64_t>(0x2e76e41bU);
0062
0063 class CRCImpl : public CRC {  // Implementation of the abstract class CRC
0064  public:
0065   using Uint32By256 = uint32_t[256];
0066
0067   CRCImpl() = default;
0068   ~CRCImpl() override = default;
0069
0070   // The internal version of CRC::New().
0071   static CRCImpl* NewInternal();
0072
0073   // Fill in a table for updating a CRC by one word of 'word_size' bytes
0074   // [last_lo, last_hi] contains the answer if the last bit in the word
0075   // is set.
0076   static void FillWordTable(uint32_t poly, uint32_t last, int word_size,
0077                             Uint32By256* t);
0078
0079   // Build the table for extending by zeroes, returning the number of entries.
0080   // For a in {1, 2, ..., ZEROES_BASE-1}, b in {0, 1, 2, 3, ...},
0081   // entry j=a-1+(ZEROES_BASE-1)*b
0082   // contains a polynomial Pi such that multiplying
0083   // a CRC by Pi mod P, where P is the CRC polynomial, is equivalent to
0084   // appending a*2**(ZEROES_BASE_LG*b) zero bytes to the original string.
0085   static int FillZeroesTable(uint32_t poly, Uint32By256* t);
0086
0087   virtual void InitTables() = 0;
0088
0089  private:
0090   CRCImpl(const CRCImpl&) = delete;
0091   CRCImpl& operator=(const CRCImpl&) = delete;
0092 };
0093
0094 // This is the 32-bit implementation.  It handles all sizes from 8 to 32.
0095 class CRC32 : public CRCImpl {
0096  public:
0097   CRC32() = default;
0098   ~CRC32() override = default;
0099
0100   void Extend(uint32_t* crc, const void* bytes, size_t length) const override;
0101   void ExtendByZeroes(uint32_t* crc, size_t length) const override;
0102   void Scramble(uint32_t* crc) const override;
0103   void Unscramble(uint32_t* crc) const override;
0104   void UnextendByZeroes(uint32_t* crc, size_t length) const override;
0105
0106   void InitTables() override;
0107
0108  private:
0109   // Common implementation guts for ExtendByZeroes and UnextendByZeroes().
0110   //
0111   // zeroes_table is a table as returned by FillZeroesTable(), containing
0112   // polynomials representing CRCs of strings-of-zeros of various lengths,
0113   // and which can be combined by polynomial multiplication.  poly_table is
0114   // a table of CRC byte extension values.  These tables are determined by
0115   // the generator polynomial.
0116   //
0117   // These will be set to reverse_zeroes_ and reverse_table0_ for Unextend, and
0118   // CRC32::zeroes_ and CRC32::table0_ for Extend.
0119   static void ExtendByZeroesImpl(uint32_t* crc, size_t length,
0120                                  const uint32_t zeroes_table[256],
0121                                  const uint32_t poly_table[256]);
0122
0123   uint32_t table0_[256];  // table of byte extensions
0124   uint32_t zeroes_[256];  // table of zero extensions
0125
0126   // table of 4-byte extensions shifted by 12 bytes of zeroes
0127   uint32_t table_[4][256];
0128
0129   // Reverse lookup tables, using the alternate polynomial used by
0130   // UnextendByZeroes().
0131   uint32_t reverse_table0_[256];  // table of reverse byte extensions
0132   uint32_t reverse_zeroes_[256];  // table of reverse zero extensions
0133
0134   CRC32(const CRC32&) = delete;
0135   CRC32& operator=(const CRC32&) = delete;
0136 };
0137
0138 // Helpers
0139
0140 // Return a bit mask containing len 1-bits.
0141 // Requires 0 < len <= sizeof(T)
0142 template <typename T>
0143 T MaskOfLength(int len) {
0144   // shift 2 by len-1 rather than 1 by len because shifts of wordsize
0145   // are undefined.
0146   return (T(2) << (len - 1)) - 1;
0147 }
0148
0149 // Rotate low-order "width" bits of "in" right by "r" bits,
0150 // setting other bits in word to arbitrary values.
0151 template <typename T>
0152 T RotateRight(T in, int width, int r) {
0153   return (in << (width - r)) | ((in >> r) & MaskOfLength<T>(width - r));
0154 }
0155
0156 // RoundUp<N>(p) returns the lowest address >= p aligned to an N-byte
0157 // boundary.  Requires that N is a power of 2.
0158 template <int alignment>
0159 const uint8_t* RoundUp(const uint8_t* p) {
0160   static_assert((alignment & (alignment - 1)) == 0, "alignment is not 2^n");
0161   constexpr uintptr_t mask = alignment - 1;
0162   const uintptr_t as_uintptr = reinterpret_cast<uintptr_t>(p);
0163   return reinterpret_cast<const uint8_t*>((as_uintptr + mask) & ~mask);
0164 }
0165
0166 // Return a newly created CRC32AcceleratedX86ARMCombined if we can use Intel's
0167 // or ARM's CRC acceleration for a given polynomial.  Return nullptr otherwise.
0168 CRCImpl* TryNewCRC32AcceleratedX86ARMCombined();
0169
0170 // Return all possible hardware accelerated implementations. For testing only.
0171 std::vector<std::unique_ptr<CRCImpl>> NewCRC32AcceleratedX86ARMCombinedAll();
0172
0173 }  // namespace crc_internal
0174 ABSL_NAMESPACE_END
0175 }  // namespace absl
0176
0177 #endif  // ABSL_CRC_INTERNAL_CRC_INTERNAL_H_