Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-15 09:01:39

0001 /**********************************************************************
0002   Copyright(c) 2011-2015 Intel Corporation All rights reserved.
0003 
0004   Redistribution and use in source and binary forms, with or without
0005   modification, are permitted provided that the following conditions
0006   are met:
0007     * Redistributions of source code must retain the above copyright
0008       notice, this list of conditions and the following disclaimer.
0009     * Redistributions in binary form must reproduce the above copyright
0010       notice, this list of conditions and the following disclaimer in
0011       the documentation and/or other materials provided with the
0012       distribution.
0013     * Neither the name of Intel Corporation nor the names of its
0014       contributors may be used to endorse or promote products derived
0015       from this software without specific prior written permission.
0016 
0017   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0018   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0019   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0020   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0021   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0022   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0023   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0024   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0025   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0026   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0027   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0028 **********************************************************************/
0029 
0030 #ifndef _ERASURE_CODE_H_
0031 #define _ERASURE_CODE_H_
0032 
0033 /**
0034  *  @file erasure_code.h
0035  *  @brief Interface to functions supporting erasure code encode and decode.
0036  *
0037  *  This file defines the interface to optimized functions used in erasure
0038  *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
0039  *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
0040  *  set of coefficients.  Values for the coefficients are determined by the type
0041  *  of erasure code.  Using a general dot product means that any sequence of
0042  *  coefficients may be used including erasure codes based on random
0043  *  coefficients.
0044  *  Multiple versions of dot product are supplied to calculate 1-6 output
0045  *  vectors in one pass.
0046  *  Base GF multiply and divide functions can be sped up by defining
0047  *  GF_LARGE_TABLES at the expense of memory size.
0048  *
0049  */
0050 
0051 #include "gf_vect_mul.h"
0052 
0053 #ifdef __cplusplus
0054 extern "C" {
0055 #endif
0056 
0057 /**
0058  * @brief Initialize tables for fast Erasure Code encode and decode.
0059  *
0060  * Generates the expanded tables needed for fast encode or decode for erasure
0061  * codes on blocks of data.  32bytes is generated for each input coefficient.
0062  *
0063  * @param k      The number of vector sources or rows in the generator matrix
0064  *               for coding.
0065  * @param rows   The number of output vectors to concurrently encode/decode.
0066  * @param a      Pointer to sets of arrays of input coefficients used to encode
0067  *               or decode data.
0068  * @param gftbls Pointer to start of space for concatenated output tables
0069  *               generated from input coefficients.  Must be of size 32*k*rows.
0070  * @returns none
0071  */
0072 
0073 void
0074 ec_init_tables(int k, int rows, unsigned char *a, unsigned char *gftbls);
0075 
0076 /**
0077  * @brief Initialize tables for fast Erasure Code encode and decode, runs baseline version.
0078  *
0079  * Baseline version of ec_encode_data() with same parameters.
0080  */
0081 
0082 void
0083 ec_init_tables_base(int k, int rows, unsigned char *a, unsigned char *gftbls);
0084 
0085 /**
0086  * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
0087  *
0088  * Given a list of source data blocks, generate one or multiple blocks of
0089  * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
0090  * suitable set of coefficients, this function will perform the fast generation
0091  * or decoding of Reed-Solomon type erasure codes.
0092  *
0093  * This function determines what instruction sets are enabled and
0094  * selects the appropriate version at runtime.
0095  *
0096  * @param len    Length of each block of data (vector) of source or dest data.
0097  * @param k      The number of vector sources or rows in the generator matrix
0098  *       for coding.
0099  * @param rows   The number of output vectors to concurrently encode/decode.
0100  * @param gftbls Pointer to array of input tables generated from coding
0101  *       coefficients in ec_init_tables(). Must be of size 32*k*rows
0102  * @param data   Array of pointers to source input buffers.
0103  * @param coding Array of pointers to coded output buffers.
0104  * @returns none
0105  */
0106 
0107 void
0108 ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
0109                unsigned char **coding);
0110 
0111 /**
0112  * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
0113  *
0114  * Baseline version of ec_encode_data() with same parameters.
0115  */
0116 void
0117 ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
0118                     unsigned char **dest);
0119 
0120 /**
0121  * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate
0122  * version.
0123  *
0124  * Given one source data block, update one or multiple blocks of encoded data as
0125  * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
0126  * coefficients, this function will perform the fast generation or decoding of
0127  * Reed-Solomon type erasure codes from one input source at a time.
0128  *
0129  * This function determines what instruction sets are enabled and selects the
0130  * appropriate version at runtime.
0131  *
0132  * @param len    Length of each block of data (vector) of source or dest data.
0133  * @param k      The number of vector sources or rows in the generator matrix
0134  *       for coding.
0135  * @param rows   The number of output vectors to concurrently encode/decode.
0136  * @param vec_i  The vector index corresponding to the single input source.
0137  * @param g_tbls Pointer to array of input tables generated from coding
0138  *       coefficients in ec_init_tables(). Must be of size 32*k*rows
0139  * @param data   Pointer to single input source used to update output parity.
0140  * @param coding Array of pointers to coded output buffers.
0141  * @returns none
0142  */
0143 void
0144 ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
0145                       unsigned char *data, unsigned char **coding);
0146 
0147 /**
0148  * @brief Generate update for encode or decode of erasure codes from single source.
0149  *
0150  * Baseline version of ec_encode_data_update().
0151  */
0152 
0153 void
0154 ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
0155                            unsigned char *data, unsigned char **dest);
0156 
0157 /**
0158  * @brief GF(2^8) vector dot product, runs baseline version.
0159  *
0160  * Does a GF(2^8) dot product across each byte of the input array and a constant
0161  * set of coefficients to produce each byte of the output. Can be used for
0162  * erasure coding encode and decode. Function requires pre-calculation of a
0163  * 32*vlen byte constant array based on the input coefficients.
0164  *
0165  * @param len    Length of each vector in bytes. Must be >= 16.
0166  * @param vlen   Number of vector sources.
0167  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
0168  *               on the array of input coefficients. Only elements 32*CONST*j + 1
0169  *               of this array are used, where j = (0, 1, 2...) and CONST is the
0170  *               number of elements in the array of input coefficients. The
0171  *               elements used correspond to the original input coefficients.
0172  * @param src    Array of pointers to source inputs.
0173  * @param dest   Pointer to destination data array.
0174  * @returns none
0175  */
0176 
0177 void
0178 gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0179                       unsigned char *dest);
0180 
0181 /**
0182  * @brief GF(2^8) vector dot product, runs appropriate version.
0183  *
0184  * Does a GF(2^8) dot product across each byte of the input array and a constant
0185  * set of coefficients to produce each byte of the output. Can be used for
0186  * erasure coding encode and decode. Function requires pre-calculation of a
0187  * 32*vlen byte constant array based on the input coefficients.
0188  *
0189  * This function determines what instruction sets are enabled and
0190  * selects the appropriate version at runtime.
0191  *
0192  * @param len    Length of each vector in bytes. Must be >= 32.
0193  * @param vlen   Number of vector sources.
0194  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
0195  *               on the array of input coefficients.
0196  * @param src    Array of pointers to source inputs.
0197  * @param dest   Pointer to destination data array.
0198  * @returns none
0199  */
0200 
0201 void
0202 gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0203                  unsigned char *dest);
0204 
0205 /**
0206  * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
0207  *
0208  * Does a GF(2^8) multiply across each byte of input source with expanded
0209  * constant and add to destination array. Can be used for erasure coding encode
0210  * and decode update when only one source is available at a time. Function
0211  * requires pre-calculation of a 32*vec byte constant array based on the input
0212  * coefficients.
0213  *
0214  * This function determines what instruction sets are enabled and selects the
0215  * appropriate version at runtime.
0216  *
0217  * @param len    Length of each vector in bytes. Must be >= 64.
0218  * @param vec    The number of vector sources or rows in the generator matrix
0219  *       for coding.
0220  * @param vec_i  The vector index corresponding to the single input source.
0221  * @param gftbls Pointer to array of input tables generated from coding
0222  *       coefficients in ec_init_tables(). Must be of size 32*vec.
0223  * @param src    Array of pointers to source inputs.
0224  * @param dest   Pointer to destination data array.
0225  * @returns none
0226  */
0227 
0228 void
0229 gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0230             unsigned char *dest);
0231 
0232 /**
0233  * @brief GF(2^8) vector multiply accumulate, baseline version.
0234  *
0235  * Baseline version of gf_vect_mad() with same parameters.
0236  */
0237 
0238 void
0239 gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
0240                  unsigned char *dest);
0241 
0242 // x86 only
0243 #if defined(__i386__) || defined(__x86_64__)
0244 
0245 /**
0246  * @brief Generate or decode erasure codes on blocks of data.
0247  *
0248  * Arch specific version of ec_encode_data() with same parameters.
0249  * @requires SSE4.1
0250  */
0251 void
0252 ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
0253                    unsigned char **coding);
0254 
0255 /**
0256  * @brief Generate or decode erasure codes on blocks of data.
0257  *
0258  * Arch specific version of ec_encode_data() with same parameters.
0259  * @requires AVX
0260  */
0261 void
0262 ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
0263                    unsigned char **coding);
0264 
0265 /**
0266  * @brief Generate or decode erasure codes on blocks of data.
0267  *
0268  * Arch specific version of ec_encode_data() with same parameters.
0269  * @requires AVX2
0270  */
0271 void
0272 ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
0273                     unsigned char **coding);
0274 
0275 /**
0276  * @brief Generate update for encode or decode of erasure codes from single source.
0277  *
0278  * Arch specific version of ec_encode_data_update() with same parameters.
0279  * @requires SSE4.1
0280  */
0281 
0282 void
0283 ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
0284                           unsigned char *data, unsigned char **coding);
0285 
0286 /**
0287  * @brief Generate update for encode or decode of erasure codes from single source.
0288  *
0289  * Arch specific version of ec_encode_data_update() with same parameters.
0290  * @requires AVX
0291  */
0292 
0293 void
0294 ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
0295                           unsigned char *data, unsigned char **coding);
0296 
0297 /**
0298  * @brief Generate update for encode or decode of erasure codes from single source.
0299  *
0300  * Arch specific version of ec_encode_data_update() with same parameters.
0301  * @requires AVX2
0302  */
0303 
0304 void
0305 ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
0306                            unsigned char *data, unsigned char **coding);
0307 
0308 /**
0309  * @brief GF(2^8) vector dot product.
0310  *
0311  * Does a GF(2^8) dot product across each byte of the input array and a constant
0312  * set of coefficients to produce each byte of the output. Can be used for
0313  * erasure coding encode and decode. Function requires pre-calculation of a
0314  * 32*vlen byte constant array based on the input coefficients.
0315  * @requires SSE4.1
0316  *
0317  * @param len    Length of each vector in bytes. Must be >= 16.
0318  * @param vlen   Number of vector sources.
0319  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
0320  *               on the array of input coefficients.
0321  * @param src    Array of pointers to source inputs.
0322  * @param dest   Pointer to destination data array.
0323  * @returns none
0324  */
0325 
0326 void
0327 gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0328                      unsigned char *dest);
0329 
0330 /**
0331  * @brief GF(2^8) vector dot product.
0332  *
0333  * Does a GF(2^8) dot product across each byte of the input array and a constant
0334  * set of coefficients to produce each byte of the output. Can be used for
0335  * erasure coding encode and decode. Function requires pre-calculation of a
0336  * 32*vlen byte constant array based on the input coefficients.
0337  * @requires AVX
0338  *
0339  * @param len    Length of each vector in bytes. Must be >= 16.
0340  * @param vlen   Number of vector sources.
0341  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
0342  *               on the array of input coefficients.
0343  * @param src    Array of pointers to source inputs.
0344  * @param dest   Pointer to destination data array.
0345  * @returns none
0346  */
0347 
0348 void
0349 gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0350                      unsigned char *dest);
0351 
0352 /**
0353  * @brief GF(2^8) vector dot product.
0354  *
0355  * Does a GF(2^8) dot product across each byte of the input array and a constant
0356  * set of coefficients to produce each byte of the output. Can be used for
0357  * erasure coding encode and decode. Function requires pre-calculation of a
0358  * 32*vlen byte constant array based on the input coefficients.
0359  * @requires AVX2
0360  *
0361  * @param len    Length of each vector in bytes. Must be >= 32.
0362  * @param vlen   Number of vector sources.
0363  * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
0364  *               on the array of input coefficients.
0365  * @param src    Array of pointers to source inputs.
0366  * @param dest   Pointer to destination data array.
0367  * @returns none
0368  */
0369 
0370 void
0371 gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0372                       unsigned char *dest);
0373 
0374 /**
0375  * @brief GF(2^8) vector dot product with two outputs.
0376  *
0377  * Vector dot product optimized to calculate two outputs at a time. Does two
0378  * GF(2^8) dot products across each byte of the input array and two constant
0379  * sets of coefficients to produce each byte of the outputs. Can be used for
0380  * erasure coding encode and decode. Function requires pre-calculation of a
0381  * 2*32*vlen byte constant array based on the two sets of input coefficients.
0382  * @requires SSE4.1
0383  *
0384  * @param len    Length of each vector in bytes. Must be >= 16.
0385  * @param vlen   Number of vector sources.
0386  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
0387  *               based on the array of input coefficients.
0388  * @param src    Array of pointers to source inputs.
0389  * @param dest   Array of pointers to destination data buffers.
0390  * @returns none
0391  */
0392 
0393 void
0394 gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0395                       unsigned char **dest);
0396 
0397 /**
0398  * @brief GF(2^8) vector dot product with two outputs.
0399  *
0400  * Vector dot product optimized to calculate two outputs at a time. Does two
0401  * GF(2^8) dot products across each byte of the input array and two constant
0402  * sets of coefficients to produce each byte of the outputs. Can be used for
0403  * erasure coding encode and decode. Function requires pre-calculation of a
0404  * 2*32*vlen byte constant array based on the two sets of input coefficients.
0405  * @requires AVX
0406  *
0407  * @param len    Length of each vector in bytes. Must be >= 16.
0408  * @param vlen   Number of vector sources.
0409  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
0410  *               based on the array of input coefficients.
0411  * @param src    Array of pointers to source inputs.
0412  * @param dest   Array of pointers to destination data buffers.
0413  * @returns none
0414  */
0415 
0416 void
0417 gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0418                       unsigned char **dest);
0419 
0420 /**
0421  * @brief GF(2^8) vector dot product with two outputs.
0422  *
0423  * Vector dot product optimized to calculate two outputs at a time. Does two
0424  * GF(2^8) dot products across each byte of the input array and two constant
0425  * sets of coefficients to produce each byte of the outputs. Can be used for
0426  * erasure coding encode and decode. Function requires pre-calculation of a
0427  * 2*32*vlen byte constant array based on the two sets of input coefficients.
0428  * @requires AVX2
0429  *
0430  * @param len    Length of each vector in bytes. Must be >= 32.
0431  * @param vlen   Number of vector sources.
0432  * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
0433  *               based on the array of input coefficients.
0434  * @param src    Array of pointers to source inputs.
0435  * @param dest   Array of pointers to destination data buffers.
0436  * @returns none
0437  */
0438 
0439 void
0440 gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0441                        unsigned char **dest);
0442 
0443 /**
0444  * @brief GF(2^8) vector dot product with three outputs.
0445  *
0446  * Vector dot product optimized to calculate three outputs at a time. Does three
0447  * GF(2^8) dot products across each byte of the input array and three constant
0448  * sets of coefficients to produce each byte of the outputs. Can be used for
0449  * erasure coding encode and decode. Function requires pre-calculation of a
0450  * 3*32*vlen byte constant array based on the three sets of input coefficients.
0451  * @requires SSE4.1
0452  *
0453  * @param len    Length of each vector in bytes. Must be >= 16.
0454  * @param vlen   Number of vector sources.
0455  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
0456  *               based on the array of input coefficients.
0457  * @param src    Array of pointers to source inputs.
0458  * @param dest   Array of pointers to destination data buffers.
0459  * @returns none
0460  */
0461 
0462 void
0463 gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0464                       unsigned char **dest);
0465 
0466 /**
0467  * @brief GF(2^8) vector dot product with three outputs.
0468  *
0469  * Vector dot product optimized to calculate three outputs at a time. Does three
0470  * GF(2^8) dot products across each byte of the input array and three constant
0471  * sets of coefficients to produce each byte of the outputs. Can be used for
0472  * erasure coding encode and decode. Function requires pre-calculation of a
0473  * 3*32*vlen byte constant array based on the three sets of input coefficients.
0474  * @requires AVX
0475  *
0476  * @param len    Length of each vector in bytes. Must be >= 16.
0477  * @param vlen   Number of vector sources.
0478  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
0479  *               based on the array of input coefficients.
0480  * @param src    Array of pointers to source inputs.
0481  * @param dest   Array of pointers to destination data buffers.
0482  * @returns none
0483  */
0484 
0485 void
0486 gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0487                       unsigned char **dest);
0488 
0489 /**
0490  * @brief GF(2^8) vector dot product with three outputs.
0491  *
0492  * Vector dot product optimized to calculate three outputs at a time. Does three
0493  * GF(2^8) dot products across each byte of the input array and three constant
0494  * sets of coefficients to produce each byte of the outputs. Can be used for
0495  * erasure coding encode and decode. Function requires pre-calculation of a
0496  * 3*32*vlen byte constant array based on the three sets of input coefficients.
0497  * @requires AVX2
0498  *
0499  * @param len    Length of each vector in bytes. Must be >= 32.
0500  * @param vlen   Number of vector sources.
0501  * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
0502  *               based on the array of input coefficients.
0503  * @param src    Array of pointers to source inputs.
0504  * @param dest   Array of pointers to destination data buffers.
0505  * @returns none
0506  */
0507 
0508 void
0509 gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0510                        unsigned char **dest);
0511 
0512 /**
0513  * @brief GF(2^8) vector dot product with four outputs.
0514  *
0515  * Vector dot product optimized to calculate four outputs at a time. Does four
0516  * GF(2^8) dot products across each byte of the input array and four constant
0517  * sets of coefficients to produce each byte of the outputs. Can be used for
0518  * erasure coding encode and decode. Function requires pre-calculation of a
0519  * 4*32*vlen byte constant array based on the four sets of input coefficients.
0520  * @requires SSE4.1
0521  *
0522  * @param len    Length of each vector in bytes. Must be >= 16.
0523  * @param vlen   Number of vector sources.
0524  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
0525  *               based on the array of input coefficients.
0526  * @param src    Array of pointers to source inputs.
0527  * @param dest   Array of pointers to destination data buffers.
0528  * @returns none
0529  */
0530 
0531 void
0532 gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0533                       unsigned char **dest);
0534 
0535 /**
0536  * @brief GF(2^8) vector dot product with four outputs.
0537  *
0538  * Vector dot product optimized to calculate four outputs at a time. Does four
0539  * GF(2^8) dot products across each byte of the input array and four constant
0540  * sets of coefficients to produce each byte of the outputs. Can be used for
0541  * erasure coding encode and decode. Function requires pre-calculation of a
0542  * 4*32*vlen byte constant array based on the four sets of input coefficients.
0543  * @requires AVX
0544  *
0545  * @param len    Length of each vector in bytes. Must be >= 16.
0546  * @param vlen   Number of vector sources.
0547  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
0548  *               based on the array of input coefficients.
0549  * @param src    Array of pointers to source inputs.
0550  * @param dest   Array of pointers to destination data buffers.
0551  * @returns none
0552  */
0553 
0554 void
0555 gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0556                       unsigned char **dest);
0557 
0558 /**
0559  * @brief GF(2^8) vector dot product with four outputs.
0560  *
0561  * Vector dot product optimized to calculate four outputs at a time. Does four
0562  * GF(2^8) dot products across each byte of the input array and four constant
0563  * sets of coefficients to produce each byte of the outputs. Can be used for
0564  * erasure coding encode and decode. Function requires pre-calculation of a
0565  * 4*32*vlen byte constant array based on the four sets of input coefficients.
0566  * @requires AVX2
0567  *
0568  * @param len    Length of each vector in bytes. Must be >= 32.
0569  * @param vlen   Number of vector sources.
0570  * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
0571  *               based on the array of input coefficients.
0572  * @param src    Array of pointers to source inputs.
0573  * @param dest   Array of pointers to destination data buffers.
0574  * @returns none
0575  */
0576 
0577 void
0578 gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0579                        unsigned char **dest);
0580 
0581 /**
0582  * @brief GF(2^8) vector dot product with five outputs.
0583  *
0584  * Vector dot product optimized to calculate five outputs at a time. Does five
0585  * GF(2^8) dot products across each byte of the input array and five constant
0586  * sets of coefficients to produce each byte of the outputs. Can be used for
0587  * erasure coding encode and decode. Function requires pre-calculation of a
0588  * 5*32*vlen byte constant array based on the five sets of input coefficients.
0589  * @requires SSE4.1
0590  *
0591  * @param len    Length of each vector in bytes. Must >= 16.
0592  * @param vlen   Number of vector sources.
0593  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
0594  *               based on the array of input coefficients.
0595  * @param src    Array of pointers to source inputs.
0596  * @param dest   Array of pointers to destination data buffers.
0597  * @returns none
0598  */
0599 
0600 void
0601 gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0602                       unsigned char **dest);
0603 
0604 /**
0605  * @brief GF(2^8) vector dot product with five outputs.
0606  *
0607  * Vector dot product optimized to calculate five outputs at a time. Does five
0608  * GF(2^8) dot products across each byte of the input array and five constant
0609  * sets of coefficients to produce each byte of the outputs. Can be used for
0610  * erasure coding encode and decode. Function requires pre-calculation of a
0611  * 5*32*vlen byte constant array based on the five sets of input coefficients.
0612  * @requires AVX
0613  *
0614  * @param len    Length of each vector in bytes. Must >= 16.
0615  * @param vlen   Number of vector sources.
0616  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
0617  *               based on the array of input coefficients.
0618  * @param src    Array of pointers to source inputs.
0619  * @param dest   Array of pointers to destination data buffers.
0620  * @returns none
0621  */
0622 
0623 void
0624 gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0625                       unsigned char **dest);
0626 
0627 /**
0628  * @brief GF(2^8) vector dot product with five outputs.
0629  *
0630  * Vector dot product optimized to calculate five outputs at a time. Does five
0631  * GF(2^8) dot products across each byte of the input array and five constant
0632  * sets of coefficients to produce each byte of the outputs. Can be used for
0633  * erasure coding encode and decode. Function requires pre-calculation of a
0634  * 5*32*vlen byte constant array based on the five sets of input coefficients.
0635  * @requires AVX2
0636  *
0637  * @param len    Length of each vector in bytes. Must >= 32.
0638  * @param vlen   Number of vector sources.
0639  * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
0640  *               based on the array of input coefficients.
0641  * @param src    Array of pointers to source inputs.
0642  * @param dest   Array of pointers to destination data buffers.
0643  * @returns none
0644  */
0645 
0646 void
0647 gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0648                        unsigned char **dest);
0649 
0650 /**
0651  * @brief GF(2^8) vector dot product with six outputs.
0652  *
0653  * Vector dot product optimized to calculate six outputs at a time. Does six
0654  * GF(2^8) dot products across each byte of the input array and six constant
0655  * sets of coefficients to produce each byte of the outputs. Can be used for
0656  * erasure coding encode and decode. Function requires pre-calculation of a
0657  * 6*32*vlen byte constant array based on the six sets of input coefficients.
0658  * @requires SSE4.1
0659  *
0660  * @param len    Length of each vector in bytes. Must be >= 16.
0661  * @param vlen   Number of vector sources.
0662  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
0663  *               based on the array of input coefficients.
0664  * @param src    Array of pointers to source inputs.
0665  * @param dest   Array of pointers to destination data buffers.
0666  * @returns none
0667  */
0668 
0669 void
0670 gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0671                       unsigned char **dest);
0672 
0673 /**
0674  * @brief GF(2^8) vector dot product with six outputs.
0675  *
0676  * Vector dot product optimized to calculate six outputs at a time. Does six
0677  * GF(2^8) dot products across each byte of the input array and six constant
0678  * sets of coefficients to produce each byte of the outputs. Can be used for
0679  * erasure coding encode and decode. Function requires pre-calculation of a
0680  * 6*32*vlen byte constant array based on the six sets of input coefficients.
0681  * @requires AVX
0682  *
0683  * @param len    Length of each vector in bytes. Must be >= 16.
0684  * @param vlen   Number of vector sources.
0685  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
0686  *               based on the array of input coefficients.
0687  * @param src    Array of pointers to source inputs.
0688  * @param dest   Array of pointers to destination data buffers.
0689  * @returns none
0690  */
0691 
0692 void
0693 gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0694                       unsigned char **dest);
0695 
0696 /**
0697  * @brief GF(2^8) vector dot product with six outputs.
0698  *
0699  * Vector dot product optimized to calculate six outputs at a time. Does six
0700  * GF(2^8) dot products across each byte of the input array and six constant
0701  * sets of coefficients to produce each byte of the outputs. Can be used for
0702  * erasure coding encode and decode. Function requires pre-calculation of a
0703  * 6*32*vlen byte constant array based on the six sets of input coefficients.
0704  * @requires AVX2
0705  *
0706  * @param len    Length of each vector in bytes. Must be >= 32.
0707  * @param vlen   Number of vector sources.
0708  * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
0709  *               based on the array of input coefficients.
0710  * @param src    Array of pointers to source inputs.
0711  * @param dest   Array of pointers to destination data buffers.
0712  * @returns none
0713  */
0714 
0715 void
0716 gf_6vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, unsigned char **src,
0717                        unsigned char **dest);
0718 
0719 /**
0720  * @brief GF(2^8) vector multiply accumulate, arch specific version.
0721  *
0722  * Arch specific version of gf_vect_mad() with same parameters.
0723  * @requires SSE4.1
0724  */
0725 
0726 void
0727 gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0728                 unsigned char *dest);
0729 /**
0730  * @brief GF(2^8) vector multiply accumulate, arch specific version.
0731  *
0732  * Arch specific version of gf_vect_mad() with same parameters.
0733  * @requires AVX
0734  */
0735 
0736 void
0737 gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0738                 unsigned char *dest);
0739 
0740 /**
0741  * @brief GF(2^8) vector multiply accumulate, arch specific version.
0742  *
0743  * Arch specific version of gf_vect_mad() with same parameters.
0744  * @requires AVX2
0745  */
0746 
0747 void
0748 gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0749                  unsigned char *dest);
0750 
0751 /**
0752  * @brief GF(2^8) vector multiply with 2 accumulate.  SSE version.
0753  *
0754  * Does a GF(2^8) multiply across each byte of input source with expanded
0755  * constants and add to destination arrays. Can be used for erasure coding
0756  * encode and decode update when only one source is available at a
0757  * time. Function requires pre-calculation of a 32*vec byte constant array based
0758  * on the input coefficients.
0759  * @requires SSE4.1
0760  *
0761  * @param len    Length of each vector in bytes. Must be >= 32.
0762  * @param vec    The number of vector sources or rows in the generator matrix
0763  *       for coding.
0764  * @param vec_i  The vector index corresponding to the single input source.
0765  * @param gftbls Pointer to array of input tables generated from coding
0766  *       coefficients in ec_init_tables(). Must be of size 32*vec.
0767  * @param src    Pointer to source input array.
0768  * @param dest   Array of pointers to destination input/outputs.
0769  * @returns none
0770  */
0771 
0772 void
0773 gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0774                  unsigned char **dest);
0775 
0776 /**
0777  * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
0778  * @requires AVX
0779  */
0780 void
0781 gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0782                  unsigned char **dest);
0783 /**
0784  * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
0785  * @requires AVX2
0786  */
0787 void
0788 gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0789                   unsigned char **dest);
0790 
0791 /**
0792  * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
0793  *
0794  * Does a GF(2^8) multiply across each byte of input source with expanded
0795  * constants and add to destination arrays. Can be used for erasure coding
0796  * encode and decode update when only one source is available at a
0797  * time. Function requires pre-calculation of a 32*vec byte constant array based
0798  * on the input coefficients.
0799  * @requires SSE4.1
0800  *
0801  * @param len    Length of each vector in bytes. Must be >= 32.
0802  * @param vec    The number of vector sources or rows in the generator matrix
0803  *       for coding.
0804  * @param vec_i  The vector index corresponding to the single input source.
0805  * @param gftbls Pointer to array of input tables generated from coding
0806  *       coefficients in ec_init_tables(). Must be of size 32*vec.
0807  * @param src    Pointer to source input array.
0808  * @param dest   Array of pointers to destination input/outputs.
0809  * @returns none
0810  */
0811 
0812 void
0813 gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0814                  unsigned char **dest);
0815 
0816 /**
0817  * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
0818  * @requires AVX
0819  */
0820 void
0821 gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0822                  unsigned char **dest);
0823 
0824 /**
0825  * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
0826  * @requires AVX2
0827  */
0828 void
0829 gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0830                   unsigned char **dest);
0831 
0832 /**
0833  * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
0834  *
0835  * Does a GF(2^8) multiply across each byte of input source with expanded
0836  * constants and add to destination arrays. Can be used for erasure coding
0837  * encode and decode update when only one source is available at a
0838  * time. Function requires pre-calculation of a 32*vec byte constant array based
0839  * on the input coefficients.
0840  * @requires SSE4.1
0841  *
0842  * @param len    Length of each vector in bytes. Must be >= 32.
0843  * @param vec    The number of vector sources or rows in the generator matrix
0844  *       for coding.
0845  * @param vec_i  The vector index corresponding to the single input source.
0846  * @param gftbls Pointer to array of input tables generated from coding
0847  *       coefficients in ec_init_tables(). Must be of size 32*vec.
0848  * @param src    Pointer to source input array.
0849  * @param dest   Array of pointers to destination input/outputs.
0850  * @returns none
0851  */
0852 
0853 void
0854 gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0855                  unsigned char **dest);
0856 
0857 /**
0858  * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
0859  * @requires AVX
0860  */
0861 void
0862 gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0863                  unsigned char **dest);
0864 /**
0865  * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
0866  * @requires AVX2
0867  */
0868 void
0869 gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0870                   unsigned char **dest);
0871 
0872 /**
0873  * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
0874  * @requires SSE4.1
0875  */
0876 void
0877 gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0878                  unsigned char **dest);
0879 
0880 /**
0881  * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
0882  * @requires AVX
0883  */
0884 void
0885 gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0886                  unsigned char **dest);
0887 /**
0888  * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
0889  * @requires AVX2
0890  */
0891 void
0892 gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0893                   unsigned char **dest);
0894 
0895 /**
0896  * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
0897  * @requires SSE4.1
0898  */
0899 void
0900 gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0901                  unsigned char **dest);
0902 /**
0903  * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
0904  * @requires AVX
0905  */
0906 void
0907 gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0908                  unsigned char **dest);
0909 
0910 /**
0911  * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
0912  * @requires AVX2
0913  */
0914 void
0915 gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
0916                   unsigned char **dest);
0917 
0918 #endif
0919 
0920 /**********************************************************************
0921  * The remaining are lib support functions used in GF(2^8) operations.
0922  */
0923 
0924 /**
0925  * @brief Single element GF(2^8) multiply.
0926  *
0927  * @param a  Multiplicand a
0928  * @param b  Multiplicand b
0929  * @returns  Product of a and b in GF(2^8)
0930  */
0931 
0932 unsigned char
0933 gf_mul(unsigned char a, unsigned char b);
0934 
0935 /**
0936  * @brief Single element GF(2^8) inverse.
0937  *
0938  * @param a  Input element
0939  * @returns  Field element b such that a x b = {1}
0940  */
0941 
0942 unsigned char
0943 gf_inv(unsigned char a);
0944 
0945 /**
0946  * @brief Generate a matrix of coefficients to be used for encoding.
0947  *
0948  * Vandermonde matrix example of encoding coefficients where high portion of
0949  * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
0950  * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
0951  * erasure encoding but does not guarantee invertable for every sub matrix. For
0952  * large pairs of m and k it is possible to find cases where the decode matrix
0953  * chosen from sources and parity is not invertable. Users may want to adjust
0954  * for certain pairs m and k. If m and k satisfy one of the following
0955  * inequalities, no adjustment is required:
0956  *
0957  * - k <= 3
0958  * - k = 4, m <= 25
0959  * - k = 5, m <= 10
0960  * - k <= 21, m-k = 4
0961  * - m - k <= 3.
0962  *
0963  * @param a  [m x k] array to hold coefficients
0964  * @param m  number of rows in matrix corresponding to srcs + parity.
0965  * @param k  number of columns in matrix corresponding to srcs.
0966  * @returns  none
0967  */
0968 
0969 void
0970 gf_gen_rs_matrix(unsigned char *a, int m, int k);
0971 
0972 /**
0973  * @brief Generate a Cauchy matrix of coefficients to be used for encoding.
0974  *
0975  * Cauchy matrix example of encoding coefficients where high portion of matrix
0976  * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
0977  * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
0978  *
0979  * @param a  [m x k] array to hold coefficients
0980  * @param m  number of rows in matrix corresponding to srcs + parity.
0981  * @param k  number of columns in matrix corresponding to srcs.
0982  * @returns  none
0983  */
0984 
0985 void
0986 gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
0987 
0988 /**
0989  * @brief Invert a matrix in GF(2^8)
0990  *
0991  * Attempts to construct an n x n inverse of the input matrix. Returns non-zero
0992  * if singular. Will always destroy input matrix in process.
0993  *
0994  * @param in  input matrix, destroyed by invert process
0995  * @param out output matrix such that [in] x [out] = [I] - identity matrix
0996  * @param n   size of matrix [nxn]
0997  * @returns 0 successful, other fail on singular input matrix
0998  */
0999 
1000 int
1001 gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
1002 
1003 /*************************************************************/
1004 
1005 #ifdef __cplusplus
1006 }
1007 #endif
1008 
1009 #endif //_ERASURE_CODE_H_