root/Math/CholeskyDecomp.h

0001 // @(#)root/smatrix:$Id$
0002 // Author: M. Schiller    2009
0003
0004 #ifndef ROOT_Math_CholeskyDecomp
0005 #define ROOT_Math_CholeskyDecomp
0006
0007 /** @file
0008  * header file containing the templated implementation of matrix inversion
0009  * routines for use with ROOT's SMatrix classes (symmetric positive
0010  * definite case)
0011  *
0012  * @author Manuel Schiller
0013  * @date Aug 29 2008
0014  *    initial release inside LHCb
0015  * @date May 7 2009
0016  * factored code to provide a nice Cholesky decomposition class, along
0017  * with separate methods for solving a single linear system and to
0018  * obtain the inverse matrix from the decomposition
0019  * @date July 15th 2013
0020  *    provide a version of that class which works if the dimension of the
0021  *    problem is only known at run time
0022  * @date September 30th 2013
0023  *    provide routines to access the result of the decomposition L and its
0024  *    inverse
0025  * @date January 20th 2024
0026  *    CholeskyDecompGenDim allowed copying and moving, but did not provide copy
0027  *    and move constructors or assignment operators, resulting in incorrect
0028  *    code and potential memory corruption, if somebody tried to copy/move a
0029  *    CholeskyDecompGenDim instance. Since we're in a C++17 world now, use the
0030  *    excellent movable std::vector<F> to get rid of explicit memory management
0031  *    in CholeskyDecompGenDim, and thus provide correct code for copy/move
0032  *    construction and assignment. Moreover, the provided releaseWorkspace
0033  *    method allows reuse of the allocated vector inside a CholeskyDecompGenDim
0034  *    to reduce the cost of repeated matrix decomposition.
0035  */
0036
0037 #include <cmath>
0038 #include <vector>
0039 #include <algorithm>
0040
0041 namespace ROOT {
0042
0043 namespace Math {
0044
0045 /// helpers for CholeskyDecomp
0046 namespace CholeskyDecompHelpers {
0047 // forward decls
0048 template <class F, class M>
0049 struct _decomposerGenDim;
0050 template <class F, unsigned N, class M>
0051 struct _decomposer;
0052 template <class F, class M>
0053 struct _inverterGenDim;
0054 template <class F, unsigned N, class M>
0055 struct _inverter;
0056 template <class F, class V>
0057 struct _solverGenDim;
0058 template <class F, unsigned N, class V>
0059 struct _solver;
0060 template <typename G>
0061 class PackedArrayAdapter;
0062 } // namespace CholeskyDecompHelpers
0063
0064 /// class to compute the Cholesky decomposition of a matrix
0065 /** class to compute the Cholesky decomposition of a symmetric
0066  * positive definite matrix
0067  *
0068  * provides routines to check if the decomposition succeeded (i.e. if
0069  * matrix is positive definite and non-singular), to solve a linear
0070  * system for the given matrix and to obtain its inverse
0071  *
0072  * the actual functionality is implemented in templated helper
0073  * classes which have specializations for dimensions N = 1 to 6
0074  * to achieve a gain in speed for common matrix sizes
0075  *
0076  * usage example:
0077  * @code
0078  * // let m be a symmetric positive definite SMatrix (use type float
0079  * // for internal computations, matrix size is 4x4)
0080  * CholeskyDecomp<float, 4> decomp(m);
0081  * // check if the decomposition succeeded
0082  * if (!decomp) {
0083  *   std::cerr << "decomposition failed!" << std::endl;
0084  * } else {
0085  *   // let rhs be a vector; we seek a vector x such that m * x = rhs
0086  *   decomp.Solve(rhs);
0087  *   // rhs now contains the solution we are looking for
0088  *
0089  *   // obtain the inverse of m, put it into m itself
0090  *   decomp.Invert(m);
0091  * }
0092  * @endcode
0093  */
0094 template <class F, unsigned N>
0095 class CholeskyDecomp {
0096 private:
0097    /// lower triangular matrix L
0098    /** lower triangular matrix L, packed storage, with diagonal
0099     * elements pre-inverted */
0100    F fL[N * (N + 1) / 2];
0101    /// flag indicating a successful decomposition
0102    bool fOk;
0103
0104 public:
0105    /// perform a Cholesky decomposition
0106    /** perform a Cholesky decomposition of a symmetric positive
0107     * definite matrix m
0108     *
0109     * this is the constructor to uses with an SMatrix (and objects
0110     * that behave like an SMatrix in terms of using
0111     * operator()(int i, int j) for access to elements)
0112     */
0113    template <class M>
0114    CholeskyDecomp(const M &m) : fOk(false)
0115    {
0116       using CholeskyDecompHelpers::_decomposer;
0117       fOk = _decomposer<F, N, M>()(fL, m);
0118    }
0119
0120    /// perform a Cholesky decomposition
0121    /** perform a Cholesky decomposition of a symmetric positive
0122     * definite matrix m
0123     *
0124     * this is the constructor to use in special applications where
0125     * plain arrays are used
0126     *
0127     * NOTE: the matrix is given in packed representation, matrix
0128     * element m(i,j) (j <= i) is supposed to be in array element
0129     * (i * (i + 1)) / 2 + j
0130     */
0131    template <typename G>
0132    CholeskyDecomp(G *m) : fOk(false)
0133    {
0134       using CholeskyDecompHelpers::_decomposer;
0135       using CholeskyDecompHelpers::PackedArrayAdapter;
0136       fOk = _decomposer<F, N, PackedArrayAdapter<G>>()(fL, PackedArrayAdapter<G>(m));
0137    }
0138
0139    /// returns true if decomposition was successful
0140    /** @returns true if decomposition was successful */
0141    bool ok() const { return fOk; }
0142    /// returns true if decomposition was successful
0143    /** @returns true if decomposition was successful */
0144    operator bool() const { return fOk; }
0145
0146    /** @brief solves a linear system for the given right hand side
0147     *
0148     * Note that you can use both SVector classes and plain arrays for
0149     * rhs. (Make sure that the sizes match!). It will work with any vector
0150     * implementing the operator [i]
0151     *
0152     * @returns if the decomposition was successful
0153     */
0154    template <class V>
0155    bool Solve(V &rhs) const
0156    {
0157       using CholeskyDecompHelpers::_solver;
0158       if (fOk)
0159          _solver<F, N, V>()(rhs, fL);
0160       return fOk;
0161    }
0162
0163    /** @brief place the inverse into m
0164     *
0165     * This is the method to use with an SMatrix.
0166     *
0167     * @returns if the decomposition was successful
0168     */
0169    template <class M>
0170    bool Invert(M &m) const
0171    {
0172       using CholeskyDecompHelpers::_inverter;
0173       if (fOk)
0174          _inverter<F, N, M>()(m, fL);
0175       return fOk;
0176    }
0177
0178    /** @brief place the inverse into m
0179     *
0180     * This is the method to use with a plain array.
0181     *
0182     * @returns if the decomposition was successful
0183     *
0184     * NOTE: the matrix is given in packed representation, matrix
0185     * element m(i,j) (j <= i) is supposed to be in array element
0186     * (i * (i + 1)) / 2 + j
0187     */
0188    template <typename G>
0189    bool Invert(G *m) const
0190    {
0191       using CholeskyDecompHelpers::_inverter;
0192       using CholeskyDecompHelpers::PackedArrayAdapter;
0193       if (fOk) {
0194          PackedArrayAdapter<G> adapted(m);
0195          _inverter<F, N, PackedArrayAdapter<G>>()(adapted, fL);
0196       }
0197       return fOk;
0198    }
0199
0200    /** @brief obtain the decomposed matrix L
0201     *
0202     * This is the method to use with a plain array.
0203     *
0204     * @returns if the decomposition was successful
0205     */
0206    template <class M>
0207    bool getL(M &m) const
0208    {
0209       if (!fOk)
0210          return false;
0211       for (unsigned i = 0; i < N; ++i) {
0212          // zero upper half of matrix
0213          for (unsigned j = i + 1; j < N; ++j)
0214             m(i, j) = F(0);
0215          // copy the rest
0216          for (unsigned j = 0; j <= i; ++j)
0217             m(i, j) = fL[i * (i + 1) / 2 + j];
0218          // adjust the diagonal - we save 1/L(i, i) in that position, so
0219          // convert to what caller expects
0220          m(i, i) = F(1) / m(i, i);
0221       }
0222       return true;
0223    }
0224
0225    /** @brief obtain the decomposed matrix L
0226     *
0227     * @returns if the decomposition was successful
0228     *
0229     * NOTE: the matrix is given in packed representation, matrix
0230     * element m(i,j) (j <= i) is supposed to be in array element
0231     * (i * (i + 1)) / 2 + j
0232     */
0233    template <typename G>
0234    bool getL(G *m) const
0235    {
0236       if (!fOk)
0237          return false;
0238       // copy L
0239       for (unsigned i = 0; i < (N * (N + 1)) / 2; ++i)
0240          m[i] = fL[i];
0241       // adjust diagonal - we save 1/L(i, i) in that position, so convert to
0242       // what caller expects
0243       for (unsigned i = 0; i < N; ++i)
0244          m[(i * (i + 1)) / 2 + i] = F(1) / fL[(i * (i + 1)) / 2 + i];
0245       return true;
0246    }
0247
0248    /** @brief obtain the inverse of the decomposed matrix L
0249     *
0250     * This is the method to use with a plain array.
0251     *
0252     * @returns if the decomposition was successful
0253     */
0254    template <class M>
0255    bool getLi(M &m) const
0256    {
0257       if (!fOk)
0258          return false;
0259       for (unsigned i = 0; i < N; ++i) {
0260          // zero lower half of matrix
0261          for (unsigned j = i + 1; j < N; ++j)
0262             m(j, i) = F(0);
0263          // copy the rest
0264          for (unsigned j = 0; j <= i; ++j)
0265             m(j, i) = fL[i * (i + 1) / 2 + j];
0266       }
0267       // invert the off-diagonal part of what we just copied
0268       for (unsigned i = 1; i < N; ++i) {
0269          for (unsigned j = 0; j < i; ++j) {
0270             typename M::value_type tmp = F(0);
0271             for (unsigned k = i; k-- > j;)
0272                tmp -= m(k, i) * m(j, k);
0273             m(j, i) = tmp * m(i, i);
0274          }
0275       }
0276       return true;
0277    }
0278
0279    /** @brief obtain the inverse of the decomposed matrix L
0280     *
0281     * @returns if the decomposition was successful
0282     *
0283     * NOTE: the matrix is given in packed representation, matrix
0284     * element m(j,i) (j <= i) is supposed to be in array element
0285     * (i * (i + 1)) / 2 + j
0286     */
0287    template <typename G>
0288    bool getLi(G *m) const
0289    {
0290       if (!fOk)
0291          return false;
0292       // copy L
0293       for (unsigned i = 0; i < (N * (N + 1)) / 2; ++i)
0294          m[i] = fL[i];
0295       // invert the off-diagonal part of what we just copied
0296       G *base1 = &m[1];
0297       for (unsigned i = 1; i < N; base1 += ++i) {
0298          for (unsigned j = 0; j < i; ++j) {
0299             G tmp = F(0);
0300             const G *base2 = &m[(i * (i - 1)) / 2];
0301             for (unsigned k = i; k-- > j; base2 -= k)
0302                tmp -= base1[k] * base2[j];
0303             base1[j] = tmp * base1[i];
0304          }
0305       }
0306       return true;
0307    }
0308 };
0309
0310 /// class to compute the Cholesky decomposition of a matrix
0311 /** class to compute the Cholesky decomposition of a symmetric
0312  * positive definite matrix when the dimensionality of the problem is not known
0313  * at compile time
0314  *
0315  * provides routines to check if the decomposition succeeded (i.e. if
0316  * matrix is positive definite and non-singular), to solve a linear
0317  * system for the given matrix and to obtain its inverse
0318  *
0319  * the actual functionality is implemented in templated helper
0320  * classes which have specializations for dimensions N = 1 to 6
0321  * to achieve a gain in speed for common matrix sizes
0322  *
0323  * usage example:
0324  * @code
0325  * // let m be a symmetric positive definite SMatrix (use type float
0326  * // for internal computations, matrix size is 4x4)
0327  * CholeskyDecompGenDim<float> decomp(4, m);
0328  * // check if the decomposition succeeded
0329  * if (!decomp) {
0330  *   std::cerr << "decomposition failed!" << std::endl;
0331  * } else {
0332  *   // let rhs be a vector; we seek a vector x such that m * x = rhs
0333  *   decomp.Solve(rhs);
0334  *   // rhs now contains the solution we are looking for
0335  *
0336  *   // obtain the inverse of m, put it into m itself
0337  *   decomp.Invert(m);
0338  * }
0339  * @endcode
0340  */
0341 template <class F>
0342 class CholeskyDecompGenDim {
0343 private:
0344    /** @brief dimensionality
0345     * dimensionality of the problem */
0346    unsigned fN = 0;
0347    /// lower triangular matrix L
0348    /** lower triangular matrix L, packed storage, with diagonal
0349     * elements pre-inverted */
0350    std::vector<F> fL;
0351    /// flag indicating a successful decomposition
0352    bool fOk = false;
0353
0354 public:
0355    /// perform a Cholesky decomposition
0356    /** perform a Cholesky decomposition of a symmetric positive
0357     * definite matrix m
0358     *
0359     * this is the constructor to uses with an SMatrix (and objects
0360     * that behave like an SMatrix in terms of using
0361     * operator()(int i, int j) for access to elements)
0362     *
0363     * The optional wksp parameter that can be used to avoid (re-)allocations
0364     * on repeated decompositions of the same size (by passing a workspace of
0365     * size N * (N + 1), or reusing one returned from releaseWorkspace by a
0366     * previous decomposition).
0367     */
0368    template <class M>
0369    CholeskyDecompGenDim(unsigned N, const M &m, std::vector<F> wksp = {}) : fN(N), fL(std::move(wksp))
0370    {
0371       using CholeskyDecompHelpers::_decomposerGenDim;
0372       if (fL.size() < fN * (fN + 1) / 2)
0373          fL.resize(fN * (fN + 1) / 2);
0374       fOk = _decomposerGenDim<F, M>()(fL.data(), m, fN);
0375    }
0376
0377    /// perform a Cholesky decomposition
0378    /** perform a Cholesky decomposition of a symmetric positive
0379     * definite matrix m
0380     *
0381     * this is the constructor to use in special applications where
0382     * plain arrays are used
0383     *
0384     * NOTE: the matrix is given in packed representation, matrix
0385     * element m(i,j) (j <= i) is supposed to be in array element
0386     * (i * (i + 1)) / 2 + j
0387     *
0388     * The optional wksp parameter that can be used to avoid (re-)allocations
0389     * on repeated decompositions of the same size (by passing a workspace of
0390     * size N * (N + 1), or reusing one returned from releaseWorkspace by a
0391     * previous decomposition).
0392     */
0393    template <typename G>
0394    CholeskyDecompGenDim(unsigned N, G *m, std::vector<F> wksp = {}) : fN(N), fL(std::move(wksp))
0395    {
0396       using CholeskyDecompHelpers::_decomposerGenDim;
0397       using CholeskyDecompHelpers::PackedArrayAdapter;
0398       if (fL.size() < fN * (fN + 1) / 2)
0399          fL.resize(fN * (fN + 1) / 2);
0400       fOk = _decomposerGenDim<F, PackedArrayAdapter<G>>()(fL.data(), PackedArrayAdapter<G>(m), fN);
0401    }
0402
0403    /// release the workspace of the decomposition for reuse
0404    /** release the workspace of the decomposition for reuse
0405     *
0406     * If you use CholeskyDecompGenDim repeatedly with the same size N, it is
0407     * possible to avoid repeatedly allocating the internal workspace. The
0408     * constructors take an optional wksp argument, and this routine can be
0409     * called to get the workspace out of a decomposition when you are done
0410     * with it.
0411     *
0412     * Please note that once you call releaseWorkspace, you cannot do further
0413     * operations on the decomposition object (and this is indicated by having
0414     * it return false when you call ok()).
0415     *
0416     * Here is an example snippet of (pseudo-)code which illustrates the use of
0417     * releaseWorkspace for inverting the covariance matrices of a number of
0418     * items (which must all be of the same size):
0419     *
0420     * @code
0421     * std::vector<float> wksp;
0422     * for (const auto& item: items) {
0423     *     CholeskyDecompGenDim decomp(item.cov().size(), item.cov(),
0424     *                                 std::move(wksp));
0425     *     if (decomp.ok()) {
0426     *         decomp.Invert(item.invcov());
0427     *     }
0428     *     wksp = std::move(decomp.releaseWorkspace());
0429     * }
0430     * @endcode
0431     *
0432     * In the above code snippet, only the first CholeskyDecompGenDim
0433     * constructor call will allocate memory. Subsequent calls in the loop will
0434     * reuse the buffer from the first iteration.
0435     */
0436    std::vector<F> releaseWorkspace()
0437    {
0438       fN = 0;
0439       fOk = false;
0440       return std::move(fL);
0441    }
0442
0443    /// returns true if decomposition was successful
0444    /** @returns true if decomposition was successful */
0445    bool ok() const { return fOk; }
0446    /// returns true if decomposition was successful
0447    /** @returns true if decomposition was successful */
0448    operator bool() const { return fOk; }
0449
0450    /** @brief solves a linear system for the given right hand side
0451     *
0452     * Note that you can use both SVector classes and plain arrays for
0453     * rhs. (Make sure that the sizes match!). It will work with any vector
0454     * implementing the operator [i]
0455     *
0456     * @returns if the decomposition was successful
0457     */
0458    template <class V>
0459    bool Solve(V &rhs) const
0460    {
0461       using CholeskyDecompHelpers::_solverGenDim;
0462       if (fOk)
0463          _solverGenDim<F, V>()(rhs, fL.data(), fN);
0464       return fOk;
0465    }
0466
0467    /** @brief place the inverse into m
0468     *
0469     * This is the method to use with an SMatrix.
0470     *
0471     * @returns if the decomposition was successful
0472     */
0473    template <class M>
0474    bool Invert(M &m) const
0475    {
0476       using CholeskyDecompHelpers::_inverterGenDim;
0477       if (fOk) {
0478          auto wksp = fL;
0479          _inverterGenDim<F, M>()(m, fN, wksp.data());
0480       }
0481       return fOk;
0482    }
0483
0484    /** @brief place the inverse into m
0485     *
0486     * This is the method to use with a plain array.
0487     *
0488     * @returns if the decomposition was successful
0489     *
0490     * NOTE: the matrix is given in packed representation, matrix
0491     * element m(i,j) (j <= i) is supposed to be in array element
0492     * (i * (i + 1)) / 2 + j
0493     */
0494    template <typename G>
0495    bool Invert(G *m) const
0496    {
0497       using CholeskyDecompHelpers::_inverterGenDim;
0498       using CholeskyDecompHelpers::PackedArrayAdapter;
0499       if (fOk) {
0500          auto wksp = fL;
0501          PackedArrayAdapter<G> adapted(m);
0502          _inverterGenDim<F, PackedArrayAdapter<G>>()(adapted, fN, wksp.data());
0503       }
0504       return fOk;
0505    }
0506
0507    /** @brief obtain the decomposed matrix L
0508     *
0509     * This is the method to use with a plain array.
0510     *
0511     * @returns if the decomposition was successful
0512     */
0513    template <class M>
0514    bool getL(M &m) const
0515    {
0516       if (!fOk)
0517          return false;
0518       const F *L = fL.data();
0519       for (unsigned i = 0; i < fN; ++i) {
0520          // zero upper half of matrix
0521          for (unsigned j = i + 1; j < fN; ++j)
0522             m(i, j) = F(0);
0523          // copy the rest
0524          for (unsigned j = 0; j <= i; ++j)
0525             m(i, j) = L[i * (i + 1) / 2 + j];
0526          // adjust the diagonal - we save 1/L(i, i) in that position, so
0527          // convert to what caller expects
0528          m(i, i) = F(1) / m(i, i);
0529       }
0530       return true;
0531    }
0532
0533    /** @brief obtain the decomposed matrix L
0534     *
0535     * @returns if the decomposition was successful
0536     *
0537     * NOTE: the matrix is given in packed representation, matrix
0538     * element m(i,j) (j <= i) is supposed to be in array element
0539     * (i * (i + 1)) / 2 + j
0540     */
0541    template <typename G>
0542    bool getL(G *m) const
0543    {
0544       if (!fOk)
0545          return false;
0546       const F *L = fL.data();
0547       // copy L
0548       for (unsigned i = 0; i < (fN * (fN + 1)) / 2; ++i)
0549          m[i] = L[i];
0550       // adjust diagonal - we save 1/L(i, i) in that position, so convert to
0551       // what caller expects
0552       for (unsigned i = 0; i < fN; ++i)
0553          m[(i * (i + 1)) / 2 + i] = F(1) / L[(i * (i + 1)) / 2 + i];
0554       return true;
0555    }
0556
0557    /** @brief obtain the inverse of the decomposed matrix L
0558     *
0559     * This is the method to use with a plain array.
0560     *
0561     * @returns if the decomposition was successful
0562     */
0563    template <class M>
0564    bool getLi(M &m) const
0565    {
0566       if (!fOk)
0567          return false;
0568       const F *L = fL.data();
0569       for (unsigned i = 0; i < fN; ++i) {
0570          // zero lower half of matrix
0571          for (unsigned j = i + 1; j < fN; ++j)
0572             m(j, i) = F(0);
0573          // copy the rest
0574          for (unsigned j = 0; j <= i; ++j)
0575             m(j, i) = L[i * (i + 1) / 2 + j];
0576       }
0577       // invert the off-diagonal part of what we just copied
0578       for (unsigned i = 1; i < fN; ++i) {
0579          for (unsigned j = 0; j < i; ++j) {
0580             typename M::value_type tmp = F(0);
0581             for (unsigned k = i; k-- > j;)
0582                tmp -= m(k, i) * m(j, k);
0583             m(j, i) = tmp * m(i, i);
0584          }
0585       }
0586       return true;
0587    }
0588
0589    /** @brief obtain the inverse of the decomposed matrix L
0590     *
0591     * @returns if the decomposition was successful
0592     *
0593     * NOTE: the matrix is given in packed representation, matrix
0594     * element m(j,i) (j <= i) is supposed to be in array element
0595     * (i * (i + 1)) / 2 + j
0596     */
0597    template <typename G>
0598    bool getLi(G *m) const
0599    {
0600       if (!fOk)
0601          return false;
0602       const F *L = fL.data();
0603       // copy L
0604       for (unsigned i = 0; i < (fN * (fN + 1)) / 2; ++i)
0605          m[i] = L[i];
0606       // invert the off-diagonal part of what we just copied
0607       G *base1 = &m[1];
0608       for (unsigned i = 1; i < fN; base1 += ++i) {
0609          for (unsigned j = 0; j < i; ++j) {
0610             G tmp = F(0);
0611             const G *base2 = &m[(i * (i - 1)) / 2];
0612             for (unsigned k = i; k-- > j; base2 -= k)
0613                tmp -= base1[k] * base2[j];
0614             base1[j] = tmp * base1[i];
0615          }
0616       }
0617       return true;
0618    }
0619 };
0620
0621 namespace CholeskyDecompHelpers {
0622 /// adapter for packed arrays (to SMatrix indexing conventions)
0623 template <typename G>
0624 class PackedArrayAdapter {
0625 private:
0626    G *fArr; ///< pointer to first array element
0627 public:
0628    /// constructor
0629    PackedArrayAdapter(G *arr) : fArr(arr) {}
0630    /// read access to elements (make sure that j <= i)
0631    const G operator()(unsigned i, unsigned j) const { return fArr[((i * (i + 1)) / 2) + j]; }
0632    /// write access to elements (make sure that j <= i)
0633    G &operator()(unsigned i, unsigned j) { return fArr[((i * (i + 1)) / 2) + j]; }
0634 };
0635 /// struct to do a Cholesky decomposition (general dimensionality)
0636 template <class F, class M>
0637 struct _decomposerGenDim {
0638    /// method to do the decomposition
0639    /** @returns if the decomposition was successful */
0640    bool operator()(F *dst, const M &src, unsigned N) const
0641    {
0642       // perform Cholesky decomposition of matrix: M = L L^T
0643       // only thing that can go wrong: trying to take square
0644       // root of negative number or zero (matrix is
0645       // ill-conditioned or singular in these cases)
0646
0647       // element L(i,j) is at array position (i * (i+1)) / 2 + j
0648
0649       // quirk: we may need to invert L later anyway, so we can
0650       // invert elements on diagonal straight away (we only
0651       // ever need their reciprocals!)
0652
0653       // cache starting address of rows of L for speed reasons
0654       F *base1 = &dst[0];
0655       for (unsigned i = 0; i < N; base1 += ++i) {
0656          F tmpdiag = F(0.0); // for element on diagonal
0657          // calculate off-diagonal elements
0658          F *base2 = &dst[0];
0659          for (unsigned j = 0; j < i; base2 += ++j) {
0660             F tmp = src(i, j);
0661             for (unsigned k = j; k--;)
0662                tmp -= base1[k] * base2[k];
0663             base1[j] = tmp *= base2[j];
0664             // keep track of contribution to element on diagonal
0665             tmpdiag += tmp * tmp;
0666          }
0667          // keep truncation error small
0668          tmpdiag = src(i, i) - tmpdiag;
0669          // check if positive definite
0670          if (tmpdiag <= F(0.0))
0671             return false;
0672          else
0673             base1[i] = std::sqrt(F(1.0) / tmpdiag);
0674       }
0675       return true;
0676    }
0677 };
0678
0679 /// struct to do a Cholesky decomposition
0680 template <class F, unsigned N, class M>
0681 struct _decomposer {
0682    /// method to do the decomposition
0683    /** @returns if the decomposition was successful */
0684    bool operator()(F *dst, const M &src) const { return _decomposerGenDim<F, M>()(dst, src, N); }
0685 };
0686
0687 /// struct to obtain the inverse from a Cholesky decomposition (general dimensionality)
0688 template <class F, class M>
0689 struct _inverterGenDim {
0690    /// method to do the inversion
0691    void operator()(M &dst, unsigned N, F *wksp) const
0692    {
0693       // invert off-diagonal part of matrix
0694       F *base1 = &wksp[1];
0695       for (unsigned i = 1; i < N; base1 += ++i) {
0696          for (unsigned j = 0; j < i; ++j) {
0697             F tmp = F(0.0);
0698             const F *base2 = &wksp[(i * (i - 1)) / 2];
0699             for (unsigned k = i; k-- > j; base2 -= k)
0700                tmp -= base1[k] * base2[j];
0701             base1[j] = tmp * base1[i];
0702          }
0703       }
0704
0705       // Li = L^(-1) formed, now calculate M^(-1) = Li^T Li
0706       for (unsigned i = N; i--;) {
0707          for (unsigned j = i + 1; j--;) {
0708             F tmp = F(0.0);
0709             base1 = &wksp[(N * (N - 1)) / 2];
0710             for (unsigned k = N; k-- > i; base1 -= k)
0711                tmp += base1[i] * base1[j];
0712             dst(i, j) = tmp;
0713          }
0714       }
0715    }
0716 };
0717
0718 /// struct to obtain the inverse from a Cholesky decomposition
0719 template <class F, unsigned N, class M>
0720 struct _inverter {
0721    /// method to do the inversion
0722    void operator()(M &dst, const F *src) const
0723    {
0724       // make working copy
0725       F wksp[N * (N + 1) / 2];
0726       std::copy(src, src + ((N * (N + 1)) / 2), wksp);
0727       return _inverterGenDim<F, M>()(dst, N, wksp);
0728    }
0729 };
0730
0731 /// struct to solve a linear system using its Cholesky decomposition (generalised dimensionality)
0732 template <class F, class V>
0733 struct _solverGenDim {
0734    /// method to solve the linear system
0735    void operator()(V &rhs, const F *l, unsigned N) const
0736    {
0737       // solve Ly = rhs
0738       for (unsigned k = 0; k < N; ++k) {
0739          const unsigned base = (k * (k + 1)) / 2;
0740          F sum = F(0.0);
0741          for (unsigned i = k; i--;)
0742             sum += rhs[i] * l[base + i];
0743          // elements on diagonal are pre-inverted!
0744          rhs[k] = (rhs[k] - sum) * l[base + k];
0745       }
0746       // solve L^Tx = y
0747       for (unsigned k = N; k--;) {
0748          F sum = F(0.0);
0749          for (unsigned i = N; --i > k;)
0750             sum += rhs[i] * l[(i * (i + 1)) / 2 + k];
0751          // elements on diagonal are pre-inverted!
0752          rhs[k] = (rhs[k] - sum) * l[(k * (k + 1)) / 2 + k];
0753       }
0754    }
0755 };
0756
0757 /// struct to solve a linear system using its Cholesky decomposition
0758 template <class F, unsigned N, class V>
0759 struct _solver {
0760    /// method to solve the linear system
0761    void operator()(V &rhs, const F *l) const { _solverGenDim<F, V>()(rhs, l, N); }
0762 };
0763
0764 /// struct to do a Cholesky decomposition (specialized, N = 6)
0765 template <class F, class M>
0766 struct _decomposer<F, 6, M> {
0767    /// method to do the decomposition
0768    bool operator()(F *dst, const M &src) const
0769    {
0770       if (src(0, 0) <= F(0.0))
0771          return false;
0772       dst[0] = std::sqrt(F(1.0) / src(0, 0));
0773       dst[1] = src(1, 0) * dst[0];
0774       dst[2] = src(1, 1) - dst[1] * dst[1];
0775       if (dst[2] <= F(0.0))
0776          return false;
0777       else
0778          dst[2] = std::sqrt(F(1.0) / dst[2]);
0779       dst[3] = src(2, 0) * dst[0];
0780       dst[4] = (src(2, 1) - dst[1] * dst[3]) * dst[2];
0781       dst[5] = src(2, 2) - (dst[3] * dst[3] + dst[4] * dst[4]);
0782       if (dst[5] <= F(0.0))
0783          return false;
0784       else
0785          dst[5] = std::sqrt(F(1.0) / dst[5]);
0786       dst[6] = src(3, 0) * dst[0];
0787       dst[7] = (src(3, 1) - dst[1] * dst[6]) * dst[2];
0788       dst[8] = (src(3, 2) - dst[3] * dst[6] - dst[4] * dst[7]) * dst[5];
0789       dst[9] = src(3, 3) - (dst[6] * dst[6] + dst[7] * dst[7] + dst[8] * dst[8]);
0790       if (dst[9] <= F(0.0))
0791          return false;
0792       else
0793          dst[9] = std::sqrt(F(1.0) / dst[9]);
0794       dst[10] = src(4, 0) * dst[0];
0795       dst[11] = (src(4, 1) - dst[1] * dst[10]) * dst[2];
0796       dst[12] = (src(4, 2) - dst[3] * dst[10] - dst[4] * dst[11]) * dst[5];
0797       dst[13] = (src(4, 3) - dst[6] * dst[10] - dst[7] * dst[11] - dst[8] * dst[12]) * dst[9];
0798       dst[14] = src(4, 4) - (dst[10] * dst[10] + dst[11] * dst[11] + dst[12] * dst[12] + dst[13] * dst[13]);
0799       if (dst[14] <= F(0.0))
0800          return false;
0801       else
0802          dst[14] = std::sqrt(F(1.0) / dst[14]);
0803       dst[15] = src(5, 0) * dst[0];
0804       dst[16] = (src(5, 1) - dst[1] * dst[15]) * dst[2];
0805       dst[17] = (src(5, 2) - dst[3] * dst[15] - dst[4] * dst[16]) * dst[5];
0806       dst[18] = (src(5, 3) - dst[6] * dst[15] - dst[7] * dst[16] - dst[8] * dst[17]) * dst[9];
0807       dst[19] = (src(5, 4) - dst[10] * dst[15] - dst[11] * dst[16] - dst[12] * dst[17] - dst[13] * dst[18]) * dst[14];
0808       dst[20] = src(5, 5) -
0809                 (dst[15] * dst[15] + dst[16] * dst[16] + dst[17] * dst[17] + dst[18] * dst[18] + dst[19] * dst[19]);
0810       if (dst[20] <= F(0.0))
0811          return false;
0812       else
0813          dst[20] = std::sqrt(F(1.0) / dst[20]);
0814       return true;
0815    }
0816 };
0817 /// struct to do a Cholesky decomposition (specialized, N = 5)
0818 template <class F, class M>
0819 struct _decomposer<F, 5, M> {
0820    /// method to do the decomposition
0821    bool operator()(F *dst, const M &src) const
0822    {
0823       if (src(0, 0) <= F(0.0))
0824          return false;
0825       dst[0] = std::sqrt(F(1.0) / src(0, 0));
0826       dst[1] = src(1, 0) * dst[0];
0827       dst[2] = src(1, 1) - dst[1] * dst[1];
0828       if (dst[2] <= F(0.0))
0829          return false;
0830       else
0831          dst[2] = std::sqrt(F(1.0) / dst[2]);
0832       dst[3] = src(2, 0) * dst[0];
0833       dst[4] = (src(2, 1) - dst[1] * dst[3]) * dst[2];
0834       dst[5] = src(2, 2) - (dst[3] * dst[3] + dst[4] * dst[4]);
0835       if (dst[5] <= F(0.0))
0836          return false;
0837       else
0838          dst[5] = std::sqrt(F(1.0) / dst[5]);
0839       dst[6] = src(3, 0) * dst[0];
0840       dst[7] = (src(3, 1) - dst[1] * dst[6]) * dst[2];
0841       dst[8] = (src(3, 2) - dst[3] * dst[6] - dst[4] * dst[7]) * dst[5];
0842       dst[9] = src(3, 3) - (dst[6] * dst[6] + dst[7] * dst[7] + dst[8] * dst[8]);
0843       if (dst[9] <= F(0.0))
0844          return false;
0845       else
0846          dst[9] = std::sqrt(F(1.0) / dst[9]);
0847       dst[10] = src(4, 0) * dst[0];
0848       dst[11] = (src(4, 1) - dst[1] * dst[10]) * dst[2];
0849       dst[12] = (src(4, 2) - dst[3] * dst[10] - dst[4] * dst[11]) * dst[5];
0850       dst[13] = (src(4, 3) - dst[6] * dst[10] - dst[7] * dst[11] - dst[8] * dst[12]) * dst[9];
0851       dst[14] = src(4, 4) - (dst[10] * dst[10] + dst[11] * dst[11] + dst[12] * dst[12] + dst[13] * dst[13]);
0852       if (dst[14] <= F(0.0))
0853          return false;
0854       else
0855          dst[14] = std::sqrt(F(1.0) / dst[14]);
0856       return true;
0857    }
0858 };
0859 /// struct to do a Cholesky decomposition (specialized, N = 4)
0860 template <class F, class M>
0861 struct _decomposer<F, 4, M> {
0862    /// method to do the decomposition
0863    bool operator()(F *dst, const M &src) const
0864    {
0865       if (src(0, 0) <= F(0.0))
0866          return false;
0867       dst[0] = std::sqrt(F(1.0) / src(0, 0));
0868       dst[1] = src(1, 0) * dst[0];
0869       dst[2] = src(1, 1) - dst[1] * dst[1];
0870       if (dst[2] <= F(0.0))
0871          return false;
0872       else
0873          dst[2] = std::sqrt(F(1.0) / dst[2]);
0874       dst[3] = src(2, 0) * dst[0];
0875       dst[4] = (src(2, 1) - dst[1] * dst[3]) * dst[2];
0876       dst[5] = src(2, 2) - (dst[3] * dst[3] + dst[4] * dst[4]);
0877       if (dst[5] <= F(0.0))
0878          return false;
0879       else
0880          dst[5] = std::sqrt(F(1.0) / dst[5]);
0881       dst[6] = src(3, 0) * dst[0];
0882       dst[7] = (src(3, 1) - dst[1] * dst[6]) * dst[2];
0883       dst[8] = (src(3, 2) - dst[3] * dst[6] - dst[4] * dst[7]) * dst[5];
0884       dst[9] = src(3, 3) - (dst[6] * dst[6] + dst[7] * dst[7] + dst[8] * dst[8]);
0885       if (dst[9] <= F(0.0))
0886          return false;
0887       else
0888          dst[9] = std::sqrt(F(1.0) / dst[9]);
0889       return true;
0890    }
0891 };
0892 /// struct to do a Cholesky decomposition (specialized, N = 3)
0893 template <class F, class M>
0894 struct _decomposer<F, 3, M> {
0895    /// method to do the decomposition
0896    bool operator()(F *dst, const M &src) const
0897    {
0898       if (src(0, 0) <= F(0.0))
0899          return false;
0900       dst[0] = std::sqrt(F(1.0) / src(0, 0));
0901       dst[1] = src(1, 0) * dst[0];
0902       dst[2] = src(1, 1) - dst[1] * dst[1];
0903       if (dst[2] <= F(0.0))
0904          return false;
0905       else
0906          dst[2] = std::sqrt(F(1.0) / dst[2]);
0907       dst[3] = src(2, 0) * dst[0];
0908       dst[4] = (src(2, 1) - dst[1] * dst[3]) * dst[2];
0909       dst[5] = src(2, 2) - (dst[3] * dst[3] + dst[4] * dst[4]);
0910       if (dst[5] <= F(0.0))
0911          return false;
0912       else
0913          dst[5] = std::sqrt(F(1.0) / dst[5]);
0914       return true;
0915    }
0916 };
0917 /// struct to do a Cholesky decomposition (specialized, N = 2)
0918 template <class F, class M>
0919 struct _decomposer<F, 2, M> {
0920    /// method to do the decomposition
0921    bool operator()(F *dst, const M &src) const
0922    {
0923       if (src(0, 0) <= F(0.0))
0924          return false;
0925       dst[0] = std::sqrt(F(1.0) / src(0, 0));
0926       dst[1] = src(1, 0) * dst[0];
0927       dst[2] = src(1, 1) - dst[1] * dst[1];
0928       if (dst[2] <= F(0.0))
0929          return false;
0930       else
0931          dst[2] = std::sqrt(F(1.0) / dst[2]);
0932       return true;
0933    }
0934 };
0935 /// struct to do a Cholesky decomposition (specialized, N = 1)
0936 template <class F, class M>
0937 struct _decomposer<F, 1, M> {
0938    /// method to do the decomposition
0939    bool operator()(F *dst, const M &src) const
0940    {
0941       if (src(0, 0) <= F(0.0))
0942          return false;
0943       dst[0] = std::sqrt(F(1.0) / src(0, 0));
0944       return true;
0945    }
0946 };
0947 /// struct to do a Cholesky decomposition (specialized, N = 0)
0948 template <class F, class M>
0949 struct _decomposer<F, 0, M> {
0950 private:
0951    _decomposer(){};
0952    bool operator()(F *dst, const M &src) const;
0953 };
0954
0955 /// struct to obtain the inverse from a Cholesky decomposition (N = 6)
0956 template <class F, class M>
0957 struct _inverter<F, 6, M> {
0958    /// method to do the inversion
0959    inline void operator()(M &dst, const F *src) const
0960    {
0961       const F li21 = -src[1] * src[0] * src[2];
0962       const F li32 = -src[4] * src[2] * src[5];
0963       const F li31 = (src[1] * src[4] * src[2] - src[3]) * src[0] * src[5];
0964       const F li43 = -src[8] * src[9] * src[5];
0965       const F li42 = (src[4] * src[8] * src[5] - src[7]) * src[2] * src[9];
0966       const F li41 =
0967          (-src[1] * src[4] * src[8] * src[2] * src[5] + src[1] * src[7] * src[2] + src[3] * src[8] * src[5] - src[6]) *
0968          src[0] * src[9];
0969       const F li54 = -src[13] * src[14] * src[9];
0970       const F li53 = (src[13] * src[8] * src[9] - src[12]) * src[5] * src[14];
0971       const F li52 = (-src[4] * src[8] * src[13] * src[5] * src[9] + src[4] * src[12] * src[5] +
0972                       src[7] * src[13] * src[9] - src[11]) *
0973                      src[2] * src[14];
0974       const F li51 =
0975          (src[1] * src[4] * src[8] * src[13] * src[2] * src[5] * src[9] - src[13] * src[8] * src[3] * src[9] * src[5] -
0976           src[12] * src[4] * src[1] * src[2] * src[5] - src[13] * src[7] * src[1] * src[9] * src[2] +
0977           src[11] * src[1] * src[2] + src[12] * src[3] * src[5] + src[13] * src[6] * src[9] - src[10]) *
0978          src[0] * src[14];
0979       const F li65 = -src[19] * src[20] * src[14];
0980       const F li64 = (src[19] * src[13] * src[14] - src[18]) * src[9] * src[20];
0981       const F li63 = (-src[8] * src[13] * src[19] * src[9] * src[14] + src[8] * src[18] * src[9] +
0982                       src[12] * src[19] * src[14] - src[17]) *
0983                      src[5] * src[20];
0984       const F li62 = (src[4] * src[8] * src[13] * src[19] * src[5] * src[9] * src[14] -
0985                       src[18] * src[8] * src[4] * src[9] * src[5] - src[19] * src[12] * src[4] * src[14] * src[5] -
0986                       src[19] * src[13] * src[7] * src[14] * src[9] + src[17] * src[4] * src[5] +
0987                       src[18] * src[7] * src[9] + src[19] * src[11] * src[14] - src[16]) *
0988                      src[2] * src[20];
0989       const F li61 = (-src[19] * src[13] * src[8] * src[4] * src[1] * src[2] * src[5] * src[9] * src[14] +
0990                       src[18] * src[8] * src[4] * src[1] * src[2] * src[5] * src[9] +
0991                       src[19] * src[12] * src[4] * src[1] * src[2] * src[5] * src[14] +
0992                       src[19] * src[13] * src[7] * src[1] * src[2] * src[9] * src[14] +
0993                       src[19] * src[13] * src[8] * src[3] * src[5] * src[9] * src[14] -
0994                       src[17] * src[4] * src[1] * src[2] * src[5] - src[18] * src[7] * src[1] * src[2] * src[9] -
0995                       src[19] * src[11] * src[1] * src[2] * src[14] - src[18] * src[8] * src[3] * src[5] * src[9] -
0996                       src[19] * src[12] * src[3] * src[5] * src[14] - src[19] * src[13] * src[6] * src[9] * src[14] +
0997                       src[16] * src[1] * src[2] + src[17] * src[3] * src[5] + src[18] * src[6] * src[9] +
0998                       src[19] * src[10] * src[14] - src[15]) *
0999                      src[0] * src[20];
1000
1001       dst(0, 0) = li61 * li61 + li51 * li51 + li41 * li41 + li31 * li31 + li21 * li21 + src[0] * src[0];
1002       dst(1, 0) = li61 * li62 + li51 * li52 + li41 * li42 + li31 * li32 + li21 * src[2];
1003       dst(1, 1) = li62 * li62 + li52 * li52 + li42 * li42 + li32 * li32 + src[2] * src[2];
1004       dst(2, 0) = li61 * li63 + li51 * li53 + li41 * li43 + li31 * src[5];
1005       dst(2, 1) = li62 * li63 + li52 * li53 + li42 * li43 + li32 * src[5];
1006       dst(2, 2) = li63 * li63 + li53 * li53 + li43 * li43 + src[5] * src[5];
1007       dst(3, 0) = li61 * li64 + li51 * li54 + li41 * src[9];
1008       dst(3, 1) = li62 * li64 + li52 * li54 + li42 * src[9];
1009       dst(3, 2) = li63 * li64 + li53 * li54 + li43 * src[9];
1010       dst(3, 3) = li64 * li64 + li54 * li54 + src[9] * src[9];
1011       dst(4, 0) = li61 * li65 + li51 * src[14];
1012       dst(4, 1) = li62 * li65 + li52 * src[14];
1013       dst(4, 2) = li63 * li65 + li53 * src[14];
1014       dst(4, 3) = li64 * li65 + li54 * src[14];
1015       dst(4, 4) = li65 * li65 + src[14] * src[14];
1016       dst(5, 0) = li61 * src[20];
1017       dst(5, 1) = li62 * src[20];
1018       dst(5, 2) = li63 * src[20];
1019       dst(5, 3) = li64 * src[20];
1020       dst(5, 4) = li65 * src[20];
1021       dst(5, 5) = src[20] * src[20];
1022    }
1023 };
1024 /// struct to obtain the inverse from a Cholesky decomposition (N = 5)
1025 template <class F, class M>
1026 struct _inverter<F, 5, M> {
1027    /// method to do the inversion
1028    inline void operator()(M &dst, const F *src) const
1029    {
1030       const F li21 = -src[1] * src[0] * src[2];
1031       const F li32 = -src[4] * src[2] * src[5];
1032       const F li31 = (src[1] * src[4] * src[2] - src[3]) * src[0] * src[5];
1033       const F li43 = -src[8] * src[9] * src[5];
1034       const F li42 = (src[4] * src[8] * src[5] - src[7]) * src[2] * src[9];
1035       const F li41 =
1036          (-src[1] * src[4] * src[8] * src[2] * src[5] + src[1] * src[7] * src[2] + src[3] * src[8] * src[5] - src[6]) *
1037          src[0] * src[9];
1038       const F li54 = -src[13] * src[14] * src[9];
1039       const F li53 = (src[13] * src[8] * src[9] - src[12]) * src[5] * src[14];
1040       const F li52 = (-src[4] * src[8] * src[13] * src[5] * src[9] + src[4] * src[12] * src[5] +
1041                       src[7] * src[13] * src[9] - src[11]) *
1042                      src[2] * src[14];
1043       const F li51 =
1044          (src[1] * src[4] * src[8] * src[13] * src[2] * src[5] * src[9] - src[13] * src[8] * src[3] * src[9] * src[5] -
1045           src[12] * src[4] * src[1] * src[2] * src[5] - src[13] * src[7] * src[1] * src[9] * src[2] +
1046           src[11] * src[1] * src[2] + src[12] * src[3] * src[5] + src[13] * src[6] * src[9] - src[10]) *
1047          src[0] * src[14];
1048
1049       dst(0, 0) = li51 * li51 + li41 * li41 + li31 * li31 + li21 * li21 + src[0] * src[0];
1050       dst(1, 0) = li51 * li52 + li41 * li42 + li31 * li32 + li21 * src[2];
1051       dst(1, 1) = li52 * li52 + li42 * li42 + li32 * li32 + src[2] * src[2];
1052       dst(2, 0) = li51 * li53 + li41 * li43 + li31 * src[5];
1053       dst(2, 1) = li52 * li53 + li42 * li43 + li32 * src[5];
1054       dst(2, 2) = li53 * li53 + li43 * li43 + src[5] * src[5];
1055       dst(3, 0) = li51 * li54 + li41 * src[9];
1056       dst(3, 1) = li52 * li54 + li42 * src[9];
1057       dst(3, 2) = li53 * li54 + li43 * src[9];
1058       dst(3, 3) = li54 * li54 + src[9] * src[9];
1059       dst(4, 0) = li51 * src[14];
1060       dst(4, 1) = li52 * src[14];
1061       dst(4, 2) = li53 * src[14];
1062       dst(4, 3) = li54 * src[14];
1063       dst(4, 4) = src[14] * src[14];
1064    }
1065 };
1066 /// struct to obtain the inverse from a Cholesky decomposition (N = 4)
1067 template <class F, class M>
1068 struct _inverter<F, 4, M> {
1069    /// method to do the inversion
1070    inline void operator()(M &dst, const F *src) const
1071    {
1072       const F li21 = -src[1] * src[0] * src[2];
1073       const F li32 = -src[4] * src[2] * src[5];
1074       const F li31 = (src[1] * src[4] * src[2] - src[3]) * src[0] * src[5];
1075       const F li43 = -src[8] * src[9] * src[5];
1076       const F li42 = (src[4] * src[8] * src[5] - src[7]) * src[2] * src[9];
1077       const F li41 =
1078          (-src[1] * src[4] * src[8] * src[2] * src[5] + src[1] * src[7] * src[2] + src[3] * src[8] * src[5] - src[6]) *
1079          src[0] * src[9];
1080
1081       dst(0, 0) = li41 * li41 + li31 * li31 + li21 * li21 + src[0] * src[0];
1082       dst(1, 0) = li41 * li42 + li31 * li32 + li21 * src[2];
1083       dst(1, 1) = li42 * li42 + li32 * li32 + src[2] * src[2];
1084       dst(2, 0) = li41 * li43 + li31 * src[5];
1085       dst(2, 1) = li42 * li43 + li32 * src[5];
1086       dst(2, 2) = li43 * li43 + src[5] * src[5];
1087       dst(3, 0) = li41 * src[9];
1088       dst(3, 1) = li42 * src[9];
1089       dst(3, 2) = li43 * src[9];
1090       dst(3, 3) = src[9] * src[9];
1091    }
1092 };
1093 /// struct to obtain the inverse from a Cholesky decomposition (N = 3)
1094 template <class F, class M>
1095 struct _inverter<F, 3, M> {
1096    /// method to do the inversion
1097    inline void operator()(M &dst, const F *src) const
1098    {
1099       const F li21 = -src[1] * src[0] * src[2];
1100       const F li32 = -src[4] * src[2] * src[5];
1101       const F li31 = (src[1] * src[4] * src[2] - src[3]) * src[0] * src[5];
1102
1103       dst(0, 0) = li31 * li31 + li21 * li21 + src[0] * src[0];
1104       dst(1, 0) = li31 * li32 + li21 * src[2];
1105       dst(1, 1) = li32 * li32 + src[2] * src[2];
1106       dst(2, 0) = li31 * src[5];
1107       dst(2, 1) = li32 * src[5];
1108       dst(2, 2) = src[5] * src[5];
1109    }
1110 };
1111 /// struct to obtain the inverse from a Cholesky decomposition (N = 2)
1112 template <class F, class M>
1113 struct _inverter<F, 2, M> {
1114    /// method to do the inversion
1115    inline void operator()(M &dst, const F *src) const
1116    {
1117       const F li21 = -src[1] * src[0] * src[2];
1118
1119       dst(0, 0) = li21 * li21 + src[0] * src[0];
1120       dst(1, 0) = li21 * src[2];
1121       dst(1, 1) = src[2] * src[2];
1122    }
1123 };
1124 /// struct to obtain the inverse from a Cholesky decomposition (N = 1)
1125 template <class F, class M>
1126 struct _inverter<F, 1, M> {
1127    /// method to do the inversion
1128    inline void operator()(M &dst, const F *src) const { dst(0, 0) = src[0] * src[0]; }
1129 };
1130 /// struct to obtain the inverse from a Cholesky decomposition (N = 0)
1131 template <class F, class M>
1132 struct _inverter<F, 0, M> {
1133 private:
1134    _inverter();
1135    void operator()(M &dst, const F *src) const;
1136 };
1137
1138 /// struct to solve a linear system using its Cholesky decomposition (N=6)
1139 template <class F, class V>
1140 struct _solver<F, 6, V> {
1141    /// method to solve the linear system
1142    void operator()(V &rhs, const F *l) const
1143    {
1144       // solve Ly = rhs
1145       const F y0 = rhs[0] * l[0];
1146       const F y1 = (rhs[1] - l[1] * y0) * l[2];
1147       const F y2 = (rhs[2] - (l[3] * y0 + l[4] * y1)) * l[5];
1148       const F y3 = (rhs[3] - (l[6] * y0 + l[7] * y1 + l[8] * y2)) * l[9];
1149       const F y4 = (rhs[4] - (l[10] * y0 + l[11] * y1 + l[12] * y2 + l[13] * y3)) * l[14];
1150       const F y5 = (rhs[5] - (l[15] * y0 + l[16] * y1 + l[17] * y2 + l[18] * y3 + l[19] * y4)) * l[20];
1151       // solve L^Tx = y, and put x into rhs
1152       rhs[5] = y5 * l[20];
1153       rhs[4] = (y4 - l[19] * rhs[5]) * l[14];
1154       rhs[3] = (y3 - (l[18] * rhs[5] + l[13] * rhs[4])) * l[9];
1155       rhs[2] = (y2 - (l[17] * rhs[5] + l[12] * rhs[4] + l[8] * rhs[3])) * l[5];
1156       rhs[1] = (y1 - (l[16] * rhs[5] + l[11] * rhs[4] + l[7] * rhs[3] + l[4] * rhs[2])) * l[2];
1157       rhs[0] = (y0 - (l[15] * rhs[5] + l[10] * rhs[4] + l[6] * rhs[3] + l[3] * rhs[2] + l[1] * rhs[1])) * l[0];
1158    }
1159 };
1160 /// struct to solve a linear system using its Cholesky decomposition (N=5)
1161 template <class F, class V>
1162 struct _solver<F, 5, V> {
1163    /// method to solve the linear system
1164    void operator()(V &rhs, const F *l) const
1165    {
1166       // solve Ly = rhs
1167       const F y0 = rhs[0] * l[0];
1168       const F y1 = (rhs[1] - l[1] * y0) * l[2];
1169       const F y2 = (rhs[2] - (l[3] * y0 + l[4] * y1)) * l[5];
1170       const F y3 = (rhs[3] - (l[6] * y0 + l[7] * y1 + l[8] * y2)) * l[9];
1171       const F y4 = (rhs[4] - (l[10] * y0 + l[11] * y1 + l[12] * y2 + l[13] * y3)) * l[14];
1172       // solve L^Tx = y, and put x into rhs
1173       rhs[4] = (y4)*l[14];
1174       rhs[3] = (y3 - (l[13] * rhs[4])) * l[9];
1175       rhs[2] = (y2 - (l[12] * rhs[4] + l[8] * rhs[3])) * l[5];
1176       rhs[1] = (y1 - (l[11] * rhs[4] + l[7] * rhs[3] + l[4] * rhs[2])) * l[2];
1177       rhs[0] = (y0 - (l[10] * rhs[4] + l[6] * rhs[3] + l[3] * rhs[2] + l[1] * rhs[1])) * l[0];
1178    }
1179 };
1180 /// struct to solve a linear system using its Cholesky decomposition (N=4)
1181 template <class F, class V>
1182 struct _solver<F, 4, V> {
1183    /// method to solve the linear system
1184    void operator()(V &rhs, const F *l) const
1185    {
1186       // solve Ly = rhs
1187       const F y0 = rhs[0] * l[0];
1188       const F y1 = (rhs[1] - l[1] * y0) * l[2];
1189       const F y2 = (rhs[2] - (l[3] * y0 + l[4] * y1)) * l[5];
1190       const F y3 = (rhs[3] - (l[6] * y0 + l[7] * y1 + l[8] * y2)) * l[9];
1191       // solve L^Tx = y, and put x into rhs
1192       rhs[3] = (y3)*l[9];
1193       rhs[2] = (y2 - (l[8] * rhs[3])) * l[5];
1194       rhs[1] = (y1 - (l[7] * rhs[3] + l[4] * rhs[2])) * l[2];
1195       rhs[0] = (y0 - (l[6] * rhs[3] + l[3] * rhs[2] + l[1] * rhs[1])) * l[0];
1196    }
1197 };
1198 /// struct to solve a linear system using its Cholesky decomposition (N=3)
1199 template <class F, class V>
1200 struct _solver<F, 3, V> {
1201    /// method to solve the linear system
1202    void operator()(V &rhs, const F *l) const
1203    {
1204       // solve Ly = rhs
1205       const F y0 = rhs[0] * l[0];
1206       const F y1 = (rhs[1] - l[1] * y0) * l[2];
1207       const F y2 = (rhs[2] - (l[3] * y0 + l[4] * y1)) * l[5];
1208       // solve L^Tx = y, and put x into rhs
1209       rhs[2] = (y2)*l[5];
1210       rhs[1] = (y1 - (l[4] * rhs[2])) * l[2];
1211       rhs[0] = (y0 - (l[3] * rhs[2] + l[1] * rhs[1])) * l[0];
1212    }
1213 };
1214 /// struct to solve a linear system using its Cholesky decomposition (N=2)
1215 template <class F, class V>
1216 struct _solver<F, 2, V> {
1217    /// method to solve the linear system
1218    void operator()(V &rhs, const F *l) const
1219    {
1220       // solve Ly = rhs
1221       const F y0 = rhs[0] * l[0];
1222       const F y1 = (rhs[1] - l[1] * y0) * l[2];
1223       // solve L^Tx = y, and put x into rhs
1224       rhs[1] = (y1)*l[2];
1225       rhs[0] = (y0 - (l[1] * rhs[1])) * l[0];
1226    }
1227 };
1228 /// struct to solve a linear system using its Cholesky decomposition (N=1)
1229 template <class F, class V>
1230 struct _solver<F, 1, V> {
1231    /// method to solve the linear system
1232    void operator()(V &rhs, const F *l) const
1233    {
1234       // solve LL^T x = rhs, put y into rhs
1235       rhs[0] *= l[0] * l[0];
1236    }
1237 };
1238 /// struct to solve a linear system using its Cholesky decomposition (N=0)
1239 template <class F, class V>
1240 struct _solver<F, 0, V> {
1241 private:
1242    _solver();
1243    void operator()(V &rhs, const F *l) const;
1244 };
1245 } // namespace CholeskyDecompHelpers
1246
1247 } // namespace Math
1248
1249 } // namespace ROOT
1250
1251 #endif // ROOT_Math_CHOLESKYDECOMP