File indexing completed on 2025-04-19 09:06:43
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031 #ifndef SPARSELU_PANEL_BMOD_H
0032 #define SPARSELU_PANEL_BMOD_H
0033
0034 namespace RivetEigen {
0035 namespace internal {
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055 template <typename Scalar, typename StorageIndex>
0056 void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w, const Index jcol,
0057 const Index nseg, ScalarVector& dense, ScalarVector& tempv,
0058 IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
0059 {
0060
0061 Index ksub,jj,nextl_col;
0062 Index fsupc, nsupc, nsupr, nrow;
0063 Index krep, kfnz;
0064 Index lptr;
0065 Index luptr;
0066 Index segsize,no_zeros ;
0067
0068 Index k = nseg - 1;
0069 const Index PacketSize = internal::packet_traits<Scalar>::size;
0070
0071 for (ksub = 0; ksub < nseg; ksub++)
0072 {
0073
0074
0075
0076
0077
0078 krep = segrep(k); k--;
0079 fsupc = glu.xsup(glu.supno(krep));
0080 nsupc = krep - fsupc + 1;
0081 nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
0082 nrow = nsupr - nsupc;
0083 lptr = glu.xlsub(fsupc);
0084
0085
0086 Index u_rows = 0;
0087 Index u_cols = 0;
0088 for (jj = jcol; jj < jcol + w; jj++)
0089 {
0090 nextl_col = (jj-jcol) * m;
0091 VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m);
0092
0093 kfnz = repfnz_col(krep);
0094 if ( kfnz == emptyIdxLU )
0095 continue;
0096
0097 segsize = krep - kfnz + 1;
0098 u_cols++;
0099 u_rows = (std::max)(segsize,u_rows);
0100 }
0101
0102 if(nsupc >= 2)
0103 {
0104 Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
0105 Map<ScalarMatrix, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
0106
0107
0108 Index u_col = 0;
0109 for (jj = jcol; jj < jcol + w; jj++)
0110 {
0111 nextl_col = (jj-jcol) * m;
0112 VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m);
0113 VectorBlock<ScalarVector> dense_col(dense, nextl_col, m);
0114
0115 kfnz = repfnz_col(krep);
0116 if ( kfnz == emptyIdxLU )
0117 continue;
0118
0119 segsize = krep - kfnz + 1;
0120 luptr = glu.xlusup(fsupc);
0121 no_zeros = kfnz - fsupc;
0122
0123 Index isub = lptr + no_zeros;
0124 Index off = u_rows-segsize;
0125 for (Index i = 0; i < off; i++) U(i,u_col) = 0;
0126 for (Index i = 0; i < segsize; i++)
0127 {
0128 Index irow = glu.lsub(isub);
0129 U(i+off,u_col) = dense_col(irow);
0130 ++isub;
0131 }
0132 u_col++;
0133 }
0134
0135 luptr = glu.xlusup(fsupc);
0136 Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
0137 no_zeros = (krep - u_rows + 1) - fsupc;
0138 luptr += lda * no_zeros + no_zeros;
0139 MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
0140 U = A.template triangularView<UnitLower>().solve(U);
0141
0142
0143 luptr += u_rows;
0144 MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
0145 eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
0146
0147 Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
0148 Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
0149 MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
0150
0151 L.setZero();
0152 internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
0153
0154
0155 u_col = 0;
0156 for (jj = jcol; jj < jcol + w; jj++)
0157 {
0158 nextl_col = (jj-jcol) * m;
0159 VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m);
0160 VectorBlock<ScalarVector> dense_col(dense, nextl_col, m);
0161
0162 kfnz = repfnz_col(krep);
0163 if ( kfnz == emptyIdxLU )
0164 continue;
0165
0166 segsize = krep - kfnz + 1;
0167 no_zeros = kfnz - fsupc;
0168 Index isub = lptr + no_zeros;
0169
0170 Index off = u_rows-segsize;
0171 for (Index i = 0; i < segsize; i++)
0172 {
0173 Index irow = glu.lsub(isub++);
0174 dense_col(irow) = U.coeff(i+off,u_col);
0175 U.coeffRef(i+off,u_col) = 0;
0176 }
0177
0178
0179 for (Index i = 0; i < nrow; i++)
0180 {
0181 Index irow = glu.lsub(isub++);
0182 dense_col(irow) -= L.coeff(i,u_col);
0183 L.coeffRef(i,u_col) = 0;
0184 }
0185 u_col++;
0186 }
0187 }
0188 else
0189 {
0190
0191 for (jj = jcol; jj < jcol + w; jj++)
0192 {
0193 nextl_col = (jj-jcol) * m;
0194 VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m);
0195 VectorBlock<ScalarVector> dense_col(dense, nextl_col, m);
0196
0197 kfnz = repfnz_col(krep);
0198 if ( kfnz == emptyIdxLU )
0199 continue;
0200
0201 segsize = krep - kfnz + 1;
0202 luptr = glu.xlusup(fsupc);
0203
0204 Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);
0205
0206
0207
0208 no_zeros = kfnz - fsupc;
0209 if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
0210 else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
0211 else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
0212 else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
0213 }
0214 }
0215
0216 }
0217 }
0218
0219 }
0220
0221 }
0222
0223 #endif