200 lines
4.9 KiB
C
200 lines
4.9 KiB
C
/* generated code, do not edit. */
|
|
|
|
#include "ode/matrix.h"
|
|
|
|
/* solve L^T * x=b, with b containing 1 right hand side.
|
|
* L is an n*n lower triangular matrix with ones on the diagonal.
|
|
* L is stored by rows and its leading dimension is lskip.
|
|
* b is an n*1 matrix that contains the right hand side.
|
|
* b is overwritten with x.
|
|
* this processes blocks of 4.
|
|
*/
|
|
|
|
void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1)
|
|
{
|
|
/* declare variables - Z matrix, p and q vectors, etc */
|
|
dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
|
|
const dReal *ell;
|
|
int lskip2,lskip3,i,j;
|
|
/* special handling for L and B because we're solving L1 *transpose* */
|
|
L = L + (n-1)*(lskip1+1);
|
|
B = B + n-1;
|
|
lskip1 = -lskip1;
|
|
/* compute lskip values */
|
|
lskip2 = 2*lskip1;
|
|
lskip3 = 3*lskip1;
|
|
/* compute all 4 x 1 blocks of X */
|
|
for (i=0; i <= n-4; i+=4) {
|
|
/* compute all 4 x 1 block of X, from rows i..i+4-1 */
|
|
/* set the Z matrix to 0 */
|
|
Z11=0;
|
|
Z21=0;
|
|
Z31=0;
|
|
Z41=0;
|
|
ell = L - i;
|
|
ex = B;
|
|
/* the inner loop that computes outer products and adds them to Z */
|
|
for (j=i-4; j >= 0; j -= 4) {
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[0];
|
|
p2=ell[-1];
|
|
p3=ell[-2];
|
|
p4=ell[-3];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
m21 = p2 * q1;
|
|
m31 = p3 * q1;
|
|
m41 = p4 * q1;
|
|
ell += lskip1;
|
|
Z11 += m11;
|
|
Z21 += m21;
|
|
Z31 += m31;
|
|
Z41 += m41;
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[-1];
|
|
p2=ell[-1];
|
|
p3=ell[-2];
|
|
p4=ell[-3];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
m21 = p2 * q1;
|
|
m31 = p3 * q1;
|
|
m41 = p4 * q1;
|
|
ell += lskip1;
|
|
Z11 += m11;
|
|
Z21 += m21;
|
|
Z31 += m31;
|
|
Z41 += m41;
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[-2];
|
|
p2=ell[-1];
|
|
p3=ell[-2];
|
|
p4=ell[-3];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
m21 = p2 * q1;
|
|
m31 = p3 * q1;
|
|
m41 = p4 * q1;
|
|
ell += lskip1;
|
|
Z11 += m11;
|
|
Z21 += m21;
|
|
Z31 += m31;
|
|
Z41 += m41;
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[-3];
|
|
p2=ell[-1];
|
|
p3=ell[-2];
|
|
p4=ell[-3];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
m21 = p2 * q1;
|
|
m31 = p3 * q1;
|
|
m41 = p4 * q1;
|
|
ell += lskip1;
|
|
ex -= 4;
|
|
Z11 += m11;
|
|
Z21 += m21;
|
|
Z31 += m31;
|
|
Z41 += m41;
|
|
/* end of inner loop */
|
|
}
|
|
/* compute left-over iterations */
|
|
j += 4;
|
|
for (; j > 0; j--) {
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[0];
|
|
p2=ell[-1];
|
|
p3=ell[-2];
|
|
p4=ell[-3];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
m21 = p2 * q1;
|
|
m31 = p3 * q1;
|
|
m41 = p4 * q1;
|
|
ell += lskip1;
|
|
ex -= 1;
|
|
Z11 += m11;
|
|
Z21 += m21;
|
|
Z31 += m31;
|
|
Z41 += m41;
|
|
}
|
|
/* finish computing the X(i) block */
|
|
Z11 = ex[0] - Z11;
|
|
ex[0] = Z11;
|
|
p1 = ell[-1];
|
|
Z21 = ex[-1] - Z21 - p1*Z11;
|
|
ex[-1] = Z21;
|
|
p1 = ell[-2];
|
|
p2 = ell[-2+lskip1];
|
|
Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21;
|
|
ex[-2] = Z31;
|
|
p1 = ell[-3];
|
|
p2 = ell[-3+lskip1];
|
|
p3 = ell[-3+lskip2];
|
|
Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
|
|
ex[-3] = Z41;
|
|
/* end of outer loop */
|
|
}
|
|
/* compute rows at end that are not a multiple of block size */
|
|
for (; i < n; i++) {
|
|
/* compute all 1 x 1 block of X, from rows i..i+1-1 */
|
|
/* set the Z matrix to 0 */
|
|
Z11=0;
|
|
ell = L - i;
|
|
ex = B;
|
|
/* the inner loop that computes outer products and adds them to Z */
|
|
for (j=i-4; j >= 0; j -= 4) {
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[0];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
ell += lskip1;
|
|
Z11 += m11;
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[-1];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
ell += lskip1;
|
|
Z11 += m11;
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[-2];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
ell += lskip1;
|
|
Z11 += m11;
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[-3];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
ell += lskip1;
|
|
ex -= 4;
|
|
Z11 += m11;
|
|
/* end of inner loop */
|
|
}
|
|
/* compute left-over iterations */
|
|
j += 4;
|
|
for (; j > 0; j--) {
|
|
/* load p and q values */
|
|
p1=ell[0];
|
|
q1=ex[0];
|
|
/* compute outer product and add it to the Z matrix */
|
|
m11 = p1 * q1;
|
|
ell += lskip1;
|
|
ex -= 1;
|
|
Z11 += m11;
|
|
}
|
|
/* finish computing the X(i) block */
|
|
Z11 = ex[0] - Z11;
|
|
ex[0] = Z11;
|
|
}
|
|
}
|