diff --git a/CUDA/solver_mult_inverse.cu b/CUDA/solver_mult_inverse.cu new file mode 100644 index 0000000000000000000000000000000000000000..5cbae6169333502325908c09f2db666795fa6fc0 --- /dev/null +++ b/CUDA/solver_mult_inverse.cu @@ -0,0 +1,157 @@ +#include "options.h" + +// X = A.i() * B . Used in Levenberg-Marquardt +// MATRIX INVERSE AS NEWMAT LU SOLVER +// implemented in NEWMAT:newmat7.cpp GeneralSolvI. +__device__ void solver( // INPUT + double *A, + double *P, + int length, + //OUTPUT + double *B) +{ + double C[NPARAMS*NPARAMS]; + + for(int i=0;i<length;i++){ + for(int j=0;j<length;j++){ + C[i*length+j]=A[i*length+j]; + } + } + + bool d=true; + int indx[NPARAMS]; + + double* akk = C; + double big = fabs(*akk); + int mu = 0; + double* ai = akk; + int k; + + for (k = 1; k<length; k++){ + ai += length; + const double trybig = fabs(*ai); + if (big < trybig){ + big = trybig; + mu = k; + } + } + + if(length) for (k = 0;;){ + + indx[k] = mu; + if (mu != k){ + double* a1 = C + length*k; + double* a2 = C + length*mu; + d = !d; + int j = length; + while (j--){ + const double temp = *a1; + *a1++ = *a2; + *a2++ = temp; + } + } + + double diag = *akk; + big = 0; + mu = k + 1; + if (diag != 0){ + ai = akk; + int i = length - k - 1; + while (i--){ + ai += length; + double* al = ai; + double mult = *al / diag; + *al = mult; + int l = length - k - 1; + double* aj = akk; + if (l-- != 0){ + + double aux=al[1]-(mult* *(++aj)); + *(++al) = aux; + //*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler + + const double trybig = fabs(*al); + if (big < trybig){ + big = trybig; + mu = length - i - 1; + } + while (l--){ + double aux= al[1]-(mult* *(++aj)); + *(++al) = aux; + //*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler + } + } + } + } + if (++k == length) break; + akk += length + 1; + } + + +////////////////////////////// + + double el[NPARAMS]; + + + for(int e=0;e<length;e++){ + el[e]=P[e]; + } + + int j; + int ii = length; + int ip; + double temp; + int i; + + for (i=0; i<length; i++){ + ip = indx[i]; + temp = el[ip]; + el[ip] = el[i]; + el[i] = temp; + if (temp != 0.0) { ii = i; break; } + } + + double* bi; + double* ai2; + i = ii + 1; + + if (i < length){ + bi = el + ii; + ai2 = C + ii + i * length; + for (;;){ + int ip = indx[i]; + double sum = el[ip]; + el[ip] = el[i]; + double* aij = ai2; + double* bj = bi; + j = i - ii; + while (j--){ + sum -= *aij++* *bj++; + } + el[i] = sum; + if (++i == length) break; + ai2 += length; + } + } + + ai2 = C + length*length; + + for (i = length - 1; i >= 0; i--){ + double* bj = el+i; + ai2 -= length; + double* ajx = ai2+i; + double sum = *bj; + double diag = *ajx; + j = length - i; + while(--j){ + sum -= *(++ajx)* *(++bj); + } + el[i] = sum / diag; + + } + for(int e=0;e<length;e++){ + B[e]=el[e]; + } + +} +//END SOLVER WITH LU MATRIX INVERSE