Skip to content
Snippets Groups Projects
Commit 0e586c95 authored by Moises Fernandez's avatar Moises Fernandez
Browse files

Avoid Static Declaration of arrays (moved to shared) and reduce precision (double to float)

parent 9df520eb
No related branches found
No related tags found
No related merge requests found
...@@ -22,18 +22,18 @@ __device__ inline bool zero_cf_diff_conv(double* cfo,double* cfn,double* cftol){ ...@@ -22,18 +22,18 @@ __device__ inline bool zero_cf_diff_conv(double* cfo,double* cfn,double* cftol){
} }
__device__ void levenberg_marquardt_PVM_single_gpu( //INPUT __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT
const double* mydata, const float* mydata,
const double* bvecs, const float* bvecs,
const double* bvals, const float* bvals,
const int ndirections, const int ndirections,
const int nfib, const int nfib,
const int nparams, const int nparams,
const bool m_include_f0, const bool m_include_f0,
const int idSubVOX, const int idSubVOX,
double* step, //shared memory float* step, //shared memory
double* grad, //shared memory float* grad, //shared memory
double* hess, //shared memory float* hess, //shared memory
double* inverse, //shared memory float* inverse, //shared memory
double* pcf, //shared memory double* pcf, //shared memory
double* ncf, //shared memory double* ncf, //shared memory
double* lambda, //shared memory double* lambda, //shared memory
...@@ -42,16 +42,17 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT ...@@ -42,16 +42,17 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT
double* olambda, //shared memory double* olambda, //shared memory
int* success, //shared memory int* success, //shared memory
int* end, //shared memory int* end, //shared memory
double* reduction, //shared memory float* J, //shared memory
double* fs, //shared memory float* reduction, //shared memory
double* x, //shared memory float* fs, //shared memory
double* _d, //shared memory float* x, //shared memory
double* sumf, //shared memory float* _d, //shared memory
double* C, //shared memory float* sumf, //shared memory
double* el, //shared memory float* C, //shared memory
float* el, //shared memory
int* indx, //shared memory int* indx, //shared memory
//INPUT-OUTPUT //INPUT-OUTPUT
double* myparams) //shared memory float* myparams) //shared memory
{ {
int niter=0; int niter=0;
int maxiter=200; int maxiter=200;
...@@ -72,9 +73,9 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT ...@@ -72,9 +73,9 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT
while (!(*success&&niter++>=maxiter)){ //if success we don't increase niter (first condition is true) while (!(*success&&niter++>=maxiter)){ //if success we don't increase niter (first condition is true)
//function cost has been decreased, we have advanced. //function cost has been decreased, we have advanced.
if(*success){ if(*success){
grad_PVM_single(myparams,mydata,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,reduction,fs,x,_d,sumf,grad); grad_PVM_single(myparams,mydata,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,J,reduction,fs,x,_d,sumf,grad);
__syncthreads(); __syncthreads();
hess_PVM_single(myparams,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,reduction,fs,x,_d,sumf,hess); hess_PVM_single(myparams,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,J,reduction,fs,x,_d,sumf,hess);
} }
if(idSubVOX==0){ if(idSubVOX==0){
...@@ -122,18 +123,18 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT ...@@ -122,18 +123,18 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT
} }
__device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT
const double* mydata, const float* mydata,
const double* bvecs, const float* bvecs,
const double* bvals, const float* bvals,
const int ndirections, const int ndirections,
const int nfib, const int nfib,
const int nparams, const int nparams,
const bool m_include_f0, const bool m_include_f0,
const int idSubVOX, const int idSubVOX,
double* step, //shared memory float* step, //shared memory
double* grad, //shared memory float* grad, //shared memory
double* hess, //shared memory float* hess, //shared memory
double* inverse, //shared memory float* inverse, //shared memory
double* pcf, //shared memory double* pcf, //shared memory
double* ncf, //shared memory double* ncf, //shared memory
double* lambda, //shared memory double* lambda, //shared memory
...@@ -142,17 +143,18 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT ...@@ -142,17 +143,18 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT
double* olambda, //shared memory double* olambda, //shared memory
int* success, //shared memory int* success, //shared memory
int* end, //shared memory int* end, //shared memory
double* reduction, //shared memory float* J, //shared memory
double* fs, //shared memory float* reduction, //shared memory
double* f_deriv, //shared memory float* fs, //shared memory
double* x, //shared memory float* f_deriv, //shared memory
double* _d, //shared memory float* x, //shared memory
double* sumf, //shared memory float* _d, //shared memory
double* C, //shared memory float* sumf, //shared memory
double* el, //shared memory float* C, //shared memory
float* el, //shared memory
int* indx, //shared memory int* indx, //shared memory
//INPUT-OUTPUT //INPUT-OUTPUT
double* myparams) //shared memory float* myparams) //shared memory
{ {
int niter=0; int niter=0;
int maxiter=200; int maxiter=200;
...@@ -173,9 +175,9 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT ...@@ -173,9 +175,9 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT
while (!(*success&&niter++ >= maxiter)){ //if success we don't increase niter (first condition is true) while (!(*success&&niter++ >= maxiter)){ //if success we don't increase niter (first condition is true)
//function cost has been decreased, we have advanced. //function cost has been decreased, we have advanced.
if(*success){ if(*success){
grad_PVM_single_c(myparams,mydata,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,reduction,fs,f_deriv,x,_d,sumf,grad); grad_PVM_single_c(myparams,mydata,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,J,reduction,fs,f_deriv,x,_d,sumf,grad);
__syncthreads(); __syncthreads();
hess_PVM_single_c(myparams,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,reduction,fs,f_deriv,x,_d,sumf,hess); hess_PVM_single_c(myparams,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,J,reduction,fs,f_deriv,x,_d,sumf,hess);
} }
if(idSubVOX==0){ if(idSubVOX==0){
...@@ -224,18 +226,18 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT ...@@ -224,18 +226,18 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT
__device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT __device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT
const double* mydata, const float* mydata,
const double* bvecs, const float* bvecs,
const double* bvals, const float* bvals,
const int ndirections, const int ndirections,
const int nfib, const int nfib,
const int nparams, const int nparams,
const bool m_include_f0, const bool m_include_f0,
const int idSubVOX, const int idSubVOX,
double* step, //shared memory float* step, //shared memory
double* grad, //shared memory float* grad, //shared memory
double* hess, //shared memory float* hess, //shared memory
double* inverse, //shared memory float* inverse, //shared memory
double* pcf, //shared memory double* pcf, //shared memory
double* ncf, //shared memory double* ncf, //shared memory
double* lambda, //shared memory double* lambda, //shared memory
...@@ -244,17 +246,18 @@ __device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT ...@@ -244,17 +246,18 @@ __device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT
double* olambda, //shared memory double* olambda, //shared memory
int* success, //shared memory int* success, //shared memory
int* end, //shared memory int* end, //shared memory
double* reduction, //shared memory float* J, //shared memory
double* fs, //shared memory float* reduction, //shared memory
double* x, //shared memory float* fs, //shared memory
double* _a, //shared memory float* x, //shared memory
double* _b, //shared memory float* _a, //shared memory
double* sumf, //shared memory float* _b, //shared memory
double* C, //shared memory float* sumf, //shared memory
double* el, //shared memory float* C, //shared memory
float* el, //shared memory
int* indx, //shared memory int* indx, //shared memory
//INPUT-OUTPUT //INPUT-OUTPUT
double* myparams) //shared memory float* myparams) //shared memory
{ {
int niter=0; int niter=0;
int maxiter=200; int maxiter=200;
...@@ -275,9 +278,9 @@ __device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT ...@@ -275,9 +278,9 @@ __device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT
while (!(*success&&niter++ >= maxiter)){ //if success we don't increase niter (first condition is true) while (!(*success&&niter++ >= maxiter)){ //if success we don't increase niter (first condition is true)
//function cost has been decreased, we have advanced. //function cost has been decreased, we have advanced.
if(*success){ if(*success){
grad_PVM_multi(myparams,mydata,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,reduction,fs,x,_a,_b,sumf,grad); grad_PVM_multi(myparams,mydata,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,J,reduction,fs,x,_a,_b,sumf,grad);
__syncthreads(); __syncthreads();
hess_PVM_multi(myparams,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,reduction,fs,x,_a,_b,sumf,hess); hess_PVM_multi(myparams,bvecs,bvals,ndirections,nfib,nparams,m_include_f0,idSubVOX,J,reduction,fs,x,_a,_b,sumf,hess);
} }
if(idSubVOX==0){ if(idSubVOX==0){
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment