diff --git a/CUDA/solver_mult_inverse.cu b/CUDA/solver_mult_inverse.cu index 79f24547ca416e52ef36eb8cdefda38c78e7e747..ad83aa137f36ded4f886852f6f7e2b91d959439b 100644 --- a/CUDA/solver_mult_inverse.cu +++ b/CUDA/solver_mult_inverse.cu @@ -12,15 +12,15 @@ //MATRIX INVERSE AS NEWMAT LU SOLVER //implemented in NEWMAT:newmat7.cpp GeneralSolvI. __device__ void solver( //INPUT - double *A, - double *P, + float *A, + float *P, int length, //TO USE - double *C, - double *el, + float *C, + float *el, int *indx, //OUTPUT - double *B) + float *B) { //double C[NPARAMS*NPARAMS]; @@ -33,15 +33,15 @@ __device__ void solver( //INPUT bool d=true; //int indx[NPARAMS]; - double* akk = C; - double big = fabs(*akk); + float* akk = C; + float big = fabs(*akk); int mu = 0; - double* ai = akk; + float* ai = akk; int k; for (k = 1; k<length; k++){ ai += length; - const double trybig = fabs(*ai); + const float trybig = fabs(*ai); if (big < trybig){ big = trybig; mu = k; @@ -52,18 +52,18 @@ __device__ void solver( //INPUT indx[k] = mu; if (mu != k){ - double* a1 = C + length*k; - double* a2 = C + length*mu; + float* a1 = C + length*k; + float* a2 = C + length*mu; d = !d; int j = length; while (j--){ - const double temp = *a1; + const float temp = *a1; *a1++ = *a2; *a2++ = temp; } } - double diag = *akk; + float diag = *akk; big = 0; mu = k + 1; if (diag != 0){ @@ -71,24 +71,24 @@ __device__ void solver( //INPUT int i = length - k - 1; while (i--){ ai += length; - double* al = ai; - double mult = *al / diag; + float* al = ai; + float mult = *al / diag; *al = mult; int l = length - k - 1; - double* aj = akk; + float* aj = akk; if (l-- != 0){ - double aux=al[1]-(mult* *(++aj)); + float aux=al[1]-(mult* *(++aj)); *(++al) = aux; //*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler - const double trybig = fabs(*al); + const float trybig = fabs(*al); if (big < trybig){ big = trybig; mu = length - i - 1; } while (l--){ - double aux= al[1]-(mult* *(++aj)); + float aux= al[1]-(mult* *(++aj)); *(++al) = aux; //*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler } @@ -111,7 +111,7 @@ __device__ void solver( //INPUT int j; int ii = length; int ip; - double temp; + float temp; int i; for (i=0; i<length; i++){ @@ -122,8 +122,8 @@ __device__ void solver( //INPUT if (temp != 0.0) { ii = i; break; } } - double* bi; - double* ai2; + float* bi; + float* ai2; i = ii + 1; if (i < length){ @@ -131,10 +131,10 @@ __device__ void solver( //INPUT ai2 = C + ii + i * length; for (;;){ int ip = indx[i]; - double sum = el[ip]; + float sum = el[ip]; el[ip] = el[i]; - double* aij = ai2; - double* bj = bi; + float* aij = ai2; + float* bj = bi; j = i - ii; while (j--){ sum -= *aij++* *bj++; @@ -148,11 +148,11 @@ __device__ void solver( //INPUT ai2 = C + length*length; for (i = length - 1; i >= 0; i--){ - double* bj = el+i; + float* bj = el+i; ai2 -= length; - double* ajx = ai2+i; - double sum = *bj; - double diag = *ajx; + float* ajx = ai2+i; + float sum = *bj; + float diag = *ajx; j = length - i; while(--j){ sum -= *(++ajx)* *(++bj); diff --git a/CUDA/xfibres_gpu.cu b/CUDA/xfibres_gpu.cu index c5f5bf5916ffc5ef8f0b21ef85a48cf8859bffa8..9805887f3d820a595b403656f10e93fb08d384f5 100644 --- a/CUDA/xfibres_gpu.cu +++ b/CUDA/xfibres_gpu.cu @@ -71,7 +71,8 @@ void xfibres_gpu( //INPUT bool gradnonlin=opts.grad_file.set(); if(nvox>0){ - thrust::host_vector<double> datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host; + thrust::host_vector<float> datam_host, bvecs_host, bvals_host, params_host; + thrust::host_vector<double> alpha_host, beta_host; thrust::host_vector<float> tau_host; vector<ColumnVector> datam_vec; vector<Matrix> bvecs_vec, bvals_vec; @@ -79,10 +80,10 @@ void xfibres_gpu( //INPUT ///// FIT ///// prepare_data_gpu_FIT(datam,bvecs,bvals,gradm,datam_vec, bvecs_vec, bvals_vec, datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host, tau_host); - thrust::device_vector<double> datam_gpu=datam_host; - thrust::device_vector<double> bvecs_gpu=bvecs_host; - thrust::device_vector<double> bvals_gpu=bvals_host; - thrust::device_vector<double> params_gpu=params_host; + thrust::device_vector<float> datam_gpu=datam_host; + thrust::device_vector<float> bvecs_gpu=bvecs_host; + thrust::device_vector<float> bvals_gpu=bvals_host; + thrust::device_vector<float> params_gpu=params_host; thrust::host_vector<int> vox_repeat; //contains the id's of voxels repeated vox_repeat.resize(nvox); int nrepeat=0; @@ -161,16 +162,16 @@ void fit( //INPUT const vector<ColumnVector> datam_vec, const vector<Matrix> bvecs_vec, const vector<Matrix> bvals_vec, - thrust::host_vector<double> datam_host, - thrust::host_vector<double> bvecs_host, - thrust::host_vector<double> bvals_host, - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::host_vector<float> datam_host, + thrust::host_vector<float> bvecs_host, + thrust::host_vector<float> bvals_host, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int ndirections, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu, + thrust::device_vector<float>& params_gpu, thrust::host_vector<int>& vox_repeat, //for get residuals with or withot f0 int& nrepeat) { @@ -196,7 +197,7 @@ void fit( //INPUT if (opts.f0.value()){ float md,mf,f0; - thrust::host_vector<double> params_host; + thrust::host_vector<float> params_host; params_host.resize(nvox*nparams_fit); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); for(int vox=0;vox<nvox;vox++){ @@ -213,17 +214,17 @@ void fit( //INPUT vector<ColumnVector> datam_repeat_vec; vector<Matrix> bvecs_repeat_vec; vector<Matrix> bvals_repeat_vec; - thrust::host_vector<double> datam_repeat_host; - thrust::host_vector<double> bvecs_repeat_host; - thrust::host_vector<double> bvals_repeat_host; - thrust::host_vector<double> params_repeat_host; + thrust::host_vector<float> datam_repeat_host; + thrust::host_vector<float> bvecs_repeat_host; + thrust::host_vector<float> bvals_repeat_host; + thrust::host_vector<float> params_repeat_host; prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); - thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host; - thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host; - thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host; - thrust::device_vector<double> params_repeat_gpu=params_repeat_host; + thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host; + thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host; + thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host; + thrust::device_vector<float> params_repeat_gpu=params_repeat_host; fit_PVM_single(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu); thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin()); @@ -236,7 +237,7 @@ void fit( //INPUT if (opts.f0.value()){ float md,mf,f0; - thrust::host_vector<double> params_host; + thrust::host_vector<float> params_host; params_host.resize(nvox*nparams_fit); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); for(int vox=0;vox<nvox;vox++){ @@ -253,17 +254,17 @@ void fit( //INPUT vector<ColumnVector> datam_repeat_vec; vector<Matrix> bvecs_repeat_vec; vector<Matrix> bvals_repeat_vec; - thrust::host_vector<double> datam_repeat_host; - thrust::host_vector<double> bvecs_repeat_host; - thrust::host_vector<double> bvals_repeat_host; - thrust::host_vector<double> params_repeat_host; + thrust::host_vector<float> datam_repeat_host; + thrust::host_vector<float> bvecs_repeat_host; + thrust::host_vector<float> bvals_repeat_host; + thrust::host_vector<float> params_repeat_host; prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); - thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host; - thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host; - thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host; - thrust::device_vector<double> params_repeat_gpu=params_repeat_host; + thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host; + thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host; + thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host; + thrust::device_vector<float> params_repeat_gpu=params_repeat_host; fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu); thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin()); @@ -281,7 +282,7 @@ void fit( //INPUT if (opts.f0.value()){ float md,mf,f0; - thrust::host_vector<double> params_host; + thrust::host_vector<float> params_host; params_host.resize(nvox*nparams_fit); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); for(int vox=0;vox<nvox;vox++){ @@ -298,17 +299,17 @@ void fit( //INPUT vector<ColumnVector> datam_repeat_vec; vector<Matrix> bvecs_repeat_vec; vector<Matrix> bvals_repeat_vec; - thrust::host_vector<double> datam_repeat_host; - thrust::host_vector<double> bvecs_repeat_host; - thrust::host_vector<double> bvals_repeat_host; - thrust::host_vector<double> params_repeat_host; + thrust::host_vector<float> datam_repeat_host; + thrust::host_vector<float> bvecs_repeat_host; + thrust::host_vector<float> bvals_repeat_host; + thrust::host_vector<float> params_repeat_host; prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); - thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host; - thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host; - thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host; - thrust::device_vector<double> params_repeat_gpu=params_repeat_host; + thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host; + thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host; + thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host; + thrust::device_vector<float> params_repeat_gpu=params_repeat_host; fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu); @@ -338,12 +339,12 @@ void prepare_data_gpu_FIT( //INPUT vector<ColumnVector>& datam_vec, vector<Matrix>& bvecs_vec, vector<Matrix>& bvals_vec, - thrust::host_vector<double>& datam_host, //data prepared for copy to GPU - thrust::host_vector<double>& bvecs_host, - thrust::host_vector<double>& bvals_host, + thrust::host_vector<float>& datam_host, //data prepared for copy to GPU + thrust::host_vector<float>& bvecs_host, + thrust::host_vector<float>& bvals_host, thrust::host_vector<double>& alpha_host, thrust::host_vector<double>& beta_host, - thrust::host_vector<double>& params_host, + thrust::host_vector<float>& params_host, thrust::host_vector<float>& tau_host) { xfibresOptions& opts = xfibresOptions::getInstance(); @@ -421,9 +422,9 @@ void prepare_data_gpu_FIT( //INPUT //prepare the structures for copy all neccesary data to FIT in GPU when is repeated because f0. Only some voxels void prepare_data_gpu_FIT_repeat( //INPUT - thrust::host_vector<double> datam_host, - thrust::host_vector<double> bvecs_host, - thrust::host_vector<double> bvals_host, + thrust::host_vector<float> datam_host, + thrust::host_vector<float> bvecs_host, + thrust::host_vector<float> bvals_host, thrust::host_vector<int> vox_repeat, int nrepeat, int ndirections, @@ -431,10 +432,10 @@ void prepare_data_gpu_FIT_repeat( //INPUT vector<ColumnVector>& datam_repeat_vec, vector<Matrix>& bvecs_repeat_vec, vector<Matrix>& bvals_repeat_vec, - thrust::host_vector<double>& datam_repeat_host, //data prepared for copy to GPU - thrust::host_vector<double>& bvecs_repeat_host, - thrust::host_vector<double>& bvals_repeat_host, - thrust::host_vector<double>& params_repeat_host) + thrust::host_vector<float>& datam_repeat_host, //data prepared for copy to GPU + thrust::host_vector<float>& bvecs_repeat_host, + thrust::host_vector<float>& bvals_repeat_host, + thrust::host_vector<float>& params_repeat_host) { xfibresOptions& opts = xfibresOptions::getInstance(); @@ -509,19 +510,19 @@ void prepare_data_gpu_FIT_repeat( //INPUT void mix_params( //INPUT - thrust::host_vector<double> params_repeat_host, + thrust::host_vector<float> params_repeat_host, thrust::host_vector<int> vox_repeat, int nrepeat, int nvox, //INPUT-OUTPUT - thrust::device_vector<double>& params_gpu) + thrust::device_vector<float>& params_gpu) { xfibresOptions& opts = xfibresOptions::getInstance(); int nfib= opts.nfibres.value(); int nparams = 2+3*opts.nfibres.value(); if(opts.modelnum.value()==2) nparams++; - thrust::host_vector<double> params_host; + thrust::host_vector<float> params_host; params_host.resize(nvox*(nparams+1)); thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin()); diff --git a/CUDA/xfibres_gpu.cuh b/CUDA/xfibres_gpu.cuh index eb88dd1f0ab32e04872bd70706dfe820b00fdb77..8a120089fb7358bdb39e3011ce2e347ef30efeec 100644 --- a/CUDA/xfibres_gpu.cuh +++ b/CUDA/xfibres_gpu.cuh @@ -17,16 +17,16 @@ void fit( //INPUT const vector<ColumnVector> datam_vec, const vector<Matrix> bvecs_vec, const vector<Matrix> bvals_vec, - thrust::host_vector<double> datam_host, - thrust::host_vector<double> bvecs_host, - thrust::host_vector<double> bvals_host, - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::host_vector<float> datam_host, + thrust::host_vector<float> bvecs_host, + thrust::host_vector<float> bvals_host, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int ndirections, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu, + thrust::device_vector<float>& params_gpu, thrust::host_vector<int>& vox_repeat, int& nrepeat); @@ -40,20 +40,20 @@ void prepare_data_gpu_FIT( //INPUT vector<ColumnVector>& datam_vec, vector<Matrix>& bvecs_vec, vector<Matrix>& bvals_vec, - thrust::host_vector<double>& datam_host, - thrust::host_vector<double>& bvecs_host, - thrust::host_vector<double>& bvals_host, + thrust::host_vector<float>& datam_host, + thrust::host_vector<float>& bvecs_host, + thrust::host_vector<float>& bvals_host, thrust::host_vector<double>& alpha_host, thrust::host_vector<double>& beta_host, - thrust::host_vector<double>& params_host, + thrust::host_vector<float>& params_host, thrust::host_vector<float>& tau_host); //implemented and used in xfibres_gpu.cu void prepare_data_gpu_FIT_repeat( //INPUT - thrust::host_vector<double> datam_host, - thrust::host_vector<double> bvecs_host, - thrust::host_vector<double> bvals_host, + thrust::host_vector<float> datam_host, + thrust::host_vector<float> bvecs_host, + thrust::host_vector<float> bvals_host, thrust::host_vector<int> vox_repeat, int nrepeat, int ndirections, @@ -61,19 +61,19 @@ void prepare_data_gpu_FIT_repeat( //INPUT vector<ColumnVector>& datam_repeat_vec, vector<Matrix>& bvecs_repeat_vec, vector<Matrix>& bvals_repeat_vec, - thrust::host_vector<double>& datam_repeat_host, - thrust::host_vector<double>& bvecs_repeat_host, - thrust::host_vector<double>& bvals_repeat_host, - thrust::host_vector<double>& params_repeat_host); + thrust::host_vector<float>& datam_repeat_host, + thrust::host_vector<float>& bvecs_repeat_host, + thrust::host_vector<float>& bvals_repeat_host, + thrust::host_vector<float>& params_repeat_host); //implemented and used in xfibres_gpu.cu void mix_params( //INPUT - thrust::host_vector<double> params_repeat_gpu, + thrust::host_vector<float> params_repeat_gpu, thrust::host_vector<int> vox_repeat, int nrepeat, int nvox, //INPUT-OUTPUT - thrust::device_vector<double>& params_gpu); + thrust::device_vector<float>& params_gpu); //implemented and used in xfibres_gpu.cu