Reduce precision (double to float)

bd259c2b · Moises Fernandez · c06bbe5d · bd259c2b · bd259c2b · bd259c2b
Commit bd259c2b authored 11 years ago by Moises Fernandez
--- a/CUDA/solver_mult_inverse.cu
+++ b/CUDA/solver_mult_inverse.cu
@@ -12,15 +12,15 @@
 //MATRIX INVERSE AS NEWMAT LU SOLVER
 //implemented in NEWMAT:newmat7.cpp GeneralSolvI.
 __device__ void solver(	//INPUT
-			double *A, 
-			double *P,
+			float *A, 
+			float *P,
 			int length,
 			//TO USE
-			double *C,
-			double *el,
+			float *C,
+			float *el,
 			int *indx,	
 			//OUTPUT
-			double *B)  
+			float *B)  
 {  
 	//double C[NPARAMS*NPARAMS];

@@ -33,15 +33,15 @@ __device__ void solver(	//INPUT
 	bool d=true; 
  	//int indx[NPARAMS];

-   	double* akk = C;   
-	double big = fabs(*akk); 
+   	float* akk = C;   
+	float big = fabs(*akk); 
 	int mu = 0; 
-	double* ai = akk; 
+	float* ai = akk; 
 	int k;

 	for (k = 1; k<length; k++){
      		ai += length; 
-		const double trybig = fabs(*ai);
+		const float trybig = fabs(*ai);
      		if (big < trybig){ 
 			big = trybig; 
 			mu = k; 
@@ -52,18 +52,18 @@ __device__ void solver(	//INPUT

 		indx[k] = mu;
 		if (mu != k){
-         		double* a1 = C + length*k; 
-			double* a2 = C + length*mu; 
+         		float* a1 = C + length*k; 
+			float* a2 = C + length*mu; 
 			d = !d;
         		int j = length;
         		while (j--){ 
-				const double temp = *a1; 
+				const float temp = *a1; 
 				*a1++ = *a2; 
 				*a2++ = temp; 
 			}
      		}

-      		double diag = *akk; 
+      		float diag = *akk; 
 		big = 0; 
 		mu = k + 1;
      		if (diag != 0){
@@ -71,24 +71,24 @@ __device__ void solver(	//INPUT
 			int i = length - k - 1;
         		while (i--){
            			ai += length; 
-				double* al = ai; 
-				double mult = *al / diag; 
+				float* al = ai; 
+				float mult = *al / diag; 
 				*al = mult;
            			int l = length - k - 1; 
-				double* aj = akk;
+				float* aj = akk;
 				if (l-- != 0){
 				
-					double aux=al[1]-(mult* *(++aj));
+					float aux=al[1]-(mult* *(++aj));
 					*(++al) = aux;
 					//*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler
 					
-               				const double trybig = fabs(*al);
+               				const float trybig = fabs(*al);
               				if (big < trybig){ 
 						big = trybig; 
 						mu = length - i - 1; 
 					}
               				while (l--){ 
-						double aux= al[1]-(mult* *(++aj));
+						float aux= al[1]-(mult* *(++aj));
 						*(++al) = aux;
 						//*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler
 					}
@@ -111,7 +111,7 @@ __device__ void solver(	//INPUT
   	int j;
 	int ii = length; 
 	int ip;    
-	double temp;
+	float temp;
 	int i;
     
 	for (i=0; i<length; i++){
@@ -122,8 +122,8 @@ __device__ void solver(	//INPUT
      		if (temp != 0.0) { ii = i; break; }
   	}
 	
-  	double* bi; 
-	double* ai2;
+  	float* bi; 
+	float* ai2;
   	i = ii + 1;

  	if (i < length){
@@ -131,10 +131,10 @@ __device__ void solver(	//INPUT
 		ai2 = C + ii + i * length;
      		for (;;){
         		int ip = indx[i]; 
-			double sum = el[ip]; 
+			float sum = el[ip]; 
 			el[ip] = el[i];
-         		double* aij = ai2; 
-			double* bj = bi; 
+         		float* aij = ai2; 
+			float* bj = bi; 
 			j = i - ii;
         		while (j--){ 
 				sum -=  *aij++* *bj++; 
@@ -148,11 +148,11 @@ __device__ void solver(	//INPUT
   	ai2 = C + length*length;

   	for (i = length - 1; i >= 0; i--){
-      		double* bj = el+i; 
+      		float* bj = el+i; 
 		ai2 -= length; 
-		double* ajx = ai2+i;
-      		double sum = *bj; 
-		double diag = *ajx;
+		float* ajx = ai2+i;
+      		float sum = *bj; 
+		float diag = *ajx;
      		j = length - i; 
 		while(--j){ 
 			sum -= *(++ajx)* *(++bj);  

--- a/CUDA/xfibres_gpu.cu
+++ b/CUDA/xfibres_gpu.cu
@@ -71,7 +71,8 @@ void xfibres_gpu(	//INPUT
 	bool gradnonlin=opts.grad_file.set();

 	if(nvox>0){
-		thrust::host_vector<double> datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host;
+		thrust::host_vector<float> datam_host, bvecs_host, bvals_host, params_host;
+		thrust::host_vector<double> alpha_host, beta_host;
 		thrust::host_vector<float> tau_host;
 		vector<ColumnVector> datam_vec;
 		vector<Matrix> bvecs_vec, bvals_vec;
@@ -79,10 +80,10 @@ void xfibres_gpu(	//INPUT
 		///// FIT /////
 		prepare_data_gpu_FIT(datam,bvecs,bvals,gradm,datam_vec, bvecs_vec, bvals_vec, datam_host, bvecs_host,  bvals_host, alpha_host, beta_host, params_host, tau_host);	

-		thrust::device_vector<double> datam_gpu=datam_host;
-		thrust::device_vector<double> bvecs_gpu=bvecs_host;
-		thrust::device_vector<double> bvals_gpu=bvals_host;	
-		thrust::device_vector<double> params_gpu=params_host;
+		thrust::device_vector<float> datam_gpu=datam_host;
+		thrust::device_vector<float> bvecs_gpu=bvecs_host;
+		thrust::device_vector<float> bvals_gpu=bvals_host;	
+		thrust::device_vector<float> params_gpu=params_host;
 		thrust::host_vector<int> vox_repeat;	//contains the id's of voxels repeated
 		vox_repeat.resize(nvox);
 		int nrepeat=0;
@@ -161,16 +162,16 @@ void fit(	//INPUT
 		const vector<ColumnVector> 	datam_vec, 
 		const vector<Matrix> 		bvecs_vec,
 		const vector<Matrix> 		bvals_vec,
-		thrust::host_vector<double> 	datam_host,
-		thrust::host_vector<double>	bvecs_host, 
-		thrust::host_vector<double>	bvals_host,
-		thrust::device_vector<double> 	datam_gpu, 
-		thrust::device_vector<double>	bvecs_gpu, 
-		thrust::device_vector<double>	bvals_gpu,
+		thrust::host_vector<float> 	datam_host,
+		thrust::host_vector<float>	bvecs_host, 
+		thrust::host_vector<float>	bvals_host,
+		thrust::device_vector<float> 	datam_gpu, 
+		thrust::device_vector<float>	bvecs_gpu, 
+		thrust::device_vector<float>	bvals_gpu,
 		int 				ndirections,
 		string 				output_file,
 		//OUTPUT
-		thrust::device_vector<double>&	params_gpu,
+		thrust::device_vector<float>&	params_gpu,
 		thrust::host_vector<int>&	vox_repeat,	//for get residuals with or withot f0
 		int&				nrepeat)
 {
@@ -196,7 +197,7 @@ void fit(	//INPUT

 			if (opts.f0.value()){
 				float md,mf,f0;	
-				thrust::host_vector<double> params_host;
+				thrust::host_vector<float> params_host;
 				params_host.resize(nvox*nparams_fit);
 				thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());	
 				for(int vox=0;vox<nvox;vox++){			
@@ -213,17 +214,17 @@ void fit(	//INPUT
 					vector<ColumnVector> 	datam_repeat_vec; 
 					vector<Matrix> 		bvecs_repeat_vec;
 					vector<Matrix> 		bvals_repeat_vec;
-					thrust::host_vector<double> 	datam_repeat_host;
-					thrust::host_vector<double> 	bvecs_repeat_host;	
-					thrust::host_vector<double> 	bvals_repeat_host;	
-					thrust::host_vector<double> 	params_repeat_host;	
+					thrust::host_vector<float> 	datam_repeat_host;
+					thrust::host_vector<float> 	bvecs_repeat_host;	
+					thrust::host_vector<float> 	bvals_repeat_host;	
+					thrust::host_vector<float> 	params_repeat_host;	
 								
 					prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host);

-					thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host;
-					thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host;
-					thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;	
-					thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
+					thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host;
+					thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host;
+					thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host;	
+					thrust::device_vector<float> params_repeat_gpu=params_repeat_host;
 				
 		 			fit_PVM_single(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
 					thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin());	
@@ -236,7 +237,7 @@ void fit(	//INPUT

 			if (opts.f0.value()){
 				float md,mf,f0;	
-				thrust::host_vector<double> params_host;
+				thrust::host_vector<float> params_host;
 				params_host.resize(nvox*nparams_fit);
 				thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());	
 				for(int vox=0;vox<nvox;vox++){		
@@ -253,17 +254,17 @@ void fit(	//INPUT
 					vector<ColumnVector> 	datam_repeat_vec; 
 					vector<Matrix> 		bvecs_repeat_vec;
 					vector<Matrix> 		bvals_repeat_vec;
-					thrust::host_vector<double> 	datam_repeat_host;
-					thrust::host_vector<double> 	bvecs_repeat_host;	
-					thrust::host_vector<double> 	bvals_repeat_host;	
-					thrust::host_vector<double> 	params_repeat_host;	
+					thrust::host_vector<float> 	datam_repeat_host;
+					thrust::host_vector<float> 	bvecs_repeat_host;	
+					thrust::host_vector<float> 	bvals_repeat_host;	
+					thrust::host_vector<float> 	params_repeat_host;	
 								
 					prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host);

-					thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host;
-					thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host;
-					thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;	
-					thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
+					thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host;
+					thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host;
+					thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host;	
+					thrust::device_vector<float> params_repeat_gpu=params_repeat_host;
 				
 		 			fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
 					thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin());	
@@ -281,7 +282,7 @@ void fit(	//INPUT

 		if (opts.f0.value()){
 				float md,mf,f0;	
-				thrust::host_vector<double> params_host;
+				thrust::host_vector<float> params_host;
 				params_host.resize(nvox*nparams_fit);
 				thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());	
 				for(int vox=0;vox<nvox;vox++){			
@@ -298,17 +299,17 @@ void fit(	//INPUT
 					vector<ColumnVector> 	datam_repeat_vec; 
 					vector<Matrix> 		bvecs_repeat_vec;
 					vector<Matrix> 		bvals_repeat_vec;
-					thrust::host_vector<double> 	datam_repeat_host;
-					thrust::host_vector<double> 	bvecs_repeat_host;	
-					thrust::host_vector<double> 	bvals_repeat_host;	
-					thrust::host_vector<double> 	params_repeat_host;		
+					thrust::host_vector<float> 	datam_repeat_host;
+					thrust::host_vector<float> 	bvecs_repeat_host;	
+					thrust::host_vector<float> 	bvals_repeat_host;	
+					thrust::host_vector<float> 	params_repeat_host;		
 								
 					prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host,  bvals_repeat_host, params_repeat_host);

-					thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host;
-					thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host;
-					thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;	
-					thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
+					thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host;
+					thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host;
+					thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host;	
+					thrust::device_vector<float> params_repeat_gpu=params_repeat_host;
 				
 		 			fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);

@@ -338,12 +339,12 @@ void prepare_data_gpu_FIT(	//INPUT
 				vector<ColumnVector>&			datam_vec,
 				vector<Matrix>&				bvecs_vec,
 				vector<Matrix>&				bvals_vec,
-				thrust::host_vector<double>&   		datam_host,	//data prepared for copy to GPU
-				thrust::host_vector<double>&		bvecs_host,				
-				thrust::host_vector<double>&		bvals_host,
+				thrust::host_vector<float>&   		datam_host,	//data prepared for copy to GPU
+				thrust::host_vector<float>&		bvecs_host,				
+				thrust::host_vector<float>&		bvals_host,
 				thrust::host_vector<double>&		alpha_host,
 				thrust::host_vector<double>&		beta_host,
-				thrust::host_vector<double>&		params_host,
+				thrust::host_vector<float>&		params_host,
 				thrust::host_vector<float>&		tau_host)
 {
 	xfibresOptions& opts = xfibresOptions::getInstance();
@@ -421,9 +422,9 @@ void prepare_data_gpu_FIT(	//INPUT

 //prepare the structures for copy all neccesary data to FIT in GPU when is repeated because f0. Only some voxels
 void prepare_data_gpu_FIT_repeat(	//INPUT
-					thrust::host_vector<double>   		datam_host,	
-					thrust::host_vector<double>		bvecs_host,				
-					thrust::host_vector<double>		bvals_host,
+					thrust::host_vector<float>   		datam_host,	
+					thrust::host_vector<float>		bvecs_host,				
+					thrust::host_vector<float>		bvals_host,
 					thrust::host_vector<int>		vox_repeat,
 					int					nrepeat,
 					int					ndirections,
@@ -431,10 +432,10 @@ void prepare_data_gpu_FIT_repeat(	//INPUT
 					vector<ColumnVector>&			datam_repeat_vec,
 					vector<Matrix>&				bvecs_repeat_vec,
 					vector<Matrix>&				bvals_repeat_vec,
-					thrust::host_vector<double>&   		datam_repeat_host,	//data prepared for copy to GPU
-					thrust::host_vector<double>&		bvecs_repeat_host,				
-					thrust::host_vector<double>&		bvals_repeat_host,
-					thrust::host_vector<double>&		params_repeat_host)
+					thrust::host_vector<float>&   		datam_repeat_host,	//data prepared for copy to GPU
+					thrust::host_vector<float>&		bvecs_repeat_host,				
+					thrust::host_vector<float>&		bvals_repeat_host,
+					thrust::host_vector<float>&		params_repeat_host)
 {
 	xfibresOptions& opts = xfibresOptions::getInstance();

@@ -509,19 +510,19 @@ void prepare_data_gpu_FIT_repeat(	//INPUT


 void mix_params(	//INPUT
-			thrust::host_vector<double>   		params_repeat_host,
+			thrust::host_vector<float>   		params_repeat_host,
 			thrust::host_vector<int>		vox_repeat,
 			int					nrepeat,
 			int					nvox,
 			//INPUT-OUTPUT
-			thrust::device_vector<double>&   	params_gpu)
+			thrust::device_vector<float>&   	params_gpu)
 {
 	xfibresOptions& opts = xfibresOptions::getInstance();
 	int nfib= opts.nfibres.value();
 	int nparams = 2+3*opts.nfibres.value();
 	if(opts.modelnum.value()==2) nparams++;

-	thrust::host_vector<double> params_host;
+	thrust::host_vector<float> params_host;
 	params_host.resize(nvox*(nparams+1));
 	thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());	


--- a/CUDA/xfibres_gpu.cuh
+++ b/CUDA/xfibres_gpu.cuh
@@ -17,16 +17,16 @@ void fit(	//INPUT
 		const vector<ColumnVector> 	datam_vec, 
 		const vector<Matrix> 		bvecs_vec,
 		const vector<Matrix> 		bvals_vec,
-		thrust::host_vector<double> 	datam_host,	
-		thrust::host_vector<double>	bvecs_host, 
-		thrust::host_vector<double>	bvals_host,
-		thrust::device_vector<double> 	datam_gpu, 
-		thrust::device_vector<double>	bvecs_gpu, 
-		thrust::device_vector<double>	bvals_gpu,
+		thrust::host_vector<float> 	datam_host,	
+		thrust::host_vector<float>	bvecs_host, 
+		thrust::host_vector<float>	bvals_host,
+		thrust::device_vector<float> 	datam_gpu, 
+		thrust::device_vector<float>	bvecs_gpu, 
+		thrust::device_vector<float>	bvals_gpu,
 		int				ndirections,
 		string 				output_file,
 		//OUTPUT
-		thrust::device_vector<double>&	params_gpu,
+		thrust::device_vector<float>&	params_gpu,
 		thrust::host_vector<int>&	vox_repeat,
 		int&				nrepeat);

@@ -40,20 +40,20 @@ void prepare_data_gpu_FIT(	//INPUT
 				vector<ColumnVector>&			datam_vec,
 				vector<Matrix>&				bvecs_vec,
 				vector<Matrix>&				bvals_vec,
-				thrust::host_vector<double>&   		datam_host,	
-				thrust::host_vector<double>&		bvecs_host,				
-				thrust::host_vector<double>&		bvals_host,
+				thrust::host_vector<float>&   		datam_host,	
+				thrust::host_vector<float>&		bvecs_host,				
+				thrust::host_vector<float>&		bvals_host,
 				thrust::host_vector<double>&		alpha_host,
 				thrust::host_vector<double>&		beta_host,
-				thrust::host_vector<double>&		params_host,
+				thrust::host_vector<float>&		params_host,
 				thrust::host_vector<float>&		tau_host);


 //implemented and used in xfibres_gpu.cu
 void prepare_data_gpu_FIT_repeat(	//INPUT
-					thrust::host_vector<double>   		datam_host,	
-					thrust::host_vector<double>		bvecs_host,				
-					thrust::host_vector<double>		bvals_host,
+					thrust::host_vector<float>   		datam_host,	
+					thrust::host_vector<float>		bvecs_host,				
+					thrust::host_vector<float>		bvals_host,
 					thrust::host_vector<int>		vox_repeat,
 					int					nrepeat,
 					int					ndirections,
@@ -61,19 +61,19 @@ void prepare_data_gpu_FIT_repeat(	//INPUT
 					vector<ColumnVector>&			datam_repeat_vec,
 					vector<Matrix>&				bvecs_repeat_vec,
 					vector<Matrix>&				bvals_repeat_vec,
-					thrust::host_vector<double>&   		datam_repeat_host,
-					thrust::host_vector<double>&		bvecs_repeat_host,				
-					thrust::host_vector<double>&		bvals_repeat_host,
-					thrust::host_vector<double>&		params_repeat_host);
+					thrust::host_vector<float>&   		datam_repeat_host,
+					thrust::host_vector<float>&		bvecs_repeat_host,				
+					thrust::host_vector<float>&		bvals_repeat_host,
+					thrust::host_vector<float>&		params_repeat_host);

 //implemented and used in xfibres_gpu.cu
 void mix_params(	//INPUT
-			thrust::host_vector<double>   		params_repeat_gpu,
+			thrust::host_vector<float>   		params_repeat_gpu,
 			thrust::host_vector<int>			vox_repeat,
 			int						nrepeat,
 			int						nvox,
 			//INPUT-OUTPUT
-			thrust::device_vector<double>&   		params_gpu);
+			thrust::device_vector<float>&   		params_gpu);


 //implemented and used in xfibres_gpu.cu