Reduce precision (double to float)

9df520eb · Moises Fernandez · 940fd739 · 9df520eb · 9df520eb
Commit 9df520eb authored 11 years ago by Moises Fernandez
--- a/CUDA/diffmodels.cuh
+++ b/CUDA/diffmodels.cuh
@@ -12,36 +12,36 @@ void fit_PVM_single(	//INPUT
 			const vector<ColumnVector> 	datam_vec, 
 			const vector<Matrix> 		bvecs_vec,
 			const vector<Matrix> 		bvals_vec,
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,
 			int				ndirections,
 			int 				nfib,	
 			bool 				m_include_f0,
 			bool				gradnonlin,
 			string 				output_file,		
 			//OUTPUT
-			thrust::device_vector<double>&	params_gpu);
+			thrust::device_vector<float>&	params_gpu);

 void fit_PVM_single_c(	//INPUT
 			const vector<ColumnVector> 	datam_vec, 
 			const vector<Matrix> 		bvecs_vec,
 			const vector<Matrix> 		bvals_vec,
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,
 			int				ndirections,
 			int 				nfib,		
 			bool 				m_include_f0,
 			bool				gradnonlin,
 			string 				output_file,		
 			//OUTPUT
-			thrust::device_vector<double>&	params_gpu);
+			thrust::device_vector<float>&	params_gpu);

 void fit_PVM_multi(	//INPUT
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,	
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,	
 			int 				nvox,		
 			int				ndirections,	
 			int				nfib,
@@ -49,13 +49,13 @@ void fit_PVM_multi(	//INPUT
 			bool				gradnonlin,
 			string 				output_file,
 			//OUTPUT
-			thrust::device_vector<double>&	params_gpu);
+			thrust::device_vector<float>&	params_gpu);

 void calculate_tau(	//INPUT
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	params_gpu,
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	params_gpu,
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,
 			thrust::host_vector<int>	vox_repeat,
 			int				nrepeat,
 			int				ndirections,
@@ -70,166 +70,172 @@ void calculate_tau(	//INPUT


 __device__ void cf_PVM_single(		//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,	
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,	
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams, 
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double*				_d,
-					double*				sumf,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float*				_d,
+					float*				sumf,
 					//OUTPUT
 					double* 			cfv);

 __device__ void grad_PVM_single(	//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				grad);
+					float*				grad);

 __device__ void hess_PVM_single(	//INPUT
-					const double*			params,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,
-					double* 			fs,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,
+					float* 				fs,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				hess);
+					float*				hess);

 __device__ void cf_PVM_single_c(	//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams, 
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,
-					double* 			fs,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				reduction,
+					float* 				fs,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
 					double* 			cfv);


 __device__ void grad_PVM_single_c(	//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double* 			f_deriv,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float* 				f_deriv,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				grad);
+					float*				grad);

 __device__ void hess_PVM_single_c(	//INPUT
-					const double*			params,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double* 			f_deriv,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float* 				f_deriv,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				hess);
+					float*				hess);

 __device__ void cf_PVM_multi(		//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams, 
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_a,
-					double* 			_b,
-					double* 			sumf,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_a,
+					float* 				_b,
+					float* 				sumf,
 					//OUTPUT
-					double* 			cfv);
+					double*				cfv);

 __device__ void grad_PVM_multi(		//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_a,
-					double* 			_b,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_a,
+					float* 				_b,
+					float* 				sumf,
 					//OUTPUT
-					double*				grad);
+					float*				grad);

 __device__ void hess_PVM_multi(		//INPUT
-					const double*			params,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_a,
-					double*				_b,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_a,
+					float*				_b,
+					float* 				sumf,
 					//OUTPUT
-					double*				hess);
+					float*				hess);
--- a/CUDA/fit_gpu_kernels.h
+++ b/CUDA/fit_gpu_kernels.h
@@ -7,9 +7,9 @@
 /*  CCOPYRIGHT  */

 extern "C" __global__ void fit_PVM_single_kernel(	//INPUT
-							const double* 		data, 
-							const double* 		bvecs, 
-							const double* 		bvals, 
+							const float* 		data, 
+							const float* 		bvecs, 
+							const float* 		bvals, 
 							const int 		nvox, 
 							const int		ndirections,
 							const int 		nfib,
@@ -17,12 +17,12 @@ extern "C" __global__ void fit_PVM_single_kernel(	//INPUT
 							const bool 		m_include_f0, 
 							const bool		gradnonlin,
 							//INPUT-OUTPUT
-							double* 		params);
+							float* 			params);

 extern "C" __global__ void fit_PVM_single_c_kernel(	//INPUT
-							const double* 		data, 
-							const double* 		bvecs, 
-							const double* 		bvals, 
+							const float* 		data, 
+							const float* 		bvecs, 
+							const float* 		bvals, 
 							const int 		nvox, 
 							const int		ndirections,
 							const int 		nfib, 
@@ -32,13 +32,13 @@ extern "C" __global__ void fit_PVM_single_c_kernel(	//INPUT
 							const bool	 	m_return_fanning,
 							const bool		gradnonlin,
 							//INPUT-OUTPUT
-							double* 		params);
+							float* 			params);

 extern "C" __global__ void fit_PVM_multi_kernel(	//INPUT
-							const double* 		data, 
-							const double* 		params_PVM_simple_c,
-							const double* 		bvecs, 
-							const double* 		bvals, 
+							const float* 		data, 
+							const float* 		params_PVM_simple_c,
+							const float* 		bvecs, 
+							const float* 		bvals, 
 							const int 		nvox, 
 							const int		ndirections,
 							const int 		nfib, 	
@@ -46,13 +46,13 @@ extern "C" __global__ void fit_PVM_multi_kernel(	//INPUT
 							const bool 		m_include_f0,
 							const bool		gradnonlin,
 							//OUTPUT
-							double* 		params);
+							float* 			params);

 extern "C" __global__ void get_residuals_PVM_single_kernel(	//INPUT
-								const double* 		data, 
-								const double* 		params,
-								const double* 		bvecs, 
-								const double* 		bvals, 
+								const float* 		data, 
+								const float* 		params,
+								const float* 		bvecs, 
+								const float* 		bvals, 
 								const int 		nvox, 
 								const int		ndirections,
 								const int 		nfib, 
@@ -61,13 +61,13 @@ extern "C" __global__ void get_residuals_PVM_single_kernel(	//INPUT
 								const bool		gradnonlin,
 								const bool* 		includes_f0,								
 								//OUTPUT
-								double*			residuals);
+								float*			residuals);

 extern "C" __global__ void get_residuals_PVM_single_c_kernel(	//INPUT
-								const double* 		data, 
-								const double* 		params,
-								const double* 		bvecs, 
-								const double* 		bvals, 
+								const float* 		data, 
+								const float* 		params,
+								const float* 		bvecs, 
+								const float* 		bvals, 
 								const int 		nvox, 
 								const int		ndirections,
 								const int 		nfib, 
@@ -76,14 +76,14 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel(	//INPUT
 								const bool		gradnonlin,
 								const bool* 		includes_f0,								
 								//OUTPUT
-								double*			residuals);
+								float*			residuals);


 extern "C" __global__ void get_residuals_PVM_multi_kernel(	//INPUT
-								const double* 		data, 
-								const double* 		params,
-								const double* 		bvecs, 
-								const double* 		bvals, 
+								const float* 		data, 
+								const float* 		params,
+								const float* 		bvecs, 
+								const float* 		bvals, 
 								const int 		nvox, 
 								const int		ndirections,
 								const int 		nfib, 
@@ -92,5 +92,5 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel(	//INPUT
 								const bool		gradnonlin,
 								const bool* 		includes_f0,								
 								//OUTPUT
-								double*			residuals);
+								float*			residuals);