From 9df520eb6c7652c043ac2b7cb33d83de69183b0b Mon Sep 17 00:00:00 2001
From: Moises Fernandez <moisesf@fmrib.ox.ac.uk>
Date: Fri, 10 May 2013 13:41:21 +0000
Subject: [PATCH] Reduce precision (double to float)

---
 CUDA/diffmodels.cuh    | 218 +++++++++++++++++++++--------------------
 CUDA/fit_gpu_kernels.h |  56 +++++------
 2 files changed, 140 insertions(+), 134 deletions(-)

diff --git a/CUDA/diffmodels.cuh b/CUDA/diffmodels.cuh
index 7601885..e9bebc8 100644
--- a/CUDA/diffmodels.cuh
+++ b/CUDA/diffmodels.cuh
@@ -12,36 +12,36 @@ void fit_PVM_single(	//INPUT
 			const vector<ColumnVector> 	datam_vec, 
 			const vector<Matrix> 		bvecs_vec,
 			const vector<Matrix> 		bvals_vec,
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,
 			int				ndirections,
 			int 				nfib,	
 			bool 				m_include_f0,
 			bool				gradnonlin,
 			string 				output_file,		
 			//OUTPUT
-			thrust::device_vector<double>&	params_gpu);
+			thrust::device_vector<float>&	params_gpu);
 
 void fit_PVM_single_c(	//INPUT
 			const vector<ColumnVector> 	datam_vec, 
 			const vector<Matrix> 		bvecs_vec,
 			const vector<Matrix> 		bvals_vec,
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,
 			int				ndirections,
 			int 				nfib,		
 			bool 				m_include_f0,
 			bool				gradnonlin,
 			string 				output_file,		
 			//OUTPUT
-			thrust::device_vector<double>&	params_gpu);
+			thrust::device_vector<float>&	params_gpu);
 
 void fit_PVM_multi(	//INPUT
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,	
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,	
 			int 				nvox,		
 			int				ndirections,	
 			int				nfib,
@@ -49,13 +49,13 @@ void fit_PVM_multi(	//INPUT
 			bool				gradnonlin,
 			string 				output_file,
 			//OUTPUT
-			thrust::device_vector<double>&	params_gpu);
+			thrust::device_vector<float>&	params_gpu);
 
 void calculate_tau(	//INPUT
-			thrust::device_vector<double> 	datam_gpu, 
-			thrust::device_vector<double>	params_gpu,
-			thrust::device_vector<double>	bvecs_gpu, 
-			thrust::device_vector<double>	bvals_gpu,
+			thrust::device_vector<float> 	datam_gpu, 
+			thrust::device_vector<float>	params_gpu,
+			thrust::device_vector<float>	bvecs_gpu, 
+			thrust::device_vector<float>	bvals_gpu,
 			thrust::host_vector<int>	vox_repeat,
 			int				nrepeat,
 			int				ndirections,
@@ -70,166 +70,172 @@ void calculate_tau(	//INPUT
 
 
 __device__ void cf_PVM_single(		//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,	
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,	
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams, 
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double*				_d,
-					double*				sumf,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float*				_d,
+					float*				sumf,
 					//OUTPUT
 					double* 			cfv);
 
 __device__ void grad_PVM_single(	//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				grad);
+					float*				grad);
 
 __device__ void hess_PVM_single(	//INPUT
-					const double*			params,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,
-					double* 			fs,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,
+					float* 				fs,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				hess);
+					float*				hess);
 
 __device__ void cf_PVM_single_c(	//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams, 
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,
-					double* 			fs,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				reduction,
+					float* 				fs,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
 					double* 			cfv);
 
 
 __device__ void grad_PVM_single_c(	//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double* 			f_deriv,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float* 				f_deriv,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				grad);
+					float*				grad);
 
 __device__ void hess_PVM_single_c(	//INPUT
-					const double*			params,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double* 			f_deriv,
-					double*				x,
-					double* 			_d,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float* 				f_deriv,
+					float*				x,
+					float* 				_d,
+					float* 				sumf,
 					//OUTPUT
-					double*				hess);
+					float*				hess);
 
 __device__ void cf_PVM_multi(		//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams, 
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_a,
-					double* 			_b,
-					double* 			sumf,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_a,
+					float* 				_b,
+					float* 				sumf,
 					//OUTPUT
-					double* 			cfv);
+					double*				cfv);
 
 __device__ void grad_PVM_multi(		//INPUT
-					const double*			params,
-					const double*			data,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			data,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_a,
-					double* 			_b,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_a,
+					float* 				_b,
+					float* 				sumf,
 					//OUTPUT
-					double*				grad);
+					float*				grad);
 
 __device__ void hess_PVM_multi(		//INPUT
-					const double*			params,
-					const double*			bvecs, 
-					const double*			bvals,
+					const float*			params,
+					const float*			bvecs, 
+					const float*			bvals,
 					const int			ndirections,
 					const int			nfib,
 					const int 			nparams,
 					const bool 			m_include_f0,
 					const int			idSubVOX,
-					double*				reduction,					
-					double* 			fs,
-					double*				x,
-					double* 			_a,
-					double*				_b,
-					double* 			sumf,
+					float*				J,
+					float*				reduction,					
+					float* 				fs,
+					float*				x,
+					float* 				_a,
+					float*				_b,
+					float* 				sumf,
 					//OUTPUT
-					double*				hess);
+					float*				hess);
diff --git a/CUDA/fit_gpu_kernels.h b/CUDA/fit_gpu_kernels.h
index 8065231..97fccb9 100644
--- a/CUDA/fit_gpu_kernels.h
+++ b/CUDA/fit_gpu_kernels.h
@@ -7,9 +7,9 @@
 /*  CCOPYRIGHT  */
 
 extern "C" __global__ void fit_PVM_single_kernel(	//INPUT
-							const double* 		data, 
-							const double* 		bvecs, 
-							const double* 		bvals, 
+							const float* 		data, 
+							const float* 		bvecs, 
+							const float* 		bvals, 
 							const int 		nvox, 
 							const int		ndirections,
 							const int 		nfib,
@@ -17,12 +17,12 @@ extern "C" __global__ void fit_PVM_single_kernel(	//INPUT
 							const bool 		m_include_f0, 
 							const bool		gradnonlin,
 							//INPUT-OUTPUT
-							double* 		params);
+							float* 			params);
 
 extern "C" __global__ void fit_PVM_single_c_kernel(	//INPUT
-							const double* 		data, 
-							const double* 		bvecs, 
-							const double* 		bvals, 
+							const float* 		data, 
+							const float* 		bvecs, 
+							const float* 		bvals, 
 							const int 		nvox, 
 							const int		ndirections,
 							const int 		nfib, 
@@ -32,13 +32,13 @@ extern "C" __global__ void fit_PVM_single_c_kernel(	//INPUT
 							const bool	 	m_return_fanning,
 							const bool		gradnonlin,
 							//INPUT-OUTPUT
-							double* 		params);
+							float* 			params);
 
 extern "C" __global__ void fit_PVM_multi_kernel(	//INPUT
-							const double* 		data, 
-							const double* 		params_PVM_simple_c,
-							const double* 		bvecs, 
-							const double* 		bvals, 
+							const float* 		data, 
+							const float* 		params_PVM_simple_c,
+							const float* 		bvecs, 
+							const float* 		bvals, 
 							const int 		nvox, 
 							const int		ndirections,
 							const int 		nfib, 	
@@ -46,13 +46,13 @@ extern "C" __global__ void fit_PVM_multi_kernel(	//INPUT
 							const bool 		m_include_f0,
 							const bool		gradnonlin,
 							//OUTPUT
-							double* 		params);
+							float* 			params);
 
 extern "C" __global__ void get_residuals_PVM_single_kernel(	//INPUT
-								const double* 		data, 
-								const double* 		params,
-								const double* 		bvecs, 
-								const double* 		bvals, 
+								const float* 		data, 
+								const float* 		params,
+								const float* 		bvecs, 
+								const float* 		bvals, 
 								const int 		nvox, 
 								const int		ndirections,
 								const int 		nfib, 
@@ -61,13 +61,13 @@ extern "C" __global__ void get_residuals_PVM_single_kernel(	//INPUT
 								const bool		gradnonlin,
 								const bool* 		includes_f0,								
 								//OUTPUT
-								double*			residuals);
+								float*			residuals);
 
 extern "C" __global__ void get_residuals_PVM_single_c_kernel(	//INPUT
-								const double* 		data, 
-								const double* 		params,
-								const double* 		bvecs, 
-								const double* 		bvals, 
+								const float* 		data, 
+								const float* 		params,
+								const float* 		bvecs, 
+								const float* 		bvals, 
 								const int 		nvox, 
 								const int		ndirections,
 								const int 		nfib, 
@@ -76,14 +76,14 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel(	//INPUT
 								const bool		gradnonlin,
 								const bool* 		includes_f0,								
 								//OUTPUT
-								double*			residuals);
+								float*			residuals);
 
 
 extern "C" __global__ void get_residuals_PVM_multi_kernel(	//INPUT
-								const double* 		data, 
-								const double* 		params,
-								const double* 		bvecs, 
-								const double* 		bvals, 
+								const float* 		data, 
+								const float* 		params,
+								const float* 		bvecs, 
+								const float* 		bvals, 
 								const int 		nvox, 
 								const int		ndirections,
 								const int 		nfib, 
@@ -92,5 +92,5 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel(	//INPUT
 								const bool		gradnonlin,
 								const bool* 		includes_f0,								
 								//OUTPUT
-								double*			residuals);
+								float*			residuals);
 
-- 
GitLab