From 9df520eb6c7652c043ac2b7cb33d83de69183b0b Mon Sep 17 00:00:00 2001 From: Moises Fernandez <moisesf@fmrib.ox.ac.uk> Date: Fri, 10 May 2013 13:41:21 +0000 Subject: [PATCH] Reduce precision (double to float) --- CUDA/diffmodels.cuh | 218 +++++++++++++++++++++-------------------- CUDA/fit_gpu_kernels.h | 56 +++++------ 2 files changed, 140 insertions(+), 134 deletions(-) diff --git a/CUDA/diffmodels.cuh b/CUDA/diffmodels.cuh index 7601885..e9bebc8 100644 --- a/CUDA/diffmodels.cuh +++ b/CUDA/diffmodels.cuh @@ -12,36 +12,36 @@ void fit_PVM_single( //INPUT const vector<ColumnVector> datam_vec, const vector<Matrix> bvecs_vec, const vector<Matrix> bvals_vec, - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int ndirections, int nfib, bool m_include_f0, bool gradnonlin, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu); + thrust::device_vector<float>& params_gpu); void fit_PVM_single_c( //INPUT const vector<ColumnVector> datam_vec, const vector<Matrix> bvecs_vec, const vector<Matrix> bvals_vec, - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int ndirections, int nfib, bool m_include_f0, bool gradnonlin, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu); + thrust::device_vector<float>& params_gpu); void fit_PVM_multi( //INPUT - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int nvox, int ndirections, int nfib, @@ -49,13 +49,13 @@ void fit_PVM_multi( //INPUT bool gradnonlin, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu); + thrust::device_vector<float>& params_gpu); void calculate_tau( //INPUT - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> params_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> params_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, thrust::host_vector<int> vox_repeat, int nrepeat, int ndirections, @@ -70,166 +70,172 @@ void calculate_tau( //INPUT __device__ void cf_PVM_single( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT double* cfv); __device__ void grad_PVM_single( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT - double* grad); + float* grad); __device__ void hess_PVM_single( //INPUT - const double* params, - const double* bvecs, - const double* bvals, + const float* params, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT - double* hess); + float* hess); __device__ void cf_PVM_single_c( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT double* cfv); __device__ void grad_PVM_single_c( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* f_deriv, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* f_deriv, + float* x, + float* _d, + float* sumf, //OUTPUT - double* grad); + float* grad); __device__ void hess_PVM_single_c( //INPUT - const double* params, - const double* bvecs, - const double* bvals, + const float* params, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* f_deriv, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* f_deriv, + float* x, + float* _d, + float* sumf, //OUTPUT - double* hess); + float* hess); __device__ void cf_PVM_multi( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _a, - double* _b, - double* sumf, + float* reduction, + float* fs, + float* x, + float* _a, + float* _b, + float* sumf, //OUTPUT - double* cfv); + double* cfv); __device__ void grad_PVM_multi( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _a, - double* _b, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _a, + float* _b, + float* sumf, //OUTPUT - double* grad); + float* grad); __device__ void hess_PVM_multi( //INPUT - const double* params, - const double* bvecs, - const double* bvals, + const float* params, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _a, - double* _b, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _a, + float* _b, + float* sumf, //OUTPUT - double* hess); + float* hess); diff --git a/CUDA/fit_gpu_kernels.h b/CUDA/fit_gpu_kernels.h index 8065231..97fccb9 100644 --- a/CUDA/fit_gpu_kernels.h +++ b/CUDA/fit_gpu_kernels.h @@ -7,9 +7,9 @@ /* CCOPYRIGHT */ extern "C" __global__ void fit_PVM_single_kernel( //INPUT - const double* data, - const double* bvecs, - const double* bvals, + const float* data, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -17,12 +17,12 @@ extern "C" __global__ void fit_PVM_single_kernel( //INPUT const bool m_include_f0, const bool gradnonlin, //INPUT-OUTPUT - double* params); + float* params); extern "C" __global__ void fit_PVM_single_c_kernel( //INPUT - const double* data, - const double* bvecs, - const double* bvals, + const float* data, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -32,13 +32,13 @@ extern "C" __global__ void fit_PVM_single_c_kernel( //INPUT const bool m_return_fanning, const bool gradnonlin, //INPUT-OUTPUT - double* params); + float* params); extern "C" __global__ void fit_PVM_multi_kernel( //INPUT - const double* data, - const double* params_PVM_simple_c, - const double* bvecs, - const double* bvals, + const float* data, + const float* params_PVM_simple_c, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -46,13 +46,13 @@ extern "C" __global__ void fit_PVM_multi_kernel( //INPUT const bool m_include_f0, const bool gradnonlin, //OUTPUT - double* params); + float* params); extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT - const double* data, - const double* params, - const double* bvecs, - const double* bvals, + const float* data, + const float* params, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -61,13 +61,13 @@ extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT const bool gradnonlin, const bool* includes_f0, //OUTPUT - double* residuals); + float* residuals); extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT - const double* data, - const double* params, - const double* bvecs, - const double* bvals, + const float* data, + const float* params, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -76,14 +76,14 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT const bool gradnonlin, const bool* includes_f0, //OUTPUT - double* residuals); + float* residuals); extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT - const double* data, - const double* params, - const double* bvecs, - const double* bvals, + const float* data, + const float* params, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -92,5 +92,5 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT const bool gradnonlin, const bool* includes_f0, //OUTPUT - double* residuals); + float* residuals); -- GitLab