diff --git a/CUDA/diffmodels.cuh b/CUDA/diffmodels.cuh index 7601885d0a6054db9b1ee3bcec50a2d865926d17..e9bebc8768d0daf35459dcb5eb7b36c90b79a2b9 100644 --- a/CUDA/diffmodels.cuh +++ b/CUDA/diffmodels.cuh @@ -12,36 +12,36 @@ void fit_PVM_single( //INPUT const vector<ColumnVector> datam_vec, const vector<Matrix> bvecs_vec, const vector<Matrix> bvals_vec, - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int ndirections, int nfib, bool m_include_f0, bool gradnonlin, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu); + thrust::device_vector<float>& params_gpu); void fit_PVM_single_c( //INPUT const vector<ColumnVector> datam_vec, const vector<Matrix> bvecs_vec, const vector<Matrix> bvals_vec, - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int ndirections, int nfib, bool m_include_f0, bool gradnonlin, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu); + thrust::device_vector<float>& params_gpu); void fit_PVM_multi( //INPUT - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, int nvox, int ndirections, int nfib, @@ -49,13 +49,13 @@ void fit_PVM_multi( //INPUT bool gradnonlin, string output_file, //OUTPUT - thrust::device_vector<double>& params_gpu); + thrust::device_vector<float>& params_gpu); void calculate_tau( //INPUT - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> params_gpu, - thrust::device_vector<double> bvecs_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> params_gpu, + thrust::device_vector<float> bvecs_gpu, + thrust::device_vector<float> bvals_gpu, thrust::host_vector<int> vox_repeat, int nrepeat, int ndirections, @@ -70,166 +70,172 @@ void calculate_tau( //INPUT __device__ void cf_PVM_single( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT double* cfv); __device__ void grad_PVM_single( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT - double* grad); + float* grad); __device__ void hess_PVM_single( //INPUT - const double* params, - const double* bvecs, - const double* bvals, + const float* params, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT - double* hess); + float* hess); __device__ void cf_PVM_single_c( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _d, - double* sumf, + float* reduction, + float* fs, + float* x, + float* _d, + float* sumf, //OUTPUT double* cfv); __device__ void grad_PVM_single_c( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* f_deriv, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* f_deriv, + float* x, + float* _d, + float* sumf, //OUTPUT - double* grad); + float* grad); __device__ void hess_PVM_single_c( //INPUT - const double* params, - const double* bvecs, - const double* bvals, + const float* params, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* f_deriv, - double* x, - double* _d, - double* sumf, + float* J, + float* reduction, + float* fs, + float* f_deriv, + float* x, + float* _d, + float* sumf, //OUTPUT - double* hess); + float* hess); __device__ void cf_PVM_multi( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _a, - double* _b, - double* sumf, + float* reduction, + float* fs, + float* x, + float* _a, + float* _b, + float* sumf, //OUTPUT - double* cfv); + double* cfv); __device__ void grad_PVM_multi( //INPUT - const double* params, - const double* data, - const double* bvecs, - const double* bvals, + const float* params, + const float* data, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _a, - double* _b, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _a, + float* _b, + float* sumf, //OUTPUT - double* grad); + float* grad); __device__ void hess_PVM_multi( //INPUT - const double* params, - const double* bvecs, - const double* bvals, + const float* params, + const float* bvecs, + const float* bvals, const int ndirections, const int nfib, const int nparams, const bool m_include_f0, const int idSubVOX, - double* reduction, - double* fs, - double* x, - double* _a, - double* _b, - double* sumf, + float* J, + float* reduction, + float* fs, + float* x, + float* _a, + float* _b, + float* sumf, //OUTPUT - double* hess); + float* hess); diff --git a/CUDA/fit_gpu_kernels.h b/CUDA/fit_gpu_kernels.h index 806523149c7ea8f8f66a39fcd892ee4691e53899..97fccb936ec980c379df09dd40d00965d1d00703 100644 --- a/CUDA/fit_gpu_kernels.h +++ b/CUDA/fit_gpu_kernels.h @@ -7,9 +7,9 @@ /* CCOPYRIGHT */ extern "C" __global__ void fit_PVM_single_kernel( //INPUT - const double* data, - const double* bvecs, - const double* bvals, + const float* data, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -17,12 +17,12 @@ extern "C" __global__ void fit_PVM_single_kernel( //INPUT const bool m_include_f0, const bool gradnonlin, //INPUT-OUTPUT - double* params); + float* params); extern "C" __global__ void fit_PVM_single_c_kernel( //INPUT - const double* data, - const double* bvecs, - const double* bvals, + const float* data, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -32,13 +32,13 @@ extern "C" __global__ void fit_PVM_single_c_kernel( //INPUT const bool m_return_fanning, const bool gradnonlin, //INPUT-OUTPUT - double* params); + float* params); extern "C" __global__ void fit_PVM_multi_kernel( //INPUT - const double* data, - const double* params_PVM_simple_c, - const double* bvecs, - const double* bvals, + const float* data, + const float* params_PVM_simple_c, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -46,13 +46,13 @@ extern "C" __global__ void fit_PVM_multi_kernel( //INPUT const bool m_include_f0, const bool gradnonlin, //OUTPUT - double* params); + float* params); extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT - const double* data, - const double* params, - const double* bvecs, - const double* bvals, + const float* data, + const float* params, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -61,13 +61,13 @@ extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT const bool gradnonlin, const bool* includes_f0, //OUTPUT - double* residuals); + float* residuals); extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT - const double* data, - const double* params, - const double* bvecs, - const double* bvals, + const float* data, + const float* params, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -76,14 +76,14 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT const bool gradnonlin, const bool* includes_f0, //OUTPUT - double* residuals); + float* residuals); extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT - const double* data, - const double* params, - const double* bvecs, - const double* bvals, + const float* data, + const float* params, + const float* bvecs, + const float* bvals, const int nvox, const int ndirections, const int nfib, @@ -92,5 +92,5 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT const bool gradnonlin, const bool* includes_f0, //OUTPUT - double* residuals); + float* residuals);