From 98f57485e8412d06e08c28f59f96e19ec3687998 Mon Sep 17 00:00:00 2001 From: Moises Fernandez <moisesf@fmrib.ox.ac.uk> Date: Fri, 19 Apr 2013 15:19:06 +0000 Subject: [PATCH] Avoid extra copies of bvals-bvecs if consider gradient nonlinearities is not activated --- CUDA/PVM_multi.cu | 29 ++++++++++++++++---- CUDA/PVM_single.cu | 29 ++++++++++++++++---- CUDA/PVM_single_c.cu | 29 ++++++++++++++++---- CUDA/diffmodels.cuh | 4 +++ CUDA/fit_gpu_kernels.h | 12 ++++++--- CUDA/runmcmc.cu | 16 +++++++---- CUDA/runmcmc_kernels.cu | 60 ++++++++++++++++++++++++++++++++++------- 7 files changed, 147 insertions(+), 32 deletions(-) diff --git a/CUDA/PVM_multi.cu b/CUDA/PVM_multi.cu index e00c9d3..1e15913 100644 --- a/CUDA/PVM_multi.cu +++ b/CUDA/PVM_multi.cu @@ -400,6 +400,7 @@ extern "C" __global__ void fit_PVM_multi_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, + const bool gradnonlin, //OUTPUT double* params) { @@ -454,8 +455,16 @@ extern "C" __global__ void fit_PVM_multi_kernel( //INPUT __syncthreads(); + int pos_bvals, pos_bvecs; + if(gradnonlin){ + pos_bvals=idVOX*ndirections; + pos_bvecs=idVOX*3*ndirections; + }else{ + pos_bvals=0; + pos_bvecs=0; + } //do the fit - levenberg_marquardt_PVM_multi_gpu(&data[idVOX*ndirections],&bvecs[idVOX*3*ndirections],&bvals[idVOX*ndirections],ndirections,nfib,nparams,m_include_f0,idSubVOX,step,grad,hess,inverse, pcf,ncf,lambda,cftol,ltol,olambda,success,end,reduction,fs,x,_a,_b,sumf,C,el,indx,myparams); + levenberg_marquardt_PVM_multi_gpu(&data[idVOX*ndirections],&bvecs[pos_bvecs],&bvals[pos_bvals],ndirections,nfib,nparams,m_include_f0,idSubVOX,step,grad,hess,inverse, pcf,ncf,lambda,cftol,ltol,olambda,success,end,reduction,fs,x,_a,_b,sumf,C,el,indx,myparams); __syncthreads(); @@ -494,7 +503,8 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, - const bool* includes_f0, + const bool gradnonlin, + const bool* includes_f0, //OUTPUT double* residuals) { @@ -567,6 +577,15 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT __syncthreads(); + int pos_bvals, pos_bvecs; + if(gradnonlin){ + pos_bvals=idVOX*ndirections; + pos_bvecs=idVOX*3*ndirections; + }else{ + pos_bvals=0; + pos_bvecs=0; + } + for(int dir=0;dir<ndir;dir++){ mydata = data[(idVOX*ndirections)+dir_iter]; predicted_signal=0; //pred = 0; @@ -575,13 +594,13 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT x2.x=x[k*3]; x2.y=x[k*3+1]; x2.z=x[k*3+2]; - val += fs[k]*anisoterm_PVM_multi(dir_iter,_a,_b,x2,&bvecs[idVOX*3*ndirections],&bvals[idVOX*ndirections],ndirections); + val += fs[k]*anisoterm_PVM_multi(dir_iter,_a,_b,x2,&bvecs[pos_bvecs],&bvals[pos_bvals],ndirections); } if (*my_include_f0){ double temp_f0=x2f_gpu(myparams[nparams-1]); - predicted_signal = abs(myparams[0])*(temp_f0+(1-*sumf-temp_f0)*isoterm_PVM_multi(dir_iter,_a,_b,&bvals[idVOX*ndirections])+val); + predicted_signal = abs(myparams[0])*(temp_f0+(1-*sumf-temp_f0)*isoterm_PVM_multi(dir_iter,_a,_b,&bvals[pos_bvals])+val); }else{ - predicted_signal = abs(myparams[0])*((1-*sumf)*isoterm_PVM_multi(dir_iter,_a,_b,&bvals[idVOX*ndirections])+val); + predicted_signal = abs(myparams[0])*((1-*sumf)*isoterm_PVM_multi(dir_iter,_a,_b,&bvals[pos_bvals])+val); } //residuals=m_data-predicted_signal; diff --git a/CUDA/PVM_single.cu b/CUDA/PVM_single.cu index ecdf6c9..667e73a 100644 --- a/CUDA/PVM_single.cu +++ b/CUDA/PVM_single.cu @@ -390,7 +390,8 @@ extern "C" __global__ void fit_PVM_single_kernel( //INPUT const int ndirections, const int nfib, const int nparams, - const bool m_include_f0, + const bool m_include_f0, + const bool gradnonlin, //INPUT-OUTPUT double* params) { @@ -432,8 +433,16 @@ extern "C" __global__ void fit_PVM_single_kernel( //INPUT __syncthreads(); + int pos_bvals, pos_bvecs; + if(gradnonlin){ + pos_bvals=idVOX*ndirections; + pos_bvecs=idVOX*3*ndirections; + }else{ + pos_bvals=0; + pos_bvecs=0; + } // do the fit - levenberg_marquardt_PVM_single_gpu(&data[idVOX*ndirections],&bvecs[idVOX*3*ndirections],&bvals[idVOX*ndirections],ndirections,nfib,nparams,m_include_f0,idSubVOX,step,grad,hess,inverse, pcf,ncf,lambda,cftol,ltol,olambda,success,end,reduction,fs,x,_d,sumf,C,el,indx,myparams); + levenberg_marquardt_PVM_single_gpu(&data[idVOX*ndirections],&bvecs[pos_bvecs],&bvals[pos_bvals],ndirections,nfib,nparams,m_include_f0,idSubVOX,step,grad,hess,inverse, pcf,ncf,lambda,cftol,ltol,olambda,success,end,reduction,fs,x,_d,sumf,C,el,indx,myparams); __syncthreads(); @@ -470,6 +479,7 @@ extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, + const bool gradnonlin, const bool* includes_f0, //OUTPUT double* residuals) @@ -537,6 +547,15 @@ extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT __syncthreads(); + int pos_bvals, pos_bvecs; + if(gradnonlin){ + pos_bvals=idVOX*ndirections; + pos_bvecs=idVOX*3*ndirections; + }else{ + pos_bvals=0; + pos_bvecs=0; + } + for(int dir=0;dir<ndir;dir++){ mydata = data[(idVOX*ndirections)+dir_iter]; predicted_signal=0; //pred = 0; @@ -545,13 +564,13 @@ extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT x2.x=x[k*3]; x2.y=x[k*3+1]; x2.z=x[k*3+2]; - val += fs[k]*anisoterm_PVM_single(dir_iter,_d,x2,&bvecs[idVOX*3*ndirections],&bvals[idVOX*ndirections],ndirections); + val += fs[k]*anisoterm_PVM_single(dir_iter,_d,x2,&bvecs[pos_bvecs],&bvals[pos_bvals],ndirections); } if (*my_include_f0){ double temp_f0=x2f_gpu(myparams[nparams-1]); - predicted_signal = myparams[0]*(temp_f0+(1-*sumf-temp_f0)*isoterm_PVM_single(dir_iter,_d,&bvals[idVOX*ndirections])+val); + predicted_signal = myparams[0]*(temp_f0+(1-*sumf-temp_f0)*isoterm_PVM_single(dir_iter,_d,&bvals[pos_bvals])+val); }else{ - predicted_signal = myparams[0]*((1-*sumf)*isoterm_PVM_single(dir_iter,_d,&bvals[idVOX*ndirections])+val); + predicted_signal = myparams[0]*((1-*sumf)*isoterm_PVM_single(dir_iter,_d,&bvals[pos_bvals])+val); } //residuals=m_data-predicted_signal; diff --git a/CUDA/PVM_single_c.cu b/CUDA/PVM_single_c.cu index a23aa79..04864e8 100644 --- a/CUDA/PVM_single_c.cu +++ b/CUDA/PVM_single_c.cu @@ -505,6 +505,7 @@ extern "C" __global__ void fit_PVM_single_c_kernel( //INPUT const bool m_eval_BIC, const bool m_include_f0, const bool m_return_fanning, + const bool gradnonlin, //INPUT - OUTPUT double* params) { @@ -547,8 +548,16 @@ extern "C" __global__ void fit_PVM_single_c_kernel( //INPUT __syncthreads(); + int pos_bvals, pos_bvecs; + if(gradnonlin){ + pos_bvals=idVOX*ndirections; + pos_bvecs=idVOX*3*ndirections; + }else{ + pos_bvals=0; + pos_bvecs=0; + } //do the fit - levenberg_marquardt_PVM_single_c_gpu(&data[idVOX*ndirections],&bvecs[idVOX*3*ndirections],&bvals[idVOX*ndirections],ndirections,nfib,nparams,m_include_f0,idSubVOX,step,grad,hess,inverse, pcf,ncf,lambda,cftol,ltol,olambda,success,end,reduction,fs,f_deriv,x,_d,sumf,C,el,indx,myparams); + levenberg_marquardt_PVM_single_c_gpu(&data[idVOX*ndirections],&bvecs[pos_bvecs],&bvals[pos_bvals],ndirections,nfib,nparams,m_include_f0,idSubVOX,step,grad,hess,inverse, pcf,ncf,lambda,cftol,ltol,olambda,success,end,reduction,fs,f_deriv,x,_d,sumf,C,el,indx,myparams); __syncthreads(); @@ -596,7 +605,8 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, - const bool* includes_f0, + const bool gradnonlin, + const bool* includes_f0, //OUTPUT double* residuals) { @@ -693,6 +703,15 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT __syncthreads(); + int pos_bvals, pos_bvecs; + if(gradnonlin){ + pos_bvals=idVOX*ndirections; + pos_bvecs=idVOX*3*ndirections; + }else{ + pos_bvals=0; + pos_bvecs=0; + } + for(int dir=0;dir<ndir;dir++){ mydata = data[(idVOX*ndirections)+dir_iter]; predicted_signal=0; //pred = 0; @@ -701,7 +720,7 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT x2.x=x[k*3]; x2.y=x[k*3+1]; x2.z=x[k*3+2]; - val += fs[k]*anisoterm_PVM_single_c(dir_iter,_d,x2,&bvecs[idVOX*3*ndirections],&bvals[idVOX*ndirections],ndirections); + val += fs[k]*anisoterm_PVM_single_c(dir_iter,_d,x2,&bvecs[pos_bvecs],&bvals[pos_bvals],ndirections); } if (*my_include_f0){ //partial_fsum /////////// @@ -710,9 +729,9 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT partial_fsum-=fs[j]; ////////////////////////// double temp_f0= beta2f_gpu(myparams[nparams-1])*partial_fsum; - predicted_signal = myparams[0]*(temp_f0+(1-*sumf-temp_f0)*isoterm_PVM_single_c(dir_iter,_d,&bvals[idVOX*ndirections])+val); + predicted_signal = myparams[0]*(temp_f0+(1-*sumf-temp_f0)*isoterm_PVM_single_c(dir_iter,_d,&bvals[pos_bvals])+val); }else{ - predicted_signal = myparams[0]*((1-*sumf)*isoterm_PVM_single_c(dir_iter,_d,&bvals[idVOX*ndirections])+val); + predicted_signal = myparams[0]*((1-*sumf)*isoterm_PVM_single_c(dir_iter,_d,&bvals[pos_bvals])+val); } //residuals=m_data-predicted_signal; diff --git a/CUDA/diffmodels.cuh b/CUDA/diffmodels.cuh index 777543b..7601885 100644 --- a/CUDA/diffmodels.cuh +++ b/CUDA/diffmodels.cuh @@ -18,6 +18,7 @@ void fit_PVM_single( //INPUT int ndirections, int nfib, bool m_include_f0, + bool gradnonlin, string output_file, //OUTPUT thrust::device_vector<double>& params_gpu); @@ -32,6 +33,7 @@ void fit_PVM_single_c( //INPUT int ndirections, int nfib, bool m_include_f0, + bool gradnonlin, string output_file, //OUTPUT thrust::device_vector<double>& params_gpu); @@ -44,6 +46,7 @@ void fit_PVM_multi( //INPUT int ndirections, int nfib, bool m_include_f0, + bool gradnonlin, string output_file, //OUTPUT thrust::device_vector<double>& params_gpu); @@ -60,6 +63,7 @@ void calculate_tau( //INPUT int model, bool m_include_f0, bool nonlin, + bool gradnonlin, string output_file, //OUTPUT thrust::host_vector<float>& tau); diff --git a/CUDA/fit_gpu_kernels.h b/CUDA/fit_gpu_kernels.h index 62e99bc..8065231 100644 --- a/CUDA/fit_gpu_kernels.h +++ b/CUDA/fit_gpu_kernels.h @@ -15,6 +15,7 @@ extern "C" __global__ void fit_PVM_single_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, + const bool gradnonlin, //INPUT-OUTPUT double* params); @@ -29,6 +30,7 @@ extern "C" __global__ void fit_PVM_single_c_kernel( //INPUT const bool m_eval_BIC, const bool m_include_f0, const bool m_return_fanning, + const bool gradnonlin, //INPUT-OUTPUT double* params); @@ -42,6 +44,7 @@ extern "C" __global__ void fit_PVM_multi_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, + const bool gradnonlin, //OUTPUT double* params); @@ -55,7 +58,8 @@ extern "C" __global__ void get_residuals_PVM_single_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, - const bool* includes_f0, + const bool gradnonlin, + const bool* includes_f0, //OUTPUT double* residuals); @@ -69,7 +73,8 @@ extern "C" __global__ void get_residuals_PVM_single_c_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, - const bool* includes_f0, + const bool gradnonlin, + const bool* includes_f0, //OUTPUT double* residuals); @@ -84,7 +89,8 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT const int nfib, const int nparams, const bool m_include_f0, - const bool* includes_f0, + const bool gradnonlin, + const bool* includes_f0, //OUTPUT double* residuals); diff --git a/CUDA/runmcmc.cu b/CUDA/runmcmc.cu index 3fd3360..13af13c 100644 --- a/CUDA/runmcmc.cu +++ b/CUDA/runmcmc.cu @@ -55,6 +55,8 @@ void init_Fibres_Multifibres( //INPUT if(opts.modelnum.value()==2) nparams_fit++; if(opts.f0.value()) nparams_fit++; + bool gradnonlin = opts.grad_file.set(); + int blocks = nvox; dim3 Dim_Grid_MCMC(blocks, 1); dim3 Dim_Block_MCMC(THREADS_BLOCK_MCMC ,1); ///dimensions for MCMC @@ -74,7 +76,7 @@ void init_Fibres_Multifibres( //INPUT myfile << "Shared Memory Used in init_Fibres_Multifibres: " << amount_shared << "\n"; - init_Fibres_Multifibres_kernel<<< Dim_Grid_MCMC, Dim_Block_MCMC, amount_shared>>>(datam_ptr, params_ptr, tau_ptr, bvals_ptr, alpha_ptr, beta_ptr, ndirections, nfib, nparams_fit, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.rician.value(), opts.ardf0.value(), opts.all_ard.value(), opts.no_ard.value(), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + init_Fibres_Multifibres_kernel<<< Dim_Grid_MCMC, Dim_Block_MCMC, amount_shared>>>(datam_ptr, params_ptr, tau_ptr, bvals_ptr, alpha_ptr, beta_ptr, ndirections, nfib, nparams_fit, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.rician.value(), opts.ardf0.value(), opts.all_ard.value(), opts.no_ard.value(), gradnonlin, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); sync_check("init_Fibres_Multifibres_kernel"); gettimeofday(&t2,NULL); @@ -118,6 +120,8 @@ void runmcmc_burnin( //INPUT int nfib= opts.nfibres.value(); int nparams; + bool gradnonlin=opts.grad_file.set(); + if(opts.f0.value()) nparams=3+nfib*3; else nparams=2+nfib*3; if(opts.modelnum.value()==2) nparams++; @@ -220,7 +224,7 @@ void runmcmc_burnin( //INPUT gettimeofday(&t1,NULL); - runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), iters_step, (i*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); sync_check("runmcmc_burnin_kernel"); gettimeofday(&t2,NULL); @@ -253,7 +257,7 @@ void runmcmc_burnin( //INPUT gettimeofday(&t1,NULL); if(nvox!=0){ - runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), last_step, (steps*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); sync_check("runmcmc_burnin_kernel"); } @@ -319,6 +323,8 @@ void runmcmc_record( //INPUT int nfib= opts.nfibres.value(); int nparams; + bool gradnonlin=opts.grad_file.set(); + if(opts.f0.value()) nparams=3+nfib*3; else nparams=2+nfib*3; if(opts.modelnum.value()==2) nparams++; @@ -430,7 +436,7 @@ void runmcmc_record( //INPUT gettimeofday(&t1,NULL); - runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), iters_step, (i*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); + runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); sync_check("runmcmc_record_kernel"); gettimeofday(&t2,NULL); @@ -460,7 +466,7 @@ void runmcmc_record( //INPUT gettimeofday(&t1,NULL); if(nvox!=0){ - runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), last_step, (steps*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords,rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); + runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords,rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); sync_check("runmcmc_record_kernel"); } diff --git a/CUDA/runmcmc_kernels.cu b/CUDA/runmcmc_kernels.cu index dcd4153..5b28537 100644 --- a/CUDA/runmcmc_kernels.cu +++ b/CUDA/runmcmc_kernels.cu @@ -146,6 +146,7 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT const bool m_ardf0, // opts.ardf0.value() const bool ard_value, // opts.all_ard.value() const bool no_ard_value, // opts.no_ard.value() + const bool gradnonlin, //OUTPUT FibreGPU* fibres, MultifibreGPU* multifibres, @@ -297,9 +298,22 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT for(int i=0; i<mydirs; i++){ int pos = (idVOX*ndirections)+idSubVOX+i*threadsBlock; mydata[i] = datam[pos]; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; + } + if(gradnonlin){ + for(int i=0; i<mydirs; i++){ + int pos = (idVOX*ndirections)+idSubVOX+i*threadsBlock; + mybvals[i] = bvals[pos]; + myalpha[i] = alpha[pos]; + mybeta[i] = beta[pos]; + } + }else{ + for(int i=0; i<mydirs; i++){ + int pos = idSubVOX+i*threadsBlock; + mybvals[i] = bvals[pos]; + myalpha[i] = alpha[pos]; + mybeta[i] = beta[pos]; + } + } __syncthreads(); @@ -413,6 +427,7 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT const bool m_ardf0, const bool can_use_ard, const bool rician, + const bool gradnonlin, const int updateproposalevery, //update every this number of iterations const int iterations, //num of iterations to do this time (maybe is a part of the total) const int current_iter, //the number of the current iteration over the total iterations @@ -608,11 +623,24 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; myisosig[i] = isosignals[pos]; mydata[i] = datam[pos]; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; } + if(gradnonlin){ + for(int i=0; i<mydirs; i++){ + int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; + mybvals[i] = bvals[pos]; + myalpha[i] = alpha[pos]; + mybeta[i] = beta[pos]; + } + }else{ + for(int i=0; i<mydirs; i++){ + int pos = idSubVOX+i*threadsBlock; + mybvals[i] = bvals[pos]; + myalpha[i] = alpha[pos]; + mybeta[i] = beta[pos]; + } + } + for(int f=0;f<nfib;f++){ for(int i=0; i<mydirs; i++){ mysig[i*nfib+f]= signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]; @@ -1560,6 +1588,7 @@ extern "C" __global__ void runmcmc_record_kernel( //INPUT const bool m_ardf0, const bool can_use_ard, const bool rician, + const bool gradnonlin, const int updateproposalevery, //update every this number of iterations const int iterations, //num of iterations to do this time (maybe is a part of the total) const int current_iter, //the number of the current iteration over the total iterations @@ -1768,9 +1797,22 @@ extern "C" __global__ void runmcmc_record_kernel( //INPUT int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; myisosig[i] = isosignals[pos]; mydata[i] = datam[pos]; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; + } + if(gradnonlin){ + for(int i=0; i<mydirs; i++){ + int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; + mybvals[i] = bvals[pos]; + myalpha[i] = alpha[pos]; + mybeta[i] = beta[pos]; + } + }else{ + for(int i=0; i<mydirs; i++){ + int pos = idSubVOX+i*threadsBlock; + mybvals[i] = bvals[pos]; + myalpha[i] = alpha[pos]; + mybeta[i] = beta[pos]; + } + } for(int f=0;f<nfib;f++){ -- GitLab