diff --git a/CUDA/runmcmc.cu b/CUDA/runmcmc.cu index 3499c841ae02aa86a6b35e51266dc9f4dd00219e..4b3341b032cfa89f8a484bb5bcf481a5f7a59f43 100644 --- a/CUDA/runmcmc.cu +++ b/CUDA/runmcmc.cu @@ -25,10 +25,10 @@ using namespace Xfibres; ////////////////////////////////////////////////////// void init_Fibres_Multifibres( //INPUT - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> params_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> params_gpu, thrust::device_vector<float> tau_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> bvals_gpu, thrust::device_vector<double> alpha_gpu, thrust::device_vector<double> beta_gpu, const int ndirections, @@ -55,28 +55,33 @@ void init_Fibres_Multifibres( //INPUT if(opts.modelnum.value()==2) nparams_fit++; if(opts.f0.value()) nparams_fit++; + thrust::device_vector<double> angtmp_gpu; + angtmp_gpu.resize(nvox*ndirections*nfib); + + bool gradnonlin = opts.grad_file.set(); int blocks = nvox; dim3 Dim_Grid_MCMC(blocks, 1); dim3 Dim_Block_MCMC(THREADS_BLOCK_MCMC ,1); ///dimensions for MCMC - double *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data()); - double *params_ptr = thrust::raw_pointer_cast(params_gpu.data()); + float *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data()); + float *params_ptr = thrust::raw_pointer_cast(params_gpu.data()); float *tau_ptr = thrust::raw_pointer_cast(tau_gpu.data()); - double *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data()); + float *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data()); double *alpha_ptr = thrust::raw_pointer_cast(alpha_gpu.data()); double *beta_ptr = thrust::raw_pointer_cast(beta_gpu.data()); FibreGPU *fibres_ptr = thrust::raw_pointer_cast(fibres_gpu.data()); MultifibreGPU *multifibres_ptr = thrust::raw_pointer_cast(multifibres_gpu.data()); double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data()); double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data()); + double *angtmp_ptr = thrust::raw_pointer_cast(angtmp_gpu.data()); - int amount_shared = (THREADS_BLOCK_MCMC+1)*sizeof(double) + (3*nfib + 8)*sizeof(float); + int amount_shared = (THREADS_BLOCK_MCMC)*sizeof(double) + (3*nfib + 8)*sizeof(float) + sizeof(int); myfile << "Shared Memory Used in init_Fibres_Multifibres: " << amount_shared << "\n"; - init_Fibres_Multifibres_kernel<<< Dim_Grid_MCMC, Dim_Block_MCMC, amount_shared>>>(datam_ptr, params_ptr, tau_ptr, bvals_ptr, alpha_ptr, beta_ptr, ndirections, nfib, nparams_fit, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.rician.value(), opts.ardf0.value(), opts.all_ard.value(), opts.no_ard.value(), gradnonlin, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + init_Fibres_Multifibres_kernel<<< Dim_Grid_MCMC, Dim_Block_MCMC, amount_shared>>>(datam_ptr, params_ptr, tau_ptr, bvals_ptr, alpha_ptr, beta_ptr, ndirections, nfib, nparams_fit, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.rician.value(), opts.ardf0.value(), opts.all_ard.value(), opts.no_ard.value(), gradnonlin, angtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); sync_check("init_Fibres_Multifibres_kernel"); gettimeofday(&t2,NULL); @@ -87,8 +92,8 @@ void init_Fibres_Multifibres( //INPUT } void runmcmc_burnin( //INPUT - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvals_gpu, thrust::device_vector<double> alpha_gpu, thrust::device_vector<double> beta_gpu, const int ndirections, @@ -126,6 +131,19 @@ void runmcmc_burnin( //INPUT else nparams=2+nfib*3; if(opts.modelnum.value()==2) nparams++; if(opts.rician.value()) nparams++; + + thrust::device_vector<float> recors_null_gpu; + recors_null_gpu.resize(1); + + thrust::device_vector<double> angtmp_gpu; + thrust::device_vector<double> oldangtmp_gpu; + thrust::device_vector<double> oldsignals_gpu; + thrust::device_vector<double> oldisosignals_gpu; + + angtmp_gpu.resize(nvox*ndirections*nfib); + oldangtmp_gpu.resize(nvox*ndirections); + oldsignals_gpu.resize(nvox*ndirections*nfib); + oldisosignals_gpu.resize(nvox*ndirections); unsigned int totalrandoms=(opts.nburn.value() * nvox * nparams); @@ -187,8 +205,8 @@ void runmcmc_burnin( //INPUT curandSetPseudoRandomGeneratorSeed(gen,seed); //get pointers - double *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data()); - double *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data()); + float *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data()); + float *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data()); double *alpha_ptr = thrust::raw_pointer_cast(alpha_gpu.data()); double *beta_ptr = thrust::raw_pointer_cast(beta_gpu.data()); float *randomsN_ptr = thrust::raw_pointer_cast(randomsN_gpu.data()); @@ -198,7 +216,14 @@ void runmcmc_burnin( //INPUT double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data()); double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data()); - int amount_shared = (THREADS_BLOCK_MCMC+1)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 16)*sizeof(int); + double *angtmp_ptr = thrust::raw_pointer_cast(angtmp_gpu.data()); + double *oldangtmp_ptr = thrust::raw_pointer_cast(oldangtmp_gpu.data()); + double *oldsignals_ptr = thrust::raw_pointer_cast(oldsignals_gpu.data()); + double *oldisosignals_ptr = thrust::raw_pointer_cast(oldisosignals_gpu.data()); + + float *records_null = thrust::raw_pointer_cast(recors_null_gpu.data()); + + int amount_shared = (THREADS_BLOCK_MCMC)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 19)*sizeof(int); myfile << "Shared Memory Used in runmcmc_burnin: " << amount_shared << "\n"; @@ -224,7 +249,7 @@ void runmcmc_burnin( //INPUT gettimeofday(&t1,NULL); - runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), 0, 0, 0, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr,records_null,records_null,records_null,records_null,records_null,records_null,records_null, records_null); sync_check("runmcmc_burnin_kernel"); gettimeofday(&t2,NULL); @@ -254,7 +279,7 @@ void runmcmc_burnin( //INPUT gettimeofday(&t1,NULL); if(nvox!=0){ - runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), 0, 0, 0, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr,records_null,records_null,records_null,records_null,records_null,records_null, records_null,records_null); sync_check("runmcmc_burnin_kernel"); } @@ -277,8 +302,8 @@ void runmcmc_burnin( //INPUT void runmcmc_record( //INPUT - thrust::device_vector<double> datam_gpu, - thrust::device_vector<double> bvals_gpu, + thrust::device_vector<float> datam_gpu, + thrust::device_vector<float> bvals_gpu, thrust::device_vector<double> alpha_gpu, thrust::device_vector<double> beta_gpu, thrust::device_vector<FibreGPU> fibres_gpu, @@ -326,6 +351,16 @@ void runmcmc_record( //INPUT else nparams=2+nfib*3; if(opts.modelnum.value()==2) nparams++; if(opts.rician.value()) nparams++; + + thrust::device_vector<double> angtmp_gpu; + thrust::device_vector<double> oldangtmp_gpu; + thrust::device_vector<double> oldsignals_gpu; + thrust::device_vector<double> oldisosignals_gpu; + + angtmp_gpu.resize(nvox*ndirections*nfib); + oldangtmp_gpu.resize(nvox*ndirections); + oldsignals_gpu.resize(nvox*ndirections*nfib); + oldisosignals_gpu.resize(nvox*ndirections); unsigned int totalrandoms=(opts.njumps.value() * nvox * nparams); @@ -387,8 +422,8 @@ void runmcmc_record( //INPUT curandSetPseudoRandomGeneratorSeed(gen,seed); //get pointers - double *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data()); - double *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data()); + float *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data()); + float *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data()); double *alpha_ptr = thrust::raw_pointer_cast(alpha_gpu.data()); double *beta_ptr = thrust::raw_pointer_cast(beta_gpu.data()); float *randomsN_ptr = thrust::raw_pointer_cast(randomsN_gpu.data()); @@ -397,6 +432,11 @@ void runmcmc_record( //INPUT MultifibreGPU *multifibres_ptr = thrust::raw_pointer_cast(multifibres_gpu.data()); double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data()); double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data()); + + double *angtmp_ptr = thrust::raw_pointer_cast(angtmp_gpu.data()); + double *oldangtmp_ptr = thrust::raw_pointer_cast(oldangtmp_gpu.data()); + double *oldsignals_ptr = thrust::raw_pointer_cast(oldsignals_gpu.data()); + double *oldisosignals_ptr = thrust::raw_pointer_cast(oldisosignals_gpu.data()); float *rf0_ptr = thrust::raw_pointer_cast(rf0_gpu.data()); float *rtau_ptr = thrust::raw_pointer_cast(rtau_gpu.data()); @@ -407,7 +447,7 @@ void runmcmc_record( //INPUT float *rph_ptr = thrust::raw_pointer_cast(rph_gpu.data()); float *rf_ptr = thrust::raw_pointer_cast(rf_gpu.data()); - int amount_shared = (THREADS_BLOCK_MCMC+1)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 18)*sizeof(int); + int amount_shared = (THREADS_BLOCK_MCMC)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 19)*sizeof(int); myfile << "Shared Memory Used in runmcmc_record: " << amount_shared << "\n"; @@ -433,7 +473,7 @@ void runmcmc_record( //INPUT gettimeofday(&t1,NULL); - runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); + runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); sync_check("runmcmc_record_kernel"); gettimeofday(&t2,NULL); @@ -463,7 +503,7 @@ void runmcmc_record( //INPUT gettimeofday(&t1,NULL); if(nvox!=0){ - runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords,rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); + runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr,randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr); sync_check("runmcmc_record_kernel"); } diff --git a/CUDA/runmcmc_kernels.cu b/CUDA/runmcmc_kernels.cu index 5b285376644cdecc6c5e982c77fb4d21e1980d4c..287d6d6047ef2cfd64416196dd7b8f0c58dd025f 100644 --- a/CUDA/runmcmc_kernels.cu +++ b/CUDA/runmcmc_kernels.cu @@ -21,36 +21,83 @@ #define maxfloat 1e10 -__device__ -inline void compute_signal(double *signals,double *oldsignals,double mbvals,float* m_d, float* m_dstd,float angtmp, int model){ - - oldsignals[0]=signals[0]; +__device__ inline void propose(float* param, float* old, float prop, float random){ + *old=*param; + *param = *param + random*prop; +} +__device__ inline void reject(float* param, float* prior, float* old, float* m_prior_en, float* m_old_prior_en, float* m_energy, float* m_old_energy, int* rej){ + *param=old[0]; + *prior=old[1]; + *m_prior_en=*m_old_prior_en; + *rej=*rej+1; + //restore_energy() + *m_energy=*m_old_energy; +} +__device__ inline void rejectF(float* param, float* prior, float* old, float* m_prior_en, float* m_old_prior_en, float* fm_prior_en, float* fm_old_prior_en, float* m_energy, float* m_old_energy, int* rej){ + *param=old[0]; + *prior=old[1]; + *fm_prior_en=*fm_old_prior_en; + *m_prior_en=*m_old_prior_en; + *rej=*rej+1; + //restore_energy() + *m_energy=*m_old_energy; +} +__device__ inline void getfsum(float* fsum, float* m_f, float m_f0, int nfib){ + *fsum=m_f0; + for(int f=0;f<nfib;f++){ + *fsum = *fsum + m_f[f]; + } +} +__device__ inline bool compute_test_energy(float *m_energy, float* m_old_energy, float m_prior_en, float m_likelihood_en, float random){ + *m_old_energy=*m_energy; + *m_energy=m_prior_en+m_likelihood_en; - if(model==1 || m_dstd[0]<1e-5){ - signals[0]=exp(double(-m_d[0]*mbvals*angtmp)); + double tmp=exp(double(*m_old_energy-*m_energy)); + return (tmp>random); +} +__device__ inline void compute_signal(double *signals,double *oldsignals,float mbvals,float* m_d, float* m_dstd,float angtmp, int model){ + *oldsignals=*signals; + if(model==1 || *m_dstd<1e-5){ + *signals=exp(double(-*m_d*mbvals*angtmp)); }else{ - float dbeta= m_d[0]/(m_dstd[0]*m_dstd[0]); - float dalpha=m_d[0]*dbeta; - signals[0]=exp(double(log(double(dbeta/(dbeta+mbvals*angtmp)))*dalpha)); + double dbeta= *m_d/(*m_dstd**m_dstd); + double dalpha= *m_d*dbeta; + *signals=exp(double(log(double(dbeta/(dbeta+mbvals*angtmp)))*dalpha)); } } - -__device__ -inline void compute_iso_signal(double *isosignals,double *oldisosignals, double mbvals,float* m_d, float* m_dstd, int model){ - +__device__ inline void compute_iso_signal(double *isosignals,double *oldisosignals, float mbvals,float* m_d, float* m_dstd, int model){ *oldisosignals=*isosignals; - - if(model==1 || m_dstd[0]<1e-5){ - *isosignals=exp(-m_d[0]*mbvals); + if(model==1 || *m_dstd<1e-5){ + *isosignals=exp(double(-m_d[0]*mbvals)); }else{ - float dbeta= m_d[0]/(m_dstd[0]*m_dstd[0]); - float dalpha=m_d[0]*dbeta; + double dbeta= *m_d/(*m_dstd**m_dstd); + double dalpha= *m_d*dbeta; *isosignals=exp(double(log(double(dbeta/(dbeta+mbvals)))*dalpha)); } } - -__device__ -inline void compute_prior(float *m_prior_en, float *m_prior_en_old,float* m_d_prior,float* m_S0_prior,float *m_prior_enf, float* m_f0_prior, float* m_tau_prior, float* m_dstd_prior, int nfib){ +__device__ inline void restore_signals(double* signals, double* oldsignals, int idVOX, int idSubVOX, int mydirs, int nfib, int ndirections, int threadsBlock){ + for(int f=0;f<nfib;f++){ + for(int i=0; i<mydirs; i++){ + int pos = idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock; + signals[pos] = oldsignals[pos]; + } + } +} +__device__ inline void restore_isosignals(double* isosignals, double* oldisosignals, int idVOX, int idSubVOX, int mydirs, int ndirections, int threadsBlock){ + for(int i=0; i<mydirs; i++){ + int pos = idVOX*ndirections + idSubVOX + i*threadsBlock; + isosignals[pos]=oldisosignals[pos]; + } +} +__device__ inline void restore_angtmp_signals(double* signals, double* oldsignals,double* angtmp, double* oldangtmp, int idVOX, int idSubVOX, int mydirs, int nfib, int fibre, int ndirections, int threadsBlock){ + for(int i=0; i<mydirs; i++){ + int pos = idVOX*ndirections*nfib + fibre*ndirections + idSubVOX + i*threadsBlock; + int pos2 = idVOX*ndirections + idSubVOX + i*threadsBlock; + angtmp[pos]=oldangtmp[pos2]; + signals[pos] = oldsignals[pos]; + } +} +__device__ inline void compute_prior(float *m_prior_en, float *m_prior_en_old,float* m_d_prior,float* m_S0_prior,float *m_prior_enf, float* m_f0_prior, float* m_tau_prior, float* m_dstd_prior, int nfib){ *m_prior_en_old=*m_prior_en; *m_prior_en=*m_d_prior+*m_S0_prior+*m_dstd_prior+*m_tau_prior+*m_f0_prior; for(int f=0;f<nfib;f++){ @@ -58,8 +105,7 @@ inline void compute_prior(float *m_prior_en, float *m_prior_en_old,float* m_d_pr } } -__device__ -inline float logIo(const float& x){ +__device__ inline float logIo(const float& x){ float y,b; b= fabs(x); if (b<3.75){ @@ -79,31 +125,27 @@ inline float logIo(const float& x){ return y; } - -__device__ -inline void compute_likelihood(int idSubVOX,float* m_S0,float *m_likelihood_en,float *m_f,double *signals,double *isosignals,double *mdata,float* fsum,double *reduction, float* m_f0, const bool rician, float* m_tau,int mydirs, int threadsBlock, int ndirections, int nfib){ +__device__ inline void compute_likelihood(int idSubVOX,float* m_S0,float *m_likelihood_en,float *m_f,double *signals,double *isosignals,const float *mdata,float* fsum,double *reduction, float* m_f0, const bool rician, float* m_tau,int mydirs, int threadsBlock, int ndirections, int nfib){ - double pred[MAXNDIRS_PER_THREAD]; + double pred; + int pos; + reduction[idSubVOX]=0; for(int i=0; i<mydirs; i++){ - pred[i]=0; + pred=0; for(int f=0;f<nfib;f++){ - pred[i]= pred[i]+m_f[f]*signals[i*nfib+f]; + pos = f*ndirections + idSubVOX + i*threadsBlock; + pred= pred+m_f[f]*signals[pos]; } - } - - for(int i=0; i<mydirs; i++){ - pred[i]= m_S0[0]*(pred[i]+(1-fsum[0])*isosignals[i]+m_f0[0]); //F0 - } - - reduction[idSubVOX]=0; - for(int i=0; i<mydirs; i++){ + pos = idSubVOX + i*threadsBlock; + pred= *m_S0*(pred+(1-*fsum)*isosignals[pos]+*m_f0); //F0 + if(!rician){ - double diff = mdata[i]-pred[i]; + double diff = mdata[pos]-pred; reduction[idSubVOX] = reduction[idSubVOX]+(diff*diff); }else{ - pred[i]= log(mdata[i])+(-0.5*m_tau[0]*(mdata[i]*mdata[i]+pred[i]*pred[i])+logIo(m_tau[0]*pred[i]*mdata[i])); - reduction[idSubVOX] = reduction[idSubVOX]+pred[i]; + pred= log(mdata[pos])+(-0.5**m_tau*(mdata[pos]*mdata[pos]+pred*pred)+logIo(*m_tau*pred*mdata[pos])); + reduction[idSubVOX] = reduction[idSubVOX]+pred; } } @@ -124,16 +166,16 @@ inline void compute_likelihood(int idSubVOX,float* m_S0,float *m_likelihood_en,f if(!rician){ *m_likelihood_en=(ndirections/2.0)*log(sumsquares/2.0); }else{ - *m_likelihood_en= -ndirections*log(m_tau[0])-sumsquares; + *m_likelihood_en= -ndirections*log(*m_tau)-sumsquares; } } } extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT - const double* datam, - const double* params, + const float* datam, + const float* params, const float* tau, - const double* bvals, + const float* bvals, const double* alpha, const double* beta, const int ndirections, @@ -147,6 +189,8 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT const bool ard_value, // opts.all_ard.value() const bool no_ard_value, // opts.no_ard.value() const bool gradnonlin, + //TO USE + double* angtmp, //OUTPUT FibreGPU* fibres, MultifibreGPU* multifibres, @@ -161,9 +205,8 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT ////////// DYNAMIC SHARED MEMORY /////////// extern __shared__ double shared[]; double* reduction = (double*)shared; //threadsBlock - double* tmp = (double*) &reduction[threadsBlock]; //1 - float* m_S0 = (float*) &tmp[1]; //1 + float* m_S0 = (float*) &reduction[threadsBlock]; //1 float* m_d = (float*) &m_S0[1]; //1 float* m_dstd =(float*) &m_d[1]; //1 float* m_f0 = (float*) &m_dstd[1]; //1 @@ -175,22 +218,15 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT float* fsum = (float*) &m_f[nfib]; //1 float* m_likelihood_en = (float*) &fsum[1]; //1 float* m_prior_en = (float*) &m_likelihood_en[1]; //1 - ////////// DYNAMIC SHARED MEMORY /////////// - - double mysig[MAXNDIRS_PER_THREAD*MAXNFIBRES]; - double myisosig[MAXNDIRS_PER_THREAD]; - double mydata[MAXNDIRS_PER_THREAD]; - double mybvals[MAXNDIRS_PER_THREAD]; - double myalpha[MAXNDIRS_PER_THREAD]; - double mybeta[MAXNDIRS_PER_THREAD]; - - float angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES]; //this is to pre compute signal - float cos_alpha_minus_theta; - float cos_alpha_plus_theta; + int* posBV = (int*) &m_prior_en[1]; //1 + ////////// DYNAMIC SHARED MEMORY /////////// // m_s0-params[0] m_d-params[1] m_f-m_th-m_ph-params[2,3,4,5, etc..] m_f0-params[nparams-1] if(leader){ + if(gradnonlin) *posBV = (idVOX*ndirections); + else *posBV = 0; + *m_S0 = params[idVOX*nparams_fit]; multifibres[idVOX].m_S0 = *m_S0; multifibres[idVOX].m_S0_prior = 0; @@ -295,53 +331,33 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT fibres[pos].m_prior_en= m_th_prior + m_ph_prior + m_f_prior; } - for(int i=0; i<mydirs; i++){ - int pos = (idVOX*ndirections)+idSubVOX+i*threadsBlock; - mydata[i] = datam[pos]; - } - if(gradnonlin){ - for(int i=0; i<mydirs; i++){ - int pos = (idVOX*ndirections)+idSubVOX+i*threadsBlock; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; - } - }else{ - for(int i=0; i<mydirs; i++){ - int pos = idSubVOX+i*threadsBlock; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; - } - - } - __syncthreads(); //compute_signal_pre - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[f])); - cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[f])); - angtmp[i*nfib+f]= (cos(double(m_ph[f]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; - angtmp[i*nfib+f]=angtmp[i*nfib+f]*angtmp[i*nfib+f]; + for(int f=0;f<nfib;f++){ + for(int i=0; i<mydirs; i++){ + double myalpha = alpha[*posBV+idSubVOX+i*threadsBlock]; + double cos_alpha_minus_theta=cos(double(myalpha-m_th[f])); + double cos_alpha_plus_theta=cos(double(myalpha+m_th[f])); + int pos = idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock; + double aux = (cos(double(m_ph[f]-beta[*posBV+idSubVOX+i*threadsBlock]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; + aux = aux*aux; + angtmp[pos]= aux; } } //compute_signal() double old; - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - compute_signal(&mysig[i*nfib+f],&old,mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model); + for(int f=0;f<nfib;f++){ + for(int i=0; i<mydirs; i++){ + int pos = idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock; + compute_signal(&signals[pos],&old,bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model); } } if(leader){ - //for likehood - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } + getfsum(fsum,m_f,*m_f0,nfib); + //initialise_energies(); //compute_d_prior(); m_d_prior=0; so i don't do nothing, it is already 0 if(model==2){ @@ -378,13 +394,14 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT //compute_iso_signal() for(int i=0; i<mydirs; i++){ - compute_iso_signal(&myisosig[i],&old, mybvals[i],m_d,m_dstd,model); + int pos = idVOX*ndirections + idSubVOX + i*threadsBlock; + compute_iso_signal(&isosignals[pos],&old,bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,model); } __syncthreads(); //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[idVOX*nfib*ndirections],&isosignals[idVOX*ndirections],&datam[idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); @@ -401,41 +418,50 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel( //INPUT multifibres[idVOX].m_tau_prop=*m_tau/2.0; multifibres[idVOX].m_f0_prop=0.2; } - - for(int i=0; i<mydirs; i++){ - isosignals[(idVOX*ndirections)+idSubVOX+i*threadsBlock] = myisosig[i]; - for(int f=0;f<nfib;f++){ - signals[(idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]=mysig[i*nfib+f]; - } - } } - -extern "C" __global__ void runmcmc_burnin_kernel( //INPUT - const double* datam, - const double* bvals, - const double* alpha, - const double* beta, - float* randomsN, - float* randomsU, - const int ndirections, - const int nfib, - const int nparams, - const int model, - const float fudgevalue, - const bool m_include_f0, - const bool m_ardf0, - const bool can_use_ard, - const bool rician, - const bool gradnonlin, - const int updateproposalevery, //update every this number of iterations - const int iterations, //num of iterations to do this time (maybe is a part of the total) - const int current_iter, //the number of the current iteration over the total iterations - //INPUT-OUTPUT - FibreGPU* fibres, - MultifibreGPU* multifibres, - double* signals, - double* isosignals) +extern "C" __global__ void runmcmc_kernel( //INPUT + const float* datam, + const float* bvals, + const double* alpha, + const double* beta, + float* randomsN, + float* randomsU, + const int ndirections, + const int nfib, + const int nparams, + const int model, + const float fudgevalue, + const bool m_include_f0, + const bool m_ardf0, + const bool can_use_ard, + const bool rician, + const bool gradnonlin, + const int updateproposalevery, //update every this number of iterations + const int iterations, //num of iterations to do this time (maybe is a part of the total) + const int current_iter, //the number of the current iteration over the total iterations + const int iters_burnin, //iters in burin, we need it to continue the updates at the correct time. + const int record_every, //record every this number + const int totalrecords, //total number of records to do + //TO USE + double* oldsignals, + double* oldisosignals, + double* angtmp, + double* oldangtmp, + //INPUT-OUTPUT + FibreGPU* fibres, + MultifibreGPU* multifibres, + double* signals, + double* isosignals, + //OUTPUT + float* rf0, //record of parameters + float* rtau, + float* rs0, + float* rd, + float* rdstd, + float* rth, + float* rph, + float* rf) { int idSubVOX= threadIdx.x; int threadsBlock = blockDim.x; @@ -444,9 +470,8 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT ////////// DYNAMIC SHARED MEMORY /////////// extern __shared__ double shared[]; double* reduction = (double*)shared; //threadsBlock - double* tmp = (double*) &reduction[threadsBlock]; //1 - float* m_S0 = (float*) &tmp[1]; //1 + float* m_S0 = (float*) &reduction[threadsBlock]; //1 float* m_d = (float*) &m_S0[1]; //1 float* m_dstd =(float*) &m_d[1]; //1 float* m_f0 = (float*) &m_dstd[1]; //1 @@ -507,27 +532,21 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT int* rejflag = (int*) &m_f_rej[nfib]; //3 int* m_lam_jump = (int*) &rejflag[3]; //nfib int* idVOX = (int*) &m_lam_jump[nfib]; //1 - int* count_update = (int*) &idVOX[1]; //1 - int* localrand = (int*) &count_update[1]; //1 + int* count_update = (int*) &idVOX[1]; //1 + int* recordcount = (int*) &count_update[1]; //1 + int* sample = (int*) &recordcount[1]; //1 + int* localrand = (int*) &sample[1]; //1 + int* posBV = (int*) &localrand[1]; //1 ////////// DYNAMIC SHARED MEMORY /////////// - - double mysig[MAXNDIRS_PER_THREAD*MAXNFIBRES]; - double mysigold[MAXNDIRS_PER_THREAD*MAXNFIBRES]; - double myisosig[MAXNDIRS_PER_THREAD]; - double myisosigold[MAXNDIRS_PER_THREAD]; - double mydata[MAXNDIRS_PER_THREAD]; - double mybvals[MAXNDIRS_PER_THREAD]; - double myalpha[MAXNDIRS_PER_THREAD]; - double mybeta[MAXNDIRS_PER_THREAD]; - - float angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES]; //this is to pre compute signal - float old_angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES]; - float cos_alpha_minus_theta; - float cos_alpha_plus_theta; if (leader){ *idVOX= blockIdx.x; - *count_update = current_iter; + *count_update = current_iter+iters_burnin; //count for updates + *recordcount = current_iter; + if(record_every) *sample=1+(current_iter/record_every); //the next number of sample.....the index start in 0 + + if(gradnonlin)*posBV = (*idVOX*ndirections); + else *posBV = 0; *m_prior_en=multifibres[*idVOX].m_prior_en; @@ -586,6 +605,7 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT *m_tau=0; } } + __syncthreads(); int mydirs = ndirections/threadsBlock; @@ -616,46 +636,18 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT m_lam_jump[idSubVOX]=fibres[pos].m_lam_jump; } - - __syncthreads(); - - for(int i=0; i<mydirs; i++){ - int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; - myisosig[i] = isosignals[pos]; - mydata[i] = datam[pos]; - } - if(gradnonlin){ - for(int i=0; i<mydirs; i++){ - int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; - } - }else{ - for(int i=0; i<mydirs; i++){ - int pos = idSubVOX+i*threadsBlock; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; - } - - } - - for(int f=0;f<nfib;f++){ - for(int i=0; i<mydirs; i++){ - mysig[i*nfib+f]= signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]; - } - } - __syncthreads(); //compute_signal_pre - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[f])); - cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[f])); - angtmp[i*nfib+f]= (cos(double(m_ph[f]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; - angtmp[i*nfib+f]=angtmp[i*nfib+f]*angtmp[i*nfib+f]; + for(int f=0;f<nfib;f++){ + for(int i=0; i<mydirs; i++){ + double myalpha = alpha[*posBV+idSubVOX+i*threadsBlock]; + double cos_alpha_minus_theta=cos(double(myalpha-m_th[f])); + double cos_alpha_plus_theta=cos(double(myalpha+m_th[f])); + int pos = *idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock; + double aux = (cos(double(m_ph[f]-beta[*posBV+idSubVOX+i*threadsBlock]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; + aux = aux*aux; + angtmp[pos]= aux; } } @@ -665,6 +657,7 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT //prefetching randoms if (leader){ *count_update=*count_update+1; + *recordcount=*recordcount+1; int idrand = *idVOX*iterations*nparams+(niter*nparams); *localrand = 0; if(m_include_f0){ @@ -714,15 +707,10 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT } *localrand = 0; } - ////////////////////////////////////////////////////////////////// F0 - if(m_include_f0){ if (leader){ - //propose_f0 - old[0]=*m_f0; - *m_f0+=prerandN[*localrand]**m_f0_prop; - + propose(m_f0,old,*m_f0_prop,prerandN[*localrand]); //compute_f0_prior() old[1]=*m_f0_prior; if(*m_f0<=0 || *m_f0 >=1){ @@ -735,83 +723,39 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT *m_f0_prior=log(double(*m_f0)); } } - - //for likehood and reject_f_sum - *fsum=*m_f0; - - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - + getfsum(fsum,m_f,*m_f0,nfib); //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - //reject_f_sum() rejflag[1]=(*fsum>1); } - __syncthreads(); - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[2]=(*tmp>prerandU[*localrand]); + rejflag[2]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); *localrand=*localrand+1; - if(!rejflag[0]){ if(!rejflag[1]){ if(rejflag[2]){ - //accept_f0() *m_f0_acc=*m_f0_acc+1; }else{ - //reject_F0() - *m_f0=old[0]; - *m_f0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_f0_rej=*m_f0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; + reject(m_f0,m_f0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_f0_rej); } - }else{ - //reject_F0() - *m_f0=old[0]; - *m_f0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_f0_rej=*m_f0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; + }else{ + reject(m_f0,m_f0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_f0_rej); } }else{ - //reject_F0() - *m_f0=old[0]; - *m_f0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_f0_rej=*m_f0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; + reject(m_f0,m_f0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_f0_rej); } } } - ////////////////////////////////////////////////////////////////// TAU if(rician){ if (leader){ - //propose_tau - old[0]=*m_tau; - *m_tau+=prerandN[*localrand]**m_tau_prop; - + propose(m_tau,old,*m_tau_prop,prerandN[*localrand]); //compute_tau_prior() old[1]=*m_tau_prior; if(*m_tau<=0){ @@ -820,70 +764,34 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT rejflag[0]=false; *m_tau_prior=0; } - - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - + getfsum(fsum,m_f,*m_f0,nfib); //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); } - __syncthreads(); - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); + rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); *localrand=*localrand+1; - if(!rejflag[0]){ if(rejflag[1]){ - //accept_tau() *m_tau_acc=*m_tau_acc+1; }else{ - //reject_tau() - *m_tau=old[0]; - *m_tau_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_tau_rej=*m_tau_rej+1; - //restore_energy() - *m_energy=*m_old_energy; + reject(m_tau,m_tau_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_tau_rej); } }else{ - //reject_tau() - *m_tau=old[0]; - *m_tau_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_tau_rej=*m_tau_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; + reject(m_tau,m_tau_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_tau_rej); } } } - ////////////////////////////////////////////////////////////////// D - if (leader){ - //propose_d() - old[0]=*m_d; - *m_d+=prerandN[*localrand]**m_d_prop; - + propose(m_d,old,*m_d_prop,prerandN[*localrand]); //compute_d_prior_f0() - old[1]=*m_d_prior; - + old[1]=*m_d_prior; if(*m_d<0 || *m_d > 0.008){ rejflag[0]=true; }else{ @@ -891,102 +799,57 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT rejflag[0]=false; } } - - __syncthreads(); - + __syncthreads(); //compute_signal() - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model); + for(int f=0;f<nfib;f++){ + for(int i=0; i<mydirs; i++){ + int pos = *idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock; + compute_signal(&signals[pos],&oldsignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model); } } //compute_iso_signal() for(int i=0; i<mydirs; i++){ - compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model); + int pos = *idVOX*ndirections + idSubVOX + i*threadsBlock; + compute_iso_signal(&isosignals[pos],&oldisosignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,model); } if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - + getfsum(fsum,m_f,*m_f0,nfib); //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); } - - __syncthreads(); - + __syncthreads(); //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); + rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); *localrand=*localrand+1; - } - + } __syncthreads(); if(!rejflag[0]){ if(rejflag[1]){ - //accept_d() if (leader) *m_d_acc=*m_d_acc+1; }else{ if (leader){ - //reject_d() - *m_d=old[0]; - *m_d_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_d_rej=*m_d_rej+1; - //restore_energy() - *m_energy=*m_old_energy; - } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; + reject(m_d,m_d_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_d_rej); } + restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock); + restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock); } }else{ if (leader){ - //reject_d() - *m_d=old[0]; - *m_d_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_d_rej=*m_d_rej+1; - //restore_energy() - *m_energy=*m_old_energy; + reject(m_d,m_d_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_d_rej); } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; - } + restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock); + restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock); } - ////////////////////////////////////////////////////////////////// D_STD - if(model==2){ if (leader){ - //propose_d_std - old[0]=*m_dstd; - *m_dstd+=prerandN[*localrand]**m_dstd_prop; - + propose(m_dstd,old,*m_dstd_prop,prerandN[*localrand]); //compute_d_std_prior() old[1]=*m_dstd_prior; if(*m_dstd<=0 || *m_dstd > 0.01){ @@ -996,105 +859,56 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT *m_dstd_prior=log(double(*m_dstd)); } } - __syncthreads(); - //compute_signal() - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model); + for(int f=0;f<nfib;f++){ + for(int i=0; i<mydirs; i++){ + int pos = *idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock; + compute_signal(&signals[pos],&oldsignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model); } } - //compute_iso_signal() for(int i=0; i<mydirs; i++){ - compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model); + int pos = *idVOX*ndirections + idSubVOX + i*threadsBlock; + compute_iso_signal(&isosignals[pos],&oldisosignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,model); } - - if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - + if (leader){ + getfsum(fsum,m_f,*m_f0,nfib); //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); } - __syncthreads(); - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); + rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); *localrand=*localrand+1; } - __syncthreads(); if(!rejflag[0]){ if(rejflag[1]){ - //accept_dstd() if (leader) *m_dstd_acc=*m_dstd_acc+1; }else{ if (leader){ - //reject_dstd() - *m_dstd=old[0]; - *m_dstd_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_dstd_rej=*m_dstd_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; + reject(m_dstd,m_dstd_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_dstd_rej); } + restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock); + restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock); } }else{ if (leader){ - //reject_dstd() - *m_dstd=old[0]; - *m_dstd_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_dstd_rej=*m_dstd_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; + reject(m_dstd,m_dstd_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_dstd_rej); } + restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock); + restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock); } } - ////////////////////////////////////////////////////////////////// S0 - if (leader){ - //propose_S0() - old[0]=*m_S0; - *m_S0+=prerandN[*localrand]**m_S0_prop; - + propose(m_S0,old,*m_S0_prop,prerandN[*localrand]); //compute_S0_prior() old[1]=*m_S0_prior; if(*m_S0<0) rejflag[0]=true; @@ -1102,68 +916,33 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT *m_S0_prior=0; rejflag[0]=false; } - - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - + getfsum(fsum,m_f,*m_f0,nfib); //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); } - __syncthreads(); - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); + rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); *localrand=*localrand+1; if(!rejflag[0]){ if(rejflag[1]){ - //accept_S0() *m_S0_acc=*m_S0_acc+1; }else{ - //reject_S0() - *m_S0=old[0]; - *m_S0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_S0_rej=*m_S0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; + reject(m_S0,m_S0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_S0_rej); } }else{ - //reject_S0() - *m_S0=old[0]; - *m_S0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_S0_rej=*m_S0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; + reject(m_S0,m_S0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_S0_rej); } } - //////////////////////////////////////////////////////////////////////////// TH - for(int fibre=0;fibre<nfib;fibre++){ if (leader){ - //propose_th() - old[0]=m_th[fibre]; - m_th[fibre]+=prerandN[*localrand]*m_th_prop[fibre]; - + propose(&m_th[fibre],old,m_th_prop[fibre],prerandN[*localrand]); //compute_th_prior() old[1]=m_th_prior[fibre]; if(m_th[fibre]==0){ @@ -1172,177 +951,111 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT m_th_prior[fibre]=-log(double(fabs(sin(double(m_th[fibre]))/2))); } //rejflag[0]=false; /////////////////always false - //compute_prior() *fm_old_prior_en=fm_prior_en[fibre]; fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre]; } - __syncthreads(); - //compute_signal() //compute_signal_pre for(int i=0; i<mydirs; i++){ - cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre])); - cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre])); - old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]; - angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; - angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre]; - - compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model); + double myalpha = alpha[*posBV+idSubVOX+i*threadsBlock]; + double cos_alpha_minus_theta=cos(double(myalpha-m_th[fibre])); + double cos_alpha_plus_theta=cos(double(myalpha+m_th[fibre])); + int pos = *idVOX*ndirections*nfib + fibre*ndirections + idSubVOX + i*threadsBlock; + int pos2 = *idVOX*ndirections + idSubVOX + i*threadsBlock; + oldangtmp[pos2]=angtmp[pos]; + double aux = (cos(double(m_ph[fibre]-beta[*posBV+idSubVOX+i*threadsBlock]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; + aux = aux*aux; + angtmp[pos]= aux; + + compute_signal(&signals[pos],&oldsignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model); } - - if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - + if (leader){ + getfsum(fsum,m_f,*m_f0,nfib); //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); } - __syncthreads(); - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; + rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); + *localrand=*localrand+1; } - __syncthreads(); if(rejflag[1]){ - //accept_th if (leader) m_th_acc[fibre]++; - }else{ - + }else{ if (leader){ - //reject_th() - m_th[fibre]=old[0]; - m_th_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_th_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; + rejectF(&m_th[fibre],&m_th_prior[fibre],old,m_prior_en,m_old_prior_en,&fm_prior_en[fibre],fm_old_prior_en,m_energy,m_old_energy,&m_th_rej[fibre]); } - //compute_signal_pre undo - for(int i=0; i<mydirs; i++){ - angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre]; - mysig[i*nfib+fibre] = mysigold[i*nfib+fibre]; - } + restore_angtmp_signals(signals,oldsignals,angtmp,oldangtmp,*idVOX,idSubVOX,mydirs,nfib,fibre,ndirections,threadsBlock); } __syncthreads(); - /////////////////////////////////////// PH - if (leader){ - //propose_ph() - old[0]=m_ph[fibre]; - m_ph[fibre]+=prerandN[*localrand]*m_ph_prop[fibre]; - + propose(&m_ph[fibre],old,m_ph_prop[fibre],prerandN[*localrand]); //compute_ph_prior() old[1]=m_ph_prior[fibre]; m_ph_prior[fibre]=0; //rejflag[0]=false; - //compute_prior() *fm_old_prior_en=fm_prior_en[fibre]; fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre]; } - __syncthreads(); - //compute_signal() //compute_signal_pre for(int i=0; i<mydirs; i++){ - cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre])); - cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre])); - old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]; - angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; - angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre]; - - compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model); + double myalpha = alpha[*posBV+idSubVOX+i*threadsBlock]; + double cos_alpha_minus_theta=cos(double(myalpha-m_th[fibre])); + double cos_alpha_plus_theta=cos(double(myalpha+m_th[fibre])); + int pos = *idVOX*ndirections*nfib + fibre*ndirections + idSubVOX + i*threadsBlock; + int pos2 = *idVOX*ndirections + idSubVOX + i*threadsBlock; + oldangtmp[pos2]=angtmp[pos]; + double aux = (cos(double(m_ph[fibre]-beta[*posBV+idSubVOX+i*threadsBlock]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; + aux = aux*aux; + angtmp[pos]= aux; + + compute_signal(&signals[pos],&oldsignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model); } if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - + getfsum(fsum,m_f,*m_f0,nfib); //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); } - __syncthreads(); - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); + rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); *localrand=*localrand+1; } - __syncthreads(); //if(!rejflag[0]){ if(rejflag[1]){ - //accept_ph() if (leader) m_ph_acc[fibre]++; }else{ if (leader){ - //reject_ph() - m_ph[fibre]=old[0]; - m_ph_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_ph_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; + rejectF(&m_ph[fibre],&m_ph_prior[fibre],old,m_prior_en,m_old_prior_en,&fm_prior_en[fibre],fm_old_prior_en,m_energy,m_old_energy,&m_ph_rej[fibre]); } //compute_signal_pre undo - for(int i=0; i<mydirs; i++){ - angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre]; - - mysig[i*nfib+fibre] = mysigold[i*nfib+fibre]; - } + restore_angtmp_signals(signals,oldsignals,angtmp,oldangtmp,*idVOX,idSubVOX,mydirs,nfib,fibre,ndirections,threadsBlock); } __syncthreads(); - //////////////////////////////////////////// F - if (leader){ - //propose_f() - old[0]=m_f[fibre]; - m_f[fibre]+=prerandN[*localrand]*m_f_prop[fibre]; + propose(&m_f[fibre],old,m_f_prop[fibre],prerandN[*localrand]); //compute_f_prior() old[1]=m_f_prior[fibre]; @@ -1360,83 +1073,58 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT m_f_prior[fibre]=fudgevalue*m_f_prior[fibre]; rejflag[0]=false; } - //compute_prior() *fm_old_prior_en=fm_prior_en[fibre]; fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre]; - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } + getfsum(fsum,m_f,*m_f0,nfib); //reject_f_sum() rejflag[1]=(*fsum>1); - //compute_prior() compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); } __syncthreads(); - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - + compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); __syncthreads(); if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[2]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; + rejflag[2]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]); + *localrand=*localrand+1; if(!rejflag[0]){ if(!rejflag[1]){ if(rejflag[2]){ - //accept_f() m_f_acc[fibre]++; }else{ - //reject_f() - m_f[fibre]=old[0]; - m_f_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_f_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; + rejectF(&m_f[fibre],&m_f_prior[fibre],old,m_prior_en,m_old_prior_en,&fm_prior_en[fibre],fm_old_prior_en,m_energy,m_old_energy,&m_f_rej[fibre]); } }else{ - //reject_f() - m_f[fibre]=old[0]; - m_f_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_f_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; + rejectF(&m_f[fibre],&m_f_prior[fibre],old,m_prior_en,m_old_prior_en,&fm_prior_en[fibre],fm_old_prior_en,m_energy,m_old_energy,&m_f_rej[fibre]); } }else{ - //reject_f() - m_f[fibre]=old[0]; - m_f_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_f_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; + rejectF(&m_f[fibre],&m_f_prior[fibre],old,m_prior_en,m_old_prior_en,&fm_prior_en[fibre],fm_old_prior_en,m_energy,m_old_energy,&m_f_rej[fibre]); } } __syncthreads(); }//end while nfib + if((record_every)&&((*recordcount%record_every)==0)&&leader){ + rd[(*idVOX*totalrecords)+*sample-1]= *m_d; + if(m_include_f0) rf0[(*idVOX*totalrecords)+*sample-1]= *m_f0; + if(rician) rtau[(*idVOX*totalrecords)+*sample-1]= *m_tau; + if(model==2) rdstd[(*idVOX*totalrecords)+*sample-1]= *m_dstd; + rs0[(*idVOX*totalrecords)+*sample-1]= *m_S0; + for(int j=0;j<nfib;j++){ + rth[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_th[j]; + rph[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_ph[j]; + rf[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_f[j]; + } + *sample=*sample+1; + } + if(((*count_update%updateproposalevery)==0)&&leader){ //m_multifibre.update_proposals(); *m_d_prop*=sqrt(float(*m_d_acc+1)/float(*m_d_rej+1)); @@ -1556,1202 +1244,5 @@ extern "C" __global__ void runmcmc_burnin_kernel( //INPUT fibres[pos].m_lam_jump=m_lam_jump[idSubVOX]; } - - for(int i=0; i<mydirs; i++){ - isosignals[(*idVOX*ndirections)+idSubVOX+i*threadsBlock] = myisosig[i]; - - for(int f=0;f<nfib;f++){ - signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]=mysig[i*nfib+f]; - } - } -} - -extern "C" __global__ void runmcmc_record_kernel( //INPUT - const double* datam, - const double* bvals, - const double* alpha, - const double* beta, - //INPUT-OUTPUT - FibreGPU* fibres, - MultifibreGPU* multifibres, - double* signals, - double* isosignals, - //INPUT - float* randomsN, - float* randomsU, - const int ndirections, - const int nfib, - const int nparams, - const int model, - const float fudgevalue, - const bool m_include_f0, - const bool m_ardf0, - const bool can_use_ard, - const bool rician, - const bool gradnonlin, - const int updateproposalevery, //update every this number of iterations - const int iterations, //num of iterations to do this time (maybe is a part of the total) - const int current_iter, //the number of the current iteration over the total iterations - const int iters_burnin, //iters in burin, we need it to continue the updates at the correct time. - const int record_every, //record every this number - const int totalrecords, //total number of records to do - - //OUTPUT - float* rf0, //records of parameters - float* rtau, - float* rs0, - float* rd, - float* rdstd, - float* rth, - float* rph, - float* rf) - -{ - int idSubVOX= threadIdx.x; - int threadsBlock = blockDim.x; - bool leader = (idSubVOX==0); - - ////////// DYNAMIC SHARED MEMORY /////////// - extern __shared__ double shared[]; - double* reduction = (double*)shared; //threadsBlock - double* tmp = (double*) &reduction[threadsBlock]; //1 - - float* m_S0 = (float*) &tmp[1]; //1 - float* m_d = (float*) &m_S0[1]; //1 - float* m_dstd =(float*) &m_d[1]; //1 - float* m_f0 = (float*) &m_dstd[1]; //1 - float* m_tau = (float*) &m_f0[1]; //1 - float* m_th = (float*) &m_tau[1]; //nfib - float* m_ph = (float*) &m_th[nfib]; //nfib - float* m_f = (float*) &m_ph[nfib]; //nfib - - float* m_S0_prior = (float*) &m_f[nfib]; //1 - float* m_d_prior = (float*) &m_S0_prior[1]; //1 - float* m_dstd_prior = (float*) &m_d_prior[1]; //1 - float* m_f0_prior = (float*) &m_dstd_prior[1]; //1 - float* m_tau_prior = (float*) &m_f0_prior[1]; //1 - float* m_th_prior = (float*) &m_tau_prior[1]; //nfib - float* m_ph_prior = (float*) &m_th_prior[nfib]; //nfib - float* m_f_prior = (float*) &m_ph_prior[nfib]; //nfib - - float* m_S0_prop = (float*) &m_f_prior[nfib]; //1 - float* m_d_prop = (float*) &m_S0_prop[1]; //1 - float* m_dstd_prop = (float*) &m_d_prop[1]; //1 - float* m_f0_prop = (float*) &m_dstd_prop[1]; //1 - float* m_tau_prop = (float*) &m_f0_prop[1]; //1 - float* m_th_prop = (float*) &m_tau_prop[1]; //nfib - float* m_ph_prop = (float*) &m_th_prop[nfib]; //nfib - float* m_f_prop = (float*) &m_ph_prop[nfib]; //nfib - - float* fsum = (float*) &m_f_prop[nfib]; //1 - float* m_likelihood_en = (float*) &fsum[1]; //1 - float* m_prior_en = (float*) &m_likelihood_en[1]; //1 - float* m_old_prior_en = (float*) &m_prior_en[1]; //1 - float* fm_prior_en = (float*) &m_old_prior_en[1]; //nfib - float* fm_old_prior_en = (float*) &fm_prior_en[nfib]; //1 - float* m_energy = (float*) &fm_old_prior_en[1]; //1 - float* m_old_energy = (float*) &m_energy[1]; //1 - float* old = (float*) &m_old_energy[1]; //2 - - float* prerandN = (float*) &old[2]; //nparams - float* prerandU = (float*) &prerandN[nparams]; //nparams - - int* m_S0_acc = (int*) &prerandU[nparams]; //1 - int* m_d_acc = (int*) &m_S0_acc[1]; //1 - int* m_dstd_acc = (int*) &m_d_acc[1]; //1 - int* m_f0_acc = (int*) &m_dstd_acc[1]; //1 - int* m_tau_acc = (int*) &m_f0_acc[1]; //1 - int* m_th_acc = (int*) &m_tau_acc[1]; //nfib - int* m_ph_acc = (int*) &m_th_acc[nfib]; //nfib - int* m_f_acc = (int*) &m_ph_acc[nfib]; //nfib - - int* m_S0_rej = (int*) &m_f_acc[nfib]; //1 - int* m_d_rej = (int*) &m_S0_rej[1]; //1 - int* m_dstd_rej = (int*) &m_d_rej[1]; //1 - int* m_f0_rej = (int*) &m_dstd_rej[1]; //1 - int* m_tau_rej = (int*) &m_f0_rej[1]; //1 - int* m_th_rej = (int*) &m_tau_rej[1]; //nfib - int* m_ph_rej = (int*) &m_th_rej[nfib]; //nfib - int* m_f_rej = (int*) &m_ph_rej[nfib]; //nfib - - int* rejflag = (int*) &m_f_rej[nfib]; //3 - int* m_lam_jump = (int*) &rejflag[3]; //nfib - int* idVOX = (int*) &m_lam_jump[nfib]; //1 - int* count_update = (int*) &idVOX[1]; //1 - int* recordcount = (int*) &count_update[1]; //1 - int* sample = (int*) &recordcount[1]; //1 - int* localrand = (int*) &sample[1]; //1 - ////////// DYNAMIC SHARED MEMORY /////////// - - double mysig[MAXNDIRS_PER_THREAD*MAXNFIBRES]; - double mysigold[MAXNDIRS_PER_THREAD*MAXNFIBRES]; - double myisosig[MAXNDIRS_PER_THREAD]; - double myisosigold[MAXNDIRS_PER_THREAD]; - double mydata[MAXNDIRS_PER_THREAD]; - double mybvals[MAXNDIRS_PER_THREAD]; - double myalpha[MAXNDIRS_PER_THREAD]; - double mybeta[MAXNDIRS_PER_THREAD]; - - float angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES]; //this is to pre compute signal - float old_angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES]; - float cos_alpha_minus_theta; - float cos_alpha_plus_theta; - - if (leader){ - *idVOX= blockIdx.x; - *count_update = current_iter + iters_burnin; //count for updates - *recordcount= current_iter; - *sample=1+(current_iter/record_every); //the next number of sample.....the index start in 0 - - *m_prior_en=multifibres[*idVOX].m_prior_en; - - if(model==2){ - *m_dstd_acc=multifibres[*idVOX].m_dstd_acc; - *m_dstd_rej=multifibres[*idVOX].m_dstd_rej; - *m_dstd_prior=multifibres[*idVOX].m_dstd_prior; - *m_dstd_prop=multifibres[*idVOX].m_dstd_prop; - *m_dstd=multifibres[*idVOX].m_dstd; - }else{ - *m_dstd_acc=0; - *m_dstd_rej=0; - *m_dstd_prior=0; - *m_dstd_prop=0; - *m_dstd=0; - } - - *m_d=multifibres[*idVOX].m_d; - *m_energy=multifibres[*idVOX].m_energy; - *m_d_prop=multifibres[*idVOX].m_d_prop; - *m_d_prior=multifibres[*idVOX].m_d_prior; - *m_S0_prior=multifibres[*idVOX].m_S0_prior; - *m_S0=multifibres[*idVOX].m_S0; - *m_likelihood_en=multifibres[*idVOX].m_likelihood_en; - *m_d_acc=multifibres[*idVOX].m_d_acc; - *m_d_rej=multifibres[*idVOX].m_d_rej; - *m_S0_acc=multifibres[*idVOX].m_S0_acc; - *m_S0_rej=multifibres[*idVOX].m_S0_rej; - *m_S0_prop=multifibres[*idVOX].m_S0_prop; - - if(m_include_f0){ - *m_f0_acc=multifibres[*idVOX].m_f0_acc; - *m_f0_rej=multifibres[*idVOX].m_f0_rej; - *m_f0_prop=multifibres[*idVOX].m_f0_prop; - *m_f0_prior=multifibres[*idVOX].m_f0_prior; - *m_f0=multifibres[*idVOX].m_f0; - }else{ - *m_f0_acc=0; - *m_f0_rej=0; - *m_f0_prop=0; - *m_f0_prior=0; - *m_f0=0; - } - - if(rician){ - *m_tau_acc=multifibres[*idVOX].m_tau_acc; - *m_tau_rej=multifibres[*idVOX].m_tau_rej; - *m_tau_prop=multifibres[*idVOX].m_tau_prop; - *m_tau_prior=multifibres[*idVOX].m_tau_prior; - *m_tau=multifibres[*idVOX].m_tau; - }else{ - *m_tau_acc=0; - *m_tau_rej=0; - *m_tau_prop=0; - *m_tau_prior=0; - *m_tau=0; - } - } - __syncthreads(); - - int mydirs = ndirections/threadsBlock; - int mod = ndirections%threadsBlock; - if(mod&&(idSubVOX<mod)) mydirs++; - - if(idSubVOX<nfib){ - int pos = (*idVOX*nfib)+idSubVOX; - m_th[idSubVOX]=fibres[pos].m_th; - m_ph[idSubVOX]=fibres[pos].m_ph; - m_f[idSubVOX]=fibres[pos].m_f; - - m_th_acc[idSubVOX]=fibres[pos].m_th_acc; - m_th_rej[idSubVOX]=fibres[pos].m_th_rej; - m_ph_acc[idSubVOX]=fibres[pos].m_ph_acc; - m_ph_rej[idSubVOX]=fibres[pos].m_ph_rej; - m_f_acc[idSubVOX]=fibres[pos].m_f_acc; - m_f_rej[idSubVOX]=fibres[pos].m_f_rej; - - fm_prior_en[idSubVOX]=fibres[pos].m_prior_en; - m_th_prior[idSubVOX]=fibres[pos].m_th_prior; - m_ph_prior[idSubVOX]=fibres[pos].m_ph_prior; - m_f_prior[idSubVOX]=fibres[pos].m_f_prior; - - m_th_prop[idSubVOX]=fibres[pos].m_th_prop; - m_ph_prop[idSubVOX]=fibres[pos].m_ph_prop; - m_f_prop[idSubVOX]=fibres[pos].m_f_prop; - - m_lam_jump[idSubVOX]=fibres[pos].m_lam_jump; - } - - __syncthreads(); - - for(int i=0; i<mydirs; i++){ - int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; - myisosig[i] = isosignals[pos]; - mydata[i] = datam[pos]; - } - if(gradnonlin){ - for(int i=0; i<mydirs; i++){ - int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; - } - }else{ - for(int i=0; i<mydirs; i++){ - int pos = idSubVOX+i*threadsBlock; - mybvals[i] = bvals[pos]; - myalpha[i] = alpha[pos]; - mybeta[i] = beta[pos]; - } - - } - - for(int f=0;f<nfib;f++){ - for(int i=0; i<mydirs; i++){ - mysig[i*nfib+f]= signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]; - } - } - - __syncthreads(); - - //compute_signal_pre - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[f])); - cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[f])); - angtmp[i*nfib+f]= (cos(double(m_ph[f]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; - angtmp[i*nfib+f]=angtmp[i*nfib+f]*angtmp[i*nfib+f]; - } - } - - for (int niter=0; niter<iterations; niter++){ - //code jump() - - //prefetching randoms - if (leader){ - *count_update=*count_update+1; - *recordcount=*recordcount+1; - int idrand = *idVOX*iterations*nparams+(niter*nparams); - *localrand = 0; - if(m_include_f0){ - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - } - if(rician){ - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - } - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - - if(model==2){ - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - } - - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - - for(int f=0;f<nfib;f++){ - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - - prerandN[*localrand]=randomsN[idrand]; - prerandU[*localrand]=randomsU[idrand]; - idrand++; - *localrand=*localrand+1; - } - *localrand = 0; - } - -////////////////////////////////////////////////////////////////// F0 - - if(m_include_f0){ - if (leader){ - //propose_f0 - old[0]=*m_f0; - *m_f0+=prerandN[*localrand]**m_f0_prop; - - //compute_f0_prior() - old[1]=*m_f0_prior; - if(*m_f0<=0 || *m_f0 >=1){ - rejflag[0]=true; - }else{ - rejflag[0]=false; - if(!m_ardf0){ - *m_f0_prior=0; - }else{ - *m_f0_prior=log(double(*m_f0)); - } - } - - //for likehood and reject_f_sum - *fsum=*m_f0; - - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - - //reject_f_sum() - rejflag[1]=(*fsum>1); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[2]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - - if(!rejflag[0]){ - if(!rejflag[1]){ - if(rejflag[2]){ - //accept_f0() - *m_f0_acc=*m_f0_acc+1; - }else{ - //reject_F0() - *m_f0=old[0]; - *m_f0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_f0_rej=*m_f0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - }else{ - //reject_F0() - *m_f0=old[0]; - *m_f0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_f0_rej=*m_f0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - }else{ - //reject_F0() - *m_f0=old[0]; - *m_f0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_f0_rej=*m_f0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - } - } - -////////////////////////////////////////////////////////////////// TAU - if(rician){ - if (leader){ - //propose_tau - old[0]=*m_tau; - *m_tau+=prerandN[*localrand]**m_tau_prop; - - //compute_tau_prior() - old[1]=*m_tau_prior; - if(*m_tau<=0){ - rejflag[0]=true; - }else{ - rejflag[0]=false; - *m_tau_prior=0; - } - - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - - if(!rejflag[0]){ - if(rejflag[1]){ - //accept_tau() - *m_tau_acc=*m_tau_acc+1; - }else{ - //reject_tau() - *m_tau=old[0]; - *m_tau_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_tau_rej=*m_tau_rej+1; - //restore_energy() - *m_energy=*m_old_energy; - } - }else{ - //reject_tau() - *m_tau=old[0]; - *m_tau_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_tau_rej=*m_tau_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - } - } - -////////////////////////////////////////////////////////////////// D - - if (leader){ - //propose_d() - old[0]=*m_d; - *m_d+=prerandN[*localrand]**m_d_prop; - - //compute_d_prior_f0() - old[1]=*m_d_prior; - - if(*m_d<0 || *m_d > 0.008){ - rejflag[0]=true; - }else{ - *m_d_prior=0; - rejflag[0]=false; - } - } - - __syncthreads(); - - //compute_signal() - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model); - } - } - //compute_iso_signal() - for(int i=0; i<mydirs; i++){ - compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model); - } - - if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - } - - __syncthreads(); - - if(!rejflag[0]){ - if(rejflag[1]){ - //accept_d() - if (leader) *m_d_acc=*m_d_acc+1; - }else{ - if (leader){ - //reject_d() - *m_d=old[0]; - *m_d_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_d_rej=*m_d_rej+1; - //restore_energy() - *m_energy=*m_old_energy; - } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; - } - } - }else{ - if (leader){ - //reject_d() - *m_d=old[0]; - *m_d_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_d_rej=*m_d_rej+1; - //restore_energy() - *m_energy=*m_old_energy; - } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; - } - } - -////////////////////////////////////////////////////////////////// D_STD - - if(model==2){ - if (leader){ - //propose_d_std - old[0]=*m_dstd; - *m_dstd+=prerandN[*localrand]**m_dstd_prop; - - //compute_d_std_prior() - old[1]=*m_dstd_prior; - if(*m_dstd<=0 || *m_dstd > 0.01){ - rejflag[0]=true; - }else{ - rejflag[0]=false; - *m_dstd_prior=log(double(*m_dstd)); - } - } - - __syncthreads(); - - //compute_signal() - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model); - } - } - - //compute_iso_signal() - for(int i=0; i<mydirs; i++){ - compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model); - } - - if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - } - - __syncthreads(); - - if(!rejflag[0]){ - if(rejflag[1]){ - //accept_dstd() - if (leader) *m_dstd_acc=*m_dstd_acc+1; - }else{ - if (leader){ - //reject_dstd() - *m_dstd=old[0]; - *m_dstd_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_dstd_rej=*m_dstd_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; - } - } - }else{ - if (leader){ - //reject_dstd() - *m_dstd=old[0]; - *m_dstd_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_dstd_rej=*m_dstd_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - - for(int i=0; i<mydirs; i++){ - for(int f=0;f<nfib;f++){ - mysig[i*nfib+f] = mysigold[i*nfib+f]; - } - - myisosig[i]=myisosigold[i]; - } - } - } - -////////////////////////////////////////////////////////////////// S0 - - if (leader){ - //propose_S0() - old[0]=*m_S0; - *m_S0+=prerandN[*localrand]**m_S0_prop; - - //compute_S0_prior() - old[1]=*m_S0_prior; - if(*m_S0<0) rejflag[0]=true; - else{ - *m_S0_prior=0; - rejflag[0]=false; - } - - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - - if(!rejflag[0]){ - if(rejflag[1]){ - //accept_S0() - *m_S0_acc=*m_S0_acc+1; - }else{ - //reject_S0() - *m_S0=old[0]; - *m_S0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_S0_rej=*m_S0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - }else{ - //reject_S0() - *m_S0=old[0]; - *m_S0_prior=old[1]; - *m_prior_en=*m_old_prior_en; - *m_S0_rej=*m_S0_rej+1; - - //restore_energy() - *m_energy=*m_old_energy; - } - } - -//////////////////////////////////////////////////////////////////////////// TH - - for(int fibre=0;fibre<nfib;fibre++){ - if (leader){ - //propose_th() - old[0]=m_th[fibre]; - m_th[fibre]+=prerandN[*localrand]*m_th_prop[fibre]; - - //compute_th_prior() - old[1]=m_th_prior[fibre]; - if(m_th[fibre]==0){ - m_th_prior[fibre]=0; - }else{ - m_th_prior[fibre]=-log(double(fabs(sin(double(m_th[fibre]))/2))); - } - //rejflag[0]=false; /////////////////always false - - //compute_prior() - *fm_old_prior_en=fm_prior_en[fibre]; - fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre]; - } - - __syncthreads(); - - //compute_signal() - //compute_signal_pre - for(int i=0; i<mydirs; i++){ - cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre])); - cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre])); - old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]; - angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; - angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre]; - - compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model); - } - - if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - } - - __syncthreads(); - - if(rejflag[1]){ - //accept_th - if (leader) m_th_acc[fibre]++; - }else{ - - if (leader){ - //reject_th() - m_th[fibre]=old[0]; - m_th_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_th_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; - } - - //compute_signal_pre undo - for(int i=0; i<mydirs; i++){ - angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre]; - mysig[i*nfib+fibre] = mysigold[i*nfib+fibre]; - } - } - __syncthreads(); - -/////////////////////////////////////// PH - - if (leader){ - //propose_ph() - old[0]=m_ph[fibre]; - m_ph[fibre]+=prerandN[*localrand]*m_ph_prop[fibre]; - - //compute_ph_prior() - old[1]=m_ph_prior[fibre]; - m_ph_prior[fibre]=0; - //rejflag[0]=false; - - //compute_prior() - *fm_old_prior_en=fm_prior_en[fibre]; - fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre]; - } - - __syncthreads(); - - //compute_signal() - //compute_signal_pre - for(int i=0; i<mydirs; i++){ - cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre])); - cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre])); - old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]; - angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2; - angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre]; - - compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model); - } - - if (leader){ - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[1]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - } - - __syncthreads(); - - //if(!rejflag[0]){ - if(rejflag[1]){ - //accept_ph() - if (leader) m_ph_acc[fibre]++; - }else{ - if (leader){ - //reject_ph() - m_ph[fibre]=old[0]; - m_ph_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_ph_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; - } - //compute_signal_pre undo - for(int i=0; i<mydirs; i++){ - angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre]; - - mysig[i*nfib+fibre] = mysigold[i*nfib+fibre]; - } - } - - __syncthreads(); - -//////////////////////////////////////////// F - - if (leader){ - //propose_f() - old[0]=m_f[fibre]; - m_f[fibre]+=prerandN[*localrand]*m_f_prop[fibre]; - - //compute_f_prior() - old[1]=m_f_prior[fibre]; - if (m_f[fibre]<=0 || m_f[fibre]>=1) rejflag[0]=true; - else{ - if(!can_use_ard ){ - m_f_prior[fibre]=0; - }else{ - if(m_lam_jump[fibre]){ - m_f_prior[fibre]=log(double(m_f[fibre])); - }else{ - m_f_prior[fibre]=0; - } - } - m_f_prior[fibre]=fudgevalue*m_f_prior[fibre]; - rejflag[0]=false; - } - - //compute_prior() - *fm_old_prior_en=fm_prior_en[fibre]; - fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre]; - - //for likehood and reject_f_sum - *fsum=*m_f0; - for(int g=0;g<nfib;g++){ - *fsum+=m_f[g]; - } - //reject_f_sum() - rejflag[1]=(*fsum>1); - - //compute_prior() - compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib); - } - - __syncthreads(); - - //compute_likelihood() - compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib); - - __syncthreads(); - - if (leader){ - //compute_energy() - *m_old_energy=*m_energy; - *m_energy=*m_prior_en+*m_likelihood_en; - - //test_energy - *tmp=exp(double(*m_old_energy-*m_energy)); - rejflag[2]=(*tmp>prerandU[*localrand]); - *localrand=*localrand+1; - - if(!rejflag[0]){ - if(!rejflag[1]){ - if(rejflag[2]){ - //accept_f() - m_f_acc[fibre]++; - }else{ - //reject_f() - m_f[fibre]=old[0]; - m_f_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_f_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; - } - }else{ - //reject_f() - m_f[fibre]=old[0]; - m_f_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_f_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; - } - }else{ - //reject_f() - m_f[fibre]=old[0]; - m_f_prior[fibre]=old[1]; - fm_prior_en[fibre]=*fm_old_prior_en; - m_f_rej[fibre]++; - - //restore_energy() - *m_prior_en=*m_old_prior_en; - *m_energy=*m_old_energy; - } - } - __syncthreads(); - - }//end while nfib - - if(((*recordcount%record_every)==0)&&leader){ - rd[(*idVOX*totalrecords)+*sample-1]= *m_d; - if(m_include_f0) rf0[(*idVOX*totalrecords)+*sample-1]= *m_f0; - if(rician) rtau[(*idVOX*totalrecords)+*sample-1]= *m_tau; - if(model==2) rdstd[(*idVOX*totalrecords)+*sample-1]= *m_dstd; - rs0[(*idVOX*totalrecords)+*sample-1]= *m_S0; - for(int j=0;j<nfib;j++){ - rth[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_th[j]; - rph[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_ph[j]; - rf[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_f[j]; - } - *sample=*sample+1; - } - - if(((*count_update%updateproposalevery)==0)&&leader){ - //m_multifibre.update_proposals(); - *m_d_prop*=sqrt(float(*m_d_acc+1)/float(*m_d_rej+1)); - *m_d_prop=min(*m_d_prop,maxfloat); - - if(rician){ - *m_tau_prop*=sqrt(float(*m_tau_acc+1)/float(*m_tau_rej+1)); - *m_tau_prop=min(*m_tau_prop,maxfloat); - *m_tau_acc=0; - *m_tau_rej=0; - } - - if(m_include_f0){ - *m_f0_prop*=sqrt(float(*m_f0_acc+1)/float(*m_f0_rej+1)); - *m_f0_prop=min(*m_f0_prop,maxfloat); - *m_f0_acc=0; - *m_f0_rej=0; - } - - if(model==2){ - *m_dstd_prop*=sqrt(float(*m_dstd_acc+1)/float(*m_dstd_rej+1)); - *m_dstd_prop=min(*m_dstd_prop,maxfloat); - *m_dstd_acc=0; - *m_dstd_rej=0; - } - - *m_S0_prop*=sqrt(float(*m_S0_acc+1)/float(*m_S0_rej+1)); - *m_S0_prop=min(*m_S0_prop,maxfloat); - *m_d_acc=0; - *m_d_rej=0; - *m_S0_acc=0; - *m_S0_rej=0; - for(int f=0; f<nfib;f++){ - //m_fibres[f].update_proposals(); - m_th_prop[f]*=sqrt(float(m_th_acc[f]+1)/float(m_th_rej[f]+1)); - m_th_prop[f]=min(m_th_prop[f],maxfloat); - m_ph_prop[f]*=sqrt(float(m_ph_acc[f]+1)/float(m_ph_rej[f]+1)); - m_ph_prop[f]=min(m_ph_prop[f],maxfloat); - m_f_prop[f]*=sqrt(float(m_f_acc[f]+1)/float(m_f_rej[f]+1)); - m_f_prop[f]=min(m_f_prop[f],maxfloat); - - m_th_acc[f]=0; - m_th_rej[f]=0; - m_ph_acc[f]=0; - m_ph_rej[f]=0; - m_f_acc[f]=0; - m_f_rej[f]=0; - } - } - - __syncthreads(); - - } //end while iterations - - if(leader){ - multifibres[*idVOX].m_S0=*m_S0; - multifibres[*idVOX].m_S0_prior=*m_S0_prior; - multifibres[*idVOX].m_S0_prop=*m_S0_prop; - multifibres[*idVOX].m_S0_acc=*m_S0_acc; - multifibres[*idVOX].m_S0_rej=*m_S0_rej; - - multifibres[*idVOX].m_d=*m_d; - multifibres[*idVOX].m_d_prior=*m_d_prior; - multifibres[*idVOX].m_d_prop=*m_d_prop; - multifibres[*idVOX].m_d_acc=*m_d_acc; - multifibres[*idVOX].m_d_rej=*m_d_rej; - - multifibres[*idVOX].m_prior_en=*m_prior_en; - multifibres[*idVOX].m_energy=*m_energy; - multifibres[*idVOX].m_likelihood_en=*m_likelihood_en; - - if(m_include_f0){ - multifibres[*idVOX].m_f0_prior=*m_f0_prior; - multifibres[*idVOX].m_f0=*m_f0; - multifibres[*idVOX].m_f0_acc=*m_f0_acc; - multifibres[*idVOX].m_f0_rej=*m_f0_rej; - multifibres[*idVOX].m_f0_prop=*m_f0_prop; - } - if(rician){ - multifibres[*idVOX].m_tau_prior=*m_tau_prior; - multifibres[*idVOX].m_tau=*m_tau; - multifibres[*idVOX].m_tau_acc=*m_tau_acc; - multifibres[*idVOX].m_tau_rej=*m_tau_rej; - multifibres[*idVOX].m_tau_prop=*m_tau_prop; - } - if(model==2){ - multifibres[*idVOX].m_dstd_prior=*m_dstd_prior; - multifibres[*idVOX].m_dstd=*m_dstd; - multifibres[*idVOX].m_dstd_acc=*m_dstd_acc; - multifibres[*idVOX].m_dstd_rej=*m_dstd_rej; - multifibres[*idVOX].m_dstd_prop=*m_dstd_prop; - } - } - - if(idSubVOX<nfib){ - int pos = (*idVOX*nfib)+idSubVOX; - - fibres[pos].m_th=m_th[idSubVOX]; - fibres[pos].m_ph=m_ph[idSubVOX]; - fibres[pos].m_f=m_f[idSubVOX]; - - fibres[pos].m_th_acc=m_th_acc[idSubVOX]; - fibres[pos].m_th_rej=m_th_rej[idSubVOX]; - fibres[pos].m_ph_acc=m_ph_acc[idSubVOX]; - fibres[pos].m_ph_rej=m_ph_rej[idSubVOX]; - fibres[pos].m_f_acc=m_f_acc[idSubVOX]; - fibres[pos].m_f_rej=m_f_rej[idSubVOX]; - - fibres[pos].m_prior_en=fm_prior_en[idSubVOX]; - fibres[pos].m_th_prior=m_th_prior[idSubVOX]; - fibres[pos].m_ph_prior=m_ph_prior[idSubVOX]; - fibres[pos].m_f_prior=m_f_prior[idSubVOX]; - - fibres[pos].m_th_prop=m_th_prop[idSubVOX]; - fibres[pos].m_ph_prop=m_ph_prop[idSubVOX]; - fibres[pos].m_f_prop=m_f_prop[idSubVOX]; - - fibres[pos].m_lam_jump=m_lam_jump[idSubVOX]; - } - - for(int i=0; i<mydirs; i++){ - isosignals[(*idVOX*ndirections)+idSubVOX+i*threadsBlock] = myisosig[i]; - - for(int f=0;f<nfib;f++){ - signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]=mysig[i*nfib+f]; - } - } }