Skip to content
Snippets Groups Projects
runmcmc_kernels.cu 42.5 KiB
Newer Older
Moises Fernandez's avatar
Moises Fernandez committed
/*  runmcmc_kernels.cu

    Tim Behrens, Saad Jbabdi, Stam Sotiropoulos, Moises Hernandez  - FMRIB Image Analysis Group

    Copyright (C) 2005 University of Oxford  */

/*  CCOPYRIGHT  */

#include <iostream>
#include <fstream>
#include <stdio.h>
#include "fibre_gpu.h"
#include <math.h>
#include <string.h>
#include <string>
#include <cuda_runtime.h>
#include <cuda.h>
#include <curand.h>

#include <options.h>

#define maxfloat 1e10
__device__ inline void propose(float* param, float* old, float prop, float random){
	*old=*param;
	*param = *param + random*prop;
}
__device__ inline void reject(float* param, float* prior, float* old, float* m_prior_en, float* m_old_prior_en, float* m_energy, float* m_old_energy, int* rej){
	*param=old[0];
	*prior=old[1];
	*m_prior_en=*m_old_prior_en;
	*rej=*rej+1;
	//restore_energy()
      	*m_energy=*m_old_energy;
}
__device__ inline void rejectF(float* param, float* prior, float* old, float* m_prior_en, float* m_old_prior_en, float* fm_prior_en, float* fm_old_prior_en, float* m_energy, float* m_old_energy, int* rej){
	*param=old[0];
	*prior=old[1];
	*fm_prior_en=*fm_old_prior_en;
	*m_prior_en=*m_old_prior_en;
	*rej=*rej+1;
	//restore_energy()
      	*m_energy=*m_old_energy;
}				
__device__ inline void getfsum(float* fsum, float* m_f, float m_f0, int nfib){
	*fsum=m_f0;
	for(int f=0;f<nfib;f++){  
		*fsum = *fsum + m_f[f];
	}
}
__device__ inline bool compute_test_energy(float *m_energy, float* m_old_energy, float m_prior_en, float m_likelihood_en, float random){
	*m_old_energy=*m_energy;
      	*m_energy=m_prior_en+m_likelihood_en;
	double tmp=exp(double(*m_old_energy-*m_energy));
	return (tmp>random);
}
__device__ inline void compute_signal(double *signals,double *oldsignals,float mbvals,float* m_d, float* m_dstd,float angtmp, int model){
	*oldsignals=*signals;
	if(model==1 || *m_dstd<1e-5){	
		*signals=exp(double(-*m_d*mbvals*angtmp));
		double dbeta= *m_d/(*m_dstd**m_dstd);
	 	double dalpha= *m_d*dbeta;         
           	*signals=exp(double(log(double(dbeta/(dbeta+mbvals*angtmp)))*dalpha)); 
__device__ inline void compute_iso_signal(double *isosignals,double *oldisosignals, float mbvals,float* m_d, float* m_dstd, int model){
	*oldisosignals=*isosignals;
	if(model==1 || *m_dstd<1e-5){
	 	*isosignals=exp(double(-m_d[0]*mbvals));	
		double dbeta= *m_d/(*m_dstd**m_dstd);
	  	double dalpha= *m_d*dbeta;
		*isosignals=exp(double(log(double(dbeta/(dbeta+mbvals)))*dalpha));
__device__ inline void restore_signals(double* signals, double* oldsignals, int idVOX, int idSubVOX, int mydirs, int nfib, int ndirections, int threadsBlock){
	for(int f=0;f<nfib;f++){
		for(int i=0; i<mydirs; i++){
			int pos = idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock;
			signals[pos] = oldsignals[pos];
		}	
	}
}
__device__ inline void restore_isosignals(double* isosignals, double* oldisosignals, int idVOX, int idSubVOX, int mydirs, int ndirections, int threadsBlock){
	for(int i=0; i<mydirs; i++){
		int pos = idVOX*ndirections + idSubVOX + i*threadsBlock;
		isosignals[pos]=oldisosignals[pos];
	}
}
__device__ inline void restore_angtmp_signals(double* signals, double* oldsignals,double* angtmp, double* oldangtmp, int idVOX, int idSubVOX, int mydirs, int nfib, int fibre, int ndirections, int threadsBlock){
	for(int i=0; i<mydirs; i++){
		int pos = idVOX*ndirections*nfib + fibre*ndirections + idSubVOX + i*threadsBlock;
		int pos2 = idVOX*ndirections + idSubVOX + i*threadsBlock;
		angtmp[pos]=oldangtmp[pos2];
		signals[pos] = oldsignals[pos];	
	}
}
__device__  inline void compute_prior(float *m_prior_en, float *m_prior_en_old,float* m_d_prior,float* m_S0_prior,float *m_prior_enf, float* m_f0_prior, float* m_tau_prior, float* m_dstd_prior, int nfib){			
        *m_prior_en_old=*m_prior_en;
	*m_prior_en=*m_d_prior+*m_S0_prior+*m_dstd_prior+*m_tau_prior+*m_f0_prior;
	for(int f=0;f<nfib;f++){
		*m_prior_en=*m_prior_en+m_prior_enf[f];
	}	
}

__device__ inline float logIo(const float& x){
    	float y,b;
    	if (b<3.75){
      		float a=x/3.75;
      		a*=a;
      		//Bessel function evaluation
		y=1.0+a*(3.5156229+a*(3.0899424+a*(1.2067492+a*(0.2659732+a*(0.0360768+a*0.0045813)))));
      		y=log(double(y));
    	}else{
      		float a=3.75/b; 
      		//Bessel function evaluation
      		//y=(exp(b)/sqrt(b))*(0.39894228+a*(0.01328592+a*(0.00225319+a*(-0.00157565+a*(0.00916281+a*(-0.02057706+a*(0.02635537+a*(-0.01647633+a*0.00392377))))))));
      		//Logarithm of Bessel function

		y=b+log(double((0.39894228+a*(0.01328592+a*(0.00225319+a*(-0.00157565+a*(0.00916281+a*(-0.02057706+a*(0.02635537+a*(-0.01647633+a*0.00392377))))))))/sqrt(b)));
    	}

    	return y;
}
__device__ inline void compute_likelihood(int idSubVOX,float* m_S0,float *m_likelihood_en,float *m_f,double *signals,double *isosignals,const float *mdata,float* fsum,double *reduction, float* m_f0, const bool rician, float* m_tau,int mydirs, int threadsBlock, int ndirections, int nfib){
	for(int i=0; i<mydirs; i++){
	      	for(int f=0;f<nfib;f++){
			pos = f*ndirections + idSubVOX + i*threadsBlock;
			pred= pred+m_f[f]*signals[pos];
		pos = idSubVOX + i*threadsBlock;
		pred= *m_S0*(pred+(1-*fsum)*isosignals[pos]+*m_f0); //F0
	
			reduction[idSubVOX] = reduction[idSubVOX]+(diff*diff);
		}else{
			pred= log(mdata[pos])+(-0.5**m_tau*(mdata[pos]*mdata[pos]+pred*pred)+logIo(*m_tau*pred*mdata[pos]));  
			reduction[idSubVOX] = reduction[idSubVOX]+pred;
	unsigned int s2=threadsBlock;
	for(unsigned int s=threadsBlock>>1; s>0; s>>=1) {
		if((s2%2)&&(idSubVOX==(s-1))) reduction[idSubVOX]= reduction[idSubVOX] + reduction[idSubVOX + s +1]; 
        	if (idSubVOX < s){
            		reduction[idSubVOX] = reduction[idSubVOX] + reduction[idSubVOX + s];
       	 	}
		s2=s;
        	__syncthreads();
    	}
	if(idSubVOX==0){
		double sumsquares=0;
		sumsquares+=reduction[0];
		if(!rician){ 
		 	*m_likelihood_en=(ndirections/2.0)*log(sumsquares/2.0); 
			*m_likelihood_en= -ndirections*log(*m_tau)-sumsquares;
extern "C" __global__ void init_Fibres_Multifibres_kernel(	//INPUT
								const float*			tau,
								const double*			alpha,
								const double*			beta,
								const int			ndirections,
								const int 			nfib,
								const int 			nparams_fit,
								const int 			model,
								const float 			fudgevalue,
								const bool			m_includef0,
								const bool			rician,
								const bool 			m_ardf0,	// opts.ardf0.value()
								const bool 			ard_value,	// opts.all_ard.value()
								const bool 			no_ard_value,	// opts.no_ard.value()
								//OUTPUT
								FibreGPU*			fibres,
								MultifibreGPU*			multifibres,
								double*				signals,
								double*				isosignals)
{
	int idSubVOX= threadIdx.x;
	int threadsBlock = blockDim.x;
	int idVOX= blockIdx.x;
	bool leader = (idSubVOX==0);

	////////// DYNAMIC SHARED MEMORY ///////////
	extern __shared__ double shared[];
	double* reduction = (double*)shared;				//threadsBlock

	float* m_S0 = (float*) &reduction[threadsBlock];		//1
	float* m_d = (float*) &m_S0[1];					//1
	float* m_dstd =(float*) &m_d[1];				//1
	float* m_f0 = (float*) &m_dstd[1];				//1
	float* m_tau = (float*) &m_f0[1];				//1
	float* m_th = (float*) &m_tau[1];				//nfib
	float* m_ph = (float*) &m_th[nfib];				//nfib
	float* m_f = (float*) &m_ph[nfib];				//nfib

	float* fsum = (float*) &m_f[nfib];				//1
	float* m_likelihood_en = (float*) &fsum[1];			//1
	float* m_prior_en = (float*) &m_likelihood_en[1];		//1
	int* posBV = (int*) &m_prior_en[1];				//1
	////////// DYNAMIC SHARED MEMORY ///////////
	// m_s0-params[0] 	m_d-params[1] 	m_f-m_th-m_ph-params[2,3,4,5, etc..]   	m_f0-params[nparams-1]
	if(leader){
		if(gradnonlin) *posBV = (idVOX*ndirections);
		else *posBV = 0;

		*m_S0 = params[idVOX*nparams_fit];
		multifibres[idVOX].m_S0 = *m_S0;
		multifibres[idVOX].m_S0_prior = 0;
		multifibres[idVOX].m_S0_acc = 0;
		multifibres[idVOX].m_S0_rej = 0;
		*m_d=params[idVOX*nparams_fit+1];
		if(*m_d<0 || *m_d>0.008) *m_d=2e-3;			//this is in xfibres...after fit
		multifibres[idVOX].m_d = *m_d;
		multifibres[idVOX].m_d_prior = 0;
		multifibres[idVOX].m_d_acc = 0;
		multifibres[idVOX].m_d_rej = 0;
			*m_dstd=params[idVOX*nparams_fit+2];
			if(*m_dstd<0 || *m_dstd>0.01) *m_dstd=*m_d/10;	//this is in xfibres...after fit
		else *m_dstd = 0;
		multifibres[idVOX].m_dstd = *m_dstd;
		multifibres[idVOX].m_dstd_prior = 0;
		multifibres[idVOX].m_dstd_acc = 0;
		multifibres[idVOX].m_dstd_rej = 0;

		if (m_includef0) *m_f0=params[idVOX*nparams_fit+nparams_fit-1];
		else *m_f0=0;
		multifibres[idVOX].m_f0 = *m_f0;
		multifibres[idVOX].m_f0_prior = 0;
		multifibres[idVOX].m_f0_acc = 0;
		multifibres[idVOX].m_f0_rej = 0;

		*m_tau = tau[idVOX];
		multifibres[idVOX].m_tau = *m_tau;
		multifibres[idVOX].m_tau_prior = 0;
		multifibres[idVOX].m_tau_acc = 0;
		multifibres[idVOX].m_tau_rej = 0;
	}
	__syncthreads();

	int mydirs = ndirections/threadsBlock;
	int mod = ndirections%threadsBlock;
	if(mod&&(idSubVOX<mod)) mydirs++;

	if(idSubVOX<nfib){
		int add=0;
		if(model==2) add=1;		// if model 2 we have d_std and then 1 more parameter in position 2
		int pos = (idVOX*nfib)+idSubVOX;

		m_th[idSubVOX]=params[idVOX*nparams_fit+2+3*idSubVOX+1+add];
		fibres[pos].m_th = m_th[idSubVOX];
		fibres[pos].m_th_prop = 0.2;
		float m_th_prior = 0;
		fibres[pos].m_th_acc = 0;
		fibres[pos].m_th_rej = 0;
		
		//compute_th_prior();
	      	if(m_th==0){
			m_th_prior=0;
		}else{
			m_th_prior=-log(double(fabs(sin(double(m_th[idSubVOX]))/2)));
	      	}
		fibres[pos].m_th_prior = m_th_prior;
		float m_ph_prior=0;	//compute_ph_prior();
		m_ph[idSubVOX]=params[idVOX*nparams_fit+2+3*idSubVOX+2+add];
		fibres[pos].m_ph = m_ph[idSubVOX];
		fibres[pos].m_ph_prop = 0.2;
		fibres[pos].m_ph_prior = 0;	//compute_ph_prior();
		fibres[pos].m_ph_acc = 0;
		fibres[pos].m_ph_rej = 0;

		m_f[idSubVOX] = params[idVOX*nparams_fit+2+3*idSubVOX+add]; 
		fibres[pos].m_f=m_f[idSubVOX];
		fibres[pos].m_f_prop = 0.2;
		float m_f_prior = 0;
		fibres[pos].m_f_acc = 0;
		fibres[pos].m_f_rej = 0;
		if(idSubVOX==0){
			fibres[pos].m_lam_jump = ard_value;
		}else{
			fibres[pos].m_lam_jump = !no_ard_value;
		}
		//compute_f_prior();
      		if (m_f[idSubVOX]<=0 | m_f[idSubVOX]>=1 ){
      		}else{
	  		if(fibres[pos].m_lam_jump){              
	    			m_f_prior=log(double(m_f[idSubVOX]));
	  		}else{
	    			m_f_prior=0;
			}
			m_f_prior= fudgevalue* m_f_prior;
      		}
		fibres[pos].m_f_prior = m_f_prior;
		//fibres[vox].m_lam = m_lam; ??
		//fibres[vox].m_lam_prop = 1;
		//fibres[vox].m_lam_prior = 0;
		//compute_lam_prior();
		//compute_prior();
		fibres[pos].m_prior_en= m_th_prior + m_ph_prior + m_f_prior;
	__syncthreads();

	//compute_signal_pre
	for(int f=0;f<nfib;f++){	
		for(int i=0; i<mydirs; i++){
			double myalpha = alpha[*posBV+idSubVOX+i*threadsBlock];
			double cos_alpha_minus_theta=cos(double(myalpha-m_th[f]));   
			double cos_alpha_plus_theta=cos(double(myalpha+m_th[f]));
			int pos = idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock;
			double aux = (cos(double(m_ph[f]-beta[*posBV+idSubVOX+i*threadsBlock]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
		     	aux =  aux*aux;
		 	angtmp[pos]= aux;
	for(int f=0;f<nfib;f++){
		for(int i=0; i<mydirs; i++){
			int pos = idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock;
			compute_signal(&signals[pos],&old,bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model);
		//initialise_energies();
	      	//compute_d_prior(); m_d_prior=0; so i don't do nothing, it is already 0
	      	if(model==2){
			//compute_d_std_prior();
			if(*m_dstd<=0 || *m_dstd>0.01){
				multifibres[idVOX].m_dstd_prior=log(double(*m_dstd));
	      	//compute_tau_prior(); m_tau_prior=0; so it doesn't do nothing, it is already 0
	      	if (m_includef0){
			//compute_f0_prior();
			if (*m_f0<=0 || *m_f0>=1){
	      		}else{
				if(!m_ardf0){}     	//Without ARD
				else              	//With ARD
		  			multifibres[idVOX].m_f0_prior= log(double(*m_f0));
	      		}
		}
	      	//compute_S0_prior(); m_S0_prior=0; so i don't do nothing, it is already 0
		*m_prior_en = 0;
	      	//compute_prior();
	      	//m_prior_en=m_d_prior+m_S0_prior; is 0
	      	if(model==2)
			*m_prior_en= *m_prior_en+multifibres[idVOX].m_dstd_prior;
	      	//if(m_rician) m_prior_en=m_prior_en+m_tau_prior; is 0
	      	if (m_includef0)
			*m_prior_en=*m_prior_en+multifibres[idVOX].m_f0_prior;
	      	for(int fib=0;fib<nfib;fib++){
			*m_prior_en=*m_prior_en+ fibres[idVOX*nfib+fib].m_prior_en;
		multifibres[idVOX].m_prior_en = *m_prior_en;

	//compute_iso_signal()
	for(int i=0; i<mydirs; i++){
		int pos = idVOX*ndirections + idSubVOX + i*threadsBlock;	
		compute_iso_signal(&isosignals[pos],&old,bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,model);
	__syncthreads();

	//compute_likelihood()
	compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[idVOX*nfib*ndirections],&isosignals[idVOX*ndirections],&datam[idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
	__syncthreads();

	if(leader){
		multifibres[idVOX].m_likelihood_en = *m_likelihood_en;
	      	
		//compute_energy();	
		multifibres[idVOX].m_energy = *m_prior_en+*m_likelihood_en;

	    	//initialise_props();
	      	multifibres[idVOX].m_S0_prop=multifibres[idVOX].m_S0/10.0; 
	      	multifibres[idVOX].m_d_prop=*m_d/10.0;
	      	multifibres[idVOX].m_dstd_prop=*m_dstd/10.0;
	      	multifibres[idVOX].m_tau_prop=*m_tau/2.0;
	      	multifibres[idVOX].m_f0_prop=0.2;
extern "C" __global__ void runmcmc_kernel(	//INPUT 
						const float*			datam,
						const float*			bvals,
						const double*			alpha,
						const double*			beta,
						float*				randomsN,
						float*				randomsU,
						const int			ndirections,
						const int			nfib,
						const int			nparams,
						const int 			model,
						const float 			fudgevalue,
						const bool 			m_include_f0,
						const bool 			m_ardf0,
						const bool 			can_use_ard, 
						const bool 			rician,
						const bool			gradnonlin,
						const int 			updateproposalevery, 	//update every this number of iterations	
						const int 			iterations,		//num of iterations to do this time (maybe is a part of the total)
						const int 			current_iter,		//the number of the current iteration over the total iterations
						const int 			iters_burnin,		//iters in burin, we need it to continue the updates at the correct time. 
						const int 			record_every, 		//record every this number
						const int 			totalrecords,		//total number of records to do
						//TO USE
						double*				oldsignals,
						double*				oldisosignals,
						double*				angtmp,
						double*				oldangtmp,
						//INPUT-OUTPUT
						FibreGPU*			fibres,
						MultifibreGPU*			multifibres,
						double*				signals,
						double*				isosignals,
						//OUTPUT
						float*				rf0,			//record of parameters
						float*				rtau,
						float*				rs0,
						float*				rd,
						float*				rdstd,
						float*				rth,
						float*				rph, 
						float*				rf)
	int idSubVOX= threadIdx.x;
	int threadsBlock = blockDim.x;
	bool leader = (idSubVOX==0);

	////////// DYNAMIC SHARED MEMORY ///////////
	extern __shared__ double shared[];
	double* reduction = (double*)shared;				//threadsBlock

	float* m_S0 = (float*) &reduction[threadsBlock];		//1
	float* m_d = (float*) &m_S0[1];					//1
	float* m_dstd =(float*) &m_d[1];				//1
	float* m_f0 = (float*) &m_dstd[1];				//1
	float* m_tau = (float*) &m_f0[1];				//1
	float* m_th = (float*) &m_tau[1];				//nfib
	float* m_ph = (float*) &m_th[nfib];				//nfib
	float* m_f = (float*) &m_ph[nfib];				//nfib

	float* m_S0_prior = (float*) &m_f[nfib];			//1
	float* m_d_prior = (float*) &m_S0_prior[1];			//1
	float* m_dstd_prior = (float*) &m_d_prior[1];			//1
	float* m_f0_prior = (float*) &m_dstd_prior[1];			//1
	float* m_tau_prior = (float*) &m_f0_prior[1];			//1
	float* m_th_prior = (float*) &m_tau_prior[1];			//nfib
	float* m_ph_prior = (float*) &m_th_prior[nfib];			//nfib
	float* m_f_prior = (float*) &m_ph_prior[nfib];			//nfib

	float* m_S0_prop = (float*) &m_f_prior[nfib];			//1
	float* m_d_prop = (float*) &m_S0_prop[1];			//1
	float* m_dstd_prop = (float*) &m_d_prop[1];			//1
	float* m_f0_prop = (float*) &m_dstd_prop[1];			//1
	float* m_tau_prop = (float*) &m_f0_prop[1];			//1
	float* m_th_prop = (float*) &m_tau_prop[1];			//nfib
	float* m_ph_prop = (float*) &m_th_prop[nfib];			//nfib
	float* m_f_prop = (float*) &m_ph_prop[nfib];			//nfib

	float* fsum = (float*) &m_f_prop[nfib];				//1
	float* m_likelihood_en = (float*) &fsum[1];			//1
	float* m_prior_en = (float*) &m_likelihood_en[1];		//1
	float* m_old_prior_en = (float*) &m_prior_en[1];		//1
	float* fm_prior_en = (float*) &m_old_prior_en[1];		//nfib		
	float* fm_old_prior_en = (float*) &fm_prior_en[nfib];		//1		
	float* m_energy = (float*) &fm_old_prior_en[1];			//1
	float* m_old_energy = (float*) &m_energy[1];			//1
	float* old = (float*) &m_old_energy[1];				//2

	float* prerandN = (float*) &old[2];				//nparams
	float* prerandU = (float*) &prerandN[nparams];			//nparams

	int* m_S0_acc = (int*) &prerandU[nparams];			//1
	int* m_d_acc = (int*) &m_S0_acc[1];				//1
	int* m_dstd_acc = (int*) &m_d_acc[1];				//1
	int* m_f0_acc = (int*) &m_dstd_acc[1];				//1
	int* m_tau_acc = (int*) &m_f0_acc[1];				//1
	int* m_th_acc = (int*) &m_tau_acc[1];				//nfib
	int* m_ph_acc = (int*) &m_th_acc[nfib];				//nfib
	int* m_f_acc = (int*) &m_ph_acc[nfib];				//nfib

	int* m_S0_rej = (int*) &m_f_acc[nfib];				//1
	int* m_d_rej = (int*) &m_S0_rej[1];				//1
	int* m_dstd_rej = (int*) &m_d_rej[1];				//1	
	int* m_f0_rej = (int*) &m_dstd_rej[1];				//1
	int* m_tau_rej = (int*) &m_f0_rej[1];				//1	
	int* m_th_rej = (int*) &m_tau_rej[1];				//nfib
	int* m_ph_rej = (int*) &m_th_rej[nfib];				//nfib
	int* m_f_rej = (int*) &m_ph_rej[nfib];				//nfib
	
	int* rejflag = (int*) &m_f_rej[nfib];				//3					
	int* m_lam_jump = (int*) &rejflag[3];				//nfib	
	int* idVOX = (int*) &m_lam_jump[nfib];				//1		
	int* count_update = (int*) &idVOX[1];				//1	
	int* recordcount = (int*) &count_update[1];			//1
	int* sample = (int*) &recordcount[1];				//1		
	int* localrand = (int*) &sample[1];				//1
	int* posBV = (int*) &localrand[1];				//1
	////////// DYNAMIC SHARED MEMORY ///////////
		*idVOX= blockIdx.x;
		*count_update = current_iter+iters_burnin;	//count for updates
		*recordcount = current_iter;	
		if(record_every) *sample=1+(current_iter/record_every);		//the next number of sample.....the index start in 0

		if(gradnonlin)*posBV = (*idVOX*ndirections);
		else *posBV = 0;

		*m_prior_en=multifibres[*idVOX].m_prior_en;
			*m_dstd_acc=multifibres[*idVOX].m_dstd_acc;
			*m_dstd_rej=multifibres[*idVOX].m_dstd_rej;
			*m_dstd_prior=multifibres[*idVOX].m_dstd_prior;
			*m_dstd_prop=multifibres[*idVOX].m_dstd_prop;
			*m_dstd=multifibres[*idVOX].m_dstd;
			*m_dstd_acc=0;
			*m_dstd_rej=0;
			*m_dstd_prior=0;
			*m_dstd_prop=0;
			*m_dstd=0;
		*m_d=multifibres[*idVOX].m_d;
		*m_energy=multifibres[*idVOX].m_energy;
		*m_d_prop=multifibres[*idVOX].m_d_prop;
		*m_d_prior=multifibres[*idVOX].m_d_prior;
		*m_S0_prior=multifibres[*idVOX].m_S0_prior;
		*m_S0=multifibres[*idVOX].m_S0;
		*m_likelihood_en=multifibres[*idVOX].m_likelihood_en;
		*m_d_acc=multifibres[*idVOX].m_d_acc;
		*m_d_rej=multifibres[*idVOX].m_d_rej;
		*m_S0_acc=multifibres[*idVOX].m_S0_acc;
		*m_S0_rej=multifibres[*idVOX].m_S0_rej;
		*m_S0_prop=multifibres[*idVOX].m_S0_prop;

		if(m_include_f0){
			*m_f0_acc=multifibres[*idVOX].m_f0_acc;
			*m_f0_rej=multifibres[*idVOX].m_f0_rej;
			*m_f0_prop=multifibres[*idVOX].m_f0_prop;
			*m_f0_prior=multifibres[*idVOX].m_f0_prior;
			*m_f0=multifibres[*idVOX].m_f0;
			*m_f0_acc=0;
			*m_f0_rej=0;
			*m_f0_prop=0;
			*m_f0_prior=0;
			*m_f0=0;
			*m_tau_acc=multifibres[*idVOX].m_tau_acc;
			*m_tau_rej=multifibres[*idVOX].m_tau_rej;
			*m_tau_prop=multifibres[*idVOX].m_tau_prop;
			*m_tau_prior=multifibres[*idVOX].m_tau_prior;
			*m_tau=multifibres[*idVOX].m_tau;	
			*m_tau_acc=0;
			*m_tau_rej=0;
			*m_tau_prop=0;
			*m_tau_prior=0;
			*m_tau=0;
	__syncthreads();
	int mydirs = ndirections/threadsBlock;
	int mod = ndirections%threadsBlock;
	if(mod&&(idSubVOX<mod)) mydirs++;
	if(idSubVOX<nfib){
		int pos = (*idVOX*nfib)+idSubVOX;
		m_th[idSubVOX]=fibres[pos].m_th;
		m_ph[idSubVOX]=fibres[pos].m_ph;
		m_f[idSubVOX]=fibres[pos].m_f;
		m_th_acc[idSubVOX]=fibres[pos].m_th_acc;
		m_th_rej[idSubVOX]=fibres[pos].m_th_rej;
		m_ph_acc[idSubVOX]=fibres[pos].m_ph_acc;
		m_ph_rej[idSubVOX]=fibres[pos].m_ph_rej;
		m_f_acc[idSubVOX]=fibres[pos].m_f_acc;
		m_f_rej[idSubVOX]=fibres[pos].m_f_rej;

		fm_prior_en[idSubVOX]=fibres[pos].m_prior_en;
		m_th_prior[idSubVOX]=fibres[pos].m_th_prior;
		m_ph_prior[idSubVOX]=fibres[pos].m_ph_prior;
		m_f_prior[idSubVOX]=fibres[pos].m_f_prior;

		m_th_prop[idSubVOX]=fibres[pos].m_th_prop;
		m_ph_prop[idSubVOX]=fibres[pos].m_ph_prop;
		m_f_prop[idSubVOX]=fibres[pos].m_f_prop;

		m_lam_jump[idSubVOX]=fibres[pos].m_lam_jump;		
	}
	__syncthreads();
		
	//compute_signal_pre
	for(int f=0;f<nfib;f++){
		for(int i=0; i<mydirs; i++){	
			double myalpha = alpha[*posBV+idSubVOX+i*threadsBlock];		
			double cos_alpha_minus_theta=cos(double(myalpha-m_th[f]));   
			double cos_alpha_plus_theta=cos(double(myalpha+m_th[f]));
			int pos = *idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock;
			double aux = (cos(double(m_ph[f]-beta[*posBV+idSubVOX+i*threadsBlock]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
		     	aux =  aux*aux;
		 	angtmp[pos]= aux;
	for (int niter=0; niter<iterations; niter++){
		//prefetching randoms
		if (leader){
			*count_update=*count_update+1;
			int idrand = *idVOX*iterations*nparams+(niter*nparams);
			*localrand = 0;
			if(m_include_f0){
				prerandN[*localrand]=randomsN[idrand];
				prerandU[*localrand]=randomsU[idrand];
				idrand++;
				*localrand=*localrand+1;
			}
			if(rician){
				prerandN[*localrand]=randomsN[idrand];
				prerandU[*localrand]=randomsU[idrand];
				idrand++;
				*localrand=*localrand+1;
			}
			prerandN[*localrand]=randomsN[idrand];
			prerandU[*localrand]=randomsU[idrand];
			idrand++;
			*localrand=*localrand+1;
			if(model==2){
				prerandN[*localrand]=randomsN[idrand];
				prerandU[*localrand]=randomsU[idrand];
				idrand++;
				*localrand=*localrand+1;
			}
			prerandN[*localrand]=randomsN[idrand];
			prerandU[*localrand]=randomsU[idrand];
			idrand++;
			*localrand=*localrand+1;

			for(int f=0;f<nfib;f++){
				prerandN[*localrand]=randomsN[idrand];
				prerandU[*localrand]=randomsU[idrand];
				idrand++;
				*localrand=*localrand+1;

				prerandN[*localrand]=randomsN[idrand];
				prerandU[*localrand]=randomsU[idrand];
				idrand++;
				*localrand=*localrand+1;

				prerandN[*localrand]=randomsN[idrand];					
				prerandU[*localrand]=randomsU[idrand];	
				idrand++;	
				*localrand=*localrand+1;	
			*localrand = 0;
		}
////////////////////////////////////////////////////////////////// F0
		if(m_include_f0){
			if (leader){
				propose(m_f0,old,*m_f0_prop,prerandN[*localrand]);
				//compute_f0_prior()     
				old[1]=*m_f0_prior;
	      			if(*m_f0<=0 || *m_f0 >=1){ 
					rejflag[0]=true;
				}else{ 	
					rejflag[0]=false;
					if(!m_ardf0){
						*m_f0_prior=0;
	      				}else{
						*m_f0_prior=log(double(*m_f0));
				//compute_prior()
				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
				//reject_f_sum()
				rejflag[1]=(*fsum>1);
			}
			__syncthreads();
			//compute_likelihood()
			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
			__syncthreads();
				rejflag[2]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]);
				*localrand=*localrand+1;	
				if(!rejflag[0]){
					if(!rejflag[1]){
						if(rejflag[2]){
		  					*m_f0_acc=*m_f0_acc+1;   
						}else{
							reject(m_f0,m_f0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_f0_rej);
					}else{
						reject(m_f0,m_f0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_f0_rej);
					reject(m_f0,m_f0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_f0_rej);
////////////////////////////////////////////////////////////////// TAU
		if(rician){
			if (leader){
				propose(m_tau,old,*m_tau_prop,prerandN[*localrand]);
				//compute_tau_prior()     
				old[1]=*m_tau_prior;
	      			if(*m_tau<=0){ 
					rejflag[0]=true;
				}else{ 	
					rejflag[0]=false;
					*m_tau_prior=0;
				}
				//compute_prior()
				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
			}
			__syncthreads();
			//compute_likelihood()
			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
			__syncthreads();
				rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]);
				*localrand=*localrand+1;
				if(!rejflag[0]){
					if(rejflag[1]){
		  				*m_tau_acc=*m_tau_acc+1;   
						reject(m_tau,m_tau_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_tau_rej);
					reject(m_tau,m_tau_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_tau_rej);
////////////////////////////////////////////////////////////////// D
			propose(m_d,old,*m_d_prop,prerandN[*localrand]);
			//compute_d_prior_f0()      
      			if(*m_d<0 || *m_d > 0.008){
				rejflag[0]=true;
			}else{ 
				*m_d_prior=0;
				rejflag[0]=false;
      			}
		}
		//compute_signal()
		for(int f=0;f<nfib;f++){
			for(int i=0; i<mydirs; i++){
				int pos = *idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock;
				compute_signal(&signals[pos],&oldsignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model);
			}
		}
		//compute_iso_signal()
		for(int i=0; i<mydirs; i++){
			int pos = *idVOX*ndirections + idSubVOX + i*threadsBlock;
			compute_iso_signal(&isosignals[pos],&oldisosignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,model);
			//compute_prior()
			compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
		}
		//compute_likelihood()
		compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);		
		__syncthreads();
			rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]);
			*localrand=*localrand+1;
		__syncthreads();
       		if(!rejflag[0]){
			if(rejflag[1]){
	  			if (leader) *m_d_acc=*m_d_acc+1;   
			}else{
				if (leader){
					reject(m_d,m_d_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_d_rej);
				restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock);
				restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock);
			}
        	}else{ 
			if (leader){
				reject(m_d,m_d_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_d_rej);
      			restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock);
			restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock);
////////////////////////////////////////////////////////////////// D_STD
		if(model==2){
			if (leader){	
				propose(m_dstd,old,*m_dstd_prop,prerandN[*localrand]);
				//compute_d_std_prior()     
				old[1]=*m_dstd_prior;
	      			if(*m_dstd<=0 || *m_dstd > 0.01){ 
					rejflag[0]=true;
				}else{ 	
					rejflag[0]=false;
					*m_dstd_prior=log(double(*m_dstd));
			}
			__syncthreads();
			//compute_signal()
			for(int f=0;f<nfib;f++){
				for(int i=0; i<mydirs; i++){				
					int pos = *idVOX*ndirections*nfib + f*ndirections + idSubVOX + i*threadsBlock;
					compute_signal(&signals[pos],&oldsignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model);
			}
			//compute_iso_signal()
			for(int i=0; i<mydirs; i++){
				int pos = *idVOX*ndirections + idSubVOX + i*threadsBlock;
				compute_iso_signal(&isosignals[pos],&oldisosignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,model);
				//compute_prior()
				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
			}
			__syncthreads();
			//compute_likelihood()
			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
			__syncthreads();
				rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]);
				*localrand=*localrand+1;					
			}
			__syncthreads();
			if(!rejflag[0]){
				if(rejflag[1]){
		  			if (leader) *m_dstd_acc=*m_dstd_acc+1;   
				}else{ 
					if (leader){
						reject(m_dstd,m_dstd_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_dstd_rej);
					restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock);
					restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock);
			}else{ 
				if (leader){
					reject(m_dstd,m_dstd_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_dstd_rej);
				restore_signals(signals,oldsignals,*idVOX,idSubVOX,mydirs,nfib,ndirections,threadsBlock);
				restore_isosignals(isosignals,oldisosignals,*idVOX,idSubVOX,mydirs,ndirections,threadsBlock);
////////////////////////////////////////////////////////////////// S0
			propose(m_S0,old,*m_S0_prop,prerandN[*localrand]);
			//compute_S0_prior()
			old[1]=*m_S0_prior;
        		if(*m_S0<0) rejflag[0]=true;
        		else{    
				*m_S0_prior=0;
	  			rejflag[0]=false;
        		}
			//compute_prior()
			compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
		}
		__syncthreads();
		//compute_likelihood()
		compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
		__syncthreads();
			rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]);
			*localrand=*localrand+1;
        		if(!rejflag[0]){
				if(rejflag[1]){
	  				*m_S0_acc=*m_S0_acc+1;   
				}else{
					reject(m_S0,m_S0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_S0_rej);
				reject(m_S0,m_S0_prior,old,m_prior_en,m_old_prior_en,m_energy,m_old_energy,m_S0_rej);
			}
        	}
////////////////////////////////////////////////////////////////////////////     TH
     		for(int fibre=0;fibre<nfib;fibre++){  
			if (leader){ 
				propose(&m_th[fibre],old,m_th_prop[fibre],prerandN[*localrand]);
				//compute_th_prior()
				old[1]=m_th_prior[fibre];
      	   			if(m_th[fibre]==0){
					m_th_prior[fibre]=0;
		   		}else{
					m_th_prior[fibre]=-log(double(fabs(sin(double(m_th[fibre]))/2)));
	      	   		}
		  		//rejflag[0]=false; /////////////////always false
				//compute_prior()
				*fm_old_prior_en=fm_prior_en[fibre];
	      	   		fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];	
			}
			__syncthreads();
			//compute_signal()
			//compute_signal_pre	
			for(int i=0; i<mydirs; i++){
				double myalpha = alpha[*posBV+idSubVOX+i*threadsBlock];
				double cos_alpha_minus_theta=cos(double(myalpha-m_th[fibre]));   
				double cos_alpha_plus_theta=cos(double(myalpha+m_th[fibre]));
				int pos = *idVOX*ndirections*nfib + fibre*ndirections + idSubVOX + i*threadsBlock;
				int pos2 = *idVOX*ndirections + idSubVOX + i*threadsBlock;
				oldangtmp[pos2]=angtmp[pos];
				double aux = (cos(double(m_ph[fibre]-beta[*posBV+idSubVOX+i*threadsBlock]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
		     		aux =  aux*aux;
		 		angtmp[pos]= aux;

				compute_signal(&signals[pos],&oldsignals[pos],bvals[*posBV+idSubVOX+i*threadsBlock],m_d,m_dstd,angtmp[pos],model);
				//compute_prior()
				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);	
			}
			__syncthreads();
			//compute_likelihood()
			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,&signals[*idVOX*nfib*ndirections],&isosignals[*idVOX*ndirections],&datam[*idVOX*ndirections],fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
			__syncthreads();
				rejflag[1]=compute_test_energy(m_energy,m_old_energy,*m_prior_en,*m_likelihood_en,prerandU[*localrand]);
				*localrand=*localrand+1;	
			}
			__syncthreads();
			if(rejflag[1]){
		  		if (leader) m_th_acc[fibre]++;   
					rejectF(&m_th[fibre],&m_th_prior[fibre],old,m_prior_en,m_old_prior_en,&fm_prior_en[fibre],fm_old_prior_en,m_energy,m_old_energy,&m_th_rej[fibre]);
				}
				//compute_signal_pre undo
				restore_angtmp_signals(signals,oldsignals,angtmp,oldangtmp,*idVOX,idSubVOX,mydirs,nfib,fibre,ndirections,threadsBlock);
			}
			__syncthreads();
///////////////////////////////////////     PH