diff --git a/CUDA/runmcmc_kernels.cu b/CUDA/runmcmc_kernels.cu
index 8d6461a614bdd033ef006ac361a6171e2ad759f9..de5ee51c1eb7e2f2d6a65da71b05fc4d462534f3 100644
--- a/CUDA/runmcmc_kernels.cu
+++ b/CUDA/runmcmc_kernels.cu
@@ -19,45 +19,41 @@
 
 #include <options.h>
 
-__device__ 
-static const float maxfloat=1e10;
+#define maxfloat 1e10
 
 __device__
-inline void compute_signal(double *signals,double *oldsignals,double mbvals,float m_d,int model, float m_dstd,float angtmp){
+inline void compute_signal(double *signals,double *oldsignals,double mbvals,float* m_d, float* m_dstd,float angtmp, int model){
 
 	oldsignals[0]=signals[0];
 
-	if(model==1 || m_dstd<1e-5){	
-		signals[0]=exp(double(-m_d*mbvals*angtmp));
+	if(model==1 || m_dstd[0]<1e-5){	
+		signals[0]=exp(double(-m_d[0]*mbvals*angtmp));
 	}else{
-		float dbeta= m_d/(m_dstd*m_dstd);
-	 	float dalpha=m_d*dbeta;         
+		float dbeta= m_d[0]/(m_dstd[0]*m_dstd[0]);
+	 	float dalpha=m_d[0]*dbeta;         
            	signals[0]=exp(double(log(double(dbeta/(dbeta+mbvals*angtmp)))*dalpha)); 
 	}
 }
 
 __device__ 
-inline void compute_iso_signal(double *isosignals,double *oldisosignals, double mbvals,float m_d, float m_dstd, int model){
+inline void compute_iso_signal(double *isosignals,double *oldisosignals, double mbvals,float* m_d, float* m_dstd, int model){
 
 	*oldisosignals=*isosignals;
 
-	 if(model==1 || m_dstd<1e-5){
-	 	*isosignals=exp(-m_d*mbvals);	
+	if(model==1 || m_dstd[0]<1e-5){
+	 	*isosignals=exp(-m_d[0]*mbvals);	
 	}else{
-		float dbeta= m_d/(m_dstd*m_dstd);
-	  	float dalpha=m_d*dbeta;
+		float dbeta= m_d[0]/(m_dstd[0]*m_dstd[0]);
+	  	float dalpha=m_d[0]*dbeta;
 		*isosignals=exp(double(log(double(dbeta/(dbeta+mbvals)))*dalpha));
 	}
 }
 
 __device__ 
-inline void compute_prior(float *m_prior_en, float *m_prior_en_old,float m_d_prior,float m_S0_prior,float *m_prior_enf, float m_f0_prior, float m_tau_prior, float m_dstd_prior){			
+inline void compute_prior(float *m_prior_en, float *m_prior_en_old,float* m_d_prior,float* m_S0_prior,float *m_prior_enf, float* m_f0_prior, float* m_tau_prior, float* m_dstd_prior, int nfib){			
         *m_prior_en_old=*m_prior_en;
-	*m_prior_en=m_d_prior+m_S0_prior;
-	*m_prior_en=*m_prior_en+m_dstd_prior;
-	*m_prior_en=*m_prior_en+m_tau_prior;
-	*m_prior_en=*m_prior_en+m_f0_prior;	
-	for(int f=0;f<NFIBRES;f++){
+	*m_prior_en=*m_d_prior+*m_S0_prior+*m_dstd_prior+*m_tau_prior+*m_f0_prior;
+	for(int f=0;f<nfib;f++){
 		*m_prior_en=*m_prior_en+m_prior_enf[f];
 	}	
 }
@@ -85,83 +81,68 @@ inline float logIo(const float& x){
 }
 
 __device__ 
-inline void compute_likelihood(bool leader,int idrest,float m_d,float m_S0,float *m_likelihood_en,float *m_f,double *signals,double *isosignals,double *mdata,float fsum,double *reduction, double m_f0, const bool rician, double m_tau,int ndirs){
+inline void compute_likelihood(int idSubVOX,float* m_S0,float *m_likelihood_en,float *m_f,double *signals,double *isosignals,double *mdata,float* fsum,double *reduction, float* m_f0, const bool rician, float* m_tau,int mydirs, int threadsBlock, int ndirections, int nfib){
 	
-
-	double sumsquares=0;
 	double pred[MAXNDIRS_PER_THREAD];
 
-	for(int i=0; i<ndirs; i++){
+	for(int i=0; i<mydirs; i++){
 		pred[i]=0;
-	      	for(int f=0;f<NFIBRES;f++){
-			pred[i]= pred[i]+m_f[f]*signals[i*NFIBRES+f];
+	      	for(int f=0;f<nfib;f++){
+			pred[i]= pred[i]+m_f[f]*signals[i*nfib+f];
 	     	}
 	}
 
-	for(int i=0; i<ndirs; i++){
-		pred[i]= m_S0*(pred[i]+(1-fsum)*isosignals[i]+m_f0); //F0
+	for(int i=0; i<mydirs; i++){
+		pred[i]= m_S0[0]*(pred[i]+(1-fsum[0])*isosignals[i]+m_f0[0]); //F0
 	}
 
-	reduction[idrest]=0;
-	for(int i=0; i<ndirs; i++){	
-		if(!rician) 
-			reduction[idrest] = reduction[idrest]+((mdata[i]-pred[i])*(mdata[i]-pred[i]));
-		else{
-			pred[i]= log(mdata[i])+(-0.5*m_tau*(mdata[i]*mdata[i]+pred[i]*pred[i])+logIo(m_tau*pred[i]*mdata[i]));  
-			reduction[idrest] = reduction[idrest]+pred[i];
+	reduction[idSubVOX]=0;
+	for(int i=0; i<mydirs; i++){	
+		if(!rician){
+			double diff = mdata[i]-pred[i];
+			reduction[idSubVOX] = reduction[idSubVOX]+(diff*diff);
+		}else{
+			pred[i]= log(mdata[i])+(-0.5*m_tau[0]*(mdata[i]*mdata[i]+pred[i]*pred[i])+logIo(m_tau[0]*pred[i]*mdata[i]));  
+			reduction[idSubVOX] = reduction[idSubVOX]+pred[i];
 		}
 	}
 
 	__syncthreads();
 
-	unsigned int s2=THREADS_BLOCK;
-	for(unsigned int s=THREADS_BLOCK/2; s>0; s>>=1) {
-		if((s2%2)&&(idrest==(s-1))) reduction[idrest]= reduction[idrest] + reduction[idrest + s +1]; 
-        	if (idrest < s){
-            		reduction[idrest] = reduction[idrest] + reduction[idrest + s];
+	unsigned int s2=threadsBlock;
+	for(unsigned int s=threadsBlock>>1; s>0; s>>=1) {
+		if((s2%2)&&(idSubVOX==(s-1))) reduction[idSubVOX]= reduction[idSubVOX] + reduction[idSubVOX + s +1]; 
+        	if (idSubVOX < s){
+            		reduction[idSubVOX] = reduction[idSubVOX] + reduction[idSubVOX + s];
        	 	}
 		s2=s;
         	__syncthreads();
     	}
-	if(leader){
+	if(idSubVOX==0){
+		double sumsquares=0;
 		sumsquares+=reduction[0];
 		if(!rician){ 
-		 	*m_likelihood_en=(NDIRECTIONS/2.0)*log(sumsquares/2.0); 
+		 	*m_likelihood_en=(ndirections/2.0)*log(sumsquares/2.0); 
 		}else{
-			*m_likelihood_en= -NDIRECTIONS*log(m_tau)-sumsquares;
+			*m_likelihood_en= -ndirections*log(m_tau[0])-sumsquares;
 		}
 	}
-
-	//REDUCTION WITH ONLY ONE THREAD. THE LEADER
-	/*if(leader){
-		if(!rician){ 
-			for(int i=0;i<THREADS_BLOCK;i++){
-				sumsquares+=reduction[i]; //normal sum
-			}
-      			*m_likelihood_en=(NDIRECTIONS/2.0)*log(double(sumsquares/2.0)); 
-		}else{
-			for(int i=0;i<THREADS_BLOCK;i++){
-				sumsquares+=reduction[i]; //normal sum
-			}
-			*m_likelihood_en= -NDIRECTIONS*log(m_tau)-sumsquares;
-		}
-	}*/
 }
-						  
+			  
 extern "C" __global__ void init_Fibres_Multifibres_kernel(	//INPUT
-								const double*			data,
+								const double*			datam,
 								const double*			params,
 								const float*			tau,
 								const double*			bvals,
 								const double*			alpha,
 								const double*			beta,
-								const int 			nvox,	
+								const int			ndirections,
 								const int 			nfib,
 								const int 			nparams_fit,
 								const int 			model,
 								const float 			fudgevalue,
 								const bool			m_includef0,
-								const bool			m_rician,
+								const bool			rician,
 								const bool 			m_ardf0,	// opts.ardf0.value()
 								const bool 			ard_value,	// opts.all_ard.value()
 								const bool 			no_ard_value,	// opts.no_ard.value()
@@ -171,230 +152,246 @@ extern "C" __global__ void init_Fibres_Multifibres_kernel(	//INPUT
 								double*				signals,
 								double*				isosignals)
 {
-	int add=0;
-	if(model==2) add=1;		// if model 2 we have d_std and then 1 more parameter in position 2
-
-	int idmult= blockIdx.x;		// this is the voxel number
-	int idrest= threadIdx.x;	// and the number of thread inside each voxel
-	bool leader = (idrest==0);	// is the first one in the voxel?
-
-	int ndirs = 1;
-	if(idrest<(NDIRECTIONS%THREADS_BLOCK)) ndirs=MAXNDIRS_PER_THREAD;
-	else if((NDIRECTIONS%THREADS_BLOCK)==0) ndirs=MAXNDIRS_PER_THREAD;
-	else ndirs=(MAXNDIRS_PER_THREAD-1);
-
-	if(idrest>=NDIRECTIONS)  { return; }
-
-	__shared__ float fsum;					
-	__shared__ double reduction[THREADS_BLOCK];		
-	__shared__ float m_d;					
-	__shared__ float m_S0;					
-	__shared__ float m_f0;					
-	__shared__ float m_dstd;
-	__shared__ float m_tau;
-	__shared__ float m_th[NFIBRES];
-	__shared__ float m_ph[NFIBRES];
-	__shared__ float m_f[NFIBRES];
-	__shared__ float m_likelihood_en;
-	__shared__ double mydata[MAXNDIRS_PER_THREAD*THREADS_BLOCK];
-
-	__shared__ float m_prior_en;
-
-	double mysig[MAXNDIRS_PER_THREAD*NFIBRES];			
-	double myisosig[MAXNDIRS_PER_THREAD];	
+	int idSubVOX= threadIdx.x;
+	int threadsBlock = blockDim.x;
+	int idVOX= blockIdx.x;
+	bool leader = (idSubVOX==0);
+
+	////////// DYNAMIC SHARED MEMORY ///////////
+	extern __shared__ double shared[];
+	double* reduction = (double*)shared;				//threadsBlock
+	double* tmp = (double*) &reduction[threadsBlock];		//1
+
+	float* m_S0 = (float*) &tmp[1];					//1
+	float* m_d = (float*) &m_S0[1];					//1
+	float* m_dstd =(float*) &m_d[1];				//1
+	float* m_f0 = (float*) &m_dstd[1];				//1
+	float* m_tau = (float*) &m_f0[1];				//1
+	float* m_th = (float*) &m_tau[1];				//nfib
+	float* m_ph = (float*) &m_th[nfib];				//nfib
+	float* m_f = (float*) &m_ph[nfib];				//nfib
+
+	float* fsum = (float*) &m_f[nfib];				//1
+	float* m_likelihood_en = (float*) &fsum[1];			//1
+	float* m_prior_en = (float*) &m_likelihood_en[1];		//1
+	////////// DYNAMIC SHARED MEMORY ///////////
 	
-	for(int i=0; i<ndirs; i++){	
-		mydata[idrest*MAXNDIRS_PER_THREAD+i] = data[(idmult*NDIRECTIONS)+idrest+i*THREADS_BLOCK];
-	}	
 
+	double mysig[MAXNDIRS_PER_THREAD*MAXNFIBRES];				
+	double myisosig[MAXNDIRS_PER_THREAD];				
+	double mydata[MAXNDIRS_PER_THREAD];
+	double mybvals[MAXNDIRS_PER_THREAD];	
+	double myalpha[MAXNDIRS_PER_THREAD];	
+	double mybeta[MAXNDIRS_PER_THREAD];	
+ 		
+	float angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES];			//this is to pre compute signal
+	float cos_alpha_minus_theta;
+	float cos_alpha_plus_theta;
+	
 	// m_s0-params[0] 	m_d-params[1] 	m_f-m_th-m_ph-params[2,3,4,5, etc..]   	m_f0-params[nparams-1]
-
 	if(leader){
-		m_S0 = params[idmult*nparams_fit];
-		multifibres[idmult].m_S0 = m_S0;
-		multifibres[idmult].m_S0_prior = 0;
-		multifibres[idmult].m_S0_acc = 0;
-		multifibres[idmult].m_S0_rej = 0;
+		*m_S0 = params[idVOX*nparams_fit];
+		multifibres[idVOX].m_S0 = *m_S0;
+		multifibres[idVOX].m_S0_prior = 0;
+		multifibres[idVOX].m_S0_acc = 0;
+		multifibres[idVOX].m_S0_rej = 0;
 	
-		m_d=params[idmult*nparams_fit+1];
-		if(m_d<0 || m_d>0.008) m_d=2e-3;			//this is in xfibres...after fit
-		multifibres[idmult].m_d = m_d;
-		multifibres[idmult].m_d_prior = 0;
-		multifibres[idmult].m_d_acc = 0;
-		multifibres[idmult].m_d_rej = 0;
+		*m_d=params[idVOX*nparams_fit+1];
+		if(*m_d<0 || *m_d>0.008) *m_d=2e-3;			//this is in xfibres...after fit
+		multifibres[idVOX].m_d = *m_d;
+		multifibres[idVOX].m_d_prior = 0;
+		multifibres[idVOX].m_d_acc = 0;
+		multifibres[idVOX].m_d_rej = 0;
 
 		if(model==2){ 
-			m_dstd=params[idmult*nparams_fit+2];
-			if(m_dstd<0 || m_dstd>0.01) m_dstd=m_d/10;	//this is in xfibres...after fit
+			*m_dstd=params[idVOX*nparams_fit+2];
+			if(*m_dstd<0 || *m_dstd>0.01) *m_dstd=*m_d/10;	//this is in xfibres...after fit
 		}
-		else m_dstd = 0;
-		multifibres[idmult].m_dstd = m_dstd;
-		multifibres[idmult].m_dstd_prior = 0;
-		multifibres[idmult].m_dstd_acc = 0;
-		multifibres[idmult].m_dstd_rej = 0;
-
-		if (m_includef0) m_f0=params[idmult*nparams_fit+nparams_fit-1];
-		else m_f0=0;
-		multifibres[idmult].m_f0 = m_f0;
-		multifibres[idmult].m_f0_prior = 0;
-		multifibres[idmult].m_f0_acc = 0;
-		multifibres[idmult].m_f0_rej = 0;
-
-		m_tau = tau[idmult];
-		multifibres[idmult].m_tau = m_tau;
-		multifibres[idmult].m_tau_prior = 0;
-		multifibres[idmult].m_tau_acc = 0;
-		multifibres[idmult].m_tau_rej = 0;
-           
-		for(int fib=0;fib<nfib;fib++){
-			m_th[fib]=params[idmult*nparams_fit+2+3*fib+1+add];
-			fibres[idmult*nfib+fib].m_th = m_th[fib];
-			fibres[idmult*nfib+fib].m_th_prop = 0.2;
-			float m_th_prior = 0;
-			fibres[idmult*nfib+fib].m_th_acc = 0;
-			fibres[idmult*nfib+fib].m_th_rej = 0;
-			//compute_th_prior();
-	      		if(m_th==0){
-				m_th_prior=0;
-			}else{
-				m_th_prior=-log(double(fabs(sin(double(m_th[fib]))/2)));
-	      		}
-			fibres[idmult*nfib+fib].m_th_prior = m_th_prior;
+		else *m_dstd = 0;
+		multifibres[idVOX].m_dstd = *m_dstd;
+		multifibres[idVOX].m_dstd_prior = 0;
+		multifibres[idVOX].m_dstd_acc = 0;
+		multifibres[idVOX].m_dstd_rej = 0;
+
+		if (m_includef0) *m_f0=params[idVOX*nparams_fit+nparams_fit-1];
+		else *m_f0=0;
+		multifibres[idVOX].m_f0 = *m_f0;
+		multifibres[idVOX].m_f0_prior = 0;
+		multifibres[idVOX].m_f0_acc = 0;
+		multifibres[idVOX].m_f0_rej = 0;
+
+		*m_tau = tau[idVOX];
+		multifibres[idVOX].m_tau = *m_tau;
+		multifibres[idVOX].m_tau_prior = 0;
+		multifibres[idVOX].m_tau_acc = 0;
+		multifibres[idVOX].m_tau_rej = 0;
+	}
+	__syncthreads();
+
+	int mydirs = ndirections/threadsBlock;
+	int mod = ndirections%threadsBlock;
+	if(mod&&(idSubVOX<mod)) mydirs++;
+
+	if(idSubVOX<nfib){
+		int add=0;
+		if(model==2) add=1;		// if model 2 we have d_std and then 1 more parameter in position 2
+		int pos = (idVOX*nfib)+idSubVOX;
+
+		m_th[idSubVOX]=params[idVOX*nparams_fit+2+3*idSubVOX+1+add];
+		fibres[pos].m_th = m_th[idSubVOX];
+		fibres[pos].m_th_prop = 0.2;
+		float m_th_prior = 0;
+		fibres[pos].m_th_acc = 0;
+		fibres[pos].m_th_rej = 0;
+		
+		//compute_th_prior();
+	      	if(m_th==0){
+			m_th_prior=0;
+		}else{
+			m_th_prior=-log(double(fabs(sin(double(m_th[idSubVOX]))/2)));
+	      	}
+		fibres[pos].m_th_prior = m_th_prior;
 		
-			float m_ph_prior=0;	//compute_ph_prior();
-			m_ph[fib]=params[idmult*nparams_fit+2+3*fib+2+add];
-			fibres[idmult*nfib+fib].m_ph = m_ph[fib];
-			fibres[idmult*nfib+fib].m_ph_prop = 0.2;
-			fibres[idmult*nfib+fib].m_ph_prior = 0;	//compute_ph_prior();
-			fibres[idmult*nfib+fib].m_ph_acc = 0;
-			fibres[idmult*nfib+fib].m_ph_rej = 0;
-
-			m_f[fib] = params[idmult*nparams_fit+2+3*fib+add]; 
-			fibres[idmult*nfib+fib].m_f=m_f[fib];
-			fibres[idmult*nfib+fib].m_f_prop = 0.2;
-			float m_f_prior = 0;
-			fibres[idmult*nfib+fib].m_f_acc = 0;
-			fibres[idmult*nfib+fib].m_f_rej = 0;
+		float m_ph_prior=0;	//compute_ph_prior();
+		m_ph[idSubVOX]=params[idVOX*nparams_fit+2+3*idSubVOX+2+add];
+		fibres[pos].m_ph = m_ph[idSubVOX];
+		fibres[pos].m_ph_prop = 0.2;
+		fibres[pos].m_ph_prior = 0;	//compute_ph_prior();
+		fibres[pos].m_ph_acc = 0;
+		fibres[pos].m_ph_rej = 0;
+
+		m_f[idSubVOX] = params[idVOX*nparams_fit+2+3*idSubVOX+add]; 
+		fibres[pos].m_f=m_f[idSubVOX];
+		fibres[pos].m_f_prop = 0.2;
+		float m_f_prior = 0;
+		fibres[pos].m_f_acc = 0;
+		fibres[pos].m_f_rej = 0;
 			
-			if(fib==0){
-				fibres[idmult*nfib+fib].m_lam_jump = ard_value;
-			}else{
-				fibres[idmult*nfib+fib].m_lam_jump = !no_ard_value;
-			}
-
-			//compute_f_prior();
-      			if (m_f[fib]<=0 | m_f[fib]>=1 ){
-      			}else{
-	  			if(fibres[idmult*nfib+fib].m_lam_jump){              
-	    				m_f_prior=log(double(m_f[fib]));
-	  			}else{
-	    				m_f_prior=0;
-				}
-				m_f_prior= fudgevalue* m_f_prior;
-      			}
-			fibres[idmult*nfib+fib].m_f_prior = m_f_prior;
+		if(idSubVOX==0){
+			fibres[pos].m_lam_jump = ard_value;
+		}else{
+			fibres[pos].m_lam_jump = !no_ard_value;
+		}
 
-			//fibres[vox].m_lam = m_lam; ??
-			//fibres[vox].m_lam_prop = 1;
-			//fibres[vox].m_lam_prior = 0;
-			//compute_lam_prior();
+		//compute_f_prior();
+      		if (m_f[idSubVOX]<=0 | m_f[idSubVOX]>=1 ){
+      		}else{
+	  		if(fibres[pos].m_lam_jump){              
+	    			m_f_prior=log(double(m_f[idSubVOX]));
+	  		}else{
+	    			m_f_prior=0;
+			}
+			m_f_prior= fudgevalue* m_f_prior;
+      		}
+		fibres[pos].m_f_prior = m_f_prior;
 
-			//compute_prior();
-			fibres[idmult*nfib+fib].m_prior_en= m_th_prior + m_ph_prior + m_f_prior;
-		}
-		//for likehood 
-		fsum=m_f0;
-		for(int g=0;g<NFIBRES;g++){  
-			fsum+=m_f[g];
-		}
+		//fibres[vox].m_lam = m_lam; ??
+		//fibres[vox].m_lam_prop = 1;
+		//fibres[vox].m_lam_prior = 0;
+		//compute_lam_prior();
 
+		//compute_prior();
+		fibres[pos].m_prior_en= m_th_prior + m_ph_prior + m_f_prior;
 	}
-	__syncthreads();
 
-	float angtmp[MAXNDIRS_PER_THREAD*NFIBRES];
-	float cos_alpha_minus_theta;
-	float cos_alpha_plus_theta;
+	for(int i=0; i<mydirs; i++){	
+		int pos = (idVOX*ndirections)+idSubVOX+i*threadsBlock;
+		mydata[i] = datam[pos];
+		mybvals[i] = bvals[pos];
+		myalpha[i] = alpha[pos];
+		mybeta[i] = beta[pos];
+	}
 
-	double old;
+	__syncthreads();
 
 	//compute_signal_pre
-	for(int i=0; i<ndirs; i++){
-		for(int f=0;f<NFIBRES;f++){				
-			cos_alpha_minus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]-m_th[f]));   
-			cos_alpha_plus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]+m_th[f]));
-		     	angtmp[i*NFIBRES+f]= (cos(double(m_ph[f]-beta[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
-		 	angtmp[i*NFIBRES+f]=angtmp[i*NFIBRES+f]*angtmp[i*NFIBRES+f];
+	for(int i=0; i<mydirs; i++){
+		for(int f=0;f<nfib;f++){	
+			cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[f]));   
+			cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[f]));
+		     	angtmp[i*nfib+f]= (cos(double(m_ph[f]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
+		 	angtmp[i*nfib+f]=angtmp[i*nfib+f]*angtmp[i*nfib+f];
 		}
 	}
 
 	//compute_signal()
-	for(int i=0; i<ndirs; i++){
-		for(int f=0;f<NFIBRES;f++){
-			compute_signal(&mysig[i*NFIBRES+f],&old,bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+f]);
+	double old;
+	for(int i=0; i<mydirs; i++){
+		for(int f=0;f<nfib;f++){
+			compute_signal(&mysig[i*nfib+f],&old,mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model);
 		}
 	}
 
 	if(leader){
+		//for likehood 
+		*fsum=*m_f0;
+		for(int g=0;g<nfib;g++){  
+			*fsum+=m_f[g];
+		}
 		//initialise_energies();
 	      	//compute_d_prior(); m_d_prior=0; so i don't do nothing, it is already 0
 	      	if(model==2){
 			//compute_d_std_prior();
-			if(m_dstd<=0 || m_dstd>0.01){
+			if(*m_dstd<=0 || *m_dstd>0.01){
 	      		}else{
-				multifibres[idmult].m_dstd_prior=log(double(m_dstd));
+				multifibres[idVOX].m_dstd_prior=log(double(*m_dstd));
 			}
 		}
-	      	//compute_tau_prior(); m_tau_prior=0; so i don't do nothing, it is already 0
+	      	//compute_tau_prior(); m_tau_prior=0; so it doesn't do nothing, it is already 0
 	      	if (m_includef0){
 			//compute_f0_prior();
-			if (m_f0<=0 || m_f0>=1){
+			if (*m_f0<=0 || *m_f0>=1){
 	      		}else{
 				if(!m_ardf0){}     	//Without ARD
 				else              	//With ARD
-		  			multifibres[idmult].m_f0_prior= log(double(m_f0));
+		  			multifibres[idVOX].m_f0_prior= log(double(*m_f0));
 	      		}
 		}
 	      	//compute_S0_prior(); m_S0_prior=0; so i don't do nothing, it is already 0
-		m_prior_en = 0;
+		*m_prior_en = 0;
 	      	//compute_prior();
 	      	//m_prior_en=m_d_prior+m_S0_prior; is 0
 	      	if(model==2)
-			m_prior_en= m_prior_en+multifibres[idmult].m_dstd_prior;
+			*m_prior_en= *m_prior_en+multifibres[idVOX].m_dstd_prior;
 	      	//if(m_rician) m_prior_en=m_prior_en+m_tau_prior; is 0
 	      	if (m_includef0)
-			m_prior_en=m_prior_en+multifibres[idmult].m_f0_prior;
+			*m_prior_en=*m_prior_en+multifibres[idVOX].m_f0_prior;
 	      	for(int fib=0;fib<nfib;fib++){
-			m_prior_en=m_prior_en+ fibres[idmult*nfib+fib].m_prior_en;
+			*m_prior_en=*m_prior_en+ fibres[idVOX*nfib+fib].m_prior_en;
 	      	} 
-		multifibres[idmult].m_prior_en = m_prior_en;
+		multifibres[idVOX].m_prior_en = *m_prior_en;
 	}
-      	//compute_iso_signal()
-	for(int i=0; i<ndirs; i++){
-		compute_iso_signal(&myisosig[i],&old, bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,m_dstd,model);
+
+	//compute_iso_signal()
+	for(int i=0; i<mydirs; i++){
+		compute_iso_signal(&myisosig[i],&old, mybvals[i],m_d,m_dstd,model);
 	}		
+ 
 	__syncthreads();
-      	//compute_likelihood()
-	compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,m_rician,m_tau,ndirs);
+
+	//compute_likelihood()
+	compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
+
 	__syncthreads();
 
 	if(leader){
-		multifibres[idmult].m_likelihood_en = m_likelihood_en;
+		multifibres[idVOX].m_likelihood_en = *m_likelihood_en;
 	      	
 		//compute_energy();	
-		multifibres[idmult].m_energy = m_prior_en+m_likelihood_en;
+		multifibres[idVOX].m_energy = *m_prior_en+*m_likelihood_en;
 
 	    	//initialise_props();
-	      	multifibres[idmult].m_S0_prop=multifibres[idmult].m_S0/10.0; 
-	      	multifibres[idmult].m_d_prop=m_d/10.0;
-	      	multifibres[idmult].m_dstd_prop=m_dstd/10.0;
-	      	multifibres[idmult].m_tau_prop=m_tau/2.0;
-	      	multifibres[idmult].m_f0_prop=0.2;
+	      	multifibres[idVOX].m_S0_prop=multifibres[idVOX].m_S0/10.0; 
+	      	multifibres[idVOX].m_d_prop=*m_d/10.0;
+	      	multifibres[idVOX].m_dstd_prop=*m_dstd/10.0;
+	      	multifibres[idVOX].m_tau_prop=*m_tau/2.0;
+	      	multifibres[idVOX].m_f0_prop=0.2;
 	}
 
-	for(int i=0; i<ndirs; i++){	
-		isosignals[(idmult*NDIRECTIONS)+idrest+i*THREADS_BLOCK] = myisosig[i];
-		for(int f=0;f<NFIBRES;f++){
-			signals[(idmult*NDIRECTIONS*NFIBRES)+(f*NDIRECTIONS)+idrest+i*THREADS_BLOCK]=mysig[i*NFIBRES+f];
+	for(int i=0; i<mydirs; i++){	
+		isosignals[(idVOX*ndirections)+idSubVOX+i*threadsBlock] = myisosig[i];
+		for(int f=0;f<nfib;f++){
+			signals[(idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]=mysig[i*nfib+f];
 		}
 	}
 }
@@ -407,7 +404,9 @@ extern "C" __global__ void runmcmc_burnin_kernel(	//INPUT
 							const double*			beta,
 							float*				randomsN,
 							float*				randomsU,
-							const int 			nvox,
+							const int			ndirections,
+							const int			nfib,
+							const int			nparams,
 							const int 			model,
 							const float 			fudgevalue,
 							const bool 			m_include_f0,
@@ -415,1126 +414,1128 @@ extern "C" __global__ void runmcmc_burnin_kernel(	//INPUT
 							const bool 			can_use_ard, 
 							const bool 			rician,
 							const int 			updateproposalevery, 	//update every this number of iterations	
-							const int 			iterations,		//num of iterations to do this time (maybe are a part of the total)
+							const int 			iterations,		//num of iterations to do this time (maybe is a part of the total)
 							const int 			current_iter,		//the number of the current iteration over the total iterations
 							//INPUT-OUTPUT
 							FibreGPU*			fibres,
-							MultifibreGPU*		multifibres,
+							MultifibreGPU*			multifibres,
 							double*				signals,
 							double*				isosignals)
 {	
-	int add=0;
-	int add2=0;
-	int add3=0;
-
-	if(m_include_f0) add=1;
-	if(rician) add2=1;
-	if(model==2) add3=1;
-
-	int idmult= blockIdx.x;
-	int idrest= threadIdx.x;
-	bool leader = (idrest==0);
-
-	int ndirs = 1;
-	if(idrest<(NDIRECTIONS%THREADS_BLOCK)) ndirs=MAXNDIRS_PER_THREAD;
-	else if((NDIRECTIONS%THREADS_BLOCK)==0) ndirs=MAXNDIRS_PER_THREAD;
-	else ndirs=(MAXNDIRS_PER_THREAD-1);
-
-	if(idrest>=NDIRECTIONS)  { return; }
-
-	__shared__ bool rejflag;				//max fibres 4
-	__shared__ bool rejflag2;				//max 1024 directions / 64 = 16 MAXDIR
-	__shared__ bool rejflag3;				//max shared used as base: 8KB of mydata + 
-	__shared__ float fsum;					//64 doubles = 0.5 KB
-	__shared__ double reduction[THREADS_BLOCK];		//23 floats + NFIBRES(4)*10 floats + 
-	__shared__ float m_d;					//NPARAMS(16)*2 floats = 95 floats, 0.37 KB
-	__shared__ float m_S0;					//3 bool + NFIBRES bool 
-	__shared__ float m_f0;					//10 int + NFIBRES*6 int = 0.16KB 
-	__shared__ float m_tau;					// MAX TOTAL 9.03 KB
-	__shared__ float m_dstd;
-	__shared__ float m_th[NFIBRES];
-	__shared__ float m_ph[NFIBRES];
-	__shared__ float m_f[NFIBRES];
-	__shared__ float m_likelihood_en;
-
-	__shared__ double tmp;
-
-	double mysig[MAXNDIRS_PER_THREAD*NFIBRES];			
-	double mysigold[MAXNDIRS_PER_THREAD*NFIBRES];			
+	int idSubVOX= threadIdx.x;
+	int threadsBlock = blockDim.x;
+	bool leader = (idSubVOX==0);
+
+	////////// DYNAMIC SHARED MEMORY ///////////
+	extern __shared__ double shared[];
+	double* reduction = (double*)shared;				//threadsBlock
+	double* tmp = (double*) &reduction[threadsBlock];		//1
+
+	float* m_S0 = (float*) &tmp[1];					//1
+	float* m_d = (float*) &m_S0[1];					//1
+	float* m_dstd =(float*) &m_d[1];				//1
+	float* m_f0 = (float*) &m_dstd[1];				//1
+	float* m_tau = (float*) &m_f0[1];				//1
+	float* m_th = (float*) &m_tau[1];				//nfib
+	float* m_ph = (float*) &m_th[nfib];				//nfib
+	float* m_f = (float*) &m_ph[nfib];				//nfib
+
+	float* m_S0_prior = (float*) &m_f[nfib];			//1
+	float* m_d_prior = (float*) &m_S0_prior[1];			//1
+	float* m_dstd_prior = (float*) &m_d_prior[1];			//1
+	float* m_f0_prior = (float*) &m_dstd_prior[1];			//1
+	float* m_tau_prior = (float*) &m_f0_prior[1];			//1
+	float* m_th_prior = (float*) &m_tau_prior[1];			//nfib
+	float* m_ph_prior = (float*) &m_th_prior[nfib];			//nfib
+	float* m_f_prior = (float*) &m_ph_prior[nfib];			//nfib
+
+	float* m_S0_prop = (float*) &m_f_prior[nfib];			//1
+	float* m_d_prop = (float*) &m_S0_prop[1];			//1
+	float* m_dstd_prop = (float*) &m_d_prop[1];			//1
+	float* m_f0_prop = (float*) &m_dstd_prop[1];			//1
+	float* m_tau_prop = (float*) &m_f0_prop[1];			//1
+	float* m_th_prop = (float*) &m_tau_prop[1];			//nfib
+	float* m_ph_prop = (float*) &m_th_prop[nfib];			//nfib
+	float* m_f_prop = (float*) &m_ph_prop[nfib];			//nfib
+
+	float* fsum = (float*) &m_f_prop[nfib];				//1
+	float* m_likelihood_en = (float*) &fsum[1];			//1
+	float* m_prior_en = (float*) &m_likelihood_en[1];		//1
+	float* m_old_prior_en = (float*) &m_prior_en[1];		//1
+	float* fm_prior_en = (float*) &m_old_prior_en[1];		//nfib		
+	float* fm_old_prior_en = (float*) &fm_prior_en[nfib];		//1		
+	float* m_energy = (float*) &fm_old_prior_en[1];			//1
+	float* m_old_energy = (float*) &m_energy[1];			//1
+	float* old = (float*) &m_old_energy[1];				//2
+
+	float* prerandN = (float*) &old[2];				//nparams
+	float* prerandU = (float*) &prerandN[nparams];			//nparams
+
+	int* m_S0_acc = (int*) &prerandU[nparams];			//1
+	int* m_d_acc = (int*) &m_S0_acc[1];				//1
+	int* m_dstd_acc = (int*) &m_d_acc[1];				//1
+	int* m_f0_acc = (int*) &m_dstd_acc[1];				//1
+	int* m_tau_acc = (int*) &m_f0_acc[1];				//1
+	int* m_th_acc = (int*) &m_tau_acc[1];				//nfib
+	int* m_ph_acc = (int*) &m_th_acc[nfib];				//nfib
+	int* m_f_acc = (int*) &m_ph_acc[nfib];				//nfib
+
+	int* m_S0_rej = (int*) &m_f_acc[nfib];				//1
+	int* m_d_rej = (int*) &m_S0_rej[1];				//1
+	int* m_dstd_rej = (int*) &m_d_rej[1];				//1	
+	int* m_f0_rej = (int*) &m_dstd_rej[1];				//1
+	int* m_tau_rej = (int*) &m_f0_rej[1];				//1	
+	int* m_th_rej = (int*) &m_tau_rej[1];				//nfib
+	int* m_ph_rej = (int*) &m_th_rej[nfib];				//nfib
+	int* m_f_rej = (int*) &m_ph_rej[nfib];				//nfib
+	
+	int* rejflag = (int*) &m_f_rej[nfib];				//3					
+	int* m_lam_jump = (int*) &rejflag[3];				//nfib	
+	int* idVOX = (int*) &m_lam_jump[nfib];				//1		
+	int* count_update = (int*) &idVOX[1];				//1		
+	int* localrand = (int*) &count_update[1];			//1
+	////////// DYNAMIC SHARED MEMORY ///////////
+
+	double mysig[MAXNDIRS_PER_THREAD*MAXNFIBRES];			
+	double mysigold[MAXNDIRS_PER_THREAD*MAXNFIBRES];			
 	double myisosig[MAXNDIRS_PER_THREAD];			
 	double myisosigold[MAXNDIRS_PER_THREAD];				
-	__shared__ double mydata[MAXNDIRS_PER_THREAD*THREADS_BLOCK];	//save local memory, array max 1024 * 8 bytes = 8KB 
-
-	__shared__ float old1;
-	__shared__ float old2;
+	double mydata[MAXNDIRS_PER_THREAD];
+	double mybvals[MAXNDIRS_PER_THREAD];	
+	double myalpha[MAXNDIRS_PER_THREAD];	
+	double mybeta[MAXNDIRS_PER_THREAD];	
  		
-	int idrand;
-	int count_update;
-
-	__shared__ float m_prior_en;
-	__shared__ float m_old_prior_en;
-
-	__shared__ float m_d_prior;
-	__shared__ float m_S0_prior;
-	__shared__ float m_f0_prior;
-	__shared__ float m_tau_prior;
-	__shared__ float m_dstd_prior;
-	__shared__ float fm_prior_en[NFIBRES];		//m_prior_en for each fibre
-	__shared__ float fm_old_prior_en;		//and old
-	__shared__ float m_th_prior[NFIBRES];
-	__shared__ float m_ph_prior[NFIBRES];
-	__shared__ float m_f_prior[NFIBRES];
-
-	__shared__ float m_energy;
-	__shared__ float m_old_energy;
-
-	__shared__ int m_d_acc;
-	__shared__ int m_d_rej;
-	__shared__ int m_S0_acc;
-	__shared__ int m_S0_rej;
-	__shared__ int m_f0_acc;
-	__shared__ int m_f0_rej;
-	__shared__ int m_tau_acc;
-	__shared__ int m_tau_rej;	
-	__shared__ int m_dstd_acc;
-	__shared__ int m_dstd_rej;	
-
-	__shared__ int m_th_acc[NFIBRES]; 
-	__shared__ int m_th_rej[NFIBRES]; 
-	__shared__ int m_ph_acc[NFIBRES]; 
-	__shared__ int m_ph_rej[NFIBRES]; 
-	__shared__ int m_f_acc[NFIBRES]; 
-	__shared__ int m_f_rej[NFIBRES]; 
-			
-	__shared__ float m_d_prop;
-	__shared__ float m_S0_prop;
-	__shared__ float m_f0_prop;
-	__shared__ float m_tau_prop;
-	__shared__ float m_dstd_prop;
-	__shared__ float m_th_prop[NFIBRES];
-	__shared__ float m_ph_prop[NFIBRES];	
-	__shared__ float m_f_prop[NFIBRES];	
-
-	__shared__ bool m_lam_jump[NFIBRES];
-
-	__shared__ float prerandN[NPARAMS];
-	__shared__ float prerandU[NPARAMS];
-
-	float angtmp[MAXNDIRS_PER_THREAD*NFIBRES];			//this is to pre compute signal
-	float old_angtmp[MAXNDIRS_PER_THREAD*NFIBRES];
+	float angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES];			//this is to pre compute signal
+	float old_angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES];
 	float cos_alpha_minus_theta;
 	float cos_alpha_plus_theta;
 	
 	if (leader){
-		idrand = idmult*iterations*NPARAMS;
-		count_update = current_iter;
-		m_prior_en=multifibres[idmult].m_prior_en;
+		*idVOX= blockIdx.x;
+		*count_update = current_iter;
+
+		*m_prior_en=multifibres[*idVOX].m_prior_en;
 
 		if(model==2){
-			m_dstd_acc=multifibres[idmult].m_dstd_acc;
-			m_dstd_rej=multifibres[idmult].m_dstd_rej;
-			m_dstd_prior=multifibres[idmult].m_dstd_prior;
-			m_dstd_prop=multifibres[idmult].m_dstd_prop;
-			m_dstd=multifibres[idmult].m_dstd;
+			*m_dstd_acc=multifibres[*idVOX].m_dstd_acc;
+			*m_dstd_rej=multifibres[*idVOX].m_dstd_rej;
+			*m_dstd_prior=multifibres[*idVOX].m_dstd_prior;
+			*m_dstd_prop=multifibres[*idVOX].m_dstd_prop;
+			*m_dstd=multifibres[*idVOX].m_dstd;
 		}else{
-			m_dstd_acc=0;
-			m_dstd_rej=0;
-			m_dstd_prior=0;
-			m_dstd_prop=0;
-			m_dstd=0;
+			*m_dstd_acc=0;
+			*m_dstd_rej=0;
+			*m_dstd_prior=0;
+			*m_dstd_prop=0;
+			*m_dstd=0;
 		}
 	
-		m_d=multifibres[idmult].m_d;
-		m_energy=multifibres[idmult].m_energy;
-		m_d_prop=multifibres[idmult].m_d_prop;
-		m_d_prior=multifibres[idmult].m_d_prior;
-		m_S0_prior=multifibres[idmult].m_S0_prior;
-		m_S0=multifibres[idmult].m_S0;
-		m_likelihood_en=multifibres[idmult].m_likelihood_en;
-		m_d_acc=multifibres[idmult].m_d_acc;
-		m_d_rej=multifibres[idmult].m_d_rej;
-		m_S0_acc=multifibres[idmult].m_S0_acc;
-		m_S0_rej=multifibres[idmult].m_S0_rej;
-		m_S0_prop=multifibres[idmult].m_S0_prop;
+		*m_d=multifibres[*idVOX].m_d;
+		*m_energy=multifibres[*idVOX].m_energy;
+		*m_d_prop=multifibres[*idVOX].m_d_prop;
+		*m_d_prior=multifibres[*idVOX].m_d_prior;
+		*m_S0_prior=multifibres[*idVOX].m_S0_prior;
+		*m_S0=multifibres[*idVOX].m_S0;
+		*m_likelihood_en=multifibres[*idVOX].m_likelihood_en;
+		*m_d_acc=multifibres[*idVOX].m_d_acc;
+		*m_d_rej=multifibres[*idVOX].m_d_rej;
+		*m_S0_acc=multifibres[*idVOX].m_S0_acc;
+		*m_S0_rej=multifibres[*idVOX].m_S0_rej;
+		*m_S0_prop=multifibres[*idVOX].m_S0_prop;
 
 		if(m_include_f0){
-			m_f0_acc=multifibres[idmult].m_f0_acc;
-			m_f0_rej=multifibres[idmult].m_f0_rej;
-			m_f0_prop=multifibres[idmult].m_f0_prop;
-			m_f0_prior=multifibres[idmult].m_f0_prior;
-			m_f0=multifibres[idmult].m_f0;
+			*m_f0_acc=multifibres[*idVOX].m_f0_acc;
+			*m_f0_rej=multifibres[*idVOX].m_f0_rej;
+			*m_f0_prop=multifibres[*idVOX].m_f0_prop;
+			*m_f0_prior=multifibres[*idVOX].m_f0_prior;
+			*m_f0=multifibres[*idVOX].m_f0;
 		}else{ 
-			m_f0_acc=0;
-			m_f0_rej=0;
-			m_f0_prop=0;
-			m_f0_prior=0;
-			m_f0=0;
+			*m_f0_acc=0;
+			*m_f0_rej=0;
+			*m_f0_prop=0;
+			*m_f0_prior=0;
+			*m_f0=0;
 		}
 				
-		
 		if(rician){
-			m_tau_acc=multifibres[idmult].m_tau_acc;
-			m_tau_rej=multifibres[idmult].m_tau_rej;
-			m_tau_prop=multifibres[idmult].m_tau_prop;
-			m_tau_prior=multifibres[idmult].m_tau_prior;
-			m_tau=multifibres[idmult].m_tau;	
+			*m_tau_acc=multifibres[*idVOX].m_tau_acc;
+			*m_tau_rej=multifibres[*idVOX].m_tau_rej;
+			*m_tau_prop=multifibres[*idVOX].m_tau_prop;
+			*m_tau_prior=multifibres[*idVOX].m_tau_prior;
+			*m_tau=multifibres[*idVOX].m_tau;	
 		}else{ 
-			m_tau_acc=0;
-			m_tau_rej=0;
-			m_tau_prop=0;
-			m_tau_prior=0;
-			m_tau=0;
+			*m_tau_acc=0;
+			*m_tau_rej=0;
+			*m_tau_prop=0;
+			*m_tau_prior=0;
+			*m_tau=0;
 		}
+	}
+	__syncthreads();
 
+	int mydirs = ndirections/threadsBlock;
+	int mod = ndirections%threadsBlock;
+	if(mod&&(idSubVOX<mod)) mydirs++;
 
-		for(int f=0;f<NFIBRES;f++){
-			m_th[f]=fibres[(idmult*NFIBRES)+f].m_th;
-			m_ph[f]=fibres[(idmult*NFIBRES)+f].m_ph;
-			m_f[f]=fibres[(idmult*NFIBRES)+f].m_f;
+	if(idSubVOX<nfib){
+		int pos = (*idVOX*nfib)+idSubVOX;
+		m_th[idSubVOX]=fibres[pos].m_th;
+		m_ph[idSubVOX]=fibres[pos].m_ph;
+		m_f[idSubVOX]=fibres[pos].m_f;
 	
-			m_th_acc[f]=fibres[(idmult*NFIBRES)+f].m_th_acc;
-			m_th_rej[f]=fibres[(idmult*NFIBRES)+f].m_th_rej;
-			m_ph_acc[f]=fibres[(idmult*NFIBRES)+f].m_ph_acc;
-			m_ph_rej[f]=fibres[(idmult*NFIBRES)+f].m_ph_rej;
-			m_f_acc[f]=fibres[(idmult*NFIBRES)+f].m_f_acc;
-			m_f_rej[f]=fibres[(idmult*NFIBRES)+f].m_f_rej;
-
-			fm_prior_en[f]=fibres[(idmult*NFIBRES)+f].m_prior_en;
-			m_th_prior[f]=fibres[(idmult*NFIBRES)+f].m_th_prior;
-			m_ph_prior[f]=fibres[(idmult*NFIBRES)+f].m_ph_prior;
-			m_f_prior[f]=fibres[(idmult*NFIBRES)+f].m_f_prior;
-
-			m_th_prop[f]=fibres[(idmult*NFIBRES)+f].m_th_prop;
-			m_ph_prop[f]=fibres[(idmult*NFIBRES)+f].m_ph_prop;
-			m_f_prop[f]=fibres[(idmult*NFIBRES)+f].m_f_prop;
-
-			m_lam_jump[f]=fibres[(idmult*NFIBRES)+f].m_lam_jump;		
-		}
-
+		m_th_acc[idSubVOX]=fibres[pos].m_th_acc;
+		m_th_rej[idSubVOX]=fibres[pos].m_th_rej;
+		m_ph_acc[idSubVOX]=fibres[pos].m_ph_acc;
+		m_ph_rej[idSubVOX]=fibres[pos].m_ph_rej;
+		m_f_acc[idSubVOX]=fibres[pos].m_f_acc;
+		m_f_rej[idSubVOX]=fibres[pos].m_f_rej;
+
+		fm_prior_en[idSubVOX]=fibres[pos].m_prior_en;
+		m_th_prior[idSubVOX]=fibres[pos].m_th_prior;
+		m_ph_prior[idSubVOX]=fibres[pos].m_ph_prior;
+		m_f_prior[idSubVOX]=fibres[pos].m_f_prior;
+
+		m_th_prop[idSubVOX]=fibres[pos].m_th_prop;
+		m_ph_prop[idSubVOX]=fibres[pos].m_ph_prop;
+		m_f_prop[idSubVOX]=fibres[pos].m_f_prop;
+
+		m_lam_jump[idSubVOX]=fibres[pos].m_lam_jump;		
 	}
 
-	for(int i=0; i<ndirs; i++){	
-		myisosig[i] = isosignals[(idmult*NDIRECTIONS)+idrest+i*THREADS_BLOCK];
-		mydata[idrest*MAXNDIRS_PER_THREAD+i] = datam[(idmult*NDIRECTIONS)+idrest+i*THREADS_BLOCK];
+	__syncthreads();
+
+	for(int i=0; i<mydirs; i++){	
+		int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock;
+		myisosig[i] = isosignals[pos];
+		mydata[i] = datam[pos];
+		mybvals[i] = bvals[pos];
+		myalpha[i] = alpha[pos];
+		mybeta[i] = beta[pos];
 	}
 
-	for(int f=0;f<NFIBRES;f++){
-		for(int i=0; i<ndirs; i++){	
-			mysig[i*NFIBRES+f]= signals[(idmult*NDIRECTIONS*NFIBRES)+(f*NDIRECTIONS)+idrest+i*THREADS_BLOCK];	
+	for(int f=0;f<nfib;f++){
+		for(int i=0; i<mydirs; i++){	
+			mysig[i*nfib+f]= signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock];	
 		}
 	}
 
 	__syncthreads();
 		
-		//compute_signal_pre
-		for(int i=0; i<ndirs; i++){
-			for(int f=0;f<NFIBRES;f++){				
-				cos_alpha_minus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]-m_th[f]));   
-			  	cos_alpha_plus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]+m_th[f]));
-		     	   	angtmp[i*NFIBRES+f]= (cos(double(m_ph[f]-beta[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
-		 	   	angtmp[i*NFIBRES+f]=angtmp[i*NFIBRES+f]*angtmp[i*NFIBRES+f];
-			}
+	//compute_signal_pre
+	for(int i=0; i<mydirs; i++){
+		for(int f=0;f<nfib;f++){	
+			cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[f]));   
+			cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[f]));
+		     	angtmp[i*nfib+f]= (cos(double(m_ph[f]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
+		 	angtmp[i*nfib+f]=angtmp[i*nfib+f]*angtmp[i*nfib+f];
 		}
+	}
 
-		for (int niter=0; niter<iterations; niter++){
+	for (int niter=0; niter<iterations; niter++){
 		//code jump()
 
-			//prefetching randoms
-			if (leader){
-				if(m_include_f0){
-					prerandN[0]=randomsN[idrand+(niter*NPARAMS)];
-					prerandU[0]=randomsU[idrand+(niter*NPARAMS)];
-				}
-				if(rician){
-					prerandN[add]=randomsN[idrand+(niter*NPARAMS)+add];
-					prerandU[add]=randomsU[idrand+(niter*NPARAMS)+add];
-				}
-				prerandN[add+add2]=randomsN[idrand+(niter*NPARAMS)+add+add2];
-				prerandU[add+add2]=randomsU[idrand+(niter*NPARAMS)+add+add2];
-
-				if(model==2){
-					prerandN[1+add+add2]=randomsN[idrand+(niter*NPARAMS)+1+add+add2];
-					prerandU[1+add+add2]=randomsU[idrand+(niter*NPARAMS)+1+add+add2];
-				}
-
-				prerandN[1+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+1+add+add2+add3];
-				prerandU[1+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+1+add+add2+add3];
+		//prefetching randoms
+		if (leader){
+			*count_update=*count_update+1;
+			int idrand = *idVOX*iterations*nparams+(niter*nparams);
+			*localrand = 0;
+			if(m_include_f0){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+			}
+			if(rician){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+			}
+			prerandN[*localrand]=randomsN[idrand];
+			prerandU[*localrand]=randomsU[idrand];
+			idrand++;
+			*localrand=*localrand+1;
 
-				for(int f=0;f<NFIBRES;f++){
-					prerandN[2+(f*3)+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+2+(f*3)+add+add2+add3];
-					prerandN[3+(f*3)+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+3+(f*3)+add+add2+add3];
-					prerandN[4+(f*3)+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+4+(f*3)+add+add2+add3];
+			if(model==2){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+			}
 
-					prerandU[2+(f*3)+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+2+(f*3)+add+add2+add3];
-					prerandU[3+(f*3)+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+3+(f*3)+add+add2+add3];
-					prerandU[4+(f*3)+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+4+(f*3)+add+add2+add3];		
-				}
+			prerandN[*localrand]=randomsN[idrand];
+			prerandU[*localrand]=randomsU[idrand];
+			idrand++;
+			*localrand=*localrand+1;
+
+			for(int f=0;f<nfib;f++){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+
+				prerandN[*localrand]=randomsN[idrand];					
+				prerandU[*localrand]=randomsU[idrand];	
+				idrand++;	
+				*localrand=*localrand+1;	
 			}
+			*localrand = 0;
+		}
 
 ////////////////////////////////////////////////////////////////// F0
 
-			if(m_include_f0){
-				if (leader){
-					//propose_f0
-					old1=m_f0;
-					m_f0+=prerandN[0]*m_f0_prop;
+		if(m_include_f0){
+			if (leader){
+				//propose_f0
+				old[0]=*m_f0;
+				*m_f0+=prerandN[*localrand]**m_f0_prop;
 				
-					//compute_f0_prior()     
-					old2=m_f0_prior;
-	      				if(m_f0<=0 || m_f0 >=1){ 
-						rejflag=true;
-					}else{ 	
-						rejflag=false;
-						if(!m_ardf0){
-							m_f0_prior=0;
-	      					}else{
-							m_f0_prior=log(double(m_f0));
-						}
+				//compute_f0_prior()     
+				old[1]=*m_f0_prior;
+	      			if(*m_f0<=0 || *m_f0 >=1){ 
+					rejflag[0]=true;
+				}else{ 	
+					rejflag[0]=false;
+					if(!m_ardf0){
+						*m_f0_prior=0;
+	      				}else{
+						*m_f0_prior=log(double(*m_f0));
 					}
+				}
 				
-					//for likehood and reject_f_sum
-					fsum=m_f0;
+				//for likehood and reject_f_sum
+				*fsum=*m_f0;
 
-					for(int g=0;g<NFIBRES;g++){  
-						fsum+=m_f[g];
-					}
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
+				}
 
-					//compute_prior()
-					compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+				//compute_prior()
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
 				
-					//reject_f_sum()
-					rejflag2=(fsum>1);
-				}
+				//reject_f_sum()
+				rejflag[1]=(*fsum>1);
+			}
+
+			__syncthreads();
 
-				__syncthreads();
+			//compute_likelihood()
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
-				//compute_likelihood()
-				compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			__syncthreads();
 
-				__syncthreads();
+			if (leader){
+				//compute_energy()
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
-				if (leader){
-					//compute_energy()
-					m_old_energy=m_energy;
-	      				m_energy=m_prior_en+m_likelihood_en;
-
-					//test_energy
-					tmp=exp(double(m_old_energy-m_energy));
-					rejflag3=(tmp>prerandU[0]);	
-
-					if(!rejflag){
-						if(!rejflag2){
-							if(rejflag3){
-								//accept_f0()
-		  						m_f0_acc++;   
-							}else{
-								//reject_F0()
-								m_f0=old1;
-								m_f0_prior=old2;
-		      						m_prior_en=m_old_prior_en;
-		      						m_f0_rej++;
-
-
-								//restore_energy()
-	      							m_energy=m_old_energy;
-							}
-						}else{ 
+				//test_energy
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[2]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;	
+
+				if(!rejflag[0]){
+					if(!rejflag[1]){
+						if(rejflag[2]){
+							//accept_f0()
+		  					*m_f0_acc=*m_f0_acc+1;   
+						}else{
 							//reject_F0()
-							m_f0=old1;
-							m_f0_prior=old2;
-		      					m_prior_en=m_old_prior_en;
-		      					m_f0_rej++;
+							*m_f0=old[0];
+							*m_f0_prior=old[1];
+		      					*m_prior_en=*m_old_prior_en;
+		      					*m_f0_rej=*m_f0_rej+1;
 
 							//restore_energy()
-		      					m_energy=m_old_energy;
+      							*m_energy=*m_old_energy;
 						}
 					}else{ 
 						//reject_F0()
-						m_f0=old1;
-						m_f0_prior=old2;
-		      				m_prior_en=m_old_prior_en;
-		      				m_f0_rej++;
+						*m_f0=old[0];
+						*m_f0_prior=old[1];
+		      				*m_prior_en=*m_old_prior_en;
+		      				*m_f0_rej=*m_f0_rej+1;
 
 						//restore_energy()
-		      				m_energy=m_old_energy;
+		      				*m_energy=*m_old_energy;
 					}
+				}else{ 
+					//reject_F0()
+					*m_f0=old[0];
+					*m_f0_prior=old[1];
+		      			*m_prior_en=*m_old_prior_en;
+		      			*m_f0_rej=*m_f0_rej+1;
+
+					//restore_energy()
+		      			*m_energy=*m_old_energy;
 				}
 			}
-
+		}
+			
 ////////////////////////////////////////////////////////////////// TAU
-			if(rician){
-				if (leader){
-					//propose_tau
-					old1=m_tau;
-					m_tau+=prerandN[add]*m_tau_prop;
+		if(rician){
+			if (leader){
+				//propose_tau
+				old[0]=*m_tau;
+				*m_tau+=prerandN[*localrand]**m_tau_prop;
 			
-					//compute_tau_prior()     
-					old2=m_tau_prior;
-	      				if(m_tau<=0){ rejflag=true;
-					}else{ 	
-						rejflag=false;
-						m_tau_prior=0;
-					}
-
-					//for likehood and reject_f_sum
-					fsum=m_f0;
-					for(int g=0;g<NFIBRES;g++){  
-						fsum+=m_f[g];
-					}
+				//compute_tau_prior()     
+				old[1]=*m_tau_prior;
+	      			if(*m_tau<=0){ 
+					rejflag[0]=true;
+				}else{ 	
+					rejflag[0]=false;
+					*m_tau_prior=0;
+				}
 
-					//compute_prior()
-					compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+				//for likehood and reject_f_sum
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
-				__syncthreads();
+				//compute_prior()
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
+			}
 
-				//compute_likelihood()
-				compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			__syncthreads();
 
-				__syncthreads();
+			//compute_likelihood()
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
-				if (leader){
-					//compute_energy()
-					m_old_energy=m_energy;
-	      				m_energy=m_prior_en+m_likelihood_en;
+			__syncthreads();
 
-					//test_energy
-					tmp=exp(double(m_old_energy-m_energy));
-					rejflag2=(tmp>prerandU[add]);	
+			if (leader){
+				//compute_energy()
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
+
+				//test_energy
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);	
+				*localrand=*localrand+1;
 				
-					if(!rejflag){
-						if(rejflag2){
-							//accept_tau()
-		  					m_tau_acc++;   
-						}else{ 
-							//reject_tau()
-							m_tau=old1;
-							m_tau_prior=old2;
-		      					m_prior_en=m_old_prior_en;
-		      					m_tau_rej++;
-							//restore_energy()
-		      					m_energy=m_old_energy;
-						}
+				if(!rejflag[0]){
+					if(rejflag[1]){
+						//accept_tau()
+		  				*m_tau_acc=*m_tau_acc+1;   
 					}else{ 
-							//reject_tau()
-							m_tau=old1;
-							m_tau_prior=old2;
-		      					m_prior_en=m_old_prior_en;
-		      					m_tau_rej++;
-
-							//restore_energy()
-		      					m_energy=m_old_energy;
+						//reject_tau()
+						*m_tau=old[0];
+						*m_tau_prior=old[1];
+		      				*m_prior_en=*m_old_prior_en;
+		      				*m_tau_rej=*m_tau_rej+1;
+						//restore_energy()
+		      				*m_energy=*m_old_energy;
 					}
+				}else{ 
+					//reject_tau()
+					*m_tau=old[0];
+					*m_tau_prior=old[1];
+		      			*m_prior_en=*m_old_prior_en;
+		      			*m_tau_rej=*m_tau_rej+1;
+
+					//restore_energy()
+		      			*m_energy=*m_old_energy;
 				}
-			}
+			}	
+		}
 
 ////////////////////////////////////////////////////////////////// D
 
-			if (leader){
-				//propose_d()
-				old1=m_d;	
-				m_d+=prerandN[add+add2]*m_d_prop;
+		if (leader){
+			//propose_d()
+			old[0]=*m_d;	
+			*m_d+=prerandN[*localrand]**m_d_prop;
 				
-				//compute_d_prior_f0()      
-				old2=m_d_prior;
+			//compute_d_prior_f0()      
+			old[1]=*m_d_prior;
 					
-      				if(m_d<0 || m_d > 0.008){
-					rejflag=true;
-				}else{ 
-					m_d_prior=0;
-					rejflag=false;
-      				}
-			}
+      			if(*m_d<0 || *m_d > 0.008){
+				rejflag[0]=true;
+			}else{ 
+				*m_d_prior=0;
+				rejflag[0]=false;
+      			}
+		}
 
-			__syncthreads();
+		__syncthreads();
 				
-				//compute_signal()
-				for(int i=0; i<ndirs; i++){
-					for(int f=0;f<NFIBRES;f++){
-						compute_signal(&mysig[i*NFIBRES+f],&mysigold[i*NFIBRES+f],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+f]);
-					
-					}
-				}
-				//compute_iso_signal()
-				for(int i=0; i<ndirs; i++){
-					compute_iso_signal(&myisosig[i],&myisosigold[i], bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,m_dstd,model);
-				}				
-
-				if (leader){
+		//compute_signal()
+		for(int i=0; i<mydirs; i++){
+			for(int f=0;f<nfib;f++){
+				compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model);
+			}
+		}
+		//compute_iso_signal()
+		for(int i=0; i<mydirs; i++){
+			compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model);
+		}				
 
-					//for likehood and reject_f_sum
-					fsum=m_f0;
+		if (leader){
+			//for likehood and reject_f_sum
+			*fsum=*m_f0;
 
-					for(int g=0;g<NFIBRES;g++){  
-						fsum+=m_f[g];
-					}
+			for(int g=0;g<nfib;g++){  
+				*fsum+=m_f[g];
+			}
 
-					//compute_prior()
-					compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
-				}
+			//compute_prior()
+			compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
+		}
 
-				__syncthreads();
-				
-				//compute_likelihood()
-				compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);	
+		__syncthreads();
 				
-				__syncthreads();
+		//compute_likelihood()
+		compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);	
 				
-				if (leader){
-					//compute_energy()
-					m_old_energy=m_energy;
-      					m_energy=m_prior_en+m_likelihood_en;
-					//test_energy
-					tmp=exp(double(m_old_energy-m_energy));
-					rejflag2=(tmp>prerandU[add+add2]);
+		__syncthreads();
 				
-				}
+		if (leader){
+			//compute_energy()
+			*m_old_energy=*m_energy;
+      			*m_energy=*m_prior_en+*m_likelihood_en;
+			//test_energy
+			*tmp=exp(double(*m_old_energy-*m_energy));
+			rejflag[1]=(*tmp>prerandU[*localrand]);
+			*localrand=*localrand+1;
+		}
 			
-				__syncthreads();
+		__syncthreads();
 
-       			if(!rejflag){
-				if(rejflag2){
-					//accept_d()
-	  				if (leader) m_d_acc++;   
-				}else{
-					if (leader){
-						//reject_d()
-						m_d=old1;
-						m_d_prior=old2;
-      						m_prior_en=m_old_prior_en;
-						m_d_rej++;
-						//restore_energy()
-      						m_energy=m_old_energy;
-					}
-      						
-					for(int i=0; i<ndirs; i++){
-						for(int f=0;f<NFIBRES;f++){
-							mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
-						}	
-				
-						myisosig[i]=myisosigold[i];
-					}
-				}
-        		}else{ 
+       		if(!rejflag[0]){
+			if(rejflag[1]){
+				//accept_d()
+	  			if (leader) *m_d_acc=*m_d_acc+1;   
+			}else{
 				if (leader){
-				//reject_d()
-					m_d=old1;
-					m_d_prior=old2;
-      					m_prior_en=m_old_prior_en;
-					m_d_rej++;
+					//reject_d()
+					*m_d=old[0];
+					*m_d_prior=old[1];
+      					*m_prior_en=*m_old_prior_en;
+					*m_d_rej=*m_d_rej+1;
 					//restore_energy()
-      					m_energy=m_old_energy;
+      					*m_energy=*m_old_energy;
 				}
-
-      				for(int i=0; i<ndirs; i++){
-					for(int f=0;f<NFIBRES;f++){
-						mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
+      						
+				for(int i=0; i<mydirs; i++){
+					for(int f=0;f<nfib;f++){
+						mysig[i*nfib+f] = mysigold[i*nfib+f];
 					}	
 				
 					myisosig[i]=myisosigold[i];
 				}
-      				
-        		}
+			}
+        	}else{ 
+			if (leader){
+				//reject_d()
+				*m_d=old[0];
+				*m_d_prior=old[1];
+      				*m_prior_en=*m_old_prior_en;
+				*m_d_rej=*m_d_rej+1;
+				//restore_energy()
+      				*m_energy=*m_old_energy;
+			}
+
+      			for(int i=0; i<mydirs; i++){
+				for(int f=0;f<nfib;f++){
+					mysig[i*nfib+f] = mysigold[i*nfib+f];
+				}	
+				
+				myisosig[i]=myisosigold[i];
+			}	
+        	}
 
 ////////////////////////////////////////////////////////////////// D_STD
 
-			if(model==2){
-				if (leader){	
-					//propose_d_std
-					old1=m_dstd;
-					m_dstd+=prerandN[1+add+add2]*m_dstd_prop;
-
-					//compute_d_std_prior()     
-					old2=m_dstd_prior;
-	      				if(m_dstd<=0 || m_dstd > 0.01){ 
-						rejflag=true;
-					}else{ 	
-						rejflag=false;
-						m_dstd_prior=log(double(m_dstd));
-					}
+		if(model==2){
+			if (leader){	
+				//propose_d_std
+				old[0]=*m_dstd;
+				*m_dstd+=prerandN[*localrand]**m_dstd_prop;
 
+				//compute_d_std_prior()     
+				old[1]=*m_dstd_prior;
+	      			if(*m_dstd<=0 || *m_dstd > 0.01){ 
+					rejflag[0]=true;
+				}else{ 	
+					rejflag[0]=false;
+					*m_dstd_prior=log(double(*m_dstd));
 				}
+			}
 
-				__syncthreads();
-
-				//compute_signal()
-				for(int i=0; i<ndirs; i++){
-					for(int f=0;f<NFIBRES;f++){
-						compute_signal(&mysig[i*NFIBRES+f],&mysigold[i*NFIBRES+f],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+f]);
-					}
-				}
+			__syncthreads();
 
-				//compute_iso_signal()
-				for(int i=0; i<ndirs; i++){
-					compute_iso_signal(&myisosig[i],&myisosigold[i], bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,m_dstd,model);
+			//compute_signal()
+			for(int i=0; i<mydirs; i++){
+				for(int f=0;f<nfib;f++){
+					compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model);
 				}
+			}
 
-				if (leader){
-				
-					//for likehood and reject_f_sum
-					fsum=m_f0;
-					for(int g=0;g<NFIBRES;g++){  
-						fsum+=m_f[g];
-					}
-
-					//compute_prior()
-					compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+			//compute_iso_signal()
+			for(int i=0; i<mydirs; i++){
+				compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model);
+			}
 
+			if (leader){	
+				//for likehood and reject_f_sum
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
-				__syncthreads();
-
-				//compute_likelihood()
-				compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+				//compute_prior()
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
+			}
 
-				__syncthreads();
+			__syncthreads();
 
-				if (leader){
-					//compute_energy()
-					m_old_energy=m_energy;
-	      				m_energy=m_prior_en+m_likelihood_en;
+			//compute_likelihood()
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
-					//test_energy
-					tmp=exp(double(m_old_energy-m_energy));
-					rejflag2=(tmp>prerandU[1+add+add2]);					
-				}
+			__syncthreads();
 
-				__syncthreads();
-				
-				if(!rejflag){
-					if(rejflag2){
-						//accept_dstd()
-		  				if (leader) m_dstd_acc++;   
-					}else{ 
-						if (leader){
-							//reject_dstd()
-							m_dstd=old1;
-							m_dstd_prior=old2;
-		      					m_prior_en=m_old_prior_en;
-		      					m_dstd_rej++;
+			if (leader){
+				//compute_energy()
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
-							//restore_energy()
-		      					m_energy=m_old_energy;
-						}
+				//test_energy
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;					
+			}
 
-						for(int i=0; i<ndirs; i++){
-							for(int f=0;f<NFIBRES;f++){
-								mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
-							}	
+			__syncthreads();
 				
-							myisosig[i]=myisosigold[i];
-						}
-					}
+			if(!rejflag[0]){
+				if(rejflag[1]){
+					//accept_dstd()
+		  			if (leader) *m_dstd_acc=*m_dstd_acc+1;   
 				}else{ 
 					if (leader){
 						//reject_dstd()
-						m_dstd=old1;
-						m_dstd_prior=old2;
-		      				m_prior_en=m_old_prior_en;
-		      				m_dstd_rej++;
+						*m_dstd=old[0];
+						*m_dstd_prior=old[1];
+		      				*m_prior_en=*m_old_prior_en;
+		      				*m_dstd_rej=*m_dstd_rej+1;
 
 						//restore_energy()
-		      				m_energy=m_old_energy;
+		      				*m_energy=*m_old_energy;
 					}
 
-					for(int i=0; i<ndirs; i++){
-						for(int f=0;f<NFIBRES;f++){
-							mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
-						}	
+					for(int i=0; i<mydirs; i++){
+						for(int f=0;f<nfib;f++){
+							mysig[i*nfib+f] = mysigold[i*nfib+f];
+						}
 				
 						myisosig[i]=myisosigold[i];
 					}
 				}
+			}else{ 
+				if (leader){
+					//reject_dstd()
+					*m_dstd=old[0];
+					*m_dstd_prior=old[1];
+		      			*m_prior_en=*m_old_prior_en;
+		      			*m_dstd_rej=*m_dstd_rej+1;
+
+					//restore_energy()
+		      			*m_energy=*m_old_energy;
+				}
+
+				for(int i=0; i<mydirs; i++){
+					for(int f=0;f<nfib;f++){
+						mysig[i*nfib+f] = mysigold[i*nfib+f];
+					}	
+				
+					myisosig[i]=myisosigold[i];
+				}
 			}
+		}
 
 ////////////////////////////////////////////////////////////////// S0
 
-			if (leader){
-			
-				//propose_S0()
-				old1=m_S0;
-				m_S0+=prerandN[1+add+add2+add3]*m_S0_prop;
+		if (leader){
+			//propose_S0()
+			old[0]=*m_S0;
+			*m_S0+=prerandN[*localrand]**m_S0_prop;
 				
-				//compute_S0_prior()
-				old2=m_S0_prior;
-        			if(m_S0<0) rejflag=true;
-        			else{    
-					m_S0_prior=0;
-	  				rejflag=false;
-        			}
+			//compute_S0_prior()
+			old[1]=*m_S0_prior;
+        		if(*m_S0<0) rejflag[0]=true;
+        		else{    
+				*m_S0_prior=0;
+	  			rejflag[0]=false;
+        		}
 				
-				//for likehood and reject_f_sum
-				fsum=m_f0;
-				for(int g=0;g<NFIBRES;g++){  
-					fsum+=m_f[g];
-				}
-
-				//compute_prior()
-				compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+			//for likehood and reject_f_sum
+			*fsum=*m_f0;
+			for(int g=0;g<nfib;g++){  
+				*fsum+=m_f[g];
 			}
 
-			__syncthreads();
+			//compute_prior()
+			compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
+		}
 
-			//compute_likelihood()
-			compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+		__syncthreads();
 
-			__syncthreads();
+		//compute_likelihood()
+		compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
-			if (leader){
-				//compute_energy()
-				m_old_energy=m_energy;
-      				m_energy=m_prior_en+m_likelihood_en;
+		__syncthreads();
 
-				//test_energy
-				tmp=exp(double(m_old_energy-m_energy));
-				rejflag2=(tmp>prerandU[1+add+add2+add3]);
+		if (leader){
+			//compute_energy()
+			*m_old_energy=*m_energy;
+      			*m_energy=*m_prior_en+*m_likelihood_en;
 
-        			if(!rejflag){
-					if(rejflag2){
-						//accept_S0()
-	  					m_S0_acc++;   
-					}else{
-						//reject_S0()
-						m_S0=old1;
-						m_S0_prior=old2;
-      						m_prior_en=m_old_prior_en;
-      						m_S0_rej++;
+			//test_energy
+			*tmp=exp(double(*m_old_energy-*m_energy));
+			rejflag[1]=(*tmp>prerandU[*localrand]);
+			*localrand=*localrand+1;
 
-						//restore_energy()
-      						m_energy=m_old_energy;
-					}
-        			}else{ 
+        		if(!rejflag[0]){
+				if(rejflag[1]){
+					//accept_S0()
+	  				*m_S0_acc=*m_S0_acc+1;   
+				}else{
 					//reject_S0()
-					m_S0=old1;
-					m_S0_prior=old2;
-	      				m_prior_en=m_old_prior_en;
-	      				m_S0_rej++;
+					*m_S0=old[0];
+					*m_S0_prior=old[1];
+      					*m_prior_en=*m_old_prior_en;
+      					*m_S0_rej=*m_S0_rej+1;
 
 					//restore_energy()
-      					m_energy=m_old_energy;
+      					*m_energy=*m_old_energy;
 				}
-        		}
+        		}else{ 
+				//reject_S0()
+				*m_S0=old[0];
+				*m_S0_prior=old[1];
+	      			*m_prior_en=*m_old_prior_en;
+	      			*m_S0_rej=*m_S0_rej+1;
 
-////////////////////////////////////////////////////////////////////////////     TH
+				//restore_energy()
+      				*m_energy=*m_old_energy;
+			}
+        	}
 
-     			for(int fibre=0;fibre<NFIBRES;fibre++){  
-				if (leader){ 
+////////////////////////////////////////////////////////////////////////////     TH
 
-					//propose_th()
-					old1=m_th[fibre];
-					m_th[fibre]+=prerandN[2+(fibre*3)+add+add2+add3]*m_th_prop[fibre];
+     		for(int fibre=0;fibre<nfib;fibre++){  
+			if (leader){ 
+				//propose_th()
+				old[0]=m_th[fibre];
+				m_th[fibre]+=prerandN[*localrand]*m_th_prop[fibre];
 					
-					//compute_th_prior()
-					old2=m_th_prior[fibre];
-      	   				if(m_th[fibre]==0){
-						m_th_prior[fibre]=0;
-		   			}else{
-						m_th_prior[fibre]=-log(double(fabs(sin(double(m_th[fibre]))/2)));
-	      	   			}
-		  			//rejflag=false; /////////////////always false
+				//compute_th_prior()
+				old[1]=m_th_prior[fibre];
+      	   			if(m_th[fibre]==0){
+					m_th_prior[fibre]=0;
+		   		}else{
+					m_th_prior[fibre]=-log(double(fabs(sin(double(m_th[fibre]))/2)));
+	      	   		}
+		  		//rejflag[0]=false; /////////////////always false
 		
-					//compute_prior()
-					fm_old_prior_en=fm_prior_en[fibre];
-	      	   			fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
-					
-				}
+				//compute_prior()
+				*fm_old_prior_en=fm_prior_en[fibre];
+	      	   		fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];	
+			}
 
-				__syncthreads();
+			__syncthreads();
 
-				//compute_signal()
-				//compute_signal_pre	
-				for(int i=0; i<ndirs; i++){
-					cos_alpha_minus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]-m_th[fibre]));   
-			  		cos_alpha_plus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]+m_th[fibre]));
-					old_angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre];
-					angtmp[i*NFIBRES+fibre]= (cos(double(m_ph[fibre]-beta[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
-		 	   		angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre]*angtmp[i*NFIBRES+fibre];
+			//compute_signal()
+			//compute_signal_pre	
+			for(int i=0; i<mydirs; i++){
+				cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre]));   
+				cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre]));
+				old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre];
+				angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
+		 	   	angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre];
+
+				compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model);
+			}
 
-					compute_signal(&mysig[i*NFIBRES+fibre],&mysigold[i*NFIBRES+fibre],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+fibre]);
+			if (leader){	
+				//for likehood and reject_f_sum
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
-				if (leader){
-					
-					//for likehood and reject_f_sum
-					fsum=m_f0;
-					for(int g=0;g<NFIBRES;g++){  
-						fsum+=m_f[g];
-					}
-
-					//compute_prior()
-					compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
-				
-				}
+				//compute_prior()
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);	
+			}
 
-				__syncthreads();
+			__syncthreads();
 
-				//compute_likelihood()
-				compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			//compute_likelihood()
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
-				__syncthreads();
+			__syncthreads();
 
-				if (leader){ 
-					//compute_energy()
-					m_old_energy=m_energy;
-	      				m_energy=m_prior_en+m_likelihood_en;
+			if (leader){ 
+				//compute_energy()
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
-					//test_energy
-					tmp=exp(double(m_old_energy-m_energy));
-					rejflag2=(tmp>prerandU[2+(fibre*3)+add+add2+add3]);
-					
-				}
+				//test_energy
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;					
+			}
 
-				__syncthreads();
+			__syncthreads();
 			
-					if(rejflag2){
-						//accept_th
-		  				if (leader) m_th_acc[fibre]++;   
-					}else{
+			if(rejflag[1]){
+				//accept_th
+		  		if (leader) m_th_acc[fibre]++;   
+				}else{
 
-						if (leader){
-						//reject_th()
-							m_th[fibre]=old1;
-							m_th_prior[fibre]=old2;
-      							fm_prior_en[fibre]=fm_old_prior_en;
-							m_th_rej[fibre]++;						
+				if (leader){
+					//reject_th()
+					m_th[fibre]=old[0];
+					m_th_prior[fibre]=old[1];
+      					fm_prior_en[fibre]=*fm_old_prior_en;
+					m_th_rej[fibre]++;						
 						
-							//restore_energy()
-							m_prior_en=m_old_prior_en;
-	      						m_energy=m_old_energy;
-
-						}
+					//restore_energy()
+					*m_prior_en=*m_old_prior_en;
+	      				*m_energy=*m_old_energy;
+				}
 
-						//compute_signal_pre undo
-						for(int i=0; i<ndirs; i++){
-							angtmp[i*NFIBRES+fibre]=old_angtmp[i*NFIBRES+fibre];
-							mysig[i*NFIBRES+fibre] = mysigold[i*NFIBRES+fibre];						
-      						}
-					}
-				__syncthreads();
+				//compute_signal_pre undo
+				for(int i=0; i<mydirs; i++){
+					angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre];
+					mysig[i*nfib+fibre] = mysigold[i*nfib+fibre];						
+      				}
+			}
+			__syncthreads();
 		
 ///////////////////////////////////////     PH
 
-				if (leader){
-
-					//propose_ph()
-					old1=m_ph[fibre];
-					m_ph[fibre]+=prerandN[3+(fibre*3)+add+add2+add3]*m_ph_prop[fibre];
-
-					//compute_ph_prior()
-					old2=m_ph_prior[fibre];
-      					m_ph_prior[fibre]=0;
-      					//rejflag=false;
+			if (leader){
+				//propose_ph()
+				old[0]=m_ph[fibre];
+				m_ph[fibre]+=prerandN[*localrand]*m_ph_prop[fibre];
 
-					//compute_prior()
-					fm_old_prior_en=fm_prior_en[fibre];
-      	   				fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
+				//compute_ph_prior()
+				old[1]=m_ph_prior[fibre];
+      				m_ph_prior[fibre]=0;
+      				//rejflag[0]=false;
 
-				}
+				//compute_prior()
+				*fm_old_prior_en=fm_prior_en[fibre];
+      	   			fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
+			}
 
-				__syncthreads();
+			__syncthreads();
 				
-				//compute_signal()
-				//compute_signal_pre
-				for(int i=0; i<ndirs; i++){
-					cos_alpha_minus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]-m_th[fibre]));   
-			  		cos_alpha_plus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]+m_th[fibre]));
-					old_angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre];
-		     	   		angtmp[i*NFIBRES+fibre]= (cos(double(m_ph[fibre]-beta[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
-		 	   		angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre]*angtmp[i*NFIBRES+fibre];
+			//compute_signal()
+			//compute_signal_pre
+			for(int i=0; i<mydirs; i++){
+				cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre]));   
+			  	cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre]));
+				old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre];
+		     	   	angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
+		 	   	angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre];
 	
-					compute_signal(&mysig[i*NFIBRES+fibre],&mysigold[i*NFIBRES+fibre],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+fibre]);
-				}
-
-				if (leader){
-					//for likehood and reject_f_sum
-					fsum=m_f0;
-					for(int g=0;g<NFIBRES;g++){  
-						fsum+=m_f[g];
-					}
+				compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model);
+			}
 
-					//compute_prior()
-					compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+			if (leader){
+				//for likehood and reject_f_sum
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
-				__syncthreads();
+				//compute_prior()
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
+			}
 
-				//compute_likelihood()
-				compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			__syncthreads();
 
+			//compute_likelihood()
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
-				__syncthreads();
 
-				if (leader){
-					//compute_energy()
-					m_old_energy=m_energy;
-	      				m_energy=m_prior_en+m_likelihood_en;
+			__syncthreads();
 
-					//test_energy
-					tmp=exp(double(m_old_energy-m_energy));
-					rejflag2=(tmp>prerandU[3+(fibre*3)+add+add2+add3]);
-				}
+			if (leader){
+				//compute_energy()
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
-					__syncthreads();
+				//test_energy
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;
+			}
 
-				//if(!rejflag){
-		        
-					if(rejflag2){
-						//accept_ph()
-		  				if (leader) m_ph_acc[fibre]++;   
-					}else{
-						if (leader){
-						//reject_ph()
-							m_ph[fibre]=old1;
-							m_ph_prior[fibre]=old2;
-      							fm_prior_en[fibre]=fm_old_prior_en;
-							m_ph_rej[fibre]++;						
+			__syncthreads();
+
+			//if(!rejflag[0]){
+			if(rejflag[1]){
+				//accept_ph()
+		  		if (leader) m_ph_acc[fibre]++;   
+			}else{
+				if (leader){
+					//reject_ph()
+					m_ph[fibre]=old[0];
+					m_ph_prior[fibre]=old[1];
+      					fm_prior_en[fibre]=*fm_old_prior_en;
+					m_ph_rej[fibre]++;						
 						
-							//restore_energy()
-							m_prior_en=m_old_prior_en;
-	      						m_energy=m_old_energy;
-						}
-						//compute_signal_pre undo
-						for(int i=0; i<ndirs; i++){
-							angtmp[i*NFIBRES+fibre]=old_angtmp[i*NFIBRES+fibre];
+					//restore_energy()
+					*m_prior_en=*m_old_prior_en;
+	      				*m_energy=*m_old_energy;
+				}
+				//compute_signal_pre undo
+				for(int i=0; i<mydirs; i++){
+					angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre];
 
-							mysig[i*NFIBRES+fibre] = mysigold[i*NFIBRES+fibre];	
-						}
-					}
+					mysig[i*nfib+fibre] = mysigold[i*nfib+fibre];	
+				}
+			}
 
-				__syncthreads();
-			
+			__syncthreads();
 
 ////////////////////////////////////////////             F
 
-				if (leader){
-					//propose_f()
-					old1=m_f[fibre];
-					m_f[fibre]+=prerandN[4+(fibre*3)+add+add2+add3]*m_f_prop[fibre];
-
-	     				//compute_f_prior()
-	        			old2=m_f_prior[fibre];
-					if (m_f[fibre]<=0 || m_f[fibre]>=1) rejflag=true;
-	        			else{
-		      				if(!can_use_ard ){
-		  					m_f_prior[fibre]=0;
-						}else{
-		  					if(m_lam_jump[fibre]){
-								m_f_prior[fibre]=log(double(m_f[fibre]));
-							}else{
-		    						m_f_prior[fibre]=0;
-		  					}
-						}
-						m_f_prior[fibre]=fudgevalue*m_f_prior[fibre];
-						rejflag=false;
-	      				}
+			if (leader){
+				//propose_f()
+				old[0]=m_f[fibre];
+				m_f[fibre]+=prerandN[*localrand]*m_f_prop[fibre];
 
-					//compute_prior()
-					fm_old_prior_en=fm_prior_en[fibre];
-      	   				fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
-					
-					
-					//for likehood and reject_f_sum
-					fsum=m_f0;
-					for(int g=0;g<NFIBRES;g++){  
-						fsum+=m_f[g];
+	     			//compute_f_prior()
+	        		old[1]=m_f_prior[fibre];
+				if (m_f[fibre]<=0 || m_f[fibre]>=1) rejflag[0]=true;
+	        		else{
+		      			if(!can_use_ard ){
+		  				m_f_prior[fibre]=0;
+					}else{
+		  				if(m_lam_jump[fibre]){
+							m_f_prior[fibre]=log(double(m_f[fibre]));
+						}else{
+		    					m_f_prior[fibre]=0;
+		  				}
 					}
-					//reject_f_sum()
-					rejflag2=(fsum>1);
+					m_f_prior[fibre]=fudgevalue*m_f_prior[fibre];
+					rejflag[0]=false;
+	      			}
 
-					//compute_prior()
-					compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);	
+				//compute_prior()
+				*fm_old_prior_en=fm_prior_en[fibre];
+      	   			fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
+						
+				//for likehood and reject_f_sum
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
+				//reject_f_sum()
+				rejflag[1]=(*fsum>1);
+
+				//compute_prior()
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);	
+			}
 
-				__syncthreads();
+			__syncthreads();
 
-				//compute_likelihood()
-				compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);	
+			//compute_likelihood()
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);	
 
-				__syncthreads();
+			__syncthreads();
 
+			if (leader){
+				//compute_energy()
+				*m_old_energy=*m_energy;
+		      		*m_energy=*m_prior_en+*m_likelihood_en;
 
-				if (leader){
-					//compute_energy()
-					m_old_energy=m_energy;
-		      			m_energy=m_prior_en+m_likelihood_en;
-
-					//test_energy
-					tmp=exp(double(m_old_energy-m_energy));
-					rejflag3=(tmp>prerandU[4+(fibre*3)+add+add2+add3]);	
-
-		      			if(!rejflag){
-						if(!rejflag2){
-							if(rejflag3){
-								//accept_f()
-			  					m_f_acc[fibre]++;   
-							}else{
-								//reject_f()
-								m_f[fibre]=old1;
-								m_f_prior[fibre]=old2;
-	      							fm_prior_en[fibre]=fm_old_prior_en;
-	      							m_f_rej[fibre]++;				
-						
-								//restore_energy()
-								m_prior_en=m_old_prior_en;
-		      						m_energy=m_old_energy;
-							}
-						}else{ 
-							//reject_f()
-							m_f[fibre]=old1;
-							m_f_prior[fibre]=old2;
-	      						fm_prior_en[fibre]=fm_old_prior_en;
-	      						m_f_rej[fibre]++;
+				//test_energy
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[2]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;			
 
+		      		if(!rejflag[0]){
+					if(!rejflag[1]){
+						if(rejflag[2]){
+							//accept_f()
+			  				m_f_acc[fibre]++;   
+						}else{
+							//reject_f()
+							m_f[fibre]=old[0];
+							m_f_prior[fibre]=old[1];
+	      						fm_prior_en[fibre]=*fm_old_prior_en;
+	      						m_f_rej[fibre]++;				
+					
 							//restore_energy()
-							m_prior_en=m_old_prior_en;
-		      					m_energy=m_old_energy;	
+							*m_prior_en=*m_old_prior_en;
+		      					*m_energy=*m_old_energy;
 						}
-					}else{
+					}else{ 
 						//reject_f()
-						m_f[fibre]=old1;
-						m_f_prior[fibre]=old2;
-		      				fm_prior_en[fibre]=fm_old_prior_en;
-		      				m_f_rej[fibre]++;
+						m_f[fibre]=old[0];
+						m_f_prior[fibre]=old[1];
+	      					fm_prior_en[fibre]=*fm_old_prior_en;
+	      					m_f_rej[fibre]++;
 
 						//restore_energy()
-						m_prior_en=m_old_prior_en;
-		      				m_energy=m_old_energy;
+						*m_prior_en=*m_old_prior_en;
+		      				*m_energy=*m_old_energy;	
 					}
-				}
-				__syncthreads();
-			
-
-        		}//end while NFIBRES
-
-        		count_update++;
-
-        		if(((count_update%updateproposalevery)==0)&&leader){
-				//m_multifibre.update_proposals();
-				m_d_prop*=sqrt(float(m_d_acc+1)/float(m_d_rej+1));
-				m_d_prop=min(m_d_prop,maxfloat);
-
-				if(rician){
-					m_tau_prop*=sqrt(float(m_tau_acc+1)/float(m_tau_rej+1));
-					m_tau_prop=min(m_tau_prop,maxfloat);
-					m_tau_acc=0; 
-					m_tau_rej=0;	
-				}
-
-				if(m_include_f0){
-					m_f0_prop*=sqrt(float(m_f0_acc+1)/float(m_f0_rej+1));
-					m_f0_prop=min(m_f0_prop,maxfloat);
-					m_f0_acc=0; 
-					m_f0_rej=0;	
-				}	
-
-				if(model==2){
-					m_dstd_prop*=sqrt(float(m_dstd_acc+1)/float(m_dstd_rej+1));
-					m_dstd_prop=min(m_dstd_prop,maxfloat);
-					m_dstd_acc=0; 
-					m_dstd_rej=0;	
-				}
+				}else{
+					//reject_f()
+					m_f[fibre]=old[0];
+					m_f_prior[fibre]=old[1];
+		      			fm_prior_en[fibre]=*fm_old_prior_en;
+		      			m_f_rej[fibre]++;
 
-				m_S0_prop*=sqrt(float(m_S0_acc+1)/float(m_S0_rej+1));
-				m_S0_prop=min(m_S0_prop,maxfloat);
-				m_d_acc=0; 
-				m_d_rej=0;
-				m_S0_acc=0; 
-				m_S0_rej=0;
-				for(int f=0; f<NFIBRES;f++){
-					//m_fibres[f].update_proposals();
-					m_th_prop[f]*=sqrt(float(m_th_acc[f]+1)/float(m_th_rej[f]+1));
-			      		m_th_prop[f]=min(m_th_prop[f],maxfloat);
-			      		m_ph_prop[f]*=sqrt(float(m_ph_acc[f]+1)/float(m_ph_rej[f]+1));
-			      		m_ph_prop[f]=min(m_ph_prop[f],maxfloat);
-			      		m_f_prop[f]*=sqrt(float(m_f_acc[f]+1)/float(m_f_rej[f]+1));
-			      		m_f_prop[f]=min(m_f_prop[f],maxfloat);
-			      
-			      		m_th_acc[f]=0; 
-			      		m_th_rej[f]=0;
-			      		m_ph_acc[f]=0; 
-			      		m_ph_rej[f]=0;
-			      		m_f_acc[f]=0; 
-			      		m_f_rej[f]=0;
+					//restore_energy()
+					*m_prior_en=*m_old_prior_en;
+		      			*m_energy=*m_old_energy;
 				}
 			}
-
 			__syncthreads();	
 
-        	} //end while iterations
+        	}//end while nfib
 
-		if(leader){
-			multifibres[idmult].m_prior_en=m_prior_en;
-			multifibres[idmult].m_d=m_d;
-			multifibres[idmult].m_energy=m_energy;		
-			multifibres[idmult].m_d_prop=m_d_prop;
-			multifibres[idmult].m_d_prior=m_d_prior;
+        	if(((*count_update%updateproposalevery)==0)&&leader){
+			//m_multifibre.update_proposals();
+			*m_d_prop*=sqrt(float(*m_d_acc+1)/float(*m_d_rej+1));
+			*m_d_prop=min(*m_d_prop,maxfloat);
 
-			for(int f=0;f<NFIBRES;f++){
-				fibres[(idmult*NFIBRES)+f].m_th=m_th[f];
-				fibres[(idmult*NFIBRES)+f].m_ph=m_ph[f];
-				fibres[(idmult*NFIBRES)+f].m_f=m_f[f];
+			if(rician){
+				*m_tau_prop*=sqrt(float(*m_tau_acc+1)/float(*m_tau_rej+1));
+				*m_tau_prop=min(*m_tau_prop,maxfloat);
+				*m_tau_acc=0; 
+				*m_tau_rej=0;	
 			}
 
-			multifibres[idmult].m_S0_prior=m_S0_prior;
-			multifibres[idmult].m_S0=m_S0;
-			multifibres[idmult].m_likelihood_en=m_likelihood_en;
-			multifibres[idmult].m_d_acc=m_d_acc;
-			multifibres[idmult].m_d_rej=m_d_rej;
-			multifibres[idmult].m_S0_acc=m_S0_acc;
-			multifibres[idmult].m_S0_rej=m_S0_rej;
-			multifibres[idmult].m_S0_prop=m_S0_prop;
-
 			if(m_include_f0){
-				multifibres[idmult].m_f0_prior=m_f0_prior;
-				multifibres[idmult].m_f0=m_f0;
-				multifibres[idmult].m_f0_acc=m_f0_acc;
-				multifibres[idmult].m_f0_rej=m_f0_rej;
-				multifibres[idmult].m_f0_prop=m_f0_prop;
-			}
-			if(rician){
-				multifibres[idmult].m_tau_prior=m_tau_prior;
-				multifibres[idmult].m_tau=m_tau;
-				multifibres[idmult].m_tau_acc=m_tau_acc;
-				multifibres[idmult].m_tau_rej=m_tau_rej;
-				multifibres[idmult].m_tau_prop=m_tau_prop;
-			}
+				*m_f0_prop*=sqrt(float(*m_f0_acc+1)/float(*m_f0_rej+1));
+				*m_f0_prop=min(*m_f0_prop,maxfloat);
+				*m_f0_acc=0; 
+				*m_f0_rej=0;	
+			}	
+
 			if(model==2){
-				multifibres[idmult].m_dstd_prior=m_dstd_prior;
-				multifibres[idmult].m_dstd=m_dstd;
-				multifibres[idmult].m_dstd_acc=m_dstd_acc;
-				multifibres[idmult].m_dstd_rej=m_dstd_rej;
-				multifibres[idmult].m_dstd_prop=m_dstd_prop;
+				*m_dstd_prop*=sqrt(float(*m_dstd_acc+1)/float(*m_dstd_rej+1));
+				*m_dstd_prop=min(*m_dstd_prop,maxfloat);
+				*m_dstd_acc=0; 
+				*m_dstd_rej=0;	
 			}
-	
-			for(int f=0;f<NFIBRES;f++){
-				fibres[(idmult*NFIBRES)+f].m_th_acc=m_th_acc[f];
-				fibres[(idmult*NFIBRES)+f].m_th_rej=m_th_rej[f];
-				fibres[(idmult*NFIBRES)+f].m_ph_acc=m_ph_acc[f];
-				fibres[(idmult*NFIBRES)+f].m_ph_rej=m_ph_rej[f];
-				fibres[(idmult*NFIBRES)+f].m_f_acc=m_f_acc[f];
-				fibres[(idmult*NFIBRES)+f].m_f_rej=m_f_rej[f];
-
-				fibres[(idmult*NFIBRES)+f].m_prior_en=fm_prior_en[f];
-				fibres[(idmult*NFIBRES)+f].m_th_prior=m_th_prior[f];
-				fibres[(idmult*NFIBRES)+f].m_ph_prior=m_ph_prior[f];
-				fibres[(idmult*NFIBRES)+f].m_f_prior=m_f_prior[f];
 
-				fibres[(idmult*NFIBRES)+f].m_th_prop=m_th_prop[f];
-				fibres[(idmult*NFIBRES)+f].m_ph_prop=m_ph_prop[f];
-				fibres[(idmult*NFIBRES)+f].m_f_prop=m_f_prop[f];
-
-				fibres[(idmult*NFIBRES)+f].m_lam_jump=m_lam_jump[f];		
+			*m_S0_prop*=sqrt(float(*m_S0_acc+1)/float(*m_S0_rej+1));
+			*m_S0_prop=min(*m_S0_prop,maxfloat);
+			*m_d_acc=0; 
+			*m_d_rej=0;
+			*m_S0_acc=0; 
+			*m_S0_rej=0;
+			for(int f=0; f<nfib;f++){
+				//m_fibres[f].update_proposals();
+				m_th_prop[f]*=sqrt(float(m_th_acc[f]+1)/float(m_th_rej[f]+1));
+				m_th_prop[f]=min(m_th_prop[f],maxfloat);
+		      		m_ph_prop[f]*=sqrt(float(m_ph_acc[f]+1)/float(m_ph_rej[f]+1));
+		      		m_ph_prop[f]=min(m_ph_prop[f],maxfloat);
+		      		m_f_prop[f]*=sqrt(float(m_f_acc[f]+1)/float(m_f_rej[f]+1));
+		      		m_f_prop[f]=min(m_f_prop[f],maxfloat);
+			      
+		      		m_th_acc[f]=0; 
+		      		m_th_rej[f]=0;
+		      		m_ph_acc[f]=0; 
+		      		m_ph_rej[f]=0;
+		      		m_f_acc[f]=0; 
+		      		m_f_rej[f]=0;
 			}
 		}
 
-		for(int i=0; i<ndirs; i++){	
-			isosignals[(idmult*NDIRECTIONS)+idrest+i*THREADS_BLOCK] = myisosig[i];
+		__syncthreads();	
 
-			for(int f=0;f<NFIBRES;f++){
-				signals[(idmult*NDIRECTIONS*NFIBRES)+(f*NDIRECTIONS)+idrest+i*THREADS_BLOCK]=mysig[i*NFIBRES+f];
-			}
+        } //end while iterations
+
+	if(leader){
+		multifibres[*idVOX].m_S0=*m_S0;
+		multifibres[*idVOX].m_S0_prior=*m_S0_prior;
+		multifibres[*idVOX].m_S0_prop=*m_S0_prop;
+		multifibres[*idVOX].m_S0_acc=*m_S0_acc;
+		multifibres[*idVOX].m_S0_rej=*m_S0_rej;
+
+		multifibres[*idVOX].m_d=*m_d;
+		multifibres[*idVOX].m_d_prior=*m_d_prior;
+		multifibres[*idVOX].m_d_prop=*m_d_prop;
+		multifibres[*idVOX].m_d_acc=*m_d_acc;
+		multifibres[*idVOX].m_d_rej=*m_d_rej;
+	
+		multifibres[*idVOX].m_prior_en=*m_prior_en;
+		multifibres[*idVOX].m_energy=*m_energy;		if(*idVOX==2){ if(*idVOX==2){ printf("BURIN FIN ENERGY: %.20f\n",*m_energy);} printf("GUARDO BURIN ENERGY: %.20f\n",multifibres[*idVOX].m_energy);}
+		multifibres[*idVOX].m_likelihood_en=*m_likelihood_en;
+
+		if(m_include_f0){
+			multifibres[*idVOX].m_f0_prior=*m_f0_prior;
+			multifibres[*idVOX].m_f0=*m_f0;
+			multifibres[*idVOX].m_f0_acc=*m_f0_acc;
+			multifibres[*idVOX].m_f0_rej=*m_f0_rej;
+			multifibres[*idVOX].m_f0_prop=*m_f0_prop;
+		}
+		if(rician){
+			multifibres[*idVOX].m_tau_prior=*m_tau_prior;
+			multifibres[*idVOX].m_tau=*m_tau;
+			multifibres[*idVOX].m_tau_acc=*m_tau_acc;
+			multifibres[*idVOX].m_tau_rej=*m_tau_rej;
+			multifibres[*idVOX].m_tau_prop=*m_tau_prop;
+		}
+		if(model==2){
+			multifibres[*idVOX].m_dstd_prior=*m_dstd_prior;
+			multifibres[*idVOX].m_dstd=*m_dstd;
+			multifibres[*idVOX].m_dstd_acc=*m_dstd_acc;
+			multifibres[*idVOX].m_dstd_rej=*m_dstd_rej;
+			multifibres[*idVOX].m_dstd_prop=*m_dstd_prop;
 		}
+	}
 	
+	if(idSubVOX<nfib){
+		int pos = (*idVOX*nfib)+idSubVOX;
+	
+		fibres[pos].m_th=m_th[idSubVOX];
+		fibres[pos].m_ph=m_ph[idSubVOX];
+		fibres[pos].m_f=m_f[idSubVOX];
+
+		fibres[pos].m_th_acc=m_th_acc[idSubVOX];
+		fibres[pos].m_th_rej=m_th_rej[idSubVOX];
+		fibres[pos].m_ph_acc=m_ph_acc[idSubVOX];
+		fibres[pos].m_ph_rej=m_ph_rej[idSubVOX];
+		fibres[pos].m_f_acc=m_f_acc[idSubVOX];
+		fibres[pos].m_f_rej=m_f_rej[idSubVOX];
+
+		fibres[pos].m_prior_en=fm_prior_en[idSubVOX];
+		fibres[pos].m_th_prior=m_th_prior[idSubVOX];
+		fibres[pos].m_ph_prior=m_ph_prior[idSubVOX];
+		fibres[pos].m_f_prior=m_f_prior[idSubVOX];
+
+		fibres[pos].m_th_prop=m_th_prop[idSubVOX];
+		fibres[pos].m_ph_prop=m_ph_prop[idSubVOX];
+		fibres[pos].m_f_prop=m_f_prop[idSubVOX];
+
+		fibres[pos].m_lam_jump=m_lam_jump[idSubVOX];		
+	}
+
+	for(int i=0; i<mydirs; i++){	
+		isosignals[(*idVOX*ndirections)+idSubVOX+i*threadsBlock] = myisosig[i];
+
+		for(int f=0;f<nfib;f++){
+			signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock]=mysig[i*nfib+f];
+		}
+	}
 }
 
 extern "C" __global__ void runmcmc_record_kernel(	//INPUT 
@@ -1548,7 +1549,9 @@ extern "C" __global__ void runmcmc_record_kernel(	//INPUT
 							double*				isosignals,
 							float*				randomsN,
 							float*				randomsU,
-							const int 			nvox,
+							const int			ndirections,
+							const int			nfib,
+							const int			nparams,
 							const int 			model,
 							const float 			fudgevalue,
 							const bool 			m_include_f0,
@@ -1556,14 +1559,14 @@ extern "C" __global__ void runmcmc_record_kernel(	//INPUT
 							const bool 			can_use_ard, 
 							const bool 			rician,
 							const int 			updateproposalevery, 	//update every this number of iterations	
-							const int 			iterations,		//num of iterations to do this time (maybe are a part of the total)	
+							const int 			iterations,		//num of iterations to do this time (maybe is a part of the total)	
 							const int 			current_iter,		//the number of the current iteration over the total iterations
 							const int 			iters_burnin,		//iters in burin, we need it to continue the updates at the correct time. 
 							const int 			record_every, 		//record every this number
 							const int 			totalrecords,		//total number of records to do
 							
 							//OUTPUT
-							int*			multirecords,		//id for each sample in each voxel
+							int*				multirecords,		//id for each sample in each voxel
 							float*				rf0,			//records od parameters
 							float*				rtau,
 							float*				rs0,
@@ -1575,513 +1578,536 @@ extern "C" __global__ void runmcmc_record_kernel(	//INPUT
 							float*				rlikelihood_energy)
 
 {
-	int add=0;
-	int add2=0;
-	int add3=0;
-
-	if(m_include_f0) add=1;
-	if(rician) add2=1;
-	if(model==2) add3=1;
-
-	int idmult= blockIdx.x;
-	int idrest= threadIdx.x;
-	bool leader = (idrest==0);	
-
-	int ndirs = 1;
-	if(idrest<(NDIRECTIONS%THREADS_BLOCK)) ndirs=MAXNDIRS_PER_THREAD;
-	else if((NDIRECTIONS%THREADS_BLOCK)==0) ndirs=MAXNDIRS_PER_THREAD;
-	else ndirs=(MAXNDIRS_PER_THREAD-1);
-
-	if(idrest>=NDIRECTIONS) { return; }
-
-	__shared__ bool rejflag;
-	__shared__ bool rejflag2;
-	__shared__ bool rejflag3;
-	__shared__ float fsum;
-	__shared__ double reduction[THREADS_BLOCK];
-	__shared__ float m_d;
-	__shared__ float m_S0;	
-	__shared__ float m_f0;
-	__shared__ float m_tau;
-	__shared__ float m_dstd;
-	__shared__ float m_th[NFIBRES];
-	__shared__ float m_ph[NFIBRES];
-	__shared__ float m_f[NFIBRES];
-	__shared__ float m_likelihood_en;
-
-	__shared__ double tmp;
-
-	double mysig[MAXNDIRS_PER_THREAD*NFIBRES];			
-	double mysigold[MAXNDIRS_PER_THREAD*NFIBRES];			
+	int idSubVOX= threadIdx.x;
+	int threadsBlock = blockDim.x;
+	bool leader = (idSubVOX==0);
+
+	////////// DYNAMIC SHARED MEMORY ///////////
+	extern __shared__ double shared[];
+	double* reduction = (double*)shared;				//threadsBlock
+	double* tmp = (double*) &reduction[threadsBlock];		//1
+
+	float* m_S0 = (float*) &tmp[1];					//1
+	float* m_d = (float*) &m_S0[1];					//1
+	float* m_dstd =(float*) &m_d[1];				//1
+	float* m_f0 = (float*) &m_dstd[1];				//1
+	float* m_tau = (float*) &m_f0[1];				//1
+	float* m_th = (float*) &m_tau[1];				//nfib
+	float* m_ph = (float*) &m_th[nfib];				//nfib
+	float* m_f = (float*) &m_ph[nfib];				//nfib
+
+	float* m_S0_prior = (float*) &m_f[nfib];			//1
+	float* m_d_prior = (float*) &m_S0_prior[1];			//1
+	float* m_dstd_prior = (float*) &m_d_prior[1];			//1
+	float* m_f0_prior = (float*) &m_dstd_prior[1];			//1
+	float* m_tau_prior = (float*) &m_f0_prior[1];			//1
+	float* m_th_prior = (float*) &m_tau_prior[1];			//nfib
+	float* m_ph_prior = (float*) &m_th_prior[nfib];			//nfib
+	float* m_f_prior = (float*) &m_ph_prior[nfib];			//nfib
+
+	float* m_S0_prop = (float*) &m_f_prior[nfib];			//1
+	float* m_d_prop = (float*) &m_S0_prop[1];			//1
+	float* m_dstd_prop = (float*) &m_d_prop[1];			//1
+	float* m_f0_prop = (float*) &m_dstd_prop[1];			//1
+	float* m_tau_prop = (float*) &m_f0_prop[1];			//1
+	float* m_th_prop = (float*) &m_tau_prop[1];			//nfib
+	float* m_ph_prop = (float*) &m_th_prop[nfib];			//nfib
+	float* m_f_prop = (float*) &m_ph_prop[nfib];			//nfib
+
+	float* fsum = (float*) &m_f_prop[nfib];				//1
+	float* m_likelihood_en = (float*) &fsum[1];			//1
+	float* m_prior_en = (float*) &m_likelihood_en[1];		//1
+	float* m_old_prior_en = (float*) &m_prior_en[1];		//1
+	float* fm_prior_en = (float*) &m_old_prior_en[1];		//nfib		
+	float* fm_old_prior_en = (float*) &fm_prior_en[nfib];		//1		
+	float* m_energy = (float*) &fm_old_prior_en[1];			//1
+	float* m_old_energy = (float*) &m_energy[1];			//1
+	float* old = (float*) &m_old_energy[1];				//2
+
+	float* prerandN = (float*) &old[2];				//nparams
+	float* prerandU = (float*) &prerandN[nparams];			//nparams
+
+	int* m_S0_acc = (int*) &prerandU[nparams];			//1
+	int* m_d_acc = (int*) &m_S0_acc[1];				//1
+	int* m_dstd_acc = (int*) &m_d_acc[1];				//1
+	int* m_f0_acc = (int*) &m_dstd_acc[1];				//1
+	int* m_tau_acc = (int*) &m_f0_acc[1];				//1
+	int* m_th_acc = (int*) &m_tau_acc[1];				//nfib
+	int* m_ph_acc = (int*) &m_th_acc[nfib];				//nfib
+	int* m_f_acc = (int*) &m_ph_acc[nfib];				//nfib
+
+	int* m_S0_rej = (int*) &m_f_acc[nfib];				//1
+	int* m_d_rej = (int*) &m_S0_rej[1];				//1
+	int* m_dstd_rej = (int*) &m_d_rej[1];				//1	
+	int* m_f0_rej = (int*) &m_dstd_rej[1];				//1
+	int* m_tau_rej = (int*) &m_f0_rej[1];				//1	
+	int* m_th_rej = (int*) &m_tau_rej[1];				//nfib
+	int* m_ph_rej = (int*) &m_th_rej[nfib];				//nfib
+	int* m_f_rej = (int*) &m_ph_rej[nfib];				//nfib
+	
+	int* rejflag = (int*) &m_f_rej[nfib];				//3					
+	int* m_lam_jump = (int*) &rejflag[3];				//nfib	
+	int* idVOX = (int*) &m_lam_jump[nfib];				//1		
+	int* count_update = (int*) &idVOX[1];				//1	
+	int* recordcount = (int*) &count_update[1];			//1
+	int* sample = (int*) &recordcount[1];				//1
+	int* localrand = (int*) &sample[1];				//1
+	////////// DYNAMIC SHARED MEMORY ///////////
+
+	double mysig[MAXNDIRS_PER_THREAD*MAXNFIBRES];			
+	double mysigold[MAXNDIRS_PER_THREAD*MAXNFIBRES];			
 	double myisosig[MAXNDIRS_PER_THREAD];			
-	double myisosigold[MAXNDIRS_PER_THREAD];		
-	__shared__ double mydata[MAXNDIRS_PER_THREAD*THREADS_BLOCK];				
-
-	__shared__ float old1;
-	__shared__ float old2;
+	double myisosigold[MAXNDIRS_PER_THREAD];				
+	double mydata[MAXNDIRS_PER_THREAD];
+	double mybvals[MAXNDIRS_PER_THREAD];	
+	double myalpha[MAXNDIRS_PER_THREAD];	
+	double mybeta[MAXNDIRS_PER_THREAD];	
  		
-	int idrand;
-	int count_update;
-	int recordcount;
-	int sample;
-
-	__shared__ float m_prior_en;
-	__shared__ float m_old_prior_en;
-
-	__shared__ float m_d_prior;	
-	__shared__ float m_S0_prior;
-	__shared__ float m_f0_prior;
-	__shared__ float m_tau_prior;
-	__shared__ float m_dstd_prior;
-
-	__shared__ float fm_prior_en[NFIBRES];
-	__shared__ float fm_old_prior_en;
-
-	__shared__ float m_th_prior[NFIBRES];
-	__shared__ float m_ph_prior[NFIBRES];
-	__shared__ float m_f_prior[NFIBRES];
-
-	__shared__ float m_energy;
-	__shared__ float m_old_energy;
-
-	__shared__ int m_d_acc;
-	__shared__ int m_d_rej;
-	
-	__shared__ int m_S0_acc;
-	__shared__ int m_S0_rej;
-
-	__shared__ int m_f0_acc;
-	__shared__ int m_f0_rej;
-	__shared__ int m_tau_acc;
-	__shared__ int m_tau_rej;
-	__shared__ int m_dstd_acc;
-	__shared__ int m_dstd_rej;
-
-	__shared__ int m_th_acc[NFIBRES]; 
-	__shared__ int m_th_rej[NFIBRES]; 
-	__shared__ int m_ph_acc[NFIBRES]; 
-	__shared__ int m_ph_rej[NFIBRES]; 
-	__shared__ int m_f_acc[NFIBRES]; 
-	__shared__ int m_f_rej[NFIBRES]; 
-		
-	__shared__ float m_d_prop;
-
-	__shared__ float m_S0_prop;
-	__shared__ float m_f0_prop;
-	__shared__ float m_tau_prop;
-	__shared__ float m_dstd_prop;
-
-	__shared__ float m_th_prop[NFIBRES];
-	__shared__ float m_ph_prop[NFIBRES];	
-	__shared__ float m_f_prop[NFIBRES];	
-
-	__shared__ bool m_lam_jump[NFIBRES];
-
-	__shared__ float prerandN[NPARAMS];
-	__shared__ float prerandU[NPARAMS];
-
-	float angtmp[MAXNDIRS_PER_THREAD*NFIBRES];	//for pre compute sigmal
-	float old_angtmp[MAXNDIRS_PER_THREAD*NFIBRES];
+	float angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES];			//this is to pre compute signal
+	float old_angtmp[MAXNDIRS_PER_THREAD*MAXNFIBRES];
 	float cos_alpha_minus_theta;
 	float cos_alpha_plus_theta;
 
 	if (leader){
-		idrand = idmult*iterations*NPARAMS;
-
-		count_update = current_iter + iters_burnin;	//count for updates
-		recordcount= current_iter;	
-		sample=1+(current_iter/record_every);		//the next number of sample.....the index start in 0
-
-		m_prior_en=multifibres[idmult].m_prior_en;
-		m_d=multifibres[idmult].m_d;
-		m_energy=multifibres[idmult].m_energy;
-		m_d_prop=multifibres[idmult].m_d_prop;
-		m_d_prior=multifibres[idmult].m_d_prior;
-		m_S0_prior=multifibres[idmult].m_S0_prior;
-		m_S0=multifibres[idmult].m_S0;
-		m_likelihood_en=multifibres[idmult].m_likelihood_en;
-		m_d_acc=multifibres[idmult].m_d_acc;
-		m_d_rej=multifibres[idmult].m_d_rej;
-		m_S0_acc=multifibres[idmult].m_S0_acc;
-		m_S0_rej=multifibres[idmult].m_S0_rej;
-		m_S0_prop=multifibres[idmult].m_S0_prop;
+		*idVOX= blockIdx.x;
+		*count_update = current_iter + iters_burnin;	//count for updates
+		*recordcount= current_iter;	
+		*sample=1+(current_iter/record_every);		//the next number of sample.....the index start in 0
+
+		*m_prior_en=multifibres[*idVOX].m_prior_en;
 
 		if(model==2){
-			m_dstd_acc=multifibres[idmult].m_dstd_acc;
-			m_dstd_rej=multifibres[idmult].m_dstd_rej;
-			m_dstd_prior=multifibres[idmult].m_dstd_prior;
-			m_dstd_prop=multifibres[idmult].m_dstd_prop;
-			m_dstd=multifibres[idmult].m_dstd;
+			*m_dstd_acc=multifibres[*idVOX].m_dstd_acc;
+			*m_dstd_rej=multifibres[*idVOX].m_dstd_rej;
+			*m_dstd_prior=multifibres[*idVOX].m_dstd_prior;
+			*m_dstd_prop=multifibres[*idVOX].m_dstd_prop;
+			*m_dstd=multifibres[*idVOX].m_dstd;
 		}else{
-			m_dstd_acc=0;
-			m_dstd_rej=0;
-			m_dstd_prior=0;
-			m_dstd_prop=0;
-			m_dstd=0;
+			*m_dstd_acc=0;
+			*m_dstd_rej=0;
+			*m_dstd_prior=0;
+			*m_dstd_prop=0;
+			*m_dstd=0;
 		}
-		
+	
+		*m_d=multifibres[*idVOX].m_d;
+		*m_energy=multifibres[*idVOX].m_energy;
+		*m_d_prop=multifibres[*idVOX].m_d_prop;
+		*m_d_prior=multifibres[*idVOX].m_d_prior;
+		*m_S0_prior=multifibres[*idVOX].m_S0_prior;
+		*m_S0=multifibres[*idVOX].m_S0;
+		*m_likelihood_en=multifibres[*idVOX].m_likelihood_en;
+		*m_d_acc=multifibres[*idVOX].m_d_acc;
+		*m_d_rej=multifibres[*idVOX].m_d_rej;
+		*m_S0_acc=multifibres[*idVOX].m_S0_acc;
+		*m_S0_rej=multifibres[*idVOX].m_S0_rej;
+		*m_S0_prop=multifibres[*idVOX].m_S0_prop;
+
 		if(m_include_f0){
-			m_f0_acc=multifibres[idmult].m_f0_acc;
-			m_f0_rej=multifibres[idmult].m_f0_rej;
-			m_f0_prop=multifibres[idmult].m_f0_prop;
-			m_f0_prior=multifibres[idmult].m_f0_prior;
-			m_f0=multifibres[idmult].m_f0;
-		}else{
-			m_f0_acc=0;
-			m_f0_rej=0;
-			m_f0_prop=0;
-			m_f0_prior=0;	
-			m_f0=0;
-		}		
+			*m_f0_acc=multifibres[*idVOX].m_f0_acc;
+			*m_f0_rej=multifibres[*idVOX].m_f0_rej;
+			*m_f0_prop=multifibres[*idVOX].m_f0_prop;
+			*m_f0_prior=multifibres[*idVOX].m_f0_prior;
+			*m_f0=multifibres[*idVOX].m_f0;
+		}else{ 
+			*m_f0_acc=0;
+			*m_f0_rej=0;
+			*m_f0_prop=0;
+			*m_f0_prior=0;
+			*m_f0=0;
+		}
+				
 		if(rician){
-			m_tau_acc=multifibres[idmult].m_tau_acc;
-			m_tau_rej=multifibres[idmult].m_tau_rej;
-			m_tau_prop=multifibres[idmult].m_tau_prop;
-			m_tau_prior=multifibres[idmult].m_tau_prior;
-			m_tau=multifibres[idmult].m_tau;
+			*m_tau_acc=multifibres[*idVOX].m_tau_acc;
+			*m_tau_rej=multifibres[*idVOX].m_tau_rej;
+			*m_tau_prop=multifibres[*idVOX].m_tau_prop;
+			*m_tau_prior=multifibres[*idVOX].m_tau_prior;
+			*m_tau=multifibres[*idVOX].m_tau;	
 		}else{ 
-			m_tau_acc=0;
-			m_tau_rej=0;
-			m_tau_prop=0;
-			m_tau_prior=0;
-			m_tau=0;
+			*m_tau_acc=0;
+			*m_tau_rej=0;
+			*m_tau_prop=0;
+			*m_tau_prior=0;
+			*m_tau=0;
 		}
+	}
+	__syncthreads();
 
-		for(int f=0;f<NFIBRES;f++){
-			m_th[f]=fibres[(idmult*NFIBRES)+f].m_th;
-			m_ph[f]=fibres[(idmult*NFIBRES)+f].m_ph;
-			m_f[f]=fibres[(idmult*NFIBRES)+f].m_f;
-			m_th_acc[f]=fibres[(idmult*NFIBRES)+f].m_th_acc;
-			m_th_rej[f]=fibres[(idmult*NFIBRES)+f].m_th_rej;
-			m_ph_acc[f]=fibres[(idmult*NFIBRES)+f].m_ph_acc;
-			m_ph_rej[f]=fibres[(idmult*NFIBRES)+f].m_ph_rej;
-			m_f_acc[f]=fibres[(idmult*NFIBRES)+f].m_f_acc;
-			m_f_rej[f]=fibres[(idmult*NFIBRES)+f].m_f_rej;
-
-			fm_prior_en[f]=fibres[(idmult*NFIBRES)+f].m_prior_en;
-			m_th_prior[f]=fibres[(idmult*NFIBRES)+f].m_th_prior;
-			m_ph_prior[f]=fibres[(idmult*NFIBRES)+f].m_ph_prior;
-			m_f_prior[f]=fibres[(idmult*NFIBRES)+f].m_f_prior;
-
-			m_th_prop[f]=fibres[(idmult*NFIBRES)+f].m_th_prop;
-			m_ph_prop[f]=fibres[(idmult*NFIBRES)+f].m_ph_prop;
-			m_f_prop[f]=fibres[(idmult*NFIBRES)+f].m_f_prop;
-
-			m_lam_jump[f]=fibres[(idmult*NFIBRES)+f].m_lam_jump;		
-		}
+	int mydirs = ndirections/threadsBlock;
+	int mod = ndirections%threadsBlock;
+	if(mod&&(idSubVOX<mod)) mydirs++;
+
+	if(idSubVOX<nfib){
+		int pos = (*idVOX*nfib)+idSubVOX;
+		m_th[idSubVOX]=fibres[pos].m_th;
+		m_ph[idSubVOX]=fibres[pos].m_ph;
+		m_f[idSubVOX]=fibres[pos].m_f;
+	
+		m_th_acc[idSubVOX]=fibres[pos].m_th_acc;
+		m_th_rej[idSubVOX]=fibres[pos].m_th_rej;
+		m_ph_acc[idSubVOX]=fibres[pos].m_ph_acc;
+		m_ph_rej[idSubVOX]=fibres[pos].m_ph_rej;
+		m_f_acc[idSubVOX]=fibres[pos].m_f_acc;
+		m_f_rej[idSubVOX]=fibres[pos].m_f_rej;
+
+		fm_prior_en[idSubVOX]=fibres[pos].m_prior_en;
+		m_th_prior[idSubVOX]=fibres[pos].m_th_prior;
+		m_ph_prior[idSubVOX]=fibres[pos].m_ph_prior;
+		m_f_prior[idSubVOX]=fibres[pos].m_f_prior;
+
+		m_th_prop[idSubVOX]=fibres[pos].m_th_prop;
+		m_ph_prop[idSubVOX]=fibres[pos].m_ph_prop;
+		m_f_prop[idSubVOX]=fibres[pos].m_f_prop;
+
+		m_lam_jump[idSubVOX]=fibres[pos].m_lam_jump;		
 	}
-			
-	for(int i=0; i<ndirs; i++){	
-		myisosig[i] = isosignals[(idmult*NDIRECTIONS)+idrest+i*THREADS_BLOCK];
-		mydata[idrest*MAXNDIRS_PER_THREAD+i] = datam[(idmult*NDIRECTIONS)+idrest+i*THREADS_BLOCK];
+
+	__syncthreads();
+
+	for(int i=0; i<mydirs; i++){	
+		int pos = (*idVOX*ndirections)+idSubVOX+i*threadsBlock;
+		myisosig[i] = isosignals[pos];
+		mydata[i] = datam[pos];
+		mybvals[i] = bvals[pos];
+		myalpha[i] = alpha[pos];
+		mybeta[i] = beta[pos];
 	}
 
-	for(int f=0;f<NFIBRES;f++){
-		for(int i=0; i<ndirs; i++){	
-			mysig[i*NFIBRES+f]= signals[(idmult*NDIRECTIONS*NFIBRES)+(f*NDIRECTIONS)+idrest+i*THREADS_BLOCK];
+	for(int f=0;f<nfib;f++){
+		for(int i=0; i<mydirs; i++){	
+			mysig[i*nfib+f]= signals[(*idVOX*ndirections*nfib)+(f*ndirections)+idSubVOX+i*threadsBlock];	
 		}
 	}
 
 	__syncthreads();
-
+	
 	//compute_signal_pre
-	for(int i=0; i<ndirs; i++){
-		for(int f=0;f<NFIBRES;f++){				
-			cos_alpha_minus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]-m_th[f]));   
-		  	cos_alpha_plus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]+m_th[f]));
-		   	angtmp[i*NFIBRES+f]= (cos(double(m_ph[f]-beta[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
-		 	angtmp[i*NFIBRES+f]=angtmp[i*NFIBRES+f]*angtmp[i*NFIBRES+f];
+	for(int i=0; i<mydirs; i++){
+		for(int f=0;f<nfib;f++){	
+			cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[f]));   
+			cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[f]));
+		     	angtmp[i*nfib+f]= (cos(double(m_ph[f]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
+		 	angtmp[i*nfib+f]=angtmp[i*nfib+f]*angtmp[i*nfib+f];
 		}
 	}
 
 	for (int niter=0; niter<iterations; niter++){
+		//code jump()
 
-	//code jump()
-
-	//prefetching randoms
-	if (leader){
-		if(m_include_f0){
-			prerandN[0]=randomsN[idrand+(niter*NPARAMS)];
-			prerandU[0]=randomsU[idrand+(niter*NPARAMS)];
-		}
-		if(rician){
-			prerandN[add]=randomsN[idrand+(niter*NPARAMS)+add];
-			prerandU[add]=randomsU[idrand+(niter*NPARAMS)+add];
-		}
-		prerandN[add+add2]=randomsN[idrand+(niter*NPARAMS)+add+add2];
-		prerandU[add+add2]=randomsU[idrand+(niter*NPARAMS)+add+add2];
-
-		if(model==2){
-			prerandN[1+add+add2]=randomsN[idrand+(niter*NPARAMS)+1+add+add2];
-			prerandU[1+add+add2]=randomsU[idrand+(niter*NPARAMS)+1+add+add2];
-		}
-
-		prerandN[1+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+1+add+add2+add3];
-		prerandU[1+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+1+add+add2+add3];
+		//prefetching randoms
+		if (leader){
+			*count_update=*count_update+1;
+			*recordcount=*recordcount+1;
+			int idrand = *idVOX*iterations*nparams+(niter*nparams);
+			*localrand = 0;
+			if(m_include_f0){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+			}
+			if(rician){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+			}
+			prerandN[*localrand]=randomsN[idrand];
+			prerandU[*localrand]=randomsU[idrand];
+			idrand++;
+			*localrand=*localrand+1;
 
-		for(int f=0;f<NFIBRES;f++){
-			prerandN[2+(f*3)+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+2+(f*3)+add+add2+add3];
-			prerandN[3+(f*3)+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+3+(f*3)+add+add2+add3];
-			prerandN[4+(f*3)+add+add2+add3]=randomsN[idrand+(niter*NPARAMS)+4+(f*3)+add+add2+add3];
+			if(model==2){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+			}
 
-			prerandU[2+(f*3)+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+2+(f*3)+add+add2+add3];
-			prerandU[3+(f*3)+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+3+(f*3)+add+add2+add3];
-			prerandU[4+(f*3)+add+add2+add3]=randomsU[idrand+(niter*NPARAMS)+4+(f*3)+add+add2+add3];		
+			prerandN[*localrand]=randomsN[idrand];
+			prerandU[*localrand]=randomsU[idrand];
+			idrand++;
+			*localrand=*localrand+1;
+
+			for(int f=0;f<nfib;f++){
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+
+				prerandN[*localrand]=randomsN[idrand];
+				prerandU[*localrand]=randomsU[idrand];
+				idrand++;
+				*localrand=*localrand+1;
+
+				prerandN[*localrand]=randomsN[idrand];					
+				prerandU[*localrand]=randomsU[idrand];	
+				idrand++;	
+				*localrand=*localrand+1;	
+			}
+			*localrand = 0;
 		}
-	}
 
 ////////////////////////////////////////////////////////////////// F0
 
 		if(m_include_f0){
 			if (leader){
 				//propose_f0
-				old1=m_f0;
-				m_f0+=prerandN[0]*m_f0_prop;
-					
+				old[0]=*m_f0;
+				*m_f0+=prerandN[*localrand]**m_f0_prop;
+				
 				//compute_f0_prior()     
-				old2=m_f0_prior;
-	      			if(m_f0<=0 || m_f0 >=1){ rejflag=true;
+				old[1]=*m_f0_prior;
+	      			if(*m_f0<=0 || *m_f0 >=1){ 
+					rejflag[0]=true;
 				}else{ 	
-					rejflag=false;
+					rejflag[0]=false;
 					if(!m_ardf0){
-						m_f0_prior=0;
+						*m_f0_prior=0;
 	      				}else{
-						m_f0_prior=log(double(m_f0));
+						*m_f0_prior=log(double(*m_f0));
 					}
 				}
-
+				
 				//for likehood and reject_f_sum
-				fsum=m_f0;
-				for(int g=0;g<NFIBRES;g++){  
-					fsum+=m_f[g];
+				*fsum=*m_f0;
+
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
 				//compute_prior()
-				compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
-
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
+				
 				//reject_f_sum()
-				rejflag2=(fsum>1);	
-			
+				rejflag[1]=(*fsum>1);
 			}
 
 			__syncthreads();
 
 			//compute_likelihood()
-			compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig, &mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
 			__syncthreads();
 
 			if (leader){
 				//compute_energy()
-				m_old_energy=m_energy;
-	      			m_energy=m_prior_en+m_likelihood_en;
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
 				//test_energy
-				tmp=exp(double(m_old_energy-m_energy));
-				rejflag3=(tmp>prerandU[0]);
-				
-				if(!rejflag){
-					if(!rejflag2){
-						if(rejflag3){
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[2]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;	
+
+				if(!rejflag[0]){
+					if(!rejflag[1]){
+						if(rejflag[2]){
 							//accept_f0()
-		  					m_f0_acc++;   
+		  					*m_f0_acc=*m_f0_acc+1;   
 						}else{
 							//reject_F0()
-							m_f0=old1;
-							m_f0_prior=old2;
-		      					m_prior_en=m_old_prior_en;
-		      					m_f0_rej++;
+							*m_f0=old[0];
+							*m_f0_prior=old[1];
+		      					*m_prior_en=*m_old_prior_en;
+		      					*m_f0_rej=*m_f0_rej+1;
 
 							//restore_energy()
-	      						m_energy=m_old_energy;
+      							*m_energy=*m_old_energy;
 						}
 					}else{ 
 						//reject_F0()
-						m_f0=old1;
-						m_f0_prior=old2;
-		      				m_prior_en=m_old_prior_en;
-		      				m_f0_rej++;
+						*m_f0=old[0];
+						*m_f0_prior=old[1];
+		      				*m_prior_en=*m_old_prior_en;
+		      				*m_f0_rej=*m_f0_rej+1;
 
 						//restore_energy()
-		      				m_energy=m_old_energy;
+		      				*m_energy=*m_old_energy;
 					}
 				}else{ 
 					//reject_F0()
-					m_f0=old1;
-					m_f0_prior=old2;
-		      			m_prior_en=m_old_prior_en;
-		      			m_f0_rej++;
+					*m_f0=old[0];
+					*m_f0_prior=old[1];
+		      			*m_prior_en=*m_old_prior_en;
+		      			*m_f0_rej=*m_f0_rej+1;
 
 					//restore_energy()
-		      			m_energy=m_old_energy;
+		      			*m_energy=*m_old_energy;
 				}
 			}
 		}
-
+			
 ////////////////////////////////////////////////////////////////// TAU
-
 		if(rician){
 			if (leader){
 				//propose_tau
-				old1=m_tau;
-				m_tau+=prerandN[add]*m_tau_prop;
+				old[0]=*m_tau; 
+				*m_tau+=prerandN[*localrand]**m_tau_prop;
 			
 				//compute_tau_prior()     
-				old2=m_tau_prior;
-	      			if(m_tau<=0){ rejflag=true;
+				old[1]=*m_tau_prior;
+	      			if(*m_tau<=0){ 
+					rejflag[0]=true;
 				}else{ 	
-					rejflag=false;
-					m_tau_prior=0;
+					rejflag[0]=false;
+					*m_tau_prior=0;
 				}
-				
+
 				//for likehood and reject_f_sum
-				fsum=m_f0;
-				for(int g=0;g<NFIBRES;g++){  
-					fsum+=m_f[g];
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
 				//compute_prior()
-				compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
 			}
+
 			__syncthreads();
 
 			//compute_likelihood()
-			compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig, &mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
 			__syncthreads();
 
-			if (leader){
+			if (leader){ 
 				//compute_energy()
-				m_old_energy=m_energy;
-	      			m_energy=m_prior_en+m_likelihood_en;
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
 				//test_energy
-				tmp=exp(double(m_old_energy-m_energy));
-				rejflag2=(tmp>prerandU[add]);	
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);	
+				*localrand=*localrand+1;
 				
-				if(!rejflag){
-					if(rejflag2){
+				if(!rejflag[0]){
+					if(rejflag[1]){
 						//accept_tau()
-		  				m_tau_acc++;   
+		  				*m_tau_acc=*m_tau_acc+1;   
 					}else{ 
 						//reject_tau()
-						m_tau=old1;
-						m_tau_prior=old2;
-		      				m_prior_en=m_old_prior_en;
-		      				m_tau_rej++;
-
+						*m_tau=old[0];
+						*m_tau_prior=old[1];
+		      				*m_prior_en=*m_old_prior_en;
+		      				*m_tau_rej=*m_tau_rej+1;
 						//restore_energy()
-		      				m_energy=m_old_energy;
+		      				*m_energy=*m_old_energy;
 					}
 				}else{ 
 					//reject_tau()
-					m_tau=old1;
-					m_tau_prior=old2;
-		      			m_prior_en=m_old_prior_en;
-		      			m_tau_rej++;
+					*m_tau=old[0];
+					*m_tau_prior=old[1];
+		      			*m_prior_en=*m_old_prior_en;
+		      			*m_tau_rej=*m_tau_rej+1;
 
 					//restore_energy()
-		      			m_energy=m_old_energy;
+		      			*m_energy=*m_old_energy;
 				}
-			}
+			}	
 		}
 
 ////////////////////////////////////////////////////////////////// D
 
 		if (leader){
-			//propose_d()	
-			old1=m_d;	
-			m_d+=prerandN[add+add2]*m_d_prop;
-
+			//propose_d()
+			old[0]=*m_d;	
+			*m_d+=prerandN[*localrand]**m_d_prop;
+				
 			//compute_d_prior_f0()      
-			old2=m_d_prior;
-      			if(m_d<0 || m_d > 0.008 ){ 
-				rejflag=true;
+			old[1]=*m_d_prior;
+					
+      			if(*m_d<0 || *m_d > 0.008){
+				rejflag[0]=true;
 			}else{ 
-				m_d_prior=0;
-				rejflag=false;
+				*m_d_prior=0;
+				rejflag[0]=false;
       			}
 		}
 
 		__syncthreads();
-
+				
 		//compute_signal()
-		for(int i=0; i<ndirs; i++){
-			for(int f=0;f<NFIBRES;f++){
-				compute_signal(&mysig[i*NFIBRES+f],&mysigold[i*NFIBRES+f],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+f]);
+		for(int i=0; i<mydirs; i++){
+			for(int f=0;f<nfib;f++){
+				compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model);
 			}
 		}
-						
 		//compute_iso_signal()
-		for(int i=0; i<ndirs; i++){
-			compute_iso_signal(&myisosig[i],&myisosigold[i], bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,m_dstd,model);
-		}
+		for(int i=0; i<mydirs; i++){
+			compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model);
+		}				
 
 		if (leader){
 			//for likehood and reject_f_sum
-			fsum=m_f0;
-			for(int g=0;g<NFIBRES;g++){  
-				fsum+=m_f[g];
+			*fsum=*m_f0;
+
+			for(int g=0;g<nfib;g++){  
+				*fsum+=m_f[g];
 			}
 
 			//compute_prior()
-			compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+			compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
 		}
 
 		__syncthreads();
-
+				
 		//compute_likelihood()
-		compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+		compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);	
 				
 		__syncthreads();
-
+				
 		if (leader){
 			//compute_energy()
-			m_old_energy=m_energy;
-      			m_energy=m_prior_en+m_likelihood_en;
-
+			*m_old_energy=*m_energy;
+      			*m_energy=*m_prior_en+*m_likelihood_en;
 			//test_energy
-			tmp=exp(double(m_old_energy-m_energy));
-			rejflag2=(tmp>prerandU[add+add2]);
+			*tmp=exp(double(*m_old_energy-*m_energy));
+			rejflag[1]=(*tmp>prerandU[*localrand]);
+			*localrand=*localrand+1;
 		}
+			
 		__syncthreads();
 
-       		if(!rejflag){
-			if(rejflag2){
+       		if(!rejflag[0]){
+			if(rejflag[1]){
 				//accept_d()
-	  			if (leader) m_d_acc++;   
+	  			if (leader) *m_d_acc=*m_d_acc+1;   
 			}else{
 				if (leader){
 					//reject_d()
-					m_d=old1;
-					m_d_prior=old2;
-      					m_prior_en=m_old_prior_en;
-					m_d_rej++;
+					*m_d=old[0];
+					*m_d_prior=old[1];
+      					*m_prior_en=*m_old_prior_en;
+					*m_d_rej=*m_d_rej+1;
 					//restore_energy()
-      					m_energy=m_old_energy;
+      					*m_energy=*m_old_energy;
 				}
       						
-				for(int i=0; i<ndirs; i++){
-					for(int f=0;f<NFIBRES;f++){
-						mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
+				for(int i=0; i<mydirs; i++){
+					for(int f=0;f<nfib;f++){
+						mysig[i*nfib+f] = mysigold[i*nfib+f];
 					}	
+				
 					myisosig[i]=myisosigold[i];
 				}
 			}
         	}else{ 
 			if (leader){
 				//reject_d()
-				m_d=old1;
-				m_d_prior=old2;
-      				m_prior_en=m_old_prior_en;
-				m_d_rej++;
+				*m_d=old[0];
+				*m_d_prior=old[1];
+      				*m_prior_en=*m_old_prior_en;
+				*m_d_rej=*m_d_rej+1;
 				//restore_energy()
-      				m_energy=m_old_energy;
+      				*m_energy=*m_old_energy;
 			}
-      			for(int i=0; i<ndirs; i++){
-				for(int f=0;f<NFIBRES;f++){
-					mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
+
+      			for(int i=0; i<mydirs; i++){
+				for(int f=0;f<nfib;f++){
+					mysig[i*nfib+f] = mysigold[i*nfib+f];
 				}	
+				
 				myisosig[i]=myisosigold[i];
 			}	
         	}
@@ -2089,100 +2115,107 @@ extern "C" __global__ void runmcmc_record_kernel(	//INPUT
 ////////////////////////////////////////////////////////////////// D_STD
 
 		if(model==2){
-			if (leader){
+			if (leader){	
 				//propose_d_std
-				old1=m_dstd;
-				m_dstd+=prerandN[1+add+add2]*m_dstd_prop;
-				
+				old[0]=*m_dstd;
+				*m_dstd+=prerandN[*localrand]**m_dstd_prop;
+
 				//compute_d_std_prior()     
-				old2=m_dstd_prior;
-	      			if(m_dstd<=0 || m_dstd > 0.01){ 
-					rejflag=true;
+				old[1]=*m_dstd_prior;
+	      			if(*m_dstd<=0 || *m_dstd > 0.01){ 
+					rejflag[0]=true;
 				}else{ 	
-					rejflag=false;
-					m_dstd_prior=log(double(m_dstd));
+					rejflag[0]=false;
+					*m_dstd_prior=log(double(*m_dstd));
 				}
 			}
 
 			__syncthreads();
 
 			//compute_signal()
-			for(int i=0; i<ndirs; i++){
-				for(int f=0;f<NFIBRES;f++){
-					compute_signal(&mysig[i*NFIBRES+f],&mysigold[i*NFIBRES+f],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+f]);
+			for(int i=0; i<mydirs; i++){
+				for(int f=0;f<nfib;f++){
+					compute_signal(&mysig[i*nfib+f],&mysigold[i*nfib+f],mybvals[i],m_d,m_dstd,angtmp[i*nfib+f],model);
 				}
 			}
 
 			//compute_iso_signal()
-			for(int i=0; i<ndirs; i++){
-				compute_iso_signal(&myisosig[i],&myisosigold[i], bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,m_dstd,model);
+			for(int i=0; i<mydirs; i++){
+				compute_iso_signal(&myisosig[i],&myisosigold[i], mybvals[i],m_d,m_dstd,model);
 			}
 
-			if (leader){
+			if (leader){	
 				//for likehood and reject_f_sum
-				fsum=m_f0;
-				for(int g=0;g<NFIBRES;g++){  
-					fsum+=m_f[g];
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
 				//compute_prior()
-				compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
 			}
 
 			__syncthreads();
 
 			//compute_likelihood()
-			compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig, &mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
 			__syncthreads();
 
 			if (leader){
 				//compute_energy()
-				m_old_energy=m_energy;
-	      			m_energy=m_prior_en+m_likelihood_en;
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
 				//test_energy
-				tmp=exp(double(m_old_energy-m_energy));
-				rejflag2=(tmp>prerandU[1+add+add2]);
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;					
 			}
+
+			__syncthreads();
 				
-			if(!rejflag){
-				if(rejflag2){
+			if(!rejflag[0]){
+				if(rejflag[1]){
 					//accept_dstd()
-		  			if (leader) m_dstd_acc++;   
+		  			if (leader) *m_dstd_acc=*m_dstd_acc+1;   
 				}else{ 
 					if (leader){
 						//reject_dstd()
-						m_dstd=old1;
-						m_dstd_prior=old2;
-		      				m_prior_en=m_old_prior_en;
-		      				m_dstd_rej++;
+						*m_dstd=old[0];
+						*m_dstd_prior=old[1];
+		      				*m_prior_en=*m_old_prior_en;
+		      				*m_dstd_rej=*m_dstd_rej+1;
 
 						//restore_energy()
-		      				m_energy=m_old_energy;
+		      				*m_energy=*m_old_energy;
 					}
-					for(int i=0; i<ndirs; i++){
-						for(int f=0;f<NFIBRES;f++){
-							mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
-						}	
+
+					for(int i=0; i<mydirs; i++){
+						for(int f=0;f<nfib;f++){
+							mysig[i*nfib+f] = mysigold[i*nfib+f];
+						}
+				
 						myisosig[i]=myisosigold[i];
 					}
 				}
 			}else{ 
 				if (leader){
 					//reject_dstd()
-					m_dstd=old1;
-					m_dstd_prior=old2;
-		      			m_prior_en=m_old_prior_en;
-		      			m_dstd_rej++;
+					*m_dstd=old[0];
+					*m_dstd_prior=old[1];
+		      			*m_prior_en=*m_old_prior_en;
+		      			*m_dstd_rej=*m_dstd_rej+1;
 
 					//restore_energy()
-		      			m_energy=m_old_energy;
+		      			*m_energy=*m_old_energy;
 				}
-				for(int i=0; i<ndirs; i++){
-					for(int f=0;f<NFIBRES;f++){
-						mysig[i*NFIBRES+f] = mysigold[i*NFIBRES+f];
+
+				for(int i=0; i<mydirs; i++){
+					for(int f=0;f<nfib;f++){
+						mysig[i*nfib+f] = mysigold[i*nfib+f];
 					}	
+				
 					myisosig[i]=myisosigold[i];
 				}
 			}
@@ -2192,173 +2225,176 @@ extern "C" __global__ void runmcmc_record_kernel(	//INPUT
 
 		if (leader){
 			//propose_S0()
-			old1=m_S0;
-			m_S0+=prerandN[1+add+add2+add3]*m_S0_prop;
+			old[0]=*m_S0;
+			*m_S0+=prerandN[*localrand]**m_S0_prop;
 				
 			//compute_S0_prior()
-			old2=m_S0_prior;
-        		if(m_S0<0) rejflag=true;
+			old[1]=*m_S0_prior;
+        		if(*m_S0<0) rejflag[0]=true;
         		else{    
-				m_S0_prior=0;
-	  			rejflag=false;
+				*m_S0_prior=0;
+	  			rejflag[0]=false;
         		}
+				
 			//for likehood and reject_f_sum
-			fsum=m_f0;
-			for(int g=0;g<NFIBRES;g++){  
-				fsum+=m_f[g];
+			*fsum=*m_f0;
+			for(int g=0;g<nfib;g++){  
+				*fsum+=m_f[g];
 			}
 
 			//compute_prior()
-			compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+			compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
 		}
 
 		__syncthreads();
 
 		//compute_likelihood()
-		compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+		compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
 		__syncthreads();
 
 		if (leader){
 			//compute_energy()
-			m_old_energy=m_energy;
-      			m_energy=m_prior_en+m_likelihood_en;
+			*m_old_energy=*m_energy;
+      			*m_energy=*m_prior_en+*m_likelihood_en;
 
 			//test_energy
-			tmp=exp(double(m_old_energy-m_energy));
-			rejflag2=(tmp>prerandU[1+add+add2+add3]);
+			*tmp=exp(double(*m_old_energy-*m_energy));
+			rejflag[1]=(*tmp>prerandU[*localrand]);
+			*localrand=*localrand+1;
 
-        		if(!rejflag){
-				if(rejflag2){
+        		if(!rejflag[0]){
+				if(rejflag[1]){
 					//accept_S0()
-	  				m_S0_acc++;   
+	  				*m_S0_acc=*m_S0_acc+1;   
 				}else{
 					//reject_S0()
-					m_S0=old1;
-					m_S0_prior=old2;
-      					m_prior_en=m_old_prior_en;
-      					m_S0_rej++;
+					*m_S0=old[0];
+					*m_S0_prior=old[1];
+      					*m_prior_en=*m_old_prior_en;
+      					*m_S0_rej=*m_S0_rej+1;
 
 					//restore_energy()
-      					m_energy=m_old_energy;
+      					*m_energy=*m_old_energy;
 				}
         		}else{ 
 				//reject_S0()
-				m_S0=old1;
-				m_S0_prior=old2;
-	      			m_prior_en=m_old_prior_en;
-	      			m_S0_rej++;
+				*m_S0=old[0];
+				*m_S0_prior=old[1];
+	      			*m_prior_en=*m_old_prior_en;
+	      			*m_S0_rej=*m_S0_rej+1;
 
 				//restore_energy()
-      				m_energy=m_old_energy;
+      				*m_energy=*m_old_energy;
 			}
         	}
 
 ////////////////////////////////////////////////////////////////////////////     TH
 
-     		for(int fibre=0;fibre<NFIBRES;fibre++){  
+     		for(int fibre=0;fibre<nfib;fibre++){  
 			if (leader){ 
 				//propose_th()
-				old1=m_th[fibre];	
-				m_th[fibre]+=prerandN[2+(fibre*3)+add+add2+add3]*m_th_prop[fibre];
-		
+				old[0]=m_th[fibre];
+				m_th[fibre]+=prerandN[*localrand]*m_th_prop[fibre];
+					
 				//compute_th_prior()
-				old2=m_th_prior[fibre];
+				old[1]=m_th_prior[fibre];
       	   			if(m_th[fibre]==0){
 					m_th_prior[fibre]=0;
 		   		}else{
 					m_th_prior[fibre]=-log(double(fabs(sin(double(m_th[fibre]))/2)));
 	      	   		}
-		  		//rejflag=false; /////////////////always false
+		  		//rejflag[0]=false; /////////////////always false
 		
 				//compute_prior()
-				fm_old_prior_en=fm_prior_en[fibre];
-	      	   		fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
+				*fm_old_prior_en=fm_prior_en[fibre];
+	      	   		fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];	
 			}
 
 			__syncthreads();
 
 			//compute_signal()
 			//compute_signal_pre	
-			for(int i=0; i<ndirs; i++){
-				cos_alpha_minus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]-m_th[fibre]));   
-			  	cos_alpha_plus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]+m_th[fibre]));
-				old_angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre];
-		     	   	angtmp[i*NFIBRES+fibre]= (cos(double(m_ph[fibre]-beta[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
-		 	   	angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre]*angtmp[i*NFIBRES+fibre];
-
-				compute_signal(&mysig[i*NFIBRES+fibre],&mysigold[i*NFIBRES+fibre],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+fibre]);
+			for(int i=0; i<mydirs; i++){
+				cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre]));   
+				cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre]));
+				old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre];
+				angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
+		 	   	angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre];
+
+				compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model);
 			}
 
-			if (leader){
+			if (leader){	
 				//for likehood and reject_f_sum
-				fsum=m_f0;
-				for(int g=0;g<NFIBRES;g++){  
-					fsum+=m_f[g];
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
-	
+
 				//compute_prior()
-				compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);	
 			}
 
 			__syncthreads();
 
 			//compute_likelihood()
-			compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
 
 			__syncthreads();
-					
+
 			if (leader){ 
 				//compute_energy()
-				m_old_energy=m_energy;
-	      			m_energy=m_prior_en+m_likelihood_en;
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
 				//test_energy
-				tmp=exp(double(m_old_energy-m_energy));
-				rejflag2=(tmp>prerandU[2+(fibre*3)+add+add2+add3]);
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;					
 			}
 
 			__syncthreads();
 			
-          		//if(!rejflag){
-				if(rejflag2){
-					//accept_th
-		  			if (leader) m_th_acc[fibre]++;   
+			if(rejflag[1]){
+				//accept_th
+		  		if (leader) m_th_acc[fibre]++;   
 				}else{
-					if (leader){
-						//reject_th()
-						m_th[fibre]=old1;
-						m_th_prior[fibre]=old2;
-      						fm_prior_en[fibre]=fm_old_prior_en;
-						m_th_rej[fibre]++;						
+
+				if (leader){
+					//reject_th()
+					m_th[fibre]=old[0];
+					m_th_prior[fibre]=old[1];
+      					fm_prior_en[fibre]=*fm_old_prior_en;
+					m_th_rej[fibre]++;						
 						
-						//restore_energy()
-						m_prior_en=m_old_prior_en;
-	      					m_energy=m_old_energy;
-					}
-					//compute_signal_pre undo
-					for(int i=0; i<ndirs; i++){
-						angtmp[i*NFIBRES+fibre]=old_angtmp[i*NFIBRES+fibre];
-						mysig[i*NFIBRES+fibre] = mysigold[i*NFIBRES+fibre];						
-      					}
+					//restore_energy()
+					*m_prior_en=*m_old_prior_en;
+	      				*m_energy=*m_old_energy;
 				}
-		
-			__syncthreads();
 
+				//compute_signal_pre undo
+				for(int i=0; i<mydirs; i++){
+					angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre];
+					mysig[i*nfib+fibre] = mysigold[i*nfib+fibre];						
+      				}
+			}
+			__syncthreads();
+		
 ///////////////////////////////////////     PH
 
 			if (leader){
 				//propose_ph()
-				old1=m_ph[fibre];
-				m_ph[fibre]+=prerandN[3+(fibre*3)+add+add2+add3]*m_ph_prop[fibre];
-					
+				old[0]=m_ph[fibre];
+				m_ph[fibre]+=prerandN[*localrand]*m_ph_prop[fibre];
+
 				//compute_ph_prior()
-				old2=m_ph_prior[fibre];
+				old[1]=m_ph_prior[fibre];
       				m_ph_prior[fibre]=0;
-      				//rejflag=false;
+      				//rejflag[0]=false;
 
 				//compute_prior()
-				fm_old_prior_en=fm_prior_en[fibre];
+				*fm_old_prior_en=fm_prior_en[fibre];
       	   			fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
 			}
 
@@ -2366,81 +2402,84 @@ extern "C" __global__ void runmcmc_record_kernel(	//INPUT
 				
 			//compute_signal()
 			//compute_signal_pre
-			for(int i=0; i<ndirs; i++){
-				cos_alpha_minus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]-m_th[fibre]));   
-			  	cos_alpha_plus_theta=cos(double(alpha[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]+m_th[fibre]));
-				old_angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre];
-		     	   	angtmp[i*NFIBRES+fibre]= (cos(double(m_ph[fibre]-beta[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
-		 	   	angtmp[i*NFIBRES+fibre]=angtmp[i*NFIBRES+fibre]*angtmp[i*NFIBRES+fibre];
-
-				compute_signal(&mysig[i*NFIBRES+fibre],&mysigold[i*NFIBRES+fibre],bvals[idmult*NDIRECTIONS+idrest+i*THREADS_BLOCK],m_d,model,m_dstd,angtmp[i*NFIBRES+fibre]);
+			for(int i=0; i<mydirs; i++){
+				cos_alpha_minus_theta=cos(double(myalpha[i]-m_th[fibre]));   
+			  	cos_alpha_plus_theta=cos(double(myalpha[i]+m_th[fibre]));
+				old_angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre];
+		     	   	angtmp[i*nfib+fibre]= (cos(double(m_ph[fibre]-mybeta[i]))*(cos_alpha_minus_theta-cos_alpha_plus_theta)/2)+(cos_alpha_minus_theta+cos_alpha_plus_theta)/2;
+		 	   	angtmp[i*nfib+fibre]=angtmp[i*nfib+fibre]*angtmp[i*nfib+fibre];
+	
+				compute_signal(&mysig[i*nfib+fibre],&mysigold[i*nfib+fibre],mybvals[i],m_d,m_dstd,angtmp[i*nfib+fibre],model);
 			}
 
 			if (leader){
 				//for likehood and reject_f_sum
-				fsum=m_f0;
-				for(int g=0;g<NFIBRES;g++){  
-					fsum+=m_f[g];
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 
 				//compute_prior()
-				compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);
 			}
 
 			__syncthreads();
 
 			//compute_likelihood()
-			compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);
+
 
 			__syncthreads();
 
 			if (leader){
 				//compute_energy()
-				m_old_energy=m_energy;
-	      			m_energy=m_prior_en+m_likelihood_en;
+				*m_old_energy=*m_energy;
+	      			*m_energy=*m_prior_en+*m_likelihood_en;
 
 				//test_energy
-				tmp=exp(double(m_old_energy-m_energy));
-				rejflag2=(tmp>prerandU[3+(fibre*3)+add+add2+add3]);	
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[1]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;
 			}
 
 			__syncthreads();
-		        
-			//if(!rejflag){
-				if(rejflag2){
-					//accept_ph()
-		  			if (leader) m_ph_acc[fibre]++;   
-				}else{
-					if (leader){
-						//reject_ph()
-						m_ph[fibre]=old1;
-						m_ph_prior[fibre]=old2;
-      						fm_prior_en[fibre]=fm_old_prior_en;
-						m_ph_rej[fibre]++;						
+
+			//if(!rejflag[0]){
+			if(rejflag[1]){
+				//accept_ph()
+		  		if (leader) m_ph_acc[fibre]++;   
+			}else{
+				if (leader){
+					//reject_ph()
+					m_ph[fibre]=old[0];
+					m_ph_prior[fibre]=old[1];
+      					fm_prior_en[fibre]=*fm_old_prior_en;
+					m_ph_rej[fibre]++;						
 						
-						//restore_energy()
-						m_prior_en=m_old_prior_en;
-	      					m_energy=m_old_energy;
-					}
-					//compute_signal_pre undo
-					for(int i=0; i<ndirs; i++){
-						angtmp[i*NFIBRES+fibre]=old_angtmp[i*NFIBRES+fibre];
-						mysig[i*NFIBRES+fibre] = mysigold[i*NFIBRES+fibre];	
-					}
+					//restore_energy()
+					*m_prior_en=*m_old_prior_en;
+	      				*m_energy=*m_old_energy;
 				}
-				
+				//compute_signal_pre undo
+				for(int i=0; i<mydirs; i++){
+					angtmp[i*nfib+fibre]=old_angtmp[i*nfib+fibre];
+
+					mysig[i*nfib+fibre] = mysigold[i*nfib+fibre];	
+				}
+			}
+
 			__syncthreads();
 
 ////////////////////////////////////////////             F
 
 			if (leader){
 				//propose_f()
-				old1=m_f[fibre];
-				m_f[fibre]+=prerandN[4+(fibre*3)+add+add2+add3]*m_f_prop[fibre];	
-					
+				old[0]=m_f[fibre];
+				m_f[fibre]+=prerandN[*localrand]*m_f_prop[fibre];
+
 	     			//compute_f_prior()
-	        		old2=m_f_prior[fibre];
-				if (m_f[fibre]<=0 | m_f[fibre]>=1) rejflag=true;
+	        		old[1]=m_f_prior[fibre];
+				if (m_f[fibre]<=0 || m_f[fibre]>=1) rejflag[0]=true;
 	        		else{
 		      			if(!can_use_ard ){
 		  				m_f_prior[fibre]=0;
@@ -2452,152 +2491,152 @@ extern "C" __global__ void runmcmc_record_kernel(	//INPUT
 		  				}
 					}
 					m_f_prior[fibre]=fudgevalue*m_f_prior[fibre];
-					rejflag=false;
+					rejflag[0]=false;
 	      			}
 
 				//compute_prior()
-				fm_old_prior_en=fm_prior_en[fibre];
+				*fm_old_prior_en=fm_prior_en[fibre];
       	   			fm_prior_en[fibre]=m_th_prior[fibre]+m_ph_prior[fibre]+m_f_prior[fibre];
-				
+						
 				//for likehood and reject_f_sum
-				fsum=m_f0;
-				for(int g=0;g<NFIBRES;g++){  
-					fsum+=m_f[g];
+				*fsum=*m_f0;
+				for(int g=0;g<nfib;g++){  
+					*fsum+=m_f[g];
 				}
 				//reject_f_sum()
-				rejflag2=(fsum>1);
+				rejflag[1]=(*fsum>1);
 
 				//compute_prior()
-				compute_prior(&m_prior_en,&m_old_prior_en,m_d_prior,m_S0_prior,&fm_prior_en[0],m_f0_prior,m_tau_prior,m_dstd_prior);
+				compute_prior(m_prior_en,m_old_prior_en,m_d_prior,m_S0_prior,fm_prior_en,m_f0_prior,m_tau_prior,m_dstd_prior,nfib);	
 			}
 
 			__syncthreads();
 
 			//compute_likelihood()
-			compute_likelihood(leader,idrest,m_d,m_S0,&m_likelihood_en,&m_f[0],mysig,myisosig,&mydata[idrest*MAXNDIRS_PER_THREAD],fsum,&reduction[0],m_f0,rician,m_tau,ndirs);
+			compute_likelihood(idSubVOX,m_S0,m_likelihood_en,m_f,mysig,myisosig,mydata,fsum,reduction,m_f0,rician,m_tau,mydirs,threadsBlock,ndirections,nfib);	
 
 			__syncthreads();
-						
+
 			if (leader){
-			//compute_energy()
-			m_old_energy=m_energy;
-		      	m_energy=m_prior_en+m_likelihood_en;
+				//compute_energy()
+				*m_old_energy=*m_energy;
+		      		*m_energy=*m_prior_en+*m_likelihood_en;
 
-			//test_energy
-			tmp=exp(double(m_old_energy-m_energy));
-			rejflag3=(tmp>prerandU[4+(fibre*3)+add+add2+add3]);
+				//test_energy
+				*tmp=exp(double(*m_old_energy-*m_energy));
+				rejflag[2]=(*tmp>prerandU[*localrand]);
+				*localrand=*localrand+1;			
 
-		      		if(!rejflag){
-					if(!rejflag2){
-						if(rejflag3){
+		      		if(!rejflag[0]){
+					if(!rejflag[1]){
+						if(rejflag[2]){
 							//accept_f()
 			  				m_f_acc[fibre]++;   
 						}else{
 							//reject_f()
-							m_f[fibre]=old1;
-							m_f_prior[fibre]=old2;
-	      						fm_prior_en[fibre]=fm_old_prior_en;
-	      						m_f_rej[fibre]++;	
-						
+							m_f[fibre]=old[0];
+							m_f_prior[fibre]=old[1];
+	      						fm_prior_en[fibre]=*fm_old_prior_en;
+	      						m_f_rej[fibre]++;				
+					
 							//restore_energy()
-							m_prior_en=m_old_prior_en;
-		      					m_energy=m_old_energy;	
+							*m_prior_en=*m_old_prior_en;
+		      					*m_energy=*m_old_energy;
 						}
 					}else{ 
-						//reject_f()	
-						m_f[fibre]=old1;
-						m_f_prior[fibre]=old2;
-	      					fm_prior_en[fibre]=fm_old_prior_en;
-	      					m_f_rej[fibre]++;		
+						//reject_f()
+						m_f[fibre]=old[0];
+						m_f_prior[fibre]=old[1];
+	      					fm_prior_en[fibre]=*fm_old_prior_en;
+	      					m_f_rej[fibre]++;
 
 						//restore_energy()
-						m_prior_en=m_old_prior_en;
-		      				m_energy=m_old_energy;	
+						*m_prior_en=*m_old_prior_en;
+		      				*m_energy=*m_old_energy;	
 					}
 				}else{
 					//reject_f()
-					m_f[fibre]=old1;
-					m_f_prior[fibre]=old2;
-	      				fm_prior_en[fibre]=fm_old_prior_en;
-	      				m_f_rej[fibre]++;
+					m_f[fibre]=old[0];
+					m_f_prior[fibre]=old[1];
+		      			fm_prior_en[fibre]=*fm_old_prior_en;
+		      			m_f_rej[fibre]++;
 
 					//restore_energy()
-					m_prior_en=m_old_prior_en;
-		      			m_energy=m_old_energy;		
+					*m_prior_en=*m_old_prior_en;
+		      			*m_energy=*m_old_energy;
 				}
 			}
-				
-			__syncthreads();
+			__syncthreads();	
 
-        	}//end while NFIBRES
-
-        	count_update++;
-		recordcount++;
-
-		if(((recordcount%record_every)==0)&&leader){
-			rd[(idmult*totalrecords)+sample-1]= m_d;
-			if(m_include_f0) rf0[(idmult*totalrecords)+sample-1]= m_f0;
-			if(rician) rtau[(idmult*totalrecords)+sample-1]= m_tau;
-			if(model==2) rdstd[(idmult*totalrecords)+sample-1]= m_dstd;
-			rs0[(idmult*totalrecords)+sample-1]=m_S0;
-			rlikelihood_energy[(idmult*totalrecords)+sample-1]=m_likelihood_en;
-			for(int j=0;j<NFIBRES;j++){
-				rth[(idmult*totalrecords*NFIBRES)+(j*totalrecords)+sample-1]=m_th[j];
-				rph[(idmult*totalrecords*NFIBRES)+(j*totalrecords)+sample-1]=m_ph[j];
-				rf[(idmult*totalrecords*NFIBRES)+(j*totalrecords)+sample-1]=m_f[j];
-			}
-			multirecords[(idmult*totalrecords)+sample-1]=sample;
-			sample++;
+        	}//end while nfib
+
+		if(((*recordcount%record_every)==0)&&leader){
+			rd[(*idVOX*totalrecords)+*sample-1]= *m_d;
+			if(m_include_f0) rf0[(*idVOX*totalrecords)+*sample-1]= *m_f0;
+			if(rician) rtau[(*idVOX*totalrecords)+*sample-1]= *m_tau;
+			if(model==2) rdstd[(*idVOX*totalrecords)+*sample-1]= *m_dstd;
+			rs0[(*idVOX*totalrecords)+*sample-1]= *m_S0;
+			rlikelihood_energy[(*idVOX*totalrecords)+*sample-1]= *m_likelihood_en;
+			for(int j=0;j<nfib;j++){
+				rth[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_th[j];
+				rph[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_ph[j];
+				rf[(*idVOX*totalrecords*nfib)+(j*totalrecords)+*sample-1]=m_f[j];
+			}
+			multirecords[(*idVOX*totalrecords)+*sample-1]=*sample;
+			*sample=*sample+1;
         	}
 
-        	if(((count_update%updateproposalevery)==0)&&leader){
+		if(((*count_update%updateproposalevery)==0)&&leader){
 			//m_multifibre.update_proposals();
-			m_d_prop*=sqrt(float(m_d_acc+1)/float(m_d_rej+1));
-			m_d_prop=min(m_d_prop,maxfloat);
+			*m_d_prop*=sqrt(float(*m_d_acc+1)/float(*m_d_rej+1));
+			*m_d_prop=min(*m_d_prop,maxfloat);
 
 			if(rician){
-				m_tau_prop*=sqrt(float(m_tau_acc+1)/float(m_tau_rej+1));
-				m_tau_prop=min(m_tau_prop,maxfloat);
-				m_tau_acc=0; 
-				m_tau_rej=0;	
+				*m_tau_prop*=sqrt(float(*m_tau_acc+1)/float(*m_tau_rej+1));
+				*m_tau_prop=min(*m_tau_prop,maxfloat);
+				*m_tau_acc=0; 
+				*m_tau_rej=0;	
 			}
+
 			if(m_include_f0){
-				m_f0_prop*=sqrt(float(m_f0_acc+1)/float(m_f0_rej+1));
-				m_f0_prop=min(m_f0_prop,maxfloat);
-				m_f0_acc=0; 
-				m_f0_rej=0;
-			}
+				*m_f0_prop*=sqrt(float(*m_f0_acc+1)/float(*m_f0_rej+1));
+				*m_f0_prop=min(*m_f0_prop,maxfloat);
+				*m_f0_acc=0; 
+				*m_f0_rej=0;	
+			}	
+
 			if(model==2){
-				m_dstd_prop*=sqrt(float(m_dstd_acc+1)/float(m_dstd_rej+1));
-				m_dstd_prop=min(m_dstd_prop,maxfloat);
-				m_dstd_acc=0; 
-				m_dstd_rej=0;	
-			}
-			m_S0_prop*=sqrt(float(m_S0_acc+1)/float(m_S0_rej+1));
-			m_S0_prop=min(m_S0_prop,maxfloat);
-			m_d_acc=0; 
-			m_d_rej=0;
-			m_S0_acc=0; 
-			m_S0_rej=0;
-			for(int f=0; f<NFIBRES;f++){
+				*m_dstd_prop*=sqrt(float(*m_dstd_acc+1)/float(*m_dstd_rej+1));
+				*m_dstd_prop=min(*m_dstd_prop,maxfloat);
+				*m_dstd_acc=0; 
+				*m_dstd_rej=0;	
+			}
+
+			*m_S0_prop*=sqrt(float(*m_S0_acc+1)/float(*m_S0_rej+1));
+			*m_S0_prop=min(*m_S0_prop,maxfloat);
+			*m_d_acc=0; 
+			*m_d_rej=0;
+			*m_S0_acc=0; 
+			*m_S0_rej=0;
+			for(int f=0; f<nfib;f++){
 				//m_fibres[f].update_proposals();
 				m_th_prop[f]*=sqrt(float(m_th_acc[f]+1)/float(m_th_rej[f]+1));
-			    	m_th_prop[f]=min(m_th_prop[f],maxfloat);
-			      	m_ph_prop[f]*=sqrt(float(m_ph_acc[f]+1)/float(m_ph_rej[f]+1));
-			      	m_ph_prop[f]=min(m_ph_prop[f],maxfloat);
-			      	m_f_prop[f]*=sqrt(float(m_f_acc[f]+1)/float(m_f_rej[f]+1));
-			      	m_f_prop[f]=min(m_f_prop[f],maxfloat);
+				m_th_prop[f]=min(m_th_prop[f],maxfloat);
+		      		m_ph_prop[f]*=sqrt(float(m_ph_acc[f]+1)/float(m_ph_rej[f]+1));
+		      		m_ph_prop[f]=min(m_ph_prop[f],maxfloat);
+		      		m_f_prop[f]*=sqrt(float(m_f_acc[f]+1)/float(m_f_rej[f]+1));
+		      		m_f_prop[f]=min(m_f_prop[f],maxfloat);
 			      
-			      	m_th_acc[f]=0; 
-			      	m_th_rej[f]=0;
-			      	m_ph_acc[f]=0; 
-			      	m_ph_rej[f]=0;
-			      	m_f_acc[f]=0; 
-			      	m_f_rej[f]=0;
+		      		m_th_acc[f]=0; 
+		      		m_th_rej[f]=0;
+		      		m_ph_acc[f]=0; 
+		      		m_ph_rej[f]=0;
+		      		m_f_acc[f]=0; 
+		      		m_f_rej[f]=0;
 			}
 		}
-			
-		__syncthreads();
+
+		__syncthreads();	
 
         } //end while iterations
 }