Skip to content
Snippets Groups Projects
runmcmc.cu 20.1 KiB
Newer Older
Moises Fernandez's avatar
Moises Fernandez committed
/*  runmcmc.cu

    Tim Behrens, Saad Jbabdi, Stam Sotiropoulos, Moises Hernandez  - FMRIB Image Analysis Group

    Copyright (C) 2005 University of Oxford  */

/*  CCOPYRIGHT  */

#include "xfibresoptions.h"
#include <curand.h>
#include "runmcmc_kernels.cu"
#include "sync_check.h"

#include <host_vector.h>
#include <device_vector.h> 

#include <time.h>
#include <sys/time.h>
#include "init_gpu.h"

using namespace Xfibres;

////////////////////////////////////////////////////// 
//   MCMC IN GPU
////////////////////////////////////////////////////// 

void init_Fibres_Multifibres(	//INPUT
				thrust::device_vector<float> 			datam_gpu,
				thrust::device_vector<float> 			params_gpu,
				thrust::device_vector<float> 			tau_gpu,
				thrust::device_vector<double> 			alpha_gpu,
				thrust::device_vector<double> 			beta_gpu,
				string 						output_file, 
				//OUTPUT
				thrust::device_vector<FibreGPU>& 		fibres_gpu,
				thrust::device_vector<MultifibreGPU>& 		multifibres_gpu,
				thrust::device_vector<double>&			signals_gpu,
				thrust::device_vector<double>&			isosignals_gpu)
{
	std::ofstream myfile;
	myfile.open (output_file.data(), ios::out | ios::app );
   	myfile << "----- MCMC ALGORITHM PART INITIALITATION IN GPU ----- " << "\n";  	

   	struct timeval t1,t2;
   	double time;
   	gettimeofday(&t1,NULL);

	int nvox = multifibres_gpu.size();

	xfibresOptions& opts = xfibresOptions::getInstance();
	int nfib= opts.nfibres.value();
	int nparams_fit = 2+3*opts.nfibres.value();
	if(opts.modelnum.value()==2) nparams_fit++;
	if(opts.f0.value()) nparams_fit++;

	thrust::device_vector<double> angtmp_gpu;
	angtmp_gpu.resize(nvox*ndirections*nfib);
	

	int blocks = nvox; 
  	dim3 Dim_Grid_MCMC(blocks, 1);
  	dim3 Dim_Block_MCMC(THREADS_BLOCK_MCMC ,1);	///dimensions for MCMC
	float *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data());
	float *params_ptr = thrust::raw_pointer_cast(params_gpu.data());	
	float *tau_ptr = thrust::raw_pointer_cast(tau_gpu.data());	
	float *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data());
	double *alpha_ptr = thrust::raw_pointer_cast(alpha_gpu.data());
	double *beta_ptr = thrust::raw_pointer_cast(beta_gpu.data());
	FibreGPU *fibres_ptr =  thrust::raw_pointer_cast(fibres_gpu.data());
	MultifibreGPU *multifibres_ptr = thrust::raw_pointer_cast(multifibres_gpu.data());
	double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data());
	double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data());
	double *angtmp_ptr = thrust::raw_pointer_cast(angtmp_gpu.data());
	int amount_shared = (THREADS_BLOCK_MCMC)*sizeof(double) + (3*nfib + 8)*sizeof(float) + sizeof(int);
	myfile << "Shared Memory Used in init_Fibres_Multifibres: " << amount_shared << "\n";
	init_Fibres_Multifibres_kernel<<< Dim_Grid_MCMC, Dim_Block_MCMC, amount_shared>>>(datam_ptr, params_ptr, tau_ptr, bvals_ptr, alpha_ptr, beta_ptr, ndirections, nfib, nparams_fit, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.rician.value(), opts.ardf0.value(), opts.all_ard.value(), opts.no_ard.value(), gradnonlin, angtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr);
	sync_check("init_Fibres_Multifibres_kernel");

	gettimeofday(&t2,NULL);
    	time=timeval_diff(&t2,&t1);
   	myfile << "TIME TOTAL: " << time << " seconds\n"; 
	myfile << "-----------------------------------------------------" << "\n\n" ; 
	myfile.close();
			thrust::device_vector<float> 			datam_gpu,
			thrust::device_vector<float> 			bvals_gpu,
			thrust::device_vector<double> 			alpha_gpu,
			thrust::device_vector<double> 			beta_gpu,
			string 						output_file, 
			//INPUT-OUTPUT
			thrust::device_vector<FibreGPU>& 		fibres_gpu,
			thrust::device_vector<MultifibreGPU>& 		multifibres_gpu,
			thrust::device_vector<double>&			signals_gpu,
			thrust::device_vector<double>&			isosignals_gpu)
{
	xfibresOptions& opts = xfibresOptions::getInstance();
	
	std::ofstream myfile;
	myfile.open (output_file.data(), ios::out | ios::app ); 
   	myfile << "--------- MCMC ALGORITHM PART BURNIN IN GPU --------- " << "\n";  	

   	struct timeval t1,t2,t_tot1,t_tot2;
   	double time,timecurand,timemcmc;
   	time=0;
   	timecurand=0;
   	timemcmc=0;

   	gettimeofday(&t_tot1,NULL);

   	size_t free,total;
	
	int nvox = multifibres_gpu.size();
   	int nfib= opts.nfibres.value();
	int nparams;

	if(opts.f0.value()) nparams=3+nfib*3;
	else nparams=2+nfib*3;	
	if(opts.modelnum.value()==2) nparams++;
	if(opts.rician.value()) nparams++;

	thrust::device_vector<float> recors_null_gpu;
	recors_null_gpu.resize(1);

	thrust::device_vector<double> angtmp_gpu;
	thrust::device_vector<double> oldangtmp_gpu;
	thrust::device_vector<double> oldsignals_gpu;
	thrust::device_vector<double> oldisosignals_gpu;
	
	angtmp_gpu.resize(nvox*ndirections*nfib);
	oldangtmp_gpu.resize(nvox*ndirections);
	oldsignals_gpu.resize(nvox*ndirections*nfib);
	oldisosignals_gpu.resize(nvox*ndirections);
   
   	unsigned int totalrandoms=(opts.nburn.value() * nvox * nparams);

   	cuMemGetInfo(&free,&total);
   	myfile << "Free memory Before Randoms: "<< free <<  " ---- Total memory: " << total << "\n";
   	//4 bytes each float, 2 random arrays, and 80% of total memory at this moment 
   	unsigned int maxrandoms=((free*0.8)/(4*2)); 
   	myfile << "Total randoms: " << totalrandoms << "\n"; 
   	myfile << "Max randoms: " << maxrandoms << "\n"; 
   
   	int steps; //num iter if not enough memory
   	int minrandoms; //min num of randoms ensamble
   	minrandoms= nvox * nparams;

   	int iters_step=0;
	int nrandoms=0;	

   	if(totalrandoms>maxrandoms){ 
		iters_step = maxrandoms / minrandoms; 			//iterations in each step
		nrandoms = iters_step*minrandoms;			//nrandoms for each step
		steps =  (opts.nburn.value()/iters_step);  		//repeat process steps times, no enough memory for all randoms 			
   	}else{ 
		nrandoms = totalrandoms;
		iters_step= opts.nburn.value();
		steps = 0;
  	}
	if(nrandoms%2){							//CURAND must generates multiples of 2 randoms
		nrandoms++;
   	myfile << "Process " << opts.nburn.value() << " iterations divided in "<< steps << " steps with "<< iters_step << " iterations in each one" << "\n";    

   	int last_step = opts.nburn.value() - (iters_step*steps);
   	int last_randoms = (last_step*minrandoms);
	if(last_randoms%2){						//CURAND must generates multiples of 2 randoms
   	myfile << "Last step with " << last_step << " iterations" << "\n"; 
	
	thrust::device_vector<float> randomsN_gpu;
	thrust::device_vector<float> randomsU_gpu;	
	randomsN_gpu.resize(nrandoms);
	randomsU_gpu.resize(nrandoms);

   	cuMemGetInfo(&free,&total);
   	myfile << "Free memory after Malloc Randoms: "<< free <<  " ---- Total memory: " << total << "\n";
   
  	int blocks = nvox;        
  	dim3 Dim_Grid(blocks, 1);
  	dim3 Dim_Block(THREADS_BLOCK_MCMC,1);	//dimensions for MCMC   
   	myfile << "\n" << "NUM BLOCKS: " << blocks << "\n"; 
   	myfile << "THREADS PER BLOCK : " << THREADS_BLOCK_MCMC << "\n\n"; 	

   	curandGenerator_t gen;
   	curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT);
   	curandSetPseudoRandomGeneratorSeed(gen,seed);

	//get pointers
	float *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data());
	float *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data());
	double *alpha_ptr = thrust::raw_pointer_cast(alpha_gpu.data());
	double *beta_ptr = thrust::raw_pointer_cast(beta_gpu.data());
	float *randomsN_ptr = thrust::raw_pointer_cast(randomsN_gpu.data());
	float *randomsU_ptr = thrust::raw_pointer_cast(randomsU_gpu.data());
	FibreGPU *fibres_ptr =  thrust::raw_pointer_cast(fibres_gpu.data());
	MultifibreGPU *multifibres_ptr = thrust::raw_pointer_cast(multifibres_gpu.data());
	double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data());
	double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data());
	double *angtmp_ptr = thrust::raw_pointer_cast(angtmp_gpu.data());
	double *oldangtmp_ptr = thrust::raw_pointer_cast(oldangtmp_gpu.data());
	double *oldsignals_ptr = thrust::raw_pointer_cast(oldsignals_gpu.data());
	double *oldisosignals_ptr = thrust::raw_pointer_cast(oldisosignals_gpu.data());

	float *records_null = thrust::raw_pointer_cast(recors_null_gpu.data());

	int amount_shared = (THREADS_BLOCK_MCMC)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 19)*sizeof(int);
	myfile << "Shared Memory Used in runmcmc_burnin: " << amount_shared << "\n";
	
   	for(int i=0;i<steps;i++){

   		gettimeofday(&t1,NULL);

	   	curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,nrandoms,0,1); 
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}
	   	status = curandGenerateUniform(gen,randomsU_ptr,nrandoms);	//generate randoms
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}

 	   	gettimeofday(&t2,NULL);
    	   	timecurand+=timeval_diff(&t2,&t1);

	   	gettimeofday(&t1,NULL);

	   	runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), 0, 0, 0, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr,records_null,records_null,records_null,records_null,records_null,records_null,records_null, records_null);   
	   	sync_check("runmcmc_burnin_kernel");

 	   	gettimeofday(&t2,NULL);
    	   	timemcmc+=timeval_diff(&t2,&t1);
   	}

   		curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,last_randoms,0,1);
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}
   		status = curandGenerateUniform(gen,randomsU_ptr,last_randoms); 	//generate randoms
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}
   	}
	
   	gettimeofday(&t2,NULL);
   	timecurand+=timeval_diff(&t2,&t1);

   	gettimeofday(&t1,NULL);

   	if(nvox!=0){
		runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), 0, 0, 0, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr,records_null,records_null,records_null,records_null,records_null,records_null, records_null,records_null); 
   		sync_check("runmcmc_burnin_kernel");
   	}

   	gettimeofday(&t2,NULL);
   	timemcmc+=timeval_diff(&t2,&t1);

    	myfile << "TIME CURAND: " << timecurand << " seconds\n"; 
    	myfile << "TIME RUNMCMC: " << timemcmc << " seconds\n"; 
   
   	curandDestroyGenerator(gen);

	gettimeofday(&t_tot2,NULL);
    	time=timeval_diff(&t_tot2,&t_tot1);
   	myfile << "TIME TOTAL: " << time << " seconds\n"; 
	myfile << "-----------------------------------------------------" << "\n\n" ; 
	myfile.close();

   	sync_check("runmcmc_burnin");
}


void runmcmc_record(	//INPUT
			thrust::device_vector<float> 			datam_gpu,
			thrust::device_vector<float> 			bvals_gpu,
			thrust::device_vector<double> 			alpha_gpu,
			thrust::device_vector<double> 			beta_gpu,
			thrust::device_vector<FibreGPU> 		fibres_gpu,
			thrust::device_vector<MultifibreGPU> 		multifibres_gpu,
			thrust::device_vector<double>			signals_gpu,
			thrust::device_vector<double>			isosignals_gpu,
			string 						output_file, 
			//OUTPUT
			thrust::device_vector<float>&			rf0_gpu,
			thrust::device_vector<float>&			rtau_gpu,
			thrust::device_vector<float>&			rs0_gpu,
			thrust::device_vector<float>&			rd_gpu,
			thrust::device_vector<float>&			rdstd_gpu,
			thrust::device_vector<float>&			rth_gpu,
			thrust::device_vector<float>&			rph_gpu,
			thrust::device_vector<float>&			rf_gpu)
{
	xfibresOptions& opts = xfibresOptions::getInstance();
	
	std::ofstream myfile;
	myfile.open (output_file.data(), ios::out | ios::app );
   	myfile << "--------- MCMC ALGORITHM PART RECORD IN GPU --------- " << "\n"; 	

   	struct timeval t1,t2,t_tot1,t_tot2;
   	double time,timecurand,timemcmc;
   	time=0;
   	timecurand=0;
   	timemcmc=0;

   	gettimeofday(&t_tot1,NULL);

   	size_t free,total;

	int totalrecords = (opts.njumps.value()/opts.sampleevery.value()); 
	
	int nvox = multifibres_gpu.size();
   	int nfib= opts.nfibres.value();
	int nparams;

	if(opts.f0.value()) nparams=3+nfib*3;
	else nparams=2+nfib*3;	
	if(opts.modelnum.value()==2) nparams++;
	if(opts.rician.value()) nparams++;

	thrust::device_vector<double> angtmp_gpu;
	thrust::device_vector<double> oldangtmp_gpu;
	thrust::device_vector<double> oldsignals_gpu;
	thrust::device_vector<double> oldisosignals_gpu;
	
	angtmp_gpu.resize(nvox*ndirections*nfib);
	oldangtmp_gpu.resize(nvox*ndirections);
	oldsignals_gpu.resize(nvox*ndirections*nfib);
	oldisosignals_gpu.resize(nvox*ndirections);
   
   	unsigned int totalrandoms=(opts.njumps.value() * nvox * nparams);

   	cuMemGetInfo(&free,&total);
   	myfile << "Free memory Before Randoms: "<< free <<  " ---- Total memory: " << total << "\n";
   	//4 bytes each float, 2 random arrays, and 80% of total memory at this moment 
   	unsigned int maxrandoms=((free*0.8)/(4*2)); 
   	myfile << "Total randoms: " << totalrandoms << "\n"; 
   	myfile << "Max randoms: " << maxrandoms << "\n"; 
   
   	int steps; //num iter if not enough memory
   	int minrandoms; //min num of randoms ensamble
   	minrandoms= nvox * nparams;

   	int iters_step=0;
	int nrandoms=0;	

   	if(totalrandoms>maxrandoms){ 
		iters_step = maxrandoms / minrandoms; 		//iterations in each step
		nrandoms = iters_step*minrandoms;		//nrandoms for each step
		steps =  (opts.njumps.value()/iters_step);  	//repeat process steps times, no enough memory for all randoms 			
   	}else{ 
		nrandoms = totalrandoms;
		iters_step= opts.njumps.value();
		steps = 0;
	if(nrandoms%2){						//CURAND must generates multiples of 2 randoms
		nrandoms++;
   	myfile << "Process " << opts.njumps.value() << " iterations divided in "<< steps << " steps with "<< iters_step << " iterations in each one" << "\n";    

   	int last_step = opts.njumps.value() - (iters_step*steps);
	if(last_randoms%2){					//CURAND must generates multiples of 2 randoms
		last_randoms++;
	}

   	myfile << "Last step with " << last_step << " iterations" << "\n"; 
	
	thrust::device_vector<float> randomsN_gpu;
	thrust::device_vector<float> randomsU_gpu;	
	randomsN_gpu.resize(nrandoms);
	randomsU_gpu.resize(nrandoms);

   	cuMemGetInfo(&free,&total);
   	myfile << "Free memory after Malloc Randoms: "<< free <<  " ---- Total memory: " << total << "\n";
   
  	int blocks = nvox;        
  	dim3 Dim_Grid(blocks, 1);
  	dim3 Dim_Block(THREADS_BLOCK_MCMC,1);	//dimensions for MCMC   
   	myfile << "\n" << "NUM BLOCKS: " << blocks << "\n"; 
   	myfile << "THREADS PER BLOCK : " << THREADS_BLOCK_MCMC << "\n\n"; 	

   	curandGenerator_t gen;
   	curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT);
   	curandSetPseudoRandomGeneratorSeed(gen,seed);

	//get pointers
	float *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data());
	float *bvals_ptr = thrust::raw_pointer_cast(bvals_gpu.data());
	double *alpha_ptr = thrust::raw_pointer_cast(alpha_gpu.data());
	double *beta_ptr = thrust::raw_pointer_cast(beta_gpu.data());
	float *randomsN_ptr = thrust::raw_pointer_cast(randomsN_gpu.data());
	float *randomsU_ptr = thrust::raw_pointer_cast(randomsU_gpu.data());
	FibreGPU *fibres_ptr =  thrust::raw_pointer_cast(fibres_gpu.data());
	MultifibreGPU *multifibres_ptr = thrust::raw_pointer_cast(multifibres_gpu.data());
	double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data());
	double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data());

	double *angtmp_ptr = thrust::raw_pointer_cast(angtmp_gpu.data());
	double *oldangtmp_ptr = thrust::raw_pointer_cast(oldangtmp_gpu.data());
	double *oldsignals_ptr = thrust::raw_pointer_cast(oldsignals_gpu.data());
	double *oldisosignals_ptr = thrust::raw_pointer_cast(oldisosignals_gpu.data());
	
	float *rf0_ptr = thrust::raw_pointer_cast(rf0_gpu.data());
	float *rtau_ptr = thrust::raw_pointer_cast(rtau_gpu.data());
	float *rs0_ptr = thrust::raw_pointer_cast(rs0_gpu.data());
	float *rd_ptr = thrust::raw_pointer_cast(rd_gpu.data());
	float *rdstd_ptr = thrust::raw_pointer_cast(rdstd_gpu.data());
	float *rth_ptr = thrust::raw_pointer_cast(rth_gpu.data());
	float *rph_ptr = thrust::raw_pointer_cast(rph_gpu.data());
	float *rf_ptr = thrust::raw_pointer_cast(rf_gpu.data());

	int amount_shared = (THREADS_BLOCK_MCMC)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 19)*sizeof(int);
	myfile << "Shared Memory Used in runmcmc_record: " << amount_shared << "\n";
   	for(int i=0;i<steps;i++){

   		gettimeofday(&t1,NULL);

	   	curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,nrandoms,0,1);
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}
	   	status = curandGenerateUniform(gen,randomsU_ptr,nrandoms);	//generate randoms
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}

 	   	gettimeofday(&t2,NULL);
    	   	timecurand+=timeval_diff(&t2,&t1);

	   	gettimeofday(&t1,NULL);

	   	runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), iters_step, (i*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr);
	   	sync_check("runmcmc_record_kernel");

 	   	gettimeofday(&t2,NULL);
    	   	timemcmc+=timeval_diff(&t2,&t1);
   	}

   	gettimeofday(&t1,NULL);

   	if(nvox!=0){
   		curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,last_randoms,0,1);
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}
   		status = curandGenerateUniform(gen,randomsU_ptr,last_randoms); 	//generate randoms
		if (status != CURAND_STATUS_SUCCESS)
		{
			printf("Failure generating cuda random numbers: %d\n",status);
			exit(1);
		}
   	}
	
   	gettimeofday(&t2,NULL);
   	timecurand+=timeval_diff(&t2,&t1);

   	gettimeofday(&t1,NULL);

   	if(nvox!=0){
		runmcmc_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr,randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), gradnonlin, opts.updateproposalevery.value(), last_step, (steps*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, oldsignals_ptr, oldisosignals_ptr, angtmp_ptr, oldangtmp_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr);   
   		sync_check("runmcmc_record_kernel");
   	}

   	gettimeofday(&t2,NULL);
   	timemcmc+=timeval_diff(&t2,&t1);


    	myfile << "TIME CURAND: " << timecurand << " seconds\n"; 
    	myfile << "TIME RUNMCMC: " << timemcmc << " seconds\n"; 
   
   	curandDestroyGenerator(gen);

	gettimeofday(&t_tot2,NULL);
    	time=timeval_diff(&t_tot2,&t_tot1);
   	myfile << "TIME TOTAL: " << time << " seconds\n"; 
	myfile << "-----------------------------------------------------" << "\n" ;
	myfile.close();