From 17d8d4bc314195e8b2e833b93afe124fb32e6eb6 Mon Sep 17 00:00:00 2001 From: Moises Fernandez <moisesf@fmrib.ox.ac.uk> Date: Thu, 11 Apr 2013 15:42:31 +0000 Subject: [PATCH] Changed to use Dynamic Shared Memory and fix curand with a number of randoms not multiplo of 2 --- CUDA/runmcmc.cu | 210 +++++++++++++++++++++++++++++------------------- 1 file changed, 127 insertions(+), 83 deletions(-) diff --git a/CUDA/runmcmc.cu b/CUDA/runmcmc.cu index f0d9354..a4c04a5 100644 --- a/CUDA/runmcmc.cu +++ b/CUDA/runmcmc.cu @@ -31,18 +31,16 @@ void init_Fibres_Multifibres( //INPUT thrust::device_vector<double> bvals_gpu, thrust::device_vector<double> alpha_gpu, thrust::device_vector<double> beta_gpu, - string output_file, + const int ndirections, //OUTPUT thrust::device_vector<FibreGPU>& fibres_gpu, thrust::device_vector<MultifibreGPU>& multifibres_gpu, thrust::device_vector<double>& signals_gpu, thrust::device_vector<double>& isosignals_gpu) { - std::ofstream myfile; - myfile.open (output_file.data(), ios::out | ios::app ); - myfile << "----------------------------------------------------- " << "\n"; - myfile << "------MCMC ALGORITHM PART INITIALITATION IN GPU ----- " << "\n"; - myfile << "----------------------------------------------------- " << "\n"; + cout << "----------------------------------------------------- " << "\n"; + cout << "------MCMC ALGORITHM PART INITIALITATION IN GPU ----- " << "\n"; + cout << "----------------------------------------------------- " << "\n"; struct timeval t1,t2; double time; @@ -58,7 +56,7 @@ void init_Fibres_Multifibres( //INPUT int blocks = nvox; dim3 Dim_Grid_MCMC(blocks, 1); - dim3 Dim_Block_MCMC(THREADS_BLOCK ,1); ///dimensions for MCMC + dim3 Dim_Block_MCMC(THREADS_BLOCK_MCMC ,1); ///dimensions for MCMC double *datam_ptr = thrust::raw_pointer_cast(datam_gpu.data()); double *params_ptr = thrust::raw_pointer_cast(params_gpu.data()); @@ -71,13 +69,17 @@ void init_Fibres_Multifibres( //INPUT double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data()); double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data()); - init_Fibres_Multifibres_kernel<<< Dim_Grid_MCMC, Dim_Block_MCMC>>>(datam_ptr, params_ptr, tau_ptr, bvals_ptr, alpha_ptr, beta_ptr, nvox, nfib, nparams_fit, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.rician.value(), opts.ardf0.value(), opts.all_ard.value(), opts.no_ard.value(), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + int amount_shared = (THREADS_BLOCK_MCMC+1)*sizeof(double) + (3*nfib + 8)*sizeof(float); + + printf("Shared Memory Used in init_Fibres_Multifibres: %i\n",amount_shared); + + init_Fibres_Multifibres_kernel<<< Dim_Grid_MCMC, Dim_Block_MCMC, amount_shared>>>(datam_ptr, params_ptr, tau_ptr, bvals_ptr, alpha_ptr, beta_ptr, ndirections, nfib, nparams_fit, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.rician.value(), opts.ardf0.value(), opts.all_ard.value(), opts.no_ard.value(), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); sync_check("init_Fibres_Multifibres_kernel"); gettimeofday(&t2,NULL); time=timeval_diff(&t2,&t1); - myfile << "TIME TOTAL: " << time << " seconds\n"; - myfile << "--------------------------------------------" << "\n\n" ; + cout << "TIME TOTAL: " << time << " seconds\n"; + cout << "--------------------------------------------" << "\n\n" ; } void runmcmc_burnin( //INPUT @@ -85,8 +87,8 @@ void runmcmc_burnin( //INPUT thrust::device_vector<double> bvals_gpu, thrust::device_vector<double> alpha_gpu, thrust::device_vector<double> beta_gpu, + const int ndirections, double seed, - string output_file, //INPUT-OUTPUT thrust::device_vector<FibreGPU>& fibres_gpu, thrust::device_vector<MultifibreGPU>& multifibres_gpu, @@ -95,11 +97,9 @@ void runmcmc_burnin( //INPUT { xfibresOptions& opts = xfibresOptions::getInstance(); - std::ofstream myfile; - myfile.open (output_file.data(), ios::out | ios::app ); - myfile << "----------------------------------------------------- " << "\n"; - myfile << "--------- MCMC ALGORITHM PART BURNIN IN GPU --------- " << "\n"; - myfile << "----------------------------------------------------- " << "\n"; + cout << "----------------------------------------------------- " << "\n"; + cout << "--------- MCMC ALGORITHM PART BURNIN IN GPU --------- " << "\n"; + cout << "----------------------------------------------------- " << "\n"; struct timeval t1,t2,t_tot1,t_tot2; double time,timecurand,timemcmc; @@ -123,12 +123,12 @@ void runmcmc_burnin( //INPUT unsigned int totalrandoms=(opts.nburn.value() * nvox * nparams); cuMemGetInfo(&free,&total); - myfile << "Free memory Before Randoms: "<< free << " ---- Total memory: " << total << "\n"; - //4 bytes each float, 2 randoms arrays, and 80% of total memory at this moment - unsigned int maxrandoms=(free/(4*2))*0.8; + cout << "Free memory Before Randoms: "<< free << " ---- Total memory: " << total << "\n"; + //4 bytes each float, 2 random arrays, and 80% of total memory at this moment + unsigned int maxrandoms=((free*0.8)/(4*2)); - myfile << "Total randoms: " << totalrandoms << "\n"; - myfile << "Max randoms: " << maxrandoms << "\n"; + cout << "Total randoms: " << totalrandoms << "\n"; + cout << "Max randoms: " << maxrandoms << "\n"; int steps; //num iter if not enough memory int minrandoms; //min num of randoms ensamble @@ -138,9 +138,9 @@ void runmcmc_burnin( //INPUT int nrandoms=0; if(totalrandoms>maxrandoms){ - iters_step = maxrandoms / minrandoms; - nrandoms = iters_step*minrandoms; //nrandoms for each step - steps = (opts.nburn.value()/iters_step); //repeat process iterations times, no enough memory for all randoms + iters_step = maxrandoms / minrandoms; //iterations in each step + nrandoms = iters_step*minrandoms; //nrandoms for each step + steps = (opts.nburn.value()/iters_step); //repeat process steps times, no enough memory for all randoms }else{ nrandoms = totalrandoms; iters_step= opts.nburn.value(); @@ -148,18 +148,17 @@ void runmcmc_burnin( //INPUT } if(nrandoms%2){ //CURAND must generates multiples of 2 randoms nrandoms++; - } - - myfile << "Process " << opts.nburn.value() << " iterations divided in "<< steps << " steps with "<< iters_step << " iterations in each one" << "\n"; + } + + cout << "Process " << opts.nburn.value() << " iterations divided in "<< steps << " steps with "<< iters_step << " iterations in each one" << "\n"; int last_step = opts.nburn.value() - (iters_step*steps); - int last_randoms= (last_step*minrandoms); - if(last_randoms%2){ //CURAND must generates multiples of 2 randoms + int last_randoms = (last_step*minrandoms); + if(last_randoms%2){ //CURAND must generates multiples of 2 randoms last_randoms++; } - - myfile << "Last step with " << last_step << " iterations" << "\n"; + cout << "Last step with " << last_step << " iterations" << "\n"; thrust::device_vector<float> randomsN_gpu; thrust::device_vector<float> randomsU_gpu; @@ -167,14 +166,14 @@ void runmcmc_burnin( //INPUT randomsU_gpu.resize(nrandoms); cuMemGetInfo(&free,&total); - myfile << "Free memory after Malloc Randoms: "<< free << " ---- Total memory: " << total << "\n"; + cout << "Free memory after Malloc Randoms: "<< free << " ---- Total memory: " << total << "\n"; int blocks = nvox; dim3 Dim_Grid(blocks, 1); - dim3 Dim_Block(THREADS_BLOCK,1); //dimensions for MCMC + dim3 Dim_Block(THREADS_BLOCK_MCMC,1); //dimensions for MCMC - myfile << "\n" << "NUM BLOCKS: " << blocks << "\n"; - myfile << "THREADS PER BLOCK : " << THREADS_BLOCK << "\n\n"; + cout << "\n" << "NUM BLOCKS: " << blocks << "\n"; + cout << "THREADS PER BLOCK : " << THREADS_BLOCK_MCMC << "\n\n"; curandGenerator_t gen; curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT); @@ -191,31 +190,58 @@ void runmcmc_burnin( //INPUT MultifibreGPU *multifibres_ptr = thrust::raw_pointer_cast(multifibres_gpu.data()); double *signals_ptr = thrust::raw_pointer_cast(signals_gpu.data()); double *isosignals_ptr = thrust::raw_pointer_cast(isosignals_gpu.data()); + + int amount_shared = (THREADS_BLOCK_MCMC+1)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 16)*sizeof(int); + + printf("Shared Memory Used in runmcmc_burnin: %i\n",amount_shared); for(int i=0;i<steps;i++){ gettimeofday(&t1,NULL); - curandGenerateNormal(gen,randomsN_ptr,nrandoms,0,1); - curandGenerateUniform(gen,randomsU_ptr,nrandoms); //generate randoms + curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,nrandoms,0,1); + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } + status = curandGenerateUniform(gen,randomsU_ptr,nrandoms); //generate randoms + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } gettimeofday(&t2,NULL); timecurand+=timeval_diff(&t2,&t1); gettimeofday(&t1,NULL); - runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, nvox, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), iters_step, (i*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), iters_step, (i*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); sync_check("runmcmc_burnin_kernel"); gettimeofday(&t2,NULL); timemcmc+=timeval_diff(&t2,&t1); } - gettimeofday(&t1,NULL); + MultifibreGPU mult = multifibres_gpu[0]; + FibreGPU fib = fibres_gpu[0]; + + gettimeofday(&t1,NULL); if(nvox!=0){ - curandGenerateNormal(gen,randomsN_ptr,last_randoms,0,1); - curandGenerateUniform(gen,randomsU_ptr,last_randoms); //generate randoms + curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,last_randoms,0,1); + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } + status = curandGenerateUniform(gen,randomsU_ptr,last_randoms); //generate randoms + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } } gettimeofday(&t2,NULL); @@ -224,24 +250,22 @@ void runmcmc_burnin( //INPUT gettimeofday(&t1,NULL); if(nvox!=0){ - runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, nvox, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), last_step, (steps*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); + runmcmc_burnin_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), last_step, (steps*iters_step), fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr); sync_check("runmcmc_burnin_kernel"); } gettimeofday(&t2,NULL); timemcmc+=timeval_diff(&t2,&t1); - - myfile << "TIME CURAND: " << timecurand << " seconds\n"; - myfile << "TIME RUNMCMC: " << timemcmc << " seconds\n"; + cout << "TIME CURAND: " << timecurand << " seconds\n"; + cout << "TIME RUNMCMC: " << timemcmc << " seconds\n"; curandDestroyGenerator(gen); gettimeofday(&t_tot2,NULL); time=timeval_diff(&t_tot2,&t_tot1); - myfile << "TIME TOTAL: " << time << " seconds\n"; - myfile << "--------------------------------------------" << "\n\n" ; - myfile.close(); + cout << "TIME TOTAL: " << time << " seconds\n"; + cout << "--------------------------------------------" << "\n\n" ; sync_check("runmcmc_burnin"); } @@ -256,8 +280,8 @@ void runmcmc_record( //INPUT thrust::device_vector<MultifibreGPU> multifibres_gpu, thrust::device_vector<double> signals_gpu, thrust::device_vector<double> isosignals_gpu, + const int ndirections, double seed, - string output_file, //OUTPUT thrust::device_vector<int>& multirecords_gpu, thrust::device_vector<float>& rf0_gpu, @@ -272,11 +296,9 @@ void runmcmc_record( //INPUT { xfibresOptions& opts = xfibresOptions::getInstance(); - std::ofstream myfile; - myfile.open (output_file.data(), ios::out | ios::app ); - myfile << "----------------------------------------------------- " << "\n"; - myfile << "--------- MCMC ALGORITHM PART RECORD IN GPU --------- " << "\n"; - myfile << "----------------------------------------------------- " << "\n"; + cout << "----------------------------------------------------- " << "\n"; + cout << "--------- MCMC ALGORITHM PART RECORD IN GPU --------- " << "\n"; + cout << "----------------------------------------------------- " << "\n"; struct timeval t1,t2,t_tot1,t_tot2; double time,timecurand,timemcmc; @@ -302,12 +324,12 @@ void runmcmc_record( //INPUT unsigned int totalrandoms=(opts.njumps.value() * nvox * nparams); cuMemGetInfo(&free,&total); - myfile << "Free memory Before Randoms: "<< free << " ---- Total memory: " << total << "\n"; - //4 bytes each float, 2 randoms arrays, and 80% of total memory at this moment - unsigned int maxrandoms=(free/(4*2))*0.8; + cout << "Free memory Before Randoms: "<< free << " ---- Total memory: " << total << "\n"; + //4 bytes each float, 2 random arrays, and 80% of total memory at this moment + unsigned int maxrandoms=((free*0.8)/(4*2)); - myfile << "Total randoms: " << totalrandoms << "\n"; - myfile << "Max randoms: " << maxrandoms << "\n"; + cout << "Total randoms: " << totalrandoms << "\n"; + cout << "Max randoms: " << maxrandoms << "\n"; int steps; //num iter if not enough memory int minrandoms; //min num of randoms ensamble @@ -317,28 +339,27 @@ void runmcmc_record( //INPUT int nrandoms=0; if(totalrandoms>maxrandoms){ - iters_step = maxrandoms / minrandoms; + iters_step = maxrandoms / minrandoms; //iterations in each step nrandoms = iters_step*minrandoms; //nrandoms for each step - steps = (opts.njumps.value()/iters_step); //repeat process iterations times, no enough memory for all randoms + steps = (opts.njumps.value()/iters_step); //repeat process steps times, no enough memory for all randoms }else{ nrandoms = totalrandoms; iters_step= opts.njumps.value(); steps = 0; - } + } if(nrandoms%2){ //CURAND must generates multiples of 2 randoms nrandoms++; - } + } - myfile << "Process " << opts.njumps.value() << " iterations divided in "<< steps << " steps with "<< iters_step << " iterations in each one" << "\n"; + cout << "Process " << opts.njumps.value() << " iterations divided in "<< steps << " steps with "<< iters_step << " iterations in each one" << "\n"; int last_step = opts.njumps.value() - (iters_step*steps); - int last_randoms= (last_step*minrandoms); + int last_randoms = (last_step*minrandoms); if(last_randoms%2){ //CURAND must generates multiples of 2 randoms last_randoms++; } - - myfile << "Last step with " << last_step << " iterations" << "\n"; + cout << "Last step with " << last_step << " iterations" << "\n"; thrust::device_vector<float> randomsN_gpu; thrust::device_vector<float> randomsU_gpu; @@ -346,14 +367,14 @@ void runmcmc_record( //INPUT randomsU_gpu.resize(nrandoms); cuMemGetInfo(&free,&total); - myfile << "Free memory after Malloc Randoms: "<< free << " ---- Total memory: " << total << "\n"; + cout << "Free memory after Malloc Randoms: "<< free << " ---- Total memory: " << total << "\n"; int blocks = nvox; dim3 Dim_Grid(blocks, 1); - dim3 Dim_Block(THREADS_BLOCK,1); //dimensions for MCMC + dim3 Dim_Block(THREADS_BLOCK_MCMC,1); //dimensions for MCMC - myfile << "\n" << "NUM BLOCKS: " << blocks << "\n"; - myfile << "THREADS PER BLOCK : " << THREADS_BLOCK << "\n\n"; + cout << "\n" << "NUM BLOCKS: " << blocks << "\n"; + cout << "THREADS PER BLOCK : " << THREADS_BLOCK_MCMC << "\n\n"; curandGenerator_t gen; curandCreateGenerator(&gen,CURAND_RNG_PSEUDO_DEFAULT); @@ -382,19 +403,33 @@ void runmcmc_record( //INPUT float *rf_ptr = thrust::raw_pointer_cast(rf_gpu.data()); float *rlikelihood_energy_ptr = thrust::raw_pointer_cast(rlikelihood_energy_gpu.data()); + int amount_shared = (THREADS_BLOCK_MCMC+1)*sizeof(double) + (10*nfib + 2*nparams + 24)*sizeof(float) + (7*nfib + 18)*sizeof(int); + + printf("Shared Memory Used in runmcmc_record: %i\n",amount_shared); + for(int i=0;i<steps;i++){ gettimeofday(&t1,NULL); - curandGenerateNormal(gen,randomsN_ptr,nrandoms,0,1); - curandGenerateUniform(gen,randomsU_ptr,nrandoms); //generate randoms + curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,nrandoms,0,1); + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } + status = curandGenerateUniform(gen,randomsU_ptr,nrandoms); //generate randoms + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } gettimeofday(&t2,NULL); timecurand+=timeval_diff(&t2,&t1); gettimeofday(&t1,NULL); - runmcmc_record_kernel<<< Dim_Grid, Dim_Block >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, nvox, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), iters_step, (i*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, multirecords_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr, rlikelihood_energy_ptr); + runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), iters_step, (i*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, multirecords_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr, rlikelihood_energy_ptr); sync_check("runmcmc_record_kernel"); gettimeofday(&t2,NULL); @@ -404,8 +439,18 @@ void runmcmc_record( //INPUT gettimeofday(&t1,NULL); if(nvox!=0){ - curandGenerateNormal(gen,randomsN_ptr,last_randoms,0,1); - curandGenerateUniform(gen,randomsU_ptr,last_randoms); //generate randoms + curandStatus_t status = curandGenerateNormal(gen,randomsN_ptr,last_randoms,0,1); + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } + status = curandGenerateUniform(gen,randomsU_ptr,last_randoms); //generate randoms + if (status != CURAND_STATUS_SUCCESS) + { + printf("Failure generating cuda random numbers: %d\n",status); + exit(1); + } } gettimeofday(&t2,NULL); @@ -414,7 +459,7 @@ void runmcmc_record( //INPUT gettimeofday(&t1,NULL); if(nvox!=0){ - runmcmc_record_kernel<<< Dim_Grid, Dim_Block >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, nvox, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), last_step, (steps*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, multirecords_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr, rlikelihood_energy_ptr); + runmcmc_record_kernel<<< Dim_Grid, Dim_Block, amount_shared >>>(datam_ptr, bvals_ptr, alpha_ptr, beta_ptr, fibres_ptr, multifibres_ptr, signals_ptr, isosignals_ptr, randomsN_ptr, randomsU_ptr, ndirections, nfib, nparams, opts.modelnum.value(), opts.fudge.value(), opts.f0.value(), opts.ardf0.value(), !opts.no_ard.value(), opts.rician.value(), opts.updateproposalevery.value(), last_step, (steps*iters_step), opts.nburn.value(), opts.sampleevery.value(), totalrecords, multirecords_ptr, rf0_ptr, rtau_ptr, rs0_ptr, rd_ptr, rdstd_ptr, rth_ptr, rph_ptr, rf_ptr, rlikelihood_energy_ptr); sync_check("runmcmc_record_kernel"); } @@ -422,16 +467,15 @@ void runmcmc_record( //INPUT timemcmc+=timeval_diff(&t2,&t1); - myfile << "TIME CURAND: " << timecurand << " seconds\n"; - myfile << "TIME RUNMCMC: " << timemcmc << " seconds\n"; + cout << "TIME CURAND: " << timecurand << " seconds\n"; + cout << "TIME RUNMCMC: " << timemcmc << " seconds\n"; curandDestroyGenerator(gen); gettimeofday(&t_tot2,NULL); time=timeval_diff(&t_tot2,&t_tot1); - myfile << "TIME TOTAL: " << time << " seconds\n"; - myfile << "--------------------------------------------" << "\n\n" ; - myfile.close(); - + cout << "TIME TOTAL: " << time << " seconds\n"; + cout << "--------------------------------------------" << "\n\n" ; + sync_check("runmcmc_record"); } -- GitLab