diff --git a/CUDA/xfibres_gpu.cu b/CUDA/xfibres_gpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..ebcc365d3140593855044f4a9447e07533c897b7 --- /dev/null +++ b/CUDA/xfibres_gpu.cu @@ -0,0 +1,612 @@ +#include "newmat.h" +#include "newimage/newimageall.h" +#include "xfibresoptions.h" + +#include "xfibres_gpu.cuh" +#include "diffmodels.cuh" +#include "runmcmc.h" +#include "samples.h" +#include "options.h" + +#include <host_vector.h> +#include <device_vector.h> + +#include <time.h> +#include <sys/time.h> +#include "init_gpu.h" +#include <fstream> + +using namespace Xfibres; + +void xfibres_gpu( //INPUT + const Matrix datam, + const Matrix bvecs, + const Matrix bvals, + const Matrix gradm, + const Matrix Qform, + const Matrix Qform_inv, + const NEWIMAGE::volume<int> vol2matrixkey, + const NEWMAT::Matrix matrix2volkey, + const NEWIMAGE::volume<float> mask, + const int slice, + const char* subjdir) +{ + //write num of slice in a string for log file + char slice_str[8]; + char aux[8]; + sprintf(slice_str,"%d",slice); + while(strlen(slice_str)<4){ + strcpy(aux,"0"); + strcat(aux,slice_str); + strcpy(slice_str,aux); + } + string gpu_log(subjdir); //logfile + gpu_log += ".bedpostX/logs/times_gpu_"; + gpu_log += slice_str; + + xfibresOptions& opts = xfibresOptions::getInstance(); + + ///// FIT ///// + thrust::host_vector<double> datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host; + thrust::host_vector<float> tau_host; + vector<ColumnVector> datam_vec; + vector<Matrix> bvecs_vec, bvals_vec; + + prepare_data_gpu_FIT(datam,bvecs,bvals,gradm,Qform,Qform_inv,datam_vec, bvecs_vec, bvals_vec, datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host, tau_host); + + int nvox = datam.Ncols(); + int nfib= opts.nfibres.value(); + + thrust::device_vector<double> datam_gpu=datam_host; + thrust::device_vector<double> bvecs_gpu=bvecs_host; + thrust::device_vector<double> bvals_gpu=bvals_host; + thrust::device_vector<double> params_gpu=params_host; + thrust::host_vector<int> vox_repeat; //contains the id's of voxels repeated + vox_repeat.resize(nvox); + int nrepeat=0; + + fit(datam_vec,bvecs_vec,bvals_vec,datam_host,bvecs_host,bvals_host,datam_gpu,bvecs_gpu,bvals_gpu,gpu_log,params_gpu,vox_repeat,nrepeat); + + if(opts.rician.value()){ + calculate_tau(datam_gpu,params_gpu,bvecs_gpu,bvals_gpu,vox_repeat,nrepeat,gpu_log, tau_host); + } + + bvecs_gpu.clear(); //free bvecs_gpu + bvecs_gpu.shrink_to_fit(); + + ////// RUN MCMC ////// + thrust::host_vector<double> signals_host,isosignals_host; + thrust::host_vector<FibreGPU> fibres_host; + thrust::host_vector<MultifibreGPU> multifibres_host; + + prepare_data_gpu_MCMC(nvox, nfib, signals_host, isosignals_host, fibres_host, multifibres_host); + + thrust::device_vector<double> signals_gpu=signals_host; + thrust::device_vector<double> isosignals_gpu=isosignals_host; + thrust::device_vector<FibreGPU> fibres_gpu=fibres_host; + thrust::device_vector<MultifibreGPU> multifibres_gpu=multifibres_host; + thrust::device_vector<float> tau_gpu = tau_host; + thrust::device_vector<double> alpha_gpu=alpha_host; + thrust::device_vector<double> beta_gpu=beta_host; + + init_Fibres_Multifibres(datam_gpu, params_gpu, tau_gpu, bvals_gpu, alpha_gpu, beta_gpu, gpu_log, fibres_gpu, multifibres_gpu, signals_gpu, isosignals_gpu); + + srand(opts.seed.value()); + //double seed1 = rand(); + //double seed2 = rand(); + + runmcmc_burnin(datam_gpu, bvals_gpu, alpha_gpu, beta_gpu, rand(), gpu_log, fibres_gpu,multifibres_gpu, signals_gpu, isosignals_gpu); + + thrust::device_vector<int> multirecords_gpu; + thrust::device_vector<float> rf0_gpu, rtau_gpu, rs0_gpu, rd_gpu, rdstd_gpu, rth_gpu, rph_gpu, rf_gpu, rlikelihood_energy_gpu; + + prepare_data_gpu_MCMC_record(nvox, multirecords_gpu, rf0_gpu, rtau_gpu, rs0_gpu, rd_gpu, rdstd_gpu, rth_gpu, rph_gpu, rf_gpu, rlikelihood_energy_gpu); + + runmcmc_record(datam_gpu, bvals_gpu, alpha_gpu,beta_gpu, fibres_gpu, multifibres_gpu, signals_gpu, isosignals_gpu, rand(), gpu_log, multirecords_gpu, rf0_gpu, rtau_gpu, rs0_gpu, rd_gpu, rdstd_gpu, rth_gpu, rph_gpu, rf_gpu, rlikelihood_energy_gpu); + + /////// FINISH ALL VOXELS /////// + record_finish_voxels(vol2matrixkey, matrix2volkey, mask, multirecords_gpu, rf0_gpu, rtau_gpu, rs0_gpu, rd_gpu, rdstd_gpu, rth_gpu, rph_gpu, rf_gpu, rlikelihood_energy_gpu, nvox); +} + + +// Correct bvals/bvecs accounting for Gradient Nonlinearities +// ColumnVector grad_nonlin has 9 entries, corresponding to the 3 components of each of the x,y and z gradient deviation +void correct_bvals_bvecs(const Matrix& bvals,const Matrix& bvecs, const ColumnVector& grad_nonlin, const Matrix& Qform, const Matrix& Qform_inv, Matrix& bvals_c, Matrix& bvecs_c){ + bvals_c=bvals; bvecs_c=bvecs; + Matrix L(3,3); //gradient coil tensor + float mag; + L(1,1)=grad_nonlin(1); L(1,2)=grad_nonlin(4); L(1,3)=grad_nonlin(7); + L(2,1)=grad_nonlin(2); L(2,2)=grad_nonlin(5); L(2,3)=grad_nonlin(8); + L(3,1)=grad_nonlin(3); L(3,2)=grad_nonlin(6); L(3,3)=grad_nonlin(9); + + IdentityMatrix Id(3); + + for (int l=1; l<=bvals.Ncols(); l++){ + if (bvals(1,l)>0){ //do not correct b0s + //Rotate bvecs to scanner's coordinate system + ColumnVector bvec_tmp(3); + bvec_tmp=Qform*bvecs.Column(l); + bvec_tmp(1)=-bvec_tmp(1); //Sign-flip X coordinate + + //Correct for grad-nonlin in scanner's coordinate system + bvecs_c.Column(l)=(Id+L)*bvec_tmp;//bvecs.Column(l); + mag=sqrt(bvecs_c(1,l)*bvecs_c(1,l)+bvecs_c(2,l)*bvecs_c(2,l)+bvecs_c(3,l)*bvecs_c(3,l)); + if (mag!=0) + bvecs_c.Column(l)=bvecs_c.Column(l)/mag; + bvals_c(1,l)=mag*mag*bvals(1,l); + bvec_tmp=bvecs_c.Column(l); + + //Rotate corrected bvecs back to voxel coordinate system + bvec_tmp(1)=-bvec_tmp(1); //Sign-flip X coordinate + bvecs_c.Column(l)=Qform_inv*bvec_tmp; + } + } +} + +////// FIT ////// +void fit( //INPUT + const vector<ColumnVector> datam_vec, + const vector<Matrix> bvecs_vec, + const vector<Matrix> bvals_vec, + thrust::host_vector<double> datam_host, + thrust::host_vector<double> bvecs_host, + thrust::host_vector<double> bvals_host, + thrust::device_vector<double> datam_gpu, + thrust::device_vector<double> bvecs_gpu, + thrust::device_vector<double> bvals_gpu, + string output_file, + //OUTPUT + thrust::device_vector<double>& params_gpu, + thrust::host_vector<int>& vox_repeat, //for get residuals with or withot f0 + int& nrepeat) +{ + std::ofstream myfile; + myfile.open (output_file.data(), ios::out | ios::app ); + myfile << "----------------------------------------------------- " << "\n"; + myfile << "------------------- FIT IN GPU ---------------------- " << "\n"; + myfile << "----------------------------------------------------- " << "\n"; + + struct timeval t1,t2; + double time; + gettimeofday(&t1,NULL); + + xfibresOptions& opts = xfibresOptions::getInstance(); + int nvox = datam_vec.size(); + int nfib= opts.nfibres.value(); + int nparams_fit = 2+3*opts.nfibres.value(); + if(opts.modelnum.value()==2) nparams_fit++; + if(opts.f0.value()) nparams_fit++; + + if(opts.modelnum.value()==1){ + if(opts.nonlin.value()){ + fit_PVM_single(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,opts.f0.value(),params_gpu); + + if (opts.f0.value()){ + float md,mf,f0; + + for(int vox=0;vox<nvox;vox++){ + md = params_gpu[vox*nparams_fit+(1)]; + mf = params_gpu[vox*nparams_fit+(2)]; + f0 = params_gpu[vox*nparams_fit+(nparams_fit-1)]; + if ((opts.nfibres.value()>0 && mf<0.05) || md>0.007 || f0>0.4){ //if true we need to repeat this voxel + vox_repeat[nrepeat]=vox; + nrepeat++; + } + } + if(nrepeat>0){ + //prepare structures for the voxels that need to be reprocessed + vector<ColumnVector> datam_repeat_vec; + vector<Matrix> bvecs_repeat_vec; + vector<Matrix> bvals_repeat_vec; + thrust::host_vector<double> datam_repeat_host; + thrust::host_vector<double> bvecs_repeat_host; + thrust::host_vector<double> bvals_repeat_host; + thrust::host_vector<double> params_repeat_host; + + prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); + + thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host; + thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host; + thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host; + thrust::device_vector<double> params_repeat_gpu=params_repeat_host; + + fit_PVM_single(datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_gpu, bvecs_repeat_gpu, bvals_repeat_gpu, false, params_repeat_gpu); + + //mix all the parameteres: repeated and not repeated + mix_params(params_repeat_gpu,vox_repeat, nrepeat, params_gpu); + } + } + }else{ + fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,opts.f0.value(),params_gpu); + + if (opts.f0.value()){ + float md,mf,f0; + + for(int vox=0;vox<nvox;vox++){ + md = params_gpu[vox*nparams_fit+(1)]; + mf = params_gpu[vox*nparams_fit+(2)]; + f0 = params_gpu[vox*nparams_fit+(nparams_fit-1)]; + if ((opts.nfibres.value()>0 && mf<0.05) || md>0.007 || f0>0.4){ //if true we need to repeat this voxel + vox_repeat[nrepeat]=vox; + nrepeat++; + } + } + if(nrepeat>0){ + //prepare structures for the voxels that need to be reprocessed + vector<ColumnVector> datam_repeat_vec; + vector<Matrix> bvecs_repeat_vec; + vector<Matrix> bvals_repeat_vec; + thrust::host_vector<double> datam_repeat_host; + thrust::host_vector<double> bvecs_repeat_host; + thrust::host_vector<double> bvals_repeat_host; + thrust::host_vector<double> params_repeat_host; + + prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); + + thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host; + thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host; + thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host; + thrust::device_vector<double> params_repeat_gpu=params_repeat_host; + + fit_PVM_single_c(datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_gpu, bvecs_repeat_gpu, bvals_repeat_gpu, false, params_repeat_gpu); + + //mix all the parameteres: repeated and not repeated + mix_params(params_repeat_gpu ,vox_repeat, nrepeat, params_gpu); + } + } + } + }else{ + //model 2 : non-mono-exponential + fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,opts.f0.value(),params_gpu); + + fit_PVM_multi(datam_gpu,bvecs_gpu,bvals_gpu,nvox,opts.f0.value(),params_gpu); + + if (opts.f0.value()){ + float md,mf,f0; + + for(int vox=0;vox<nvox;vox++){ + md = params_gpu[vox*nparams_fit+(1)]; + mf = params_gpu[vox*nparams_fit+(3)]; + f0 = params_gpu[vox*nparams_fit+(nparams_fit-1)]; + if ((opts.nfibres.value()>0 && mf<0.05) || md>0.007 || f0>0.4){ //if true we need to repeat this voxel + vox_repeat[nrepeat]=vox; + nrepeat++; + } + } + if(nrepeat>0){ + //prepare structures for the voxels that need to be reprocessed + vector<ColumnVector> datam_repeat_vec; + vector<Matrix> bvecs_repeat_vec; + vector<Matrix> bvals_repeat_vec; + thrust::host_vector<double> datam_repeat_host; + thrust::host_vector<double> bvecs_repeat_host; + thrust::host_vector<double> bvals_repeat_host; + thrust::host_vector<double> params_repeat_host; + + prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host); + + thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host; + thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host; + thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host; + thrust::device_vector<double> params_repeat_gpu=params_repeat_host; + + fit_PVM_single_c(datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_gpu, bvecs_repeat_gpu, bvals_repeat_gpu, false, params_repeat_gpu); + + fit_PVM_multi(datam_repeat_gpu, bvecs_repeat_gpu, bvals_repeat_gpu, nrepeat, false, params_repeat_gpu); + + //mix all the parameteres: repeated and not repeated + mix_params(params_repeat_gpu ,vox_repeat, nrepeat,params_gpu); + } + } + } + + gettimeofday(&t2,NULL); + time=timeval_diff(&t2,&t1); + myfile << "TIME TOTAL: " << time << " seconds\n"; + myfile << "--------------------------------------------" << "\n\n" ; + myfile.close(); + +} + +//prepare the structures for copy all neccesary data to FIT in GPU +void prepare_data_gpu_FIT( //INPUT + const Matrix datam, + const Matrix bvecs, + const Matrix bvals, + const Matrix gradm, + const Matrix Qform, + const Matrix Qform_inv, + //OUTPUT + vector<ColumnVector>& datam_vec, + vector<Matrix>& bvecs_vec, + vector<Matrix>& bvals_vec, + thrust::host_vector<double>& datam_host, //data prepared for copy to GPU + thrust::host_vector<double>& bvecs_host, + thrust::host_vector<double>& bvals_host, + thrust::host_vector<double>& alpha_host, + thrust::host_vector<double>& beta_host, + thrust::host_vector<double>& params_host, + thrust::host_vector<float>& tau_host) +{ + xfibresOptions& opts = xfibresOptions::getInstance(); + int nvox = datam.Ncols(); + + datam_vec.resize(nvox); + datam_host.resize(nvox*NDIRECTIONS); + for(int vox=0;vox<nvox;vox++){ + datam_vec[vox]=datam.Column(vox+1); + for(int j=0;j<NDIRECTIONS;j++){ + datam_host[vox*NDIRECTIONS+j]=datam(j+1,vox+1); + } + } + + bvecs_vec.resize(nvox); + bvals_vec.resize(nvox); + bvecs_host.resize(nvox*bvecs.Nrows()*bvecs.Ncols()); + bvals_host.resize(nvox*bvals.Ncols()); + + alpha_host.resize(nvox*bvecs.Ncols()); + beta_host.resize(nvox*bvecs.Ncols()); + + ColumnVector alpha,beta; + + if (opts.grad_file.set()){ + for(int vox=0;vox<nvox;vox++){ + correct_bvals_bvecs(bvals,bvecs, gradm.Column(vox+1),Qform,Qform_inv,bvals_vec[vox],bvecs_vec[vox]); //correct for gradient nonlinearities + MISCMATHS::cart2sph(bvecs_vec[vox],alpha,beta); + + for(int dir=0;dir<NDIRECTIONS;dir++){ + bvecs_host[vox*NDIRECTIONS*3+dir] = bvecs_vec[vox](1,dir+1); + bvecs_host[vox*NDIRECTIONS*3+NDIRECTIONS+dir] = bvecs_vec[vox](2,dir+1); + bvecs_host[vox*NDIRECTIONS*3+NDIRECTIONS*2+dir] = bvecs_vec[vox](3,dir+1); + bvals_host[vox*NDIRECTIONS+dir] = bvals_vec[vox](1,dir+1); + + alpha_host[vox*NDIRECTIONS+dir] = alpha(dir+1); + beta_host[vox*NDIRECTIONS+dir] = beta(dir+1); + } + } + + }else{ + MISCMATHS::cart2sph(bvecs,alpha,beta); + + for(int vox=0;vox<nvox;vox++){ + bvecs_vec[vox]=bvecs; + bvals_vec[vox]=bvals; + for(int dir=0;dir<NDIRECTIONS;dir++){ + bvecs_host[vox*NDIRECTIONS*3+dir] = bvecs(1,dir+1); + bvecs_host[vox*NDIRECTIONS*3+NDIRECTIONS+dir] = bvecs(2,dir+1); + bvecs_host[vox*NDIRECTIONS*3+NDIRECTIONS*2+dir] = bvecs(3,dir+1); + bvals_host[vox*NDIRECTIONS+dir] = bvals(1,dir+1); + + alpha_host[vox*NDIRECTIONS+dir] = alpha(dir+1); + beta_host[vox*NDIRECTIONS+dir] = beta(dir+1); + } + } + } + + int nfib= opts.nfibres.value(); + int nparams; + + if(opts.f0.value()) nparams=3+nfib*3; + else nparams=2+nfib*3; + if(opts.modelnum.value()==2) nparams++; + + params_host.resize(nvox*nparams); + tau_host.resize(nvox); +} + +//prepare the structures for copy all neccesary data to FIT in GPU when is repeated because f0. Only some voxels +void prepare_data_gpu_FIT_repeat( //INPUT + thrust::host_vector<double> datam_host, + thrust::host_vector<double> bvecs_host, + thrust::host_vector<double> bvals_host, + thrust::host_vector<int> vox_repeat, + int nrepeat, + //OUTPUT + vector<ColumnVector>& datam_repeat_vec, + vector<Matrix>& bvecs_repeat_vec, + vector<Matrix>& bvals_repeat_vec, + thrust::host_vector<double>& datam_repeat_host, //data prepared for copy to GPU + thrust::host_vector<double>& bvecs_repeat_host, + thrust::host_vector<double>& bvals_repeat_host, + thrust::host_vector<double>& params_repeat_host) +{ + xfibresOptions& opts = xfibresOptions::getInstance(); + + ColumnVector datam(NDIRECTIONS); + Matrix bvecs(3,NDIRECTIONS); + Matrix bvals(1,NDIRECTIONS); + + datam_repeat_vec.resize(nrepeat); + datam_repeat_host.resize(nrepeat*NDIRECTIONS); + bvecs_repeat_vec.resize(nrepeat); + bvals_repeat_vec.resize(nrepeat); + bvecs_repeat_host.resize(nrepeat*3*NDIRECTIONS); + bvals_repeat_host.resize(nrepeat*NDIRECTIONS); + + for(int vox=0;vox<nrepeat;vox++){ + for(int dir=0;dir<NDIRECTIONS;dir++){ + datam(dir+1)= datam_host[vox_repeat[vox]*NDIRECTIONS+dir]; + datam_repeat_host[vox*NDIRECTIONS+dir]=datam_host[vox_repeat[vox]*NDIRECTIONS+dir]; + + bvecs_repeat_host[vox*NDIRECTIONS*3+dir] = bvecs_host[vox_repeat[vox]*NDIRECTIONS*3+dir]; + bvecs_repeat_host[vox*NDIRECTIONS*3+NDIRECTIONS+dir] = bvecs_host[vox_repeat[vox]*NDIRECTIONS*3+NDIRECTIONS+dir]; + bvecs_repeat_host[vox*NDIRECTIONS*3+NDIRECTIONS*2+dir] = bvecs_host[vox_repeat[vox]*NDIRECTIONS*3+NDIRECTIONS*2+dir]; + bvals_repeat_host[vox*NDIRECTIONS+dir] = bvals_host[vox_repeat[vox]*NDIRECTIONS+dir]; + + bvecs(1,dir+1)= bvecs_host[vox_repeat[vox]*NDIRECTIONS*3+dir]; + bvecs(2,dir+1)= bvecs_host[vox_repeat[vox]*NDIRECTIONS*3+NDIRECTIONS+dir]; + bvecs(3,dir+1)= bvecs_host[vox_repeat[vox]*NDIRECTIONS*3+NDIRECTIONS*2+dir]; + bvals(1,dir+1)= bvals_host[vox_repeat[vox]*NDIRECTIONS+dir]; + } + datam_repeat_vec[vox]=datam; + bvecs_repeat_vec[vox]=bvecs; + bvals_repeat_vec[vox]=bvals; + } + + int nfib= opts.nfibres.value(); + int nparams; + + nparams=2+nfib*3; + if(opts.modelnum.value()==2) nparams++; + + params_repeat_host.resize(nrepeat*nparams); +} + + +void mix_params( //INPUT + thrust::device_vector<double> params_repeat_gpu, + thrust::host_vector<int> vox_repeat, + int nrepeat, + //INPUT-OUTPUT + thrust::device_vector<double>& params_gpu) +{ + xfibresOptions& opts = xfibresOptions::getInstance(); + int nfib= opts.nfibres.value(); + int nparams; + nparams=2+nfib*3; + if(opts.modelnum.value()==2) nparams++; + + for(int vox=0;vox<nrepeat;vox++){ + for(int par=0;par<nparams;par++){ + params_gpu[vox_repeat[vox]*(nparams+1)+par] = params_repeat_gpu[vox*nparams+par]; //(nparams+1) to count f0 + } + params_gpu[vox_repeat[vox]*(nparams+1)+nparams] = 0.001; //pvmf0=0.001 + } +} + +void prepare_data_gpu_MCMC( //INPUT + int nvox, + int nfib, + //OUTPUT + thrust::host_vector<double>& signals_host, + thrust::host_vector<double>& isosignals_host, + thrust::host_vector<FibreGPU>& fibres_host, + thrust::host_vector<MultifibreGPU>& multifibres_host) +{ + signals_host.resize(nvox*nfib*NDIRECTIONS); + isosignals_host.resize(nvox*NDIRECTIONS); + fibres_host.resize(nvox*nfib); + multifibres_host.resize(nvox); +} + +void prepare_data_gpu_MCMC_record( //INPUT + int nvox, + //OUTPUT + thrust::device_vector<int>& multirecords_gpu, + thrust::device_vector<float>& rf0_gpu, + thrust::device_vector<float>& rtau_gpu, + thrust::device_vector<float>& rs0_gpu, + thrust::device_vector<float>& rd_gpu, + thrust::device_vector<float>& rdstd_gpu, + thrust::device_vector<float>& rth_gpu, + thrust::device_vector<float>& rph_gpu, + thrust::device_vector<float>& rf_gpu, + thrust::device_vector<float>& rlikelihood_energy_gpu) +{ + xfibresOptions& opts = xfibresOptions::getInstance(); + + int nfib = opts.nfibres.value(); + int nrecords = (opts.njumps.value()/opts.sampleevery.value()); + + multirecords_gpu.resize(nvox*nrecords); + if(opts.f0.value()) rf0_gpu.resize(nvox*nrecords); + if(opts.rician.value()) rtau_gpu.resize(nvox*nrecords); + rs0_gpu.resize(nvox*nrecords); + rd_gpu.resize(nvox*nrecords); + if(opts.modelnum.value()==2) rdstd_gpu.resize(nvox*nrecords); + rth_gpu.resize(nvox*nrecords*nfib); + rph_gpu.resize(nvox*nrecords*nfib); + rf_gpu.resize(nvox*nrecords*nfib); + rlikelihood_energy_gpu.resize(nvox*nrecords); +} + +void record_finish_voxels( //INPUT + const NEWIMAGE::volume<int> vol2matrixkey, + const NEWMAT::Matrix matrix2volkey, + const NEWIMAGE::volume<float> mask, + thrust::device_vector<int>& multirecords_gpu, + thrust::device_vector<float>& rf0_gpu, + thrust::device_vector<float>& rtau_gpu, + thrust::device_vector<float>& rs0_gpu, + thrust::device_vector<float>& rd_gpu, + thrust::device_vector<float>& rdstd_gpu, + thrust::device_vector<float>& rth_gpu, + thrust::device_vector<float>& rph_gpu, + thrust::device_vector<float>& rf_gpu, + thrust::device_vector<float>& rlikelihood_energy_gpu, + int nvox) +{ + xfibresOptions& opts = xfibresOptions::getInstance(); + + int nfib = opts.nfibres.value(); + int nrecords = (opts.njumps.value()/opts.sampleevery.value()); + + thrust::host_vector<int> multirecords_host; + thrust::host_vector<float> rf0_host,rtau_host,rs0_host,rd_host,rdstd_host,rth_host,rph_host,rf_host,rlikelihood_energy_host; + + multirecords_host.resize(nvox*nrecords); + rf0_host.resize(nvox*nrecords); + rtau_host.resize(nvox*nrecords); + rs0_host.resize(nvox*nrecords); + rd_host.resize(nvox*nrecords); + rdstd_host.resize(nvox*nrecords); + rth_host.resize(nvox*nfib*nrecords); + rph_host.resize(nvox*nfib*nrecords); + rf_host.resize(nvox*nfib*nrecords); + rlikelihood_energy_host.resize(nvox*nrecords); + + thrust::copy(multirecords_gpu.begin(), multirecords_gpu.end(), multirecords_host.begin()); + if(opts.f0.value()) thrust::copy(rf0_gpu.begin(), rf0_gpu.end(), rf0_host.begin()); + if(opts.rician.value()) thrust::copy(rtau_gpu.begin(), rtau_gpu.end(), rtau_host.begin()); + thrust::copy(rs0_gpu.begin(), rs0_gpu.end(), rs0_host.begin()); + thrust::copy(rd_gpu.begin(), rd_gpu.end(), rd_host.begin()); + if(opts.modelnum.value()==2) thrust::copy(rdstd_gpu.begin(), rdstd_gpu.end(), rdstd_host.begin()); + thrust::copy(rth_gpu.begin(), rth_gpu.end(), rth_host.begin()); + thrust::copy(rph_gpu.begin(), rph_gpu.end(), rph_host.begin()); + thrust::copy(rf_gpu.begin(), rf_gpu.end(), rf_host.begin()); + thrust::copy(rlikelihood_energy_gpu.begin(), rlikelihood_energy_gpu.end(), rlikelihood_energy_host.begin()); + + Samples samples(vol2matrixkey,matrix2volkey,nvox,NDIRECTIONS); + + float ard,arf0,artau,ardstd,ars0,arlikelihood_energy; + float *arth = new float[nfib]; + float *arph = new float[nfib]; + float *arf = new float[nfib]; + int samp; + + for(int vox=0;vox<nvox;vox++){ + for(int rec=0;rec<nrecords;rec++){ + ard=rd_host[(vox*nrecords)+rec]; + if(opts.f0.value()){ + arf0=rf0_host[(vox*nrecords)+rec]; + } + + if(opts.rician.value()){ + artau=rtau_host[(vox*nrecords)+rec]; + } + + if(opts.modelnum.value()==2){ + ardstd=rdstd_host[(vox*nrecords)+rec]; + } + + ars0=rs0_host[(vox*nrecords)+rec]; + + arlikelihood_energy=rlikelihood_energy_host[(vox*nrecords)+rec]; + + for(int j=0;j<nfib;j++){ + arth[j]=rth_host[(vox*nfib*nrecords)+(j*nrecords)+rec]; + arph[j]=rph_host[(vox*nfib*nrecords)+(j*nrecords)+rec]; + arf[j]=rf_host[(vox*nfib*nrecords)+(j*nrecords)+rec]; + + } + + samp=multirecords_host[(vox*nrecords)+rec]; + samples.record(ard,arf0,artau,ardstd,ars0,arlikelihood_energy,arth,arph,arf,vox+1,samp); + } + samples.finish_voxel(vox+1); + } + + samples.save(mask); +} +