Skip to content
Snippets Groups Projects
Commit 87efb1bf authored by Moises Fernandez's avatar Moises Fernandez
Browse files

Avoid extra copies of bvals-bvecs if consider gradient nonlinearities is not activated

parent 87b058d4
No related branches found
No related tags found
No related merge requests found
......@@ -35,6 +35,7 @@ void fit_PVM_single( //INPUT
int ndirections,
int nfib,
bool m_include_f0,
bool gradnonlin,
string output_file,
//OUTPUT
thrust::device_vector<double>& params_gpu)
......@@ -54,7 +55,10 @@ void fit_PVM_single( //INPUT
for(int vox=0;vox<nvox;vox++){
// initialise with a tensor
DTI dti(datam_vec[vox],bvecs_vec[vox],bvals_vec[vox]);
int pos_bv;
if (gradnonlin) pos_bv=vox;
else pos_bv=0;
DTI dti(datam_vec[vox],bvecs_vec[pos_bv],bvals_vec[pos_bv]);
dti.linfit();
// set starting parameters for nonlinear fitting
......@@ -95,7 +99,7 @@ void fit_PVM_single( //INPUT
myfile << "Shared Memory Used in fit_PVM_single: " << amount_shared << "\n";
fit_PVM_single_kernel<<<Dim_Grid, Dim_Block, amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()) ,nvox, ndirections, nfib, nparams, m_include_f0, thrust::raw_pointer_cast(params_gpu.data()));
fit_PVM_single_kernel<<<Dim_Grid, Dim_Block, amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()) ,nvox, ndirections, nfib, nparams, m_include_f0, gradnonlin,thrust::raw_pointer_cast(params_gpu.data()));
sync_check("fit_PVM_single_kernel");
myfile.close();
......@@ -110,7 +114,8 @@ void fit_PVM_single_c( //INPUT
thrust::device_vector<double> bvals_gpu,
int ndirections,
int nfib,
bool m_include_f0,
bool m_include_f0,
bool gradnonlin,
string output_file,
//OUTPUT
thrust::device_vector<double>& params_gpu)
......@@ -130,7 +135,10 @@ void fit_PVM_single_c( //INPUT
for(int vox=0;vox<nvox;vox++){
// initialise with a tensor
DTI dti(datam_vec[vox],bvecs_vec[vox],bvals_vec[vox]);
int pos_bv;
if (gradnonlin) pos_bv=vox;
else pos_bv=0;
DTI dti(datam_vec[vox],bvecs_vec[pos_bv],bvals_vec[pos_bv]);
dti.linfit();
// set starting parameters for nonlinear fitting
......@@ -151,7 +159,7 @@ void fit_PVM_single_c( //INPUT
}
// do a better job for initializing the volume fractions
PVM_single_c pvm(datam_vec[vox],bvecs_vec[vox],bvals_vec[vox],nfib,false,m_include_f0,false);
PVM_single_c pvm(datam_vec[vox],bvecs_vec[pos_bv],bvals_vec[pos_bv],nfib,false,m_include_f0,false);
pvm.fit_pvf(start);
for(int i=0;i<nparams;i++){
......@@ -169,7 +177,7 @@ void fit_PVM_single_c( //INPUT
myfile << "Shared Memory Used in fit_PVM_single_c: " << amount_shared << "\n";
fit_PVM_single_c_kernel<<<Dim_Grid, Dim_Block, amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()) ,nvox, ndirections, nfib, nparams, false, m_include_f0, false, thrust::raw_pointer_cast(params_gpu.data()));
fit_PVM_single_c_kernel<<<Dim_Grid, Dim_Block, amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()) ,nvox, ndirections, nfib, nparams, false, m_include_f0, false, gradnonlin,thrust::raw_pointer_cast(params_gpu.data()));
sync_check("fit_PVM_single_c_kernel");
myfile.close();
......@@ -183,6 +191,7 @@ void fit_PVM_multi( //INPUT
int ndirections,
int nfib,
bool m_include_f0,
bool gradnonlin,
string output_file,
//OUTPUT
thrust::device_vector<double>& params_gpu)
......@@ -209,7 +218,7 @@ void fit_PVM_multi( //INPUT
myfile << "Shared Memory Used in fit_PVM_multi: " << amount_shared << "\n";
fit_PVM_multi_kernel<<<Dim_Grid, Dim_Block, amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_PVM_single_c_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()) ,nvox, ndirections, nfib, nparams, m_include_f0, thrust::raw_pointer_cast(params_gpu.data()));
fit_PVM_multi_kernel<<<Dim_Grid, Dim_Block, amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_PVM_single_c_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()) ,nvox, ndirections, nfib, nparams, m_include_f0, gradnonlin,thrust::raw_pointer_cast(params_gpu.data()));
sync_check("fit_PVM_multi_kernel");
myfile.close();
......@@ -227,6 +236,7 @@ void calculate_tau( //INPUT
int model,
bool m_include_f0,
bool nonlin,
bool gradnonlin,
string output_file,
//OUTPUT
thrust::host_vector<float>& tau)
......@@ -269,16 +279,16 @@ void calculate_tau( //INPUT
if(model==1){
if(nonlin){
get_residuals_PVM_single_kernel<<<Dim_Grid, Dim_Block,amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()), nvox, ndirections, nfib, nparams, m_include_f0, thrust::raw_pointer_cast(includes_f0_gpu.data()), thrust::raw_pointer_cast(residuals_gpu.data()));
get_residuals_PVM_single_kernel<<<Dim_Grid, Dim_Block,amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()), nvox, ndirections, nfib, nparams, m_include_f0,gradnonlin, thrust::raw_pointer_cast(includes_f0_gpu.data()), thrust::raw_pointer_cast(residuals_gpu.data()));
sync_check("get_residuals_PVM_single_kernel");
}else{
get_residuals_PVM_single_c_kernel<<<Dim_Grid, Dim_Block,amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()), nvox, ndirections, nfib, nparams, m_include_f0, thrust::raw_pointer_cast(includes_f0_gpu.data()), thrust::raw_pointer_cast(residuals_gpu.data()));
get_residuals_PVM_single_c_kernel<<<Dim_Grid, Dim_Block,amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()), nvox, ndirections, nfib, nparams, m_include_f0, gradnonlin,thrust::raw_pointer_cast(includes_f0_gpu.data()), thrust::raw_pointer_cast(residuals_gpu.data()));
sync_check("get_residuals_PVM_single_c_kernel");
}
}else{
//model 2 : non-mono-exponential
get_residuals_PVM_multi_kernel<<<Dim_Grid, Dim_Block,amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()), nvox, ndirections, nfib, nparams, m_include_f0, thrust::raw_pointer_cast(includes_f0_gpu.data()), thrust::raw_pointer_cast(residuals_gpu.data()));
get_residuals_PVM_multi_kernel<<<Dim_Grid, Dim_Block,amount_shared>>>(thrust::raw_pointer_cast(datam_gpu.data()), thrust::raw_pointer_cast(params_gpu.data()), thrust::raw_pointer_cast(bvecs_gpu.data()), thrust::raw_pointer_cast(bvals_gpu.data()), nvox, ndirections, nfib, nparams, m_include_f0, gradnonlin,thrust::raw_pointer_cast(includes_f0_gpu.data()), thrust::raw_pointer_cast(residuals_gpu.data()));
sync_check("get_residuals_PVM_multi_kernel");
}
......
......@@ -68,6 +68,7 @@ void xfibres_gpu( //INPUT
int nvox = datam.Ncols();
int ndirections = datam.Nrows();
int nfib= opts.nfibres.value();
bool gradnonlin=opts.grad_file.set();
if(nvox>0){
thrust::host_vector<double> datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host;
......@@ -89,7 +90,7 @@ void xfibres_gpu( //INPUT
fit(datam_vec,bvecs_vec,bvals_vec,datam_host,bvecs_host,bvals_host,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,gpu_log,params_gpu,vox_repeat,nrepeat);
if(opts.rician.value()){
calculate_tau(datam_gpu,params_gpu,bvecs_gpu,bvals_gpu,vox_repeat,nrepeat,ndirections,nfib,opts.modelnum.value(),opts.f0.value(),opts.nonlin.value(),gpu_log,tau_host);
calculate_tau(datam_gpu,params_gpu,bvecs_gpu,bvals_gpu,vox_repeat,nrepeat,ndirections,nfib,opts.modelnum.value(),opts.f0.value(),opts.nonlin.value(),gradnonlin,gpu_log,tau_host);
}
bvecs_gpu.clear(); //free bvecs_gpu
......@@ -187,10 +188,11 @@ void fit( //INPUT
int nparams_fit = 2+3*opts.nfibres.value();
if(opts.modelnum.value()==2) nparams_fit++;
if(opts.f0.value()) nparams_fit++;
bool gradnonlin=opts.grad_file.set();
if(opts.modelnum.value()==1){
if(opts.nonlin.value()){
fit_PVM_single(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),output_file,params_gpu);
fit_PVM_single(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu);
if (opts.f0.value()){
float md,mf,f0;
......@@ -223,14 +225,14 @@ void fit( //INPUT
thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
fit_PVM_single(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,output_file,params_repeat_gpu);
fit_PVM_single(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin());
//mix all the parameteres: repeated and not repeated
mix_params(params_repeat_host,vox_repeat, nrepeat, nvox, params_gpu);
}
}
}else{
fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),output_file,params_gpu);
fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu);
if (opts.f0.value()){
float md,mf,f0;
......@@ -263,7 +265,7 @@ void fit( //INPUT
thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,output_file,params_repeat_gpu);
fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin());
//mix all the parameteres: repeated and not repeated
......@@ -273,9 +275,9 @@ void fit( //INPUT
}
}else{
//model 2 : non-mono-exponential
fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),output_file,params_gpu);
fit_PVM_single_c(datam_vec,bvecs_vec,bvals_vec,datam_gpu,bvecs_gpu,bvals_gpu,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu);
fit_PVM_multi(datam_gpu,bvecs_gpu,bvals_gpu,nvox,ndirections,nfib,opts.f0.value(),output_file,params_gpu);
fit_PVM_multi(datam_gpu,bvecs_gpu,bvals_gpu,nvox,ndirections,nfib,opts.f0.value(),gradnonlin,output_file,params_gpu);
if (opts.f0.value()){
float md,mf,f0;
......@@ -308,9 +310,9 @@ void fit( //INPUT
thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,output_file,params_repeat_gpu);
fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
fit_PVM_multi(datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,nrepeat,ndirections,nfib,false,output_file,params_repeat_gpu);
fit_PVM_multi(datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,nrepeat,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin());
//mix all the parameteres: repeated and not repeated
......@@ -357,14 +359,22 @@ void prepare_data_gpu_FIT( //INPUT
}
}
bvecs_vec.resize(nvox);
bvals_vec.resize(nvox);
bvecs_host.resize(nvox*bvecs.Nrows()*bvecs.Ncols());
bvals_host.resize(nvox*bvals.Ncols());
if (opts.grad_file.set()){
bvecs_vec.resize(nvox);
bvals_vec.resize(nvox);
bvecs_host.resize(nvox*bvecs.Nrows()*bvecs.Ncols());
bvals_host.resize(nvox*bvals.Ncols());
alpha_host.resize(nvox*bvecs.Ncols());
beta_host.resize(nvox*bvecs.Ncols());
}else{
bvecs_vec.resize(1);
bvals_vec.resize(1);
bvecs_host.resize(1*bvecs.Nrows()*bvecs.Ncols());
bvals_host.resize(1*bvals.Ncols());
alpha_host.resize(1*bvecs.Ncols());
beta_host.resize(1*bvecs.Ncols());
}
alpha_host.resize(nvox*bvecs.Ncols());
beta_host.resize(nvox*bvecs.Ncols());
ColumnVector alpha,beta;
if (opts.grad_file.set()){
......@@ -386,18 +396,16 @@ void prepare_data_gpu_FIT( //INPUT
}else{
MISCMATHS::cart2sph(bvecs,alpha,beta);
for(int vox=0;vox<nvox;vox++){
bvecs_vec[vox]=bvecs;
bvals_vec[vox]=bvals;
for(int dir=0;dir<ndirections;dir++){
bvecs_host[vox*ndirections*3+dir] = bvecs(1,dir+1);
bvecs_host[vox*ndirections*3+ndirections+dir] = bvecs(2,dir+1);
bvecs_host[vox*ndirections*3+ndirections*2+dir] = bvecs(3,dir+1);
bvals_host[vox*ndirections+dir] = bvals(1,dir+1);
bvecs_vec[0]=bvecs;
bvals_vec[0]=bvals;
for(int dir=0;dir<ndirections;dir++){
bvecs_host[ndirections*3+dir] = bvecs(1,dir+1);
bvecs_host[ndirections*3+ndirections+dir] = bvecs(2,dir+1);
bvecs_host[ndirections*3+ndirections*2+dir] = bvecs(3,dir+1);
bvals_host[ndirections+dir] = bvals(1,dir+1);
alpha_host[vox*ndirections+dir] = alpha(dir+1);
beta_host[vox*ndirections+dir] = beta(dir+1);
}
alpha_host[ndirections+dir] = alpha(dir+1);
beta_host[ndirections+dir] = beta(dir+1);
}
}
......@@ -437,29 +445,58 @@ void prepare_data_gpu_FIT_repeat( //INPUT
datam_repeat_vec.resize(nrepeat);
datam_repeat_host.resize(nrepeat*ndirections);
bvecs_repeat_vec.resize(nrepeat);
bvals_repeat_vec.resize(nrepeat);
bvecs_repeat_host.resize(nrepeat*3*ndirections);
bvals_repeat_host.resize(nrepeat*ndirections);
if (opts.grad_file.set()){
bvecs_repeat_vec.resize(nrepeat);
bvals_repeat_vec.resize(nrepeat);
bvecs_repeat_host.resize(nrepeat*3*ndirections);
bvals_repeat_host.resize(nrepeat*ndirections);
}else{
bvecs_repeat_vec.resize(1);
bvals_repeat_vec.resize(1);
bvecs_repeat_host.resize(1*3*ndirections);
bvals_repeat_host.resize(1*ndirections);
}
for(int vox=0;vox<nrepeat;vox++){
for(int dir=0;dir<ndirections;dir++){
datam(dir+1)= datam_host[vox_repeat[vox]*ndirections+dir];
datam_repeat_host[vox*ndirections+dir]=datam_host[vox_repeat[vox]*ndirections+dir];
}
datam_repeat_vec[vox]=datam;
}
bvecs_repeat_host[vox*ndirections*3+dir] = bvecs_host[vox_repeat[vox]*ndirections*3+dir];
bvecs_repeat_host[vox*ndirections*3+ndirections+dir] = bvecs_host[vox_repeat[vox]*ndirections*3+ndirections+dir];
bvecs_repeat_host[vox*ndirections*3+ndirections*2+dir] = bvecs_host[vox_repeat[vox]*ndirections*3+ndirections*2+dir];
bvals_repeat_host[vox*ndirections+dir] = bvals_host[vox_repeat[vox]*ndirections+dir];
if (opts.grad_file.set()){
for(int vox=0;vox<nrepeat;vox++){
for(int dir=0;dir<ndirections;dir++){
bvecs_repeat_host[vox*ndirections*3+dir] = bvecs_host[vox_repeat[vox]*ndirections*3+dir];
bvecs_repeat_host[vox*ndirections*3+ndirections+dir] = bvecs_host[vox_repeat[vox]*ndirections*3+ndirections+dir];
bvecs_repeat_host[vox*ndirections*3+ndirections*2+dir] = bvecs_host[vox_repeat[vox]*ndirections*3+ndirections*2+dir];
bvals_repeat_host[vox*ndirections+dir] = bvals_host[vox_repeat[vox]*ndirections+dir];
bvecs(1,dir+1)= bvecs_host[vox_repeat[vox]*ndirections*3+dir];
bvecs(2,dir+1)= bvecs_host[vox_repeat[vox]*ndirections*3+ndirections+dir];
bvecs(3,dir+1)= bvecs_host[vox_repeat[vox]*ndirections*3+ndirections*2+dir];
bvals(1,dir+1)= bvals_host[vox_repeat[vox]*ndirections+dir];
}
bvecs_repeat_vec[vox]=bvecs;
bvals_repeat_vec[vox]=bvals;
}
}else{
for(int dir=0;dir<ndirections;dir++){
bvecs_repeat_host[ndirections*3+dir] = bvecs_host[ndirections*3+dir];
bvecs_repeat_host[ndirections*3+ndirections+dir] = bvecs_host[ndirections*3+ndirections+dir];
bvecs_repeat_host[ndirections*3+ndirections*2+dir] = bvecs_host[ndirections*3+ndirections*2+dir];
bvals_repeat_host[ndirections+dir] = bvals_host[ndirections+dir];
bvecs(1,dir+1)= bvecs_host[vox_repeat[vox]*ndirections*3+dir];
bvecs(2,dir+1)= bvecs_host[vox_repeat[vox]*ndirections*3+ndirections+dir];
bvecs(3,dir+1)= bvecs_host[vox_repeat[vox]*ndirections*3+ndirections*2+dir];
bvals(1,dir+1)= bvals_host[vox_repeat[vox]*ndirections+dir];
bvecs(1,dir+1)= bvecs_host[ndirections*3+dir];
bvecs(2,dir+1)= bvecs_host[ndirections*3+ndirections+dir];
bvecs(3,dir+1)= bvecs_host[ndirections*3+ndirections*2+dir];
bvals(1,dir+1)= bvals_host[ndirections+dir];
}
datam_repeat_vec[vox]=datam;
bvecs_repeat_vec[vox]=bvecs;
bvals_repeat_vec[vox]=bvals;
bvecs_repeat_vec[0]=bvecs;
bvals_repeat_vec[0]=bvals;
}
int nfib= opts.nfibres.value();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment