Skip to content
Snippets Groups Projects
Commit bd259c2b authored by Moises Fernandez's avatar Moises Fernandez
Browse files

Reduce precision (double to float)

parent c06bbe5d
No related branches found
No related tags found
No related merge requests found
......@@ -12,15 +12,15 @@
//MATRIX INVERSE AS NEWMAT LU SOLVER
//implemented in NEWMAT:newmat7.cpp GeneralSolvI.
__device__ void solver( //INPUT
double *A,
double *P,
float *A,
float *P,
int length,
//TO USE
double *C,
double *el,
float *C,
float *el,
int *indx,
//OUTPUT
double *B)
float *B)
{
//double C[NPARAMS*NPARAMS];
......@@ -33,15 +33,15 @@ __device__ void solver( //INPUT
bool d=true;
//int indx[NPARAMS];
double* akk = C;
double big = fabs(*akk);
float* akk = C;
float big = fabs(*akk);
int mu = 0;
double* ai = akk;
float* ai = akk;
int k;
for (k = 1; k<length; k++){
ai += length;
const double trybig = fabs(*ai);
const float trybig = fabs(*ai);
if (big < trybig){
big = trybig;
mu = k;
......@@ -52,18 +52,18 @@ __device__ void solver( //INPUT
indx[k] = mu;
if (mu != k){
double* a1 = C + length*k;
double* a2 = C + length*mu;
float* a1 = C + length*k;
float* a2 = C + length*mu;
d = !d;
int j = length;
while (j--){
const double temp = *a1;
const float temp = *a1;
*a1++ = *a2;
*a2++ = temp;
}
}
double diag = *akk;
float diag = *akk;
big = 0;
mu = k + 1;
if (diag != 0){
......@@ -71,24 +71,24 @@ __device__ void solver( //INPUT
int i = length - k - 1;
while (i--){
ai += length;
double* al = ai;
double mult = *al / diag;
float* al = ai;
float mult = *al / diag;
*al = mult;
int l = length - k - 1;
double* aj = akk;
float* aj = akk;
if (l-- != 0){
double aux=al[1]-(mult* *(++aj));
float aux=al[1]-(mult* *(++aj));
*(++al) = aux;
//*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler
const double trybig = fabs(*al);
const float trybig = fabs(*al);
if (big < trybig){
big = trybig;
mu = length - i - 1;
}
while (l--){
double aux= al[1]-(mult* *(++aj));
float aux= al[1]-(mult* *(++aj));
*(++al) = aux;
//*(++al) = __dadd_rn (*al,-mult* *(++aj)); //FAIL in cuda 4.2 compiler
}
......@@ -111,7 +111,7 @@ __device__ void solver( //INPUT
int j;
int ii = length;
int ip;
double temp;
float temp;
int i;
for (i=0; i<length; i++){
......@@ -122,8 +122,8 @@ __device__ void solver( //INPUT
if (temp != 0.0) { ii = i; break; }
}
double* bi;
double* ai2;
float* bi;
float* ai2;
i = ii + 1;
if (i < length){
......@@ -131,10 +131,10 @@ __device__ void solver( //INPUT
ai2 = C + ii + i * length;
for (;;){
int ip = indx[i];
double sum = el[ip];
float sum = el[ip];
el[ip] = el[i];
double* aij = ai2;
double* bj = bi;
float* aij = ai2;
float* bj = bi;
j = i - ii;
while (j--){
sum -= *aij++* *bj++;
......@@ -148,11 +148,11 @@ __device__ void solver( //INPUT
ai2 = C + length*length;
for (i = length - 1; i >= 0; i--){
double* bj = el+i;
float* bj = el+i;
ai2 -= length;
double* ajx = ai2+i;
double sum = *bj;
double diag = *ajx;
float* ajx = ai2+i;
float sum = *bj;
float diag = *ajx;
j = length - i;
while(--j){
sum -= *(++ajx)* *(++bj);
......
......@@ -71,7 +71,8 @@ void xfibres_gpu( //INPUT
bool gradnonlin=opts.grad_file.set();
if(nvox>0){
thrust::host_vector<double> datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host;
thrust::host_vector<float> datam_host, bvecs_host, bvals_host, params_host;
thrust::host_vector<double> alpha_host, beta_host;
thrust::host_vector<float> tau_host;
vector<ColumnVector> datam_vec;
vector<Matrix> bvecs_vec, bvals_vec;
......@@ -79,10 +80,10 @@ void xfibres_gpu( //INPUT
///// FIT /////
prepare_data_gpu_FIT(datam,bvecs,bvals,gradm,datam_vec, bvecs_vec, bvals_vec, datam_host, bvecs_host, bvals_host, alpha_host, beta_host, params_host, tau_host);
thrust::device_vector<double> datam_gpu=datam_host;
thrust::device_vector<double> bvecs_gpu=bvecs_host;
thrust::device_vector<double> bvals_gpu=bvals_host;
thrust::device_vector<double> params_gpu=params_host;
thrust::device_vector<float> datam_gpu=datam_host;
thrust::device_vector<float> bvecs_gpu=bvecs_host;
thrust::device_vector<float> bvals_gpu=bvals_host;
thrust::device_vector<float> params_gpu=params_host;
thrust::host_vector<int> vox_repeat; //contains the id's of voxels repeated
vox_repeat.resize(nvox);
int nrepeat=0;
......@@ -161,16 +162,16 @@ void fit( //INPUT
const vector<ColumnVector> datam_vec,
const vector<Matrix> bvecs_vec,
const vector<Matrix> bvals_vec,
thrust::host_vector<double> datam_host,
thrust::host_vector<double> bvecs_host,
thrust::host_vector<double> bvals_host,
thrust::device_vector<double> datam_gpu,
thrust::device_vector<double> bvecs_gpu,
thrust::device_vector<double> bvals_gpu,
thrust::host_vector<float> datam_host,
thrust::host_vector<float> bvecs_host,
thrust::host_vector<float> bvals_host,
thrust::device_vector<float> datam_gpu,
thrust::device_vector<float> bvecs_gpu,
thrust::device_vector<float> bvals_gpu,
int ndirections,
string output_file,
//OUTPUT
thrust::device_vector<double>& params_gpu,
thrust::device_vector<float>& params_gpu,
thrust::host_vector<int>& vox_repeat, //for get residuals with or withot f0
int& nrepeat)
{
......@@ -196,7 +197,7 @@ void fit( //INPUT
if (opts.f0.value()){
float md,mf,f0;
thrust::host_vector<double> params_host;
thrust::host_vector<float> params_host;
params_host.resize(nvox*nparams_fit);
thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());
for(int vox=0;vox<nvox;vox++){
......@@ -213,17 +214,17 @@ void fit( //INPUT
vector<ColumnVector> datam_repeat_vec;
vector<Matrix> bvecs_repeat_vec;
vector<Matrix> bvals_repeat_vec;
thrust::host_vector<double> datam_repeat_host;
thrust::host_vector<double> bvecs_repeat_host;
thrust::host_vector<double> bvals_repeat_host;
thrust::host_vector<double> params_repeat_host;
thrust::host_vector<float> datam_repeat_host;
thrust::host_vector<float> bvecs_repeat_host;
thrust::host_vector<float> bvals_repeat_host;
thrust::host_vector<float> params_repeat_host;
prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host);
thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host;
thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host;
thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host;
thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host;
thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<float> params_repeat_gpu=params_repeat_host;
fit_PVM_single(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin());
......@@ -236,7 +237,7 @@ void fit( //INPUT
if (opts.f0.value()){
float md,mf,f0;
thrust::host_vector<double> params_host;
thrust::host_vector<float> params_host;
params_host.resize(nvox*nparams_fit);
thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());
for(int vox=0;vox<nvox;vox++){
......@@ -253,17 +254,17 @@ void fit( //INPUT
vector<ColumnVector> datam_repeat_vec;
vector<Matrix> bvecs_repeat_vec;
vector<Matrix> bvals_repeat_vec;
thrust::host_vector<double> datam_repeat_host;
thrust::host_vector<double> bvecs_repeat_host;
thrust::host_vector<double> bvals_repeat_host;
thrust::host_vector<double> params_repeat_host;
thrust::host_vector<float> datam_repeat_host;
thrust::host_vector<float> bvecs_repeat_host;
thrust::host_vector<float> bvals_repeat_host;
thrust::host_vector<float> params_repeat_host;
prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host);
thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host;
thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host;
thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host;
thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host;
thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<float> params_repeat_gpu=params_repeat_host;
fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
thrust::copy(params_repeat_gpu.begin(), params_repeat_gpu.end(), params_repeat_host.begin());
......@@ -281,7 +282,7 @@ void fit( //INPUT
if (opts.f0.value()){
float md,mf,f0;
thrust::host_vector<double> params_host;
thrust::host_vector<float> params_host;
params_host.resize(nvox*nparams_fit);
thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());
for(int vox=0;vox<nvox;vox++){
......@@ -298,17 +299,17 @@ void fit( //INPUT
vector<ColumnVector> datam_repeat_vec;
vector<Matrix> bvecs_repeat_vec;
vector<Matrix> bvals_repeat_vec;
thrust::host_vector<double> datam_repeat_host;
thrust::host_vector<double> bvecs_repeat_host;
thrust::host_vector<double> bvals_repeat_host;
thrust::host_vector<double> params_repeat_host;
thrust::host_vector<float> datam_repeat_host;
thrust::host_vector<float> bvecs_repeat_host;
thrust::host_vector<float> bvals_repeat_host;
thrust::host_vector<float> params_repeat_host;
prepare_data_gpu_FIT_repeat(datam_host, bvecs_host, bvals_host, vox_repeat, nrepeat, ndirections, datam_repeat_vec, bvecs_repeat_vec, bvals_repeat_vec, datam_repeat_host, bvecs_repeat_host, bvals_repeat_host, params_repeat_host);
thrust::device_vector<double> datam_repeat_gpu=datam_repeat_host;
thrust::device_vector<double> bvecs_repeat_gpu=bvecs_repeat_host;
thrust::device_vector<double> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<double> params_repeat_gpu=params_repeat_host;
thrust::device_vector<float> datam_repeat_gpu=datam_repeat_host;
thrust::device_vector<float> bvecs_repeat_gpu=bvecs_repeat_host;
thrust::device_vector<float> bvals_repeat_gpu=bvals_repeat_host;
thrust::device_vector<float> params_repeat_gpu=params_repeat_host;
fit_PVM_single_c(datam_repeat_vec,bvecs_repeat_vec,bvals_repeat_vec,datam_repeat_gpu,bvecs_repeat_gpu,bvals_repeat_gpu,ndirections,nfib,false,gradnonlin,output_file,params_repeat_gpu);
......@@ -338,12 +339,12 @@ void prepare_data_gpu_FIT( //INPUT
vector<ColumnVector>& datam_vec,
vector<Matrix>& bvecs_vec,
vector<Matrix>& bvals_vec,
thrust::host_vector<double>& datam_host, //data prepared for copy to GPU
thrust::host_vector<double>& bvecs_host,
thrust::host_vector<double>& bvals_host,
thrust::host_vector<float>& datam_host, //data prepared for copy to GPU
thrust::host_vector<float>& bvecs_host,
thrust::host_vector<float>& bvals_host,
thrust::host_vector<double>& alpha_host,
thrust::host_vector<double>& beta_host,
thrust::host_vector<double>& params_host,
thrust::host_vector<float>& params_host,
thrust::host_vector<float>& tau_host)
{
xfibresOptions& opts = xfibresOptions::getInstance();
......@@ -421,9 +422,9 @@ void prepare_data_gpu_FIT( //INPUT
//prepare the structures for copy all neccesary data to FIT in GPU when is repeated because f0. Only some voxels
void prepare_data_gpu_FIT_repeat( //INPUT
thrust::host_vector<double> datam_host,
thrust::host_vector<double> bvecs_host,
thrust::host_vector<double> bvals_host,
thrust::host_vector<float> datam_host,
thrust::host_vector<float> bvecs_host,
thrust::host_vector<float> bvals_host,
thrust::host_vector<int> vox_repeat,
int nrepeat,
int ndirections,
......@@ -431,10 +432,10 @@ void prepare_data_gpu_FIT_repeat( //INPUT
vector<ColumnVector>& datam_repeat_vec,
vector<Matrix>& bvecs_repeat_vec,
vector<Matrix>& bvals_repeat_vec,
thrust::host_vector<double>& datam_repeat_host, //data prepared for copy to GPU
thrust::host_vector<double>& bvecs_repeat_host,
thrust::host_vector<double>& bvals_repeat_host,
thrust::host_vector<double>& params_repeat_host)
thrust::host_vector<float>& datam_repeat_host, //data prepared for copy to GPU
thrust::host_vector<float>& bvecs_repeat_host,
thrust::host_vector<float>& bvals_repeat_host,
thrust::host_vector<float>& params_repeat_host)
{
xfibresOptions& opts = xfibresOptions::getInstance();
......@@ -509,19 +510,19 @@ void prepare_data_gpu_FIT_repeat( //INPUT
void mix_params( //INPUT
thrust::host_vector<double> params_repeat_host,
thrust::host_vector<float> params_repeat_host,
thrust::host_vector<int> vox_repeat,
int nrepeat,
int nvox,
//INPUT-OUTPUT
thrust::device_vector<double>& params_gpu)
thrust::device_vector<float>& params_gpu)
{
xfibresOptions& opts = xfibresOptions::getInstance();
int nfib= opts.nfibres.value();
int nparams = 2+3*opts.nfibres.value();
if(opts.modelnum.value()==2) nparams++;
thrust::host_vector<double> params_host;
thrust::host_vector<float> params_host;
params_host.resize(nvox*(nparams+1));
thrust::copy(params_gpu.begin(), params_gpu.end(), params_host.begin());
......
......@@ -17,16 +17,16 @@ void fit( //INPUT
const vector<ColumnVector> datam_vec,
const vector<Matrix> bvecs_vec,
const vector<Matrix> bvals_vec,
thrust::host_vector<double> datam_host,
thrust::host_vector<double> bvecs_host,
thrust::host_vector<double> bvals_host,
thrust::device_vector<double> datam_gpu,
thrust::device_vector<double> bvecs_gpu,
thrust::device_vector<double> bvals_gpu,
thrust::host_vector<float> datam_host,
thrust::host_vector<float> bvecs_host,
thrust::host_vector<float> bvals_host,
thrust::device_vector<float> datam_gpu,
thrust::device_vector<float> bvecs_gpu,
thrust::device_vector<float> bvals_gpu,
int ndirections,
string output_file,
//OUTPUT
thrust::device_vector<double>& params_gpu,
thrust::device_vector<float>& params_gpu,
thrust::host_vector<int>& vox_repeat,
int& nrepeat);
......@@ -40,20 +40,20 @@ void prepare_data_gpu_FIT( //INPUT
vector<ColumnVector>& datam_vec,
vector<Matrix>& bvecs_vec,
vector<Matrix>& bvals_vec,
thrust::host_vector<double>& datam_host,
thrust::host_vector<double>& bvecs_host,
thrust::host_vector<double>& bvals_host,
thrust::host_vector<float>& datam_host,
thrust::host_vector<float>& bvecs_host,
thrust::host_vector<float>& bvals_host,
thrust::host_vector<double>& alpha_host,
thrust::host_vector<double>& beta_host,
thrust::host_vector<double>& params_host,
thrust::host_vector<float>& params_host,
thrust::host_vector<float>& tau_host);
//implemented and used in xfibres_gpu.cu
void prepare_data_gpu_FIT_repeat( //INPUT
thrust::host_vector<double> datam_host,
thrust::host_vector<double> bvecs_host,
thrust::host_vector<double> bvals_host,
thrust::host_vector<float> datam_host,
thrust::host_vector<float> bvecs_host,
thrust::host_vector<float> bvals_host,
thrust::host_vector<int> vox_repeat,
int nrepeat,
int ndirections,
......@@ -61,19 +61,19 @@ void prepare_data_gpu_FIT_repeat( //INPUT
vector<ColumnVector>& datam_repeat_vec,
vector<Matrix>& bvecs_repeat_vec,
vector<Matrix>& bvals_repeat_vec,
thrust::host_vector<double>& datam_repeat_host,
thrust::host_vector<double>& bvecs_repeat_host,
thrust::host_vector<double>& bvals_repeat_host,
thrust::host_vector<double>& params_repeat_host);
thrust::host_vector<float>& datam_repeat_host,
thrust::host_vector<float>& bvecs_repeat_host,
thrust::host_vector<float>& bvals_repeat_host,
thrust::host_vector<float>& params_repeat_host);
//implemented and used in xfibres_gpu.cu
void mix_params( //INPUT
thrust::host_vector<double> params_repeat_gpu,
thrust::host_vector<float> params_repeat_gpu,
thrust::host_vector<int> vox_repeat,
int nrepeat,
int nvox,
//INPUT-OUTPUT
thrust::device_vector<double>& params_gpu);
thrust::device_vector<float>& params_gpu);
//implemented and used in xfibres_gpu.cu
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment