Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
fdt
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
FSL
fdt
Commits
d4a9f9f7
Commit
d4a9f9f7
authored
12 years ago
by
Moises Fernandez
Browse files
Options
Downloads
Patches
Plain Diff
new version Levenberg: several threads per voxel
parent
6b4a6a7d
No related branches found
No related tags found
No related merge requests found
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
CUDA/PVM_multi.cu
+388
-290
388 additions, 290 deletions
CUDA/PVM_multi.cu
CUDA/PVM_single.cu
+364
-269
364 additions, 269 deletions
CUDA/PVM_single.cu
CUDA/PVM_single_c.cu
+496
-431
496 additions, 431 deletions
CUDA/PVM_single_c.cu
CUDA/levenberg_marquardt.cu
+226
-160
226 additions, 160 deletions
CUDA/levenberg_marquardt.cu
with
1474 additions
and
1150 deletions
CUDA/PVM_multi.cu
+
388
−
290
View file @
d4a9f9f7
...
@@ -17,42 +17,48 @@
...
@@ -17,42 +17,48 @@
/////////////////////////////////////
/////////////////////////////////////
__device__
inline
double
isoterm_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double
*
bvals
){
__device__
inline
double
isoterm_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double
*
bvals
){
return
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
_b
))
))
;
return
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
_b
));
}
}
__device__
inline
double
isoterm_a_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double
*
bvals
){
__device__
inline
double
isoterm_a_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double
*
bvals
){
return
-
log
(
double
(
1
+
bvals
[
pt
]
*
_b
)
)
*
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
_b
))
))
;
return
-
log
(
1
+
bvals
[
pt
]
*
_b
)
*
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
_b
));
}
}
__device__
inline
double
isoterm_b_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double
*
bvals
){
__device__
inline
double
isoterm_b_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double
*
bvals
){
return
-
_a
*
bvals
[
pt
]
/
(
1
+
bvals
[
pt
]
*
_b
)
*
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
_b
))
))
;
return
-
_a
*
bvals
[
pt
]
/
(
1
+
bvals
[
pt
]
*
_b
)
*
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
_b
));
}
}
__device__
inline
double
anisoterm_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
*
bvecs
,
const
double
*
bvals
){
__device__
inline
double
anisoterm_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
*
bvecs
,
const
double
*
bvals
){
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
return
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
_b
*
(
dp
*
dp
)))
))
;
return
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
_b
*
(
dp
*
dp
)));
}
}
__device__
inline
double
anisoterm_a_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
*
bvecs
,
const
double
*
bvals
){
__device__
inline
double
anisoterm_a_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
*
bvecs
,
const
double
*
bvals
){
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
return
-
log
(
double
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
)
*
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
))
))
;
return
-
log
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
*
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
));
}
}
__device__
inline
double
anisoterm_b_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
*
bvecs
,
const
double
*
bvals
){
__device__
inline
double
anisoterm_b_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
*
bvecs
,
const
double
*
bvals
){
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
return
(
-
_a
*
bvals
[
pt
]
*
(
dp
*
dp
)
/
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
*
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)))
))
;
return
(
-
_a
*
bvals
[
pt
]
*
(
dp
*
dp
)
/
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
*
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)));
}
}
__device__
inline
double
anisoterm_th_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
_th
,
const
double
_ph
,
const
double
*
bvecs
,
const
double
*
bvals
){
__device__
inline
double
anisoterm_th_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
_th
,
const
double
_ph
,
const
double
*
bvecs
,
const
double
*
bvals
){
double
sinth
,
costh
,
sinph
,
cosph
;
sincos
(
_th
,
&
sinth
,
&
costh
);
sincos
(
_ph
,
&
sinph
,
&
cosph
);
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
double
dp1
=
cos
(
double
(
_th
))
*
(
bvecs
[
pt
]
*
cos
(
double
(
_ph
))
+
bvecs
[
NDIRECTIONS
+
pt
]
*
sin
(
double
(
_ph
))
)
-
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
sin
(
double
(
_th
))
;
double
dp1
=
cos
th
*
(
bvecs
[
pt
]
*
cos
ph
+
bvecs
[
NDIRECTIONS
+
pt
]
*
sin
ph
)
-
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
sin
th
;
return
(
-
_a
*
_b
*
bvals
[
pt
]
/
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
*
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
))
))
*
2
*
dp
*
dp1
);
return
(
-
_a
*
_b
*
bvals
[
pt
]
/
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
*
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
))
*
2
*
dp
*
dp1
);
}
}
__device__
inline
double
anisoterm_ph_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
_th
,
const
double
_ph
,
const
double
*
bvecs
,
const
double
*
bvals
){
__device__
inline
double
anisoterm_ph_PVM_multi
(
const
int
pt
,
const
double
_a
,
const
double
_b
,
const
double3
x
,
const
double
_th
,
const
double
_ph
,
const
double
*
bvecs
,
const
double
*
bvals
){
double
sinth
,
sinph
,
cosph
;
sinth
=
sin
(
_th
);
sincos
(
_ph
,
&
sinph
,
&
cosph
);
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
double
dp
=
bvecs
[
pt
]
*
x
.
x
+
bvecs
[
NDIRECTIONS
+
pt
]
*
x
.
y
+
bvecs
[(
2
*
NDIRECTIONS
)
+
pt
]
*
x
.
z
;
double
dp1
=
sin
(
double
(
_th
))
*
(
-
bvecs
[
pt
]
*
sin
(
double
(
_ph
))
+
bvecs
[
NDIRECTIONS
+
pt
]
*
cos
(
double
(
_ph
))
);
double
dp1
=
sin
th
*
(
-
bvecs
[
pt
]
*
sin
ph
+
bvecs
[
NDIRECTIONS
+
pt
]
*
cos
ph
);
return
(
-
_a
*
_b
*
bvals
[
pt
]
/
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
*
exp
(
double
(
-
_a
*
log
(
double
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
))
))
*
2
*
dp
*
dp1
);
return
(
-
_a
*
_b
*
bvals
[
pt
]
/
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
)
*
exp
(
-
_a
*
log
(
1
+
bvals
[
pt
]
*
(
dp
*
dp
)
*
_b
))
*
2
*
dp
*
dp1
);
}
}
//in diffmodel.cc
//in diffmodel.cc
...
@@ -79,147 +85,99 @@ __device__ void fix_fsum_PVM_multi( //INPUT
...
@@ -79,147 +85,99 @@ __device__ void fix_fsum_PVM_multi( //INPUT
}
}
//in diffmodel.cc
//in diffmodel.cc
__device__
void
sort_PVM_multi
(
int
nfib
,
int
nparams
,
double
*
params
)
__device__
void
sort_PVM_multi
(
int
nfib
,
double
*
params
)
{
{
double
temp_f
,
temp_th
,
temp_ph
;
double
temp_f
,
temp_th
,
temp_ph
;
// Order vector descending using f parameters as index
// Order vector descending using f parameters as index
for
(
int
i
=
1
;
i
<
(
nfib
);
i
++
){
for
(
int
i
=
1
;
i
<
(
nfib
);
i
++
){
for
(
int
j
=
0
;
j
<
(
nfib
-
i
);
j
++
){
for
(
int
j
=
0
;
j
<
(
nfib
-
i
);
j
++
){
if
(
params
[
3
+
j
*
3
]
<
params
[
3
+
i
*
3
]){
if
(
params
[
3
+
j
*
3
]
<
params
[
3
+
(
j
+
1
)
*
3
]){
temp_f
=
params
[
3
+
j
*
3
];
temp_f
=
params
[
3
+
j
*
3
];
temp_th
=
params
[
3
+
j
*
3
+
1
];
temp_th
=
params
[
3
+
j
*
3
+
1
];
temp_ph
=
params
[
3
+
j
*
3
+
2
];
temp_ph
=
params
[
3
+
j
*
3
+
2
];
params
[
3
+
j
*
3
]
=
params
[
3
+
i
*
3
];
params
[
3
+
j
*
3
]
=
params
[
3
+
(
j
+
1
)
*
3
];
params
[
3
+
j
*
3
+
1
]
=
params
[
3
+
i
*
3
+
1
];
params
[
3
+
j
*
3
+
1
]
=
params
[
3
+
(
j
+
1
)
*
3
+
1
];
params
[
3
+
j
*
3
+
2
]
=
params
[
3
+
i
*
3
+
2
];
params
[
3
+
j
*
3
+
2
]
=
params
[
3
+
(
j
+
1
)
*
3
+
2
];
params
[
3
+
i
*
3
]
=
temp_f
;
params
[
3
+
(
j
+
1
)
*
3
]
=
temp_f
;
params
[
3
+
i
*
3
+
1
]
=
temp_th
;
params
[
3
+
(
j
+
1
)
*
3
+
1
]
=
temp_th
;
params
[
3
+
i
*
3
+
2
]
=
temp_ph
;
params
[
3
+
(
j
+
1
)
*
3
+
2
]
=
temp_ph
;
}
}
}
}
}
}
}
}
//in diffmodels.cc -- for calculate residuals
__device__
void
forwardModel_PVM_multi
(
//INPUT
const
double
*
p
,
const
double
*
bvecs
,
const
double
*
bvals
,
const
int
nfib
,
const
int
nparams
,
const
bool
m_include_f0
,
//OUTPUT
double
*
predicted_signal
)
{
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
predicted_signal
[
i
]
=
0
;
//pred = 0;
}
double
val
;
double
_a
=
abs
(
p
[
1
]);
double
_b
=
abs
(
p
[
2
]);
////////////////////////////////////
double
fs
[
NFIBRES
];
double
x
[
NFIBRES
*
3
];
double
sumf
=
0
;
double3
x2
;
for
(
int
k
=
0
;
k
<
nfib
;
k
++
){
int
kk
=
3
+
3
*
k
;
fs
[
k
]
=
x2f_gpu
(
p
[
kk
]);
sumf
+=
fs
[
k
];
x
[
k
*
3
]
=
sin
(
p
[
kk
+
1
])
*
cos
(
p
[
kk
+
2
]);
x
[
k
*
3
+
1
]
=
sin
(
p
[
kk
+
1
])
*
sin
(
p
[
kk
+
2
]);
x
[
k
*
3
+
2
]
=
cos
(
p
[
kk
+
1
]);
}
////////////////////////////////////
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
val
=
0.0
;
for
(
int
k
=
0
;
k
<
nfib
;
k
++
){
x2
.
x
=
x
[
k
*
3
];
x2
.
y
=
x
[
k
*
3
+
1
];
x2
.
z
=
x
[
k
*
3
+
2
];
val
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
i
,
_a
,
_b
,
x2
,
bvecs
,
bvals
);
}
if
(
m_include_f0
){
double
temp_f0
=
x2f_gpu
(
p
[
nparams
-
1
]);
predicted_signal
[
i
]
=
abs
(
p
[
0
])
*
(
temp_f0
+
(
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
)
+
val
);
}
else
predicted_signal
[
i
]
=
abs
(
p
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
)
+
val
);
}
}
//in diffmodels.cc -- for calculate residuals
__device__
void
get_prediction_PVM_multi
(
//INPUT
const
double
*
params
,
const
double
*
bvecs
,
const
double
*
bvals
,
const
int
nfib
,
const
int
nparams
,
const
bool
m_include_f0
,
//OUTPUT
double
*
predicted_signal
)
{
//m_s0-myparams[0] m_d-myparams[1] m_d_std-myparams[2] m_f-m_th-m_ph-myparams[3,4,5,6 etc..] m_f0-myparams[nparams-1]
double
p
[
NPARAMS
];
p
[
0
]
=
params
[
0
];
p
[
1
]
=
params
[
1
]
*
params
[
1
]
/
params
[
2
]
/
params
[
2
];
//m_d*m_d/m_d_std/m_d_std;
p
[
2
]
=
params
[
2
]
*
params
[
2
]
/
params
[
1
];
//m_d_std*m_d_std/m_d; // =1/beta
for
(
int
k
=
0
;
k
<
nfib
;
k
++
){
int
kk
=
3
+
3
*
k
;
p
[
kk
]
=
f2x_gpu
(
params
[
kk
]);
p
[
kk
+
1
]
=
params
[
kk
+
1
];
p
[
kk
+
2
]
=
params
[
kk
+
2
];
}
if
(
m_include_f0
)
p
[
nparams
-
1
]
=
f2x_gpu
(
params
[
nparams
-
1
]);
forwardModel_PVM_multi
(
p
,
bvecs
,
bvals
,
nfib
,
nparams
,
m_include_f0
,
predicted_signal
);
}
//cost function PVM_multi
//cost function PVM_multi
__device__
double
cf_PVM_multi
(
//INPUT
__device__
void
cf_PVM_multi
(
//INPUT
const
double
*
params
,
const
double
*
params
,
const
double
*
mdata
,
const
double
*
mdata
,
const
double
*
bvecs
,
const
double
*
bvecs
,
const
double
*
bvals
,
const
double
*
bvals
,
const
int
nparams
,
const
int
nparams
,
const
bool
m_include_f0
)
const
bool
m_include_f0
,
const
int
idB
,
double
*
shared
,
//shared memory
double
*
fs
,
//shared memory
double
*
x
,
//shared memory
double
&
_a
,
//shared memory
double
&
_b
,
//shared memory
double
&
sumf
,
//shared memory
//OUTPUT
double
&
cfv
)
{
{
double
cfv
=
0.0
;
if
(
idB
<
NFIBRES
){
double
err
;
int
kk
=
3
+
3
*
(
idB
);
double
_a
=
abs
(
params
[
1
]);
double
sinth
,
costh
,
sinph
,
cosph
;
double
_b
=
abs
(
params
[
2
]);
sincos
(
params
[
kk
+
1
],
&
sinth
,
&
costh
);
double
fs
[
NFIBRES
];
sincos
(
params
[
kk
+
2
],
&
sinph
,
&
cosph
);
double
x
[
NFIBRES
*
3
];
fs
[
idB
]
=
x2f_gpu
(
params
[
kk
]);
double
sumf
=
0
;
x
[
idB
*
3
]
=
sinth
*
cosph
;
x
[
idB
*
3
+
1
]
=
sinth
*
sinph
;
x
[
idB
*
3
+
2
]
=
costh
;
}
if
(
idB
==
0
){
_a
=
abs
(
params
[
1
]);
_b
=
abs
(
params
[
2
]);
cfv
=
0.0
;
sumf
=
0
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
)
sumf
+=
fs
[
k
];
}
int
ndir
=
NDIRECTIONS
/
THREADS_X_BLOCK_FIT
;
if
(
idB
<
(
NDIRECTIONS
%
THREADS_X_BLOCK_FIT
))
ndir
++
;
double
err
;
double3
x2
;
double3
x2
;
int
dir_iter
=
idB
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
__syncthreads
();
int
kk
=
3
+
3
*
(
k
);
fs
[
k
]
=
x2f_gpu
(
params
[
kk
]);
shared
[
idB
]
=
0
;
sumf
+=
fs
[
k
];
for
(
int
dir
=
0
;
dir
<
ndir
;
dir
++
){
x
[
k
*
3
]
=
sin
(
params
[
kk
+
1
])
*
cos
(
params
[
kk
+
2
]);
x
[
k
*
3
+
1
]
=
sin
(
params
[
kk
+
1
])
*
sin
(
params
[
kk
+
2
]);
x
[
k
*
3
+
2
]
=
cos
(
params
[
kk
+
1
]);
}
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
err
=
0.0
;
err
=
0.0
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
x2
.
x
=
x
[
k
*
3
];
x2
.
x
=
x
[
k
*
3
];
x2
.
y
=
x
[
k
*
3
+
1
];
x2
.
y
=
x
[
k
*
3
+
1
];
x2
.
z
=
x
[
k
*
3
+
2
];
x2
.
z
=
x
[
k
*
3
+
2
];
err
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
i
,
_a
,
_b
,
x2
,
bvecs
,
bvals
);
err
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
x2
,
bvecs
,
bvals
);
}
}
if
(
m_include_f0
){
if
(
m_include_f0
){
double
temp_f0
=
x2f_gpu
(
params
[
nparams
-
1
]);
double
temp_f0
=
x2f_gpu
(
params
[
nparams
-
1
]);
err
=
(
abs
(
params
[
0
])
*
(
temp_f0
+
((
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
)
+
err
)))
-
mdata
[
i
];
err
=
(
abs
(
params
[
0
])
*
(
temp_f0
+
((
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
)
+
err
)))
-
mdata
[
dir_iter
];
}
else
{
}
else
{
err
=
abs
(
params
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
)
+
err
)
-
mdata
[
i
];
err
=
abs
(
params
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
)
+
err
)
-
mdata
[
dir_iter
];
}
}
cfv
+=
err
*
err
;
shared
[
idB
]
+=
err
*
err
;
dir_iter
+=
THREADS_X_BLOCK_FIT
;
}
}
return
(
cfv
);
}
__syncthreads
();
if
(
idB
==
0
){
for
(
int
i
=
0
;
i
<
THREADS_X_BLOCK_FIT
;
i
++
){
cfv
+=
shared
[
i
];
}
}
}
//gradient function PVM_multi
//gradient function PVM_multi
__device__
void
grad_PVM_multi
(
//INPUT
__device__
void
grad_PVM_multi
(
//INPUT
...
@@ -229,66 +187,94 @@ __device__ void grad_PVM_multi( //INPUT
...
@@ -229,66 +187,94 @@ __device__ void grad_PVM_multi( //INPUT
const
double
*
bvals
,
const
double
*
bvals
,
const
int
nparams
,
const
int
nparams
,
const
bool
m_include_f0
,
const
bool
m_include_f0
,
const
int
idB
,
double
*
shared
,
//shared memory
double
*
fs
,
//shared memory
double
*
x
,
//shared memory
double
&
_a
,
//shared memory
double
&
_b
,
//shared memory
double
&
sumf
,
//shared memory
//OUTPUT
//OUTPUT
double
*
grad
)
double
*
grad
)
{
{
double
_a
=
abs
(
params
[
1
]);
if
(
idB
<
NFIBRES
){
double
_b
=
abs
(
params
[
2
]);
int
kk
=
3
+
3
*
(
idB
);
double
fs
[
NFIBRES
];
double
sinth
,
costh
,
sinph
,
cosph
;
double
x
[
NFIBRES
*
3
];
sincos
(
params
[
kk
+
1
],
&
sinth
,
&
costh
);
double3
xx
;
sincos
(
params
[
kk
+
2
],
&
sinph
,
&
cosph
);
double
sumf
=
0
;
fs
[
idB
]
=
x2f_gpu
(
params
[
kk
]);
x
[
idB
*
3
]
=
sinth
*
cosph
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
x
[
idB
*
3
+
1
]
=
sinth
*
sinph
;
int
kk
=
3
+
3
*
(
k
);
x
[
idB
*
3
+
2
]
=
costh
;
fs
[
k
]
=
x2f_gpu
(
params
[
kk
]);
sumf
+=
fs
[
k
];
x
[
k
*
3
]
=
sin
(
params
[
kk
+
1
])
*
cos
(
params
[
kk
+
2
]);
x
[
k
*
3
+
1
]
=
sin
(
params
[
kk
+
1
])
*
sin
(
params
[
kk
+
2
]);
x
[
k
*
3
+
2
]
=
cos
(
params
[
kk
+
1
]);
}
}
if
(
idB
==
0
){
_a
=
abs
(
params
[
1
]);
_b
=
abs
(
params
[
2
]);
sumf
=
0
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
)
sumf
+=
fs
[
k
];
for
(
int
p
=
0
;
p
<
nparams
;
p
++
)
grad
[
p
]
=
0
;
}
double
J
[
NPARAMS
];
int
ndir
=
NDIRECTIONS
/
THREADS_X_BLOCK_FIT
;
double
diff
;
if
(
idB
<
(
NDIRECTIONS
%
THREADS_X_BLOCK_FIT
))
ndir
++
;
double
sig
;
int
max_dir
=
NDIRECTIONS
/
THREADS_X_BLOCK_FIT
;
if
(
NDIRECTIONS
%
THREADS_X_BLOCK_FIT
)
max_dir
++
;
for
(
int
p
=
0
;
p
<
nparams
;
p
++
)
grad
[
p
]
=
0
;
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
double
J
[
NPARAMS
];
sig
=
0
;
double
diff
;
for
(
int
a
=
0
;
a
<
nparams
;
a
++
)
J
[
a
]
=
0
;
double
sig
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
double3
xx
;
int
kk
=
3
+
3
*
(
k
);
int
dir_iter
=
idB
;
xx
.
x
=
x
[
k
*
3
];
xx
.
y
=
x
[
k
*
3
+
1
];
__syncthreads
();
xx
.
z
=
x
[
k
*
3
+
2
];
sig
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
for
(
int
dir
=
0
;
dir
<
max_dir
;
dir
++
){
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_a_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
for
(
int
p
=
0
;
p
<
nparams
;
p
++
)
J
[
p
]
=
0
;
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_b_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
if
(
dir
<
ndir
){
J
[
kk
]
=
abs
(
params
[
0
])
*
(
anisoterm_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
)
-
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
))
*
two_pi_gpu
*
sign_gpu
(
params
[
kk
])
*
1
/
(
1
+
params
[
kk
]
*
params
[
kk
]);
sig
=
0
;
J
[
kk
+
1
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_th_PVM_multi
(
i
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
J
[
kk
+
2
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_ph_PVM_multi
(
i
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
int
kk
=
3
+
3
*
(
k
);
}
xx
.
x
=
x
[
k
*
3
];
if
(
m_include_f0
){
xx
.
y
=
x
[
k
*
3
+
1
];
double
temp_f0
=
x2f_gpu
(
params
[
nparams
-
1
]);
xx
.
z
=
x
[
k
*
3
+
2
];
J
[
nparams
-
1
]
=
abs
(
params
[
0
])
*
(
1
-
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
))
*
two_pi_gpu
*
sign_gpu
(
params
[
nparams
-
1
])
*
1
/
(
1
+
params
[
nparams
-
1
]
*
params
[
nparams
-
1
]);
sig
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
sig
=
abs
(
params
[
0
])
*
((
temp_f0
+
(
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
))
+
sig
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_a_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_a_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_b_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_b_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
J
[
kk
]
=
abs
(
params
[
0
])
*
(
anisoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
)
-
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
))
*
two_pi_gpu
*
sign_gpu
(
params
[
kk
])
*
1
/
(
1
+
params
[
kk
]
*
params
[
kk
]);
}
else
{
J
[
kk
+
1
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_th_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
sig
=
abs
(
params
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
)
+
sig
);
J
[
kk
+
2
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_ph_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_a_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
}
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_b_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
if
(
m_include_f0
){
}
double
temp_f0
=
x2f_gpu
(
params
[
nparams
-
1
]);
J
[
nparams
-
1
]
=
abs
(
params
[
0
])
*
(
1
-
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
))
*
two_pi_gpu
*
sign_gpu
(
params
[
nparams
-
1
])
*
1
/
(
1
+
params
[
nparams
-
1
]
*
params
[
nparams
-
1
]);
sig
=
abs
(
params
[
0
])
*
((
temp_f0
+
(
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
))
+
sig
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_a_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_b_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
}
else
{
sig
=
abs
(
params
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
)
+
sig
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_a_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_b_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
}
diff
=
sig
-
mdata
[
i
];
diff
=
sig
-
mdata
[
dir_iter
];
J
[
0
]
=
(
params
[
0
]
>
0
?
1.0
:-
1.0
)
*
sig
/
params
[
0
];
J
[
0
]
=
(
params
[
0
]
>
0
?
1.0
:-
1.0
)
*
sig
/
params
[
0
];
}
for
(
int
p
=
0
;
p
<
nparams
;
p
++
)
grad
[
p
]
+=
2
*
J
[
p
]
*
diff
;
for
(
int
p
=
0
;
p
<
nparams
;
p
++
){
shared
[
idB
]
=
2
*
J
[
p
]
*
diff
;
__syncthreads
();
if
(
idB
==
0
){
for
(
int
i
=
0
;
i
<
THREADS_X_BLOCK_FIT
;
i
++
){
grad
[
p
]
+=
shared
[
i
];
}
}
__syncthreads
();
}
dir_iter
+=
THREADS_X_BLOCK_FIT
;
}
}
}
}
//hessian function PVM_multi
//hessian function PVM_multi
__device__
void
hess_PVM_multi
(
//INPUT
__device__
void
hess_PVM_multi
(
//INPUT
const
double
*
params
,
const
double
*
params
,
...
@@ -296,75 +282,105 @@ __device__ void hess_PVM_multi( //INPUT
...
@@ -296,75 +282,105 @@ __device__ void hess_PVM_multi( //INPUT
const
double
*
bvals
,
const
double
*
bvals
,
const
int
nparams
,
const
int
nparams
,
const
bool
m_include_f0
,
const
bool
m_include_f0
,
const
int
idB
,
double
*
shared
,
//shared memory
double
*
fs
,
//shared memory
double
*
x
,
//shared memory
double
&
_a
,
//shared memory
double
&
_b
,
//shared memory
double
&
sumf
,
//shared memory
//OUTPUT
double
*
hess
)
double
*
hess
)
{
{
double
_a
=
abs
(
params
[
1
]);
if
(
idB
<
NFIBRES
){
double
_b
=
abs
(
params
[
2
]);
int
kk
=
3
+
3
*
(
idB
);
double
fs
[
NFIBRES
];
double
sinth
,
costh
,
sinph
,
cosph
;
double
x
[
NFIBRES
*
3
];
sincos
(
params
[
kk
+
1
],
&
sinth
,
&
costh
);
double3
xx
;
sincos
(
params
[
kk
+
2
],
&
sinph
,
&
cosph
);
double
sumf
=
0
;
fs
[
idB
]
=
x2f_gpu
(
params
[
kk
]);
x
[
idB
*
3
]
=
sinth
*
cosph
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
x
[
idB
*
3
+
1
]
=
sinth
*
sinph
;
int
kk
=
3
+
3
*
(
k
);
x
[
idB
*
3
+
2
]
=
costh
;
fs
[
k
]
=
x2f_gpu
(
params
[
kk
]);
sumf
+=
fs
[
k
];
x
[
k
*
3
]
=
sin
(
params
[
kk
+
1
])
*
cos
(
params
[
kk
+
2
]);
x
[
k
*
3
+
1
]
=
sin
(
params
[
kk
+
1
])
*
sin
(
params
[
kk
+
2
]);
x
[
k
*
3
+
2
]
=
cos
(
params
[
kk
+
1
]);
}
}
if
(
idB
==
0
){
_a
=
abs
(
params
[
1
]);
_b
=
abs
(
params
[
2
]);
sumf
=
0
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
)
sumf
+=
fs
[
k
];
for
(
int
p
=
0
;
p
<
nparams
;
p
++
){
for
(
int
p2
=
0
;
p2
<
nparams
;
p2
++
){
hess
[
p
*
nparams
+
p2
]
=
0
;
}
}
}
int
ndir
=
NDIRECTIONS
/
THREADS_X_BLOCK_FIT
;
if
(
idB
<
(
NDIRECTIONS
%
THREADS_X_BLOCK_FIT
))
ndir
++
;
int
max_dir
=
NDIRECTIONS
/
THREADS_X_BLOCK_FIT
;
if
(
NDIRECTIONS
%
THREADS_X_BLOCK_FIT
)
max_dir
++
;
double
J
[
NPARAMS
];
double
J
[
NPARAMS
];
double
sig
;
double
sig
;
double3
xx
;
int
dir_iter
=
idB
;
for
(
int
p
=
0
;
p
<
nparams
;
p
++
){
__syncthreads
();
for
(
int
p2
=
0
;
p2
<
nparams
;
p2
++
){
hess
[
p
*
nparams
+
p2
]
=
0
;
}
}
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
sig
=
0
;
for
(
int
a
=
0
;
a
<
nparams
;
a
++
)
J
[
a
]
=
0
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
int
kk
=
3
+
3
*
(
k
);
xx
.
x
=
x
[
k
*
3
];
xx
.
y
=
x
[
k
*
3
+
1
];
xx
.
z
=
x
[
k
*
3
+
2
];
sig
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
double
cov
=
two_pi_gpu
*
sign_gpu
(
params
[
kk
])
*
1
/
(
1
+
params
[
kk
]
*
params
[
kk
]);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_a_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_b_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
J
[
kk
]
=
abs
(
params
[
0
])
*
(
anisoterm_PVM_multi
(
i
,
_a
,
_b
,
xx
,
bvecs
,
bvals
)
-
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
))
*
cov
;
J
[
kk
+
1
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_th_PVM_multi
(
i
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
J
[
kk
+
2
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_ph_PVM_multi
(
i
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
}
if
(
m_include_f0
){
double
temp_f0
=
x2f_gpu
(
params
[
nparams
-
1
]);
J
[
nparams
-
1
]
=
abs
(
params
[
0
])
*
(
1
-
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
))
*
two_pi_gpu
*
sign_gpu
(
params
[
nparams
-
1
])
*
1
/
(
1
+
params
[
nparams
-
1
]
*
params
[
nparams
-
1
]);
sig
=
abs
(
params
[
0
])
*
(
temp_f0
+
(
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
)
+
sig
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_a_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_b_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
}
else
{
sig
=
abs
(
params
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
i
,
_a
,
_b
,
bvals
)
+
sig
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_a_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_b_PVM_multi
(
i
,
_a
,
_b
,
bvals
);
}
J
[
0
]
=
sig
/
params
[
0
];
for
(
int
dir
=
0
;
dir
<
max_dir
;
dir
++
){
for
(
int
p
=
0
;
p
<
nparams
;
p
++
)
J
[
p
]
=
0
;
if
(
dir
<
ndir
){
sig
=
0
;
for
(
int
k
=
0
;
k
<
NFIBRES
;
k
++
){
int
kk
=
3
+
3
*
(
k
);
xx
.
x
=
x
[
k
*
3
];
xx
.
y
=
x
[
k
*
3
+
1
];
xx
.
z
=
x
[
k
*
3
+
2
];
sig
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
double
cov
=
two_pi_gpu
*
sign_gpu
(
params
[
kk
])
*
1
/
(
1
+
params
[
kk
]
*
params
[
kk
]);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_a_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_b_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
);
J
[
kk
]
=
abs
(
params
[
0
])
*
(
anisoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
bvecs
,
bvals
)
-
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
))
*
cov
;
J
[
kk
+
1
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_th_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
J
[
kk
+
2
]
=
abs
(
params
[
0
])
*
fs
[
k
]
*
anisoterm_ph_PVM_multi
(
dir_iter
,
_a
,
_b
,
xx
,
params
[
kk
+
1
],
params
[
kk
+
2
],
bvecs
,
bvals
);
}
if
(
m_include_f0
){
double
temp_f0
=
x2f_gpu
(
params
[
nparams
-
1
]);
J
[
nparams
-
1
]
=
abs
(
params
[
0
])
*
(
1
-
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
))
*
two_pi_gpu
*
sign_gpu
(
params
[
nparams
-
1
])
*
1
/
(
1
+
params
[
nparams
-
1
]
*
params
[
nparams
-
1
]);
sig
=
abs
(
params
[
0
])
*
(
temp_f0
+
(
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
)
+
sig
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_a_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
-
temp_f0
)
*
isoterm_b_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
}
else
{
sig
=
abs
(
params
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
)
+
sig
);
J
[
1
]
+=
(
params
[
1
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_a_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
J
[
2
]
+=
(
params
[
2
]
>
0
?
1.0
:-
1.0
)
*
abs
(
params
[
0
])
*
(
1
-
sumf
)
*
isoterm_b_PVM_multi
(
dir_iter
,
_a
,
_b
,
bvals
);
}
J
[
0
]
=
sig
/
params
[
0
];
}
for
(
int
p
=
0
;
p
<
nparams
;
p
++
){
for
(
int
p
=
0
;
p
<
nparams
;
p
++
){
for
(
int
p2
=
p
;
p2
<
nparams
;
p2
++
){
for
(
int
p2
=
p
;
p2
<
nparams
;
p2
++
){
hess
[
p
*
nparams
+
p2
]
+=
2
*
(
J
[
p
]
*
J
[
p2
]);
shared
[
idB
]
=
2
*
(
J
[
p
]
*
J
[
p2
]);
__syncthreads
();
if
(
idB
==
0
){
for
(
int
i
=
0
;
i
<
THREADS_X_BLOCK_FIT
;
i
++
){
hess
[
p
*
nparams
+
p2
]
+=
shared
[
i
];
}
}
__syncthreads
();
}
}
}
}
dir_iter
+=
THREADS_X_BLOCK_FIT
;
}
}
for
(
int
j
=
0
;
j
<
nparams
;
j
++
)
{
if
(
idB
==
0
){
for
(
int
i
=
j
+
1
;
i
<
nparams
;
i
++
)
{
for
(
int
j
=
0
;
j
<
nparams
;
j
++
)
{
hess
[
i
*
nparams
+
j
]
=
hess
[
j
*
nparams
+
i
];
for
(
int
i
=
j
+
1
;
i
<
nparams
;
i
++
)
{
}
hess
[
i
*
nparams
+
j
]
=
hess
[
j
*
nparams
+
i
];
}
}
}
}
}
}
//in diffmodel.cc
//in diffmodel.cc
...
@@ -379,63 +395,80 @@ extern "C" __global__ void fit_PVM_multi_kernel( //INPUT
...
@@ -379,63 +395,80 @@ extern "C" __global__ void fit_PVM_multi_kernel( //INPUT
//OUTPUT
//OUTPUT
double
*
params
)
double
*
params
)
{
{
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
idB
=
threadIdx
.
x
;
if
(
id
>=
nvox
)
{
return
;
}
int
idVOX
=
blockIdx
.
x
;
int
nparams
;
__shared__
double
myparams
[
NPARAMS
];
if
(
m_include_f0
)
__shared__
int
nparams
;
nparams
=
nfib
*
3
+
4
;
else
__shared__
double
step
[
NPARAMS
];
nparams
=
nfib
*
3
+
3
;
__shared__
double
grad
[
NPARAMS
];
__shared__
double
hess
[
NPARAMS
*
NPARAMS
];
int
nparams_single_c
=
nparams
-
1
;
__shared__
double
inverse
[
NPARAMS
];
__shared__
double
pcf
;
double
myparams
[
NPARAMS
];
__shared__
double
ncf
;
double
myparams_aux
[
NPARAMS
];
__shared__
double
lambda
;
double
mydata
[
NDIRECTIONS
];
__shared__
double
cftol
;
__shared__
double
ltol
;
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
__shared__
double
olambda
;
mydata
[
i
]
=
data
[(
id
*
NDIRECTIONS
)
+
i
];
__shared__
bool
success
;
}
__shared__
bool
end
;
for
(
int
i
=
0
;
i
<
nparams_single_c
;
i
++
){
__shared__
double
shared
[
THREADS_X_BLOCK_FIT
];
myparams_aux
[
i
]
=
params_PVM_single_c
[(
id
*
nparams_single_c
)
+
i
];
__shared__
double
fs
[
NFIBRES
];
}
__shared__
double
x
[
NFIBRES
*
3
];
__shared__
double
_a
;
myparams
[
0
]
=
myparams_aux
[
0
];
//pvm1.get_s0();
__shared__
double
_b
;
myparams
[
1
]
=
1.0
;
//start with d=d_std
__shared__
double
sumf
;
for
(
int
i
=
0
,
ii
=
3
;
i
<
nfib
;
i
++
,
ii
+=
3
){
myparams
[
ii
]
=
f2x_gpu
(
myparams_aux
[
ii
-
1
]);
if
(
idB
==
0
){
myparams
[
ii
+
1
]
=
myparams_aux
[
ii
];
if
(
m_include_f0
)
myparams
[
ii
+
2
]
=
myparams_aux
[
ii
+
1
];
nparams
=
nfib
*
3
+
4
;
}
else
myparams
[
2
]
=
myparams_aux
[
1
];
//pvm1.get_d();
nparams
=
nfib
*
3
+
3
;
if
(
m_include_f0
)
myparams
[
nparams
-
1
]
=
f2x_gpu
(
myparams_aux
[
nparams_single_c
-
1
]);
int
nparams_single_c
=
nparams
-
1
;
myparams
[
0
]
=
params_PVM_single_c
[(
idVOX
*
nparams_single_c
)
+
0
];
//pvm1.get_s0();
myparams
[
1
]
=
1.0
;
//start with d=d_std
for
(
int
i
=
0
,
ii
=
3
;
i
<
nfib
;
i
++
,
ii
+=
3
){
myparams
[
ii
]
=
f2x_gpu
(
params_PVM_single_c
[(
idVOX
*
nparams_single_c
)
+
ii
-
1
]);
myparams
[
ii
+
1
]
=
params_PVM_single_c
[(
idVOX
*
nparams_single_c
)
+
ii
];
myparams
[
ii
+
2
]
=
params_PVM_single_c
[(
idVOX
*
nparams_single_c
)
+
ii
+
1
];
}
myparams
[
2
]
=
params_PVM_single_c
[(
idVOX
*
nparams_single_c
)
+
1
]
;
//pvm1.get_d();
if
(
m_include_f0
)
myparams
[
nparams
-
1
]
=
f2x_gpu
(
params_PVM_single_c
[(
idVOX
*
nparams_single_c
)
+
nparams_single_c
-
1
]);
}
__syncthreads
();
//do the fit
//do the fit
levenberg_marquardt_PVM_multi_gpu
(
mydata
,
&
bvecs
[
id
*
3
*
NDIRECTIONS
],
&
bvals
[
id
*
NDIRECTIONS
],
nparams
,
m_include_f0
,
myparams
);
levenberg_marquardt_PVM_multi_gpu
(
&
data
[
idVOX
*
NDIRECTIONS
],
&
bvecs
[
idVOX
*
3
*
NDIRECTIONS
],
&
bvals
[
idVOX
*
NDIRECTIONS
],
nparams
,
m_include_f0
,
idB
,
step
,
grad
,
hess
,
inverse
,
pcf
,
ncf
,
lambda
,
cftol
,
ltol
,
olambda
,
success
,
end
,
shared
,
fs
,
x
,
_a
,
_b
,
sumf
,
myparams
);
__syncthreads
();
// finalise parameters
// finalise parameters
//m_s0-myparams[0] m_d-myparams[1] m_d_std-myparams[2] m_f-m_th-m_ph-myparams[3,4,5,6 etc..] m_f0-myparams[nparams-1]
//m_s0-myparams[0] m_d-myparams[1] m_d_std-myparams[2] m_f-m_th-m_ph-myparams[3,4,5,6 etc..] m_f0-myparams[nparams-1]
myparams_aux
[
1
]
=
myparams
[
1
];
if
(
idB
==
0
){
double
aux
=
myparams
[
1
];
myparams
[
1
]
=
abs
(
myparams_aux
[
1
]
*
myparams
[
2
]);
myparams
[
1
]
=
abs
(
aux
*
myparams
[
2
]);
myparams
[
2
]
=
sqrt
(
double
(
abs
(
myparams_aux
[
1
]
*
myparams
[
2
]
*
myparams
[
2
])));
myparams
[
2
]
=
sqrt
(
double
(
abs
(
aux
*
myparams
[
2
]
*
myparams
[
2
])));
for
(
int
i
=
3
,
k
=
0
;
k
<
nfib
;
i
+=
3
,
k
++
){
for
(
int
i
=
3
,
k
=
0
;
k
<
nfib
;
i
+=
3
,
k
++
){
myparams
[
i
]
=
x2f_gpu
(
myparams
[
i
]);
myparams
[
i
]
=
x2f_gpu
(
myparams
[
i
]);
}
}
if
(
m_include_f0
)
if
(
m_include_f0
)
myparams
[
nparams
-
1
]
=
x2f_gpu
(
myparams
[
nparams
-
1
]);
myparams
[
nparams
-
1
]
=
x2f_gpu
(
myparams
[
nparams
-
1
]);
sort_PVM_multi
(
nfib
,
nparams
,
myparams
);
sort_PVM_multi
(
nfib
,
myparams
);
fix_fsum_PVM_multi
(
m_include_f0
,
nfib
,
nparams
,
myparams
);
fix_fsum_PVM_multi
(
m_include_f0
,
nfib
,
nparams
,
myparams
);
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
params
[(
id
*
nparams
)
+
i
]
=
myparams
[
i
];
params
[(
id
VOX
*
nparams
)
+
i
]
=
myparams
[
i
];
//printf("PARAM[%i]: %.20f\n",i,myparams[i]);
}
}
}
}
}
//in diffmodel.cc
//in diffmodel.cc
...
@@ -451,34 +484,99 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT
...
@@ -451,34 +484,99 @@ extern "C" __global__ void get_residuals_PVM_multi_kernel( //INPUT
//OUTPUT
//OUTPUT
double
*
residuals
)
double
*
residuals
)
{
{
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
idB
=
threadIdx
.
x
;
if
(
id
>=
nvox
)
{
return
;
}
int
idVOX
=
blockIdx
.
x
;
__shared__
double
myparams
[
NPARAMS
];
__shared__
int
nparams
;
__shared__
bool
my_include_f0
;
__shared__
double
val
;
__shared__
double
_a
;
__shared__
double
_b
;
__shared__
double
fs
[
NFIBRES
];
__shared__
double
x
[
NFIBRES
*
3
];
__shared__
double
sumf
;
double
predicted_signal
;
double
mydata
;
if
(
idB
==
0
){
if
(
m_include_f0
)
nparams
=
nfib
*
3
+
4
;
else
nparams
=
nfib
*
3
+
3
;
int
nparams
;
my_include_f0
=
includes_f0
[
idVOX
];
if
(
m_include_f0
)
nparams
=
nfib
*
3
+
4
;
//m_s0-myparams[0] m_d-myparams[1] m_d_std-myparams[2] m_f-m_th-m_ph-myparams[3,4,5,6 etc..] m_f0-myparams[nparams-1]
else
nparams
=
nfib
*
3
+
3
;
myparams
[
0
]
=
params
[(
idVOX
*
nparams
)
+
0
];
double
aux1
=
params
[(
idVOX
*
nparams
)
+
1
];
double
aux2
=
params
[(
idVOX
*
nparams
)
+
2
];
//m_d*m_d/m_d_std/m_d_std;
myparams
[
1
]
=
aux1
*
aux1
/
aux2
/
aux2
;
myparams
[
2
]
=
aux2
*
aux2
/
aux1
;
//m_d_std*m_d_std/m_d; // =1/beta
if
(
my_include_f0
)
myparams
[
nparams
-
1
]
=
f2x_gpu
(
params
[(
idVOX
*
nparams
)
+
nparams
-
1
]);
}
bool
my_include_f0
=
includes_f0
[
id
];
if
(
idB
<
nfib
){
int
kk
=
3
+
3
*
idB
;
double
sinth
,
costh
,
sinph
,
cosph
;
myparams
[
kk
]
=
f2x_gpu
(
params
[(
idVOX
*
nparams
)
+
kk
]);
myparams
[
kk
+
1
]
=
params
[(
idVOX
*
nparams
)
+
kk
+
1
];
myparams
[
kk
+
2
]
=
params
[(
idVOX
*
nparams
)
+
kk
+
2
];
sincos
(
myparams
[
kk
+
1
],
&
sinth
,
&
costh
);
sincos
(
myparams
[
kk
+
2
],
&
sinph
,
&
cosph
);
fs
[
idB
]
=
x2f_gpu
(
myparams
[
kk
]);
x
[
idB
*
3
]
=
sinth
*
cosph
;
x
[
idB
*
3
+
1
]
=
sinth
*
sinph
;
x
[
idB
*
3
+
2
]
=
costh
;
}
double
myparams
[
NPARAMS
];
__syncthreads
();
double
mydata
[
NDIRECTIONS
];
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
if
(
idB
==
0
){
myparams
[
i
]
=
params
[(
id
*
nparams
)
+
i
];
_a
=
abs
(
myparams
[
1
]);
}
_b
=
abs
(
myparams
[
2
]);
sumf
=
0
;
for
(
int
k
=
0
;
k
<
nfib
;
k
++
){
sumf
+=
fs
[
k
];
}
int
ndir
=
NDIRECTIONS
/
THREADS_X_BLOCK_FIT
;
if
(
idB
<
(
NDIRECTIONS
%
THREADS_X_BLOCK_FIT
))
ndir
++
;
double3
x2
;
int
dir_iter
=
idB
;
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
__syncthreads
();
mydata
[
i
]
=
data
[(
id
*
NDIRECTIONS
)
+
i
];
}
double
predicted_signal
[
NDIRECTIONS
];
for
(
int
dir
=
0
;
dir
<
ndir
;
dir
++
){
mydata
=
data
[(
idVOX
*
NDIRECTIONS
)
+
dir_iter
];
predicted_signal
=
0
;
//pred = 0;
val
=
0.0
;
for
(
int
k
=
0
;
k
<
nfib
;
k
++
){
x2
.
x
=
x
[
k
*
3
];
x2
.
y
=
x
[
k
*
3
+
1
];
x2
.
z
=
x
[
k
*
3
+
2
];
val
+=
fs
[
k
]
*
anisoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
x2
,
&
bvecs
[
idVOX
*
3
*
NDIRECTIONS
],
&
bvals
[
idVOX
*
NDIRECTIONS
]);
}
if
(
my_include_f0
){
double
temp_f0
=
x2f_gpu
(
myparams
[
nparams
-
1
]);
predicted_signal
=
abs
(
myparams
[
0
])
*
(
temp_f0
+
(
1
-
sumf
-
temp_f0
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
&
bvals
[
idVOX
*
NDIRECTIONS
])
+
val
);
}
else
predicted_signal
=
abs
(
myparams
[
0
])
*
((
1
-
sumf
)
*
isoterm_PVM_multi
(
dir_iter
,
_a
,
_b
,
&
bvals
[
idVOX
*
NDIRECTIONS
])
+
val
);
}
get_prediction_PVM_multi
(
myparams
,
&
bvecs
[
id
*
3
*
NDIRECTIONS
],
&
bvals
[
id
*
NDIRECTIONS
],
nfib
,
nparams
,
my_include_f0
,
predicted_signal
);
//residuals=m_data-predicted_signal;
residuals
[
idVOX
*
NDIRECTIONS
+
dir_iter
]
=
mydata
-
predicted_signal
;
for
(
int
i
=
0
;
i
<
NDIRECTIONS
;
i
++
){
//residuals=m_data-predicted_signal;
dir_iter
+=
THREADS_X_BLOCK_FIT
;
residuals
[
id
*
NDIRECTIONS
+
i
]
=
mydata
[
i
]
-
predicted_signal
[
i
];
}
}
}
}
This diff is collapsed.
Click to expand it.
CUDA/PVM_single.cu
+
364
−
269
View file @
d4a9f9f7
This diff is collapsed.
Click to expand it.
CUDA/PVM_single_c.cu
+
496
−
431
View file @
d4a9f9f7
This diff is collapsed.
Click to expand it.
CUDA/levenberg_marquardt.cu
+
226
−
160
View file @
d4a9f9f7
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
#include
"solver_mult_inverse.cu"
#include
"solver_mult_inverse.cu"
#include
"diffmodels.cuh"
#include
"diffmodels.cuh"
#include
"dim_blocks.h"
#include
"options.h"
#include
"options.h"
//CPU version in nonlin.h
//CPU version in nonlin.h
...
@@ -27,72 +28,93 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT
...
@@ -27,72 +28,93 @@ __device__ void levenberg_marquardt_PVM_single_gpu( //INPUT
const
double
*
bvals
,
const
double
*
bvals
,
const
int
nparams
,
const
int
nparams
,
const
bool
m_include_f0
,
const
bool
m_include_f0
,
const
int
idB
,
double
*
step
,
//shared memory
double
*
grad
,
//shared memory
double
*
hess
,
//shared memory
double
*
inverse
,
//shared memory
double
&
pcf
,
//shared memory
double
&
ncf
,
//shared memory
double
&
lambda
,
//shared memory
double
&
cftol
,
//shared memory
double
&
ltol
,
//shared memory
double
&
olambda
,
//shared memory
bool
&
success
,
//shared memory
bool
&
end
,
//shared memory
double
*
shared
,
//shared memory
double
*
fs
,
//shared memory
double
*
x
,
//shared memory
double
&
_d
,
//shared memory
double
&
sumf
,
//shared memory
//INPUT-OUTPUT
//INPUT-OUTPUT
double
*
myparams
)
double
*
myparams
)
//shared memory
{
{
double
pcf
;
int
niter
=
0
;
double
lambda
=
0.1
;
int
maxiter
=
200
;
double
cftol
=
1.0e-8
;
double
ltol
=
1.0e20
;
if
(
idB
==
0
){
end
=
false
;
bool
success
=
true
;
lambda
=
0.1
;
double
olambda
=
0.0
;
cftol
=
1.0e-8
;
double
grad
[
NPARAMS
];
ltol
=
1.0e20
;
double
hess
[
NPARAMS
*
NPARAMS
];
success
=
true
;
double
inverse
[
NPARAMS
];
olambda
=
0.0
;
double
step
[
NPARAMS
];
ncf
=
0
;
}
double
ncf
=
0
;
cf_PVM_single
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_d
,
sumf
,
pcf
);
int
maxiter
=
200
;
__syncthreads
();
int
niter
=
0
;
while
(
!
(
success
&&
niter
++>=
maxiter
)){
//if success we not increase niter (first condition is true)
pcf
=
cf_PVM_single
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
);
//function cost has decreise, we have advanced.
while
(
!
(
success
&&
niter
++
>=
maxiter
)){
//if success we not increase niter (first condition is true)
//function cost has decreise, we have advanced.
if
(
success
){
if
(
success
){
grad_PVM_single
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
grad
);
grad_PVM_single
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_d
,
sumf
,
grad
);
hess_PVM_single
(
myparams
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
hess
);
__syncthreads
();
}
hess_PVM_single
(
myparams
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_d
,
sumf
,
hess
);
for
(
int
i
=
0
;
i
<
nparams
;
i
++
)
{
hess
[(
i
*
nparams
)
+
i
]
+=
lambda
-
olambda
;
}
}
solver
(
hess
,
grad
,
nparams
,
inverse
);
if
(
idB
==
0
){
for
(
int
i
=
0
;
i
<
nparams
;
i
++
)
{
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
hess
[(
i
*
nparams
)
+
i
]
+=
lambda
-
olambda
;
//Levenberg LM_L
step
[
i
]
=-
inverse
[
i
];
}
}
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
solver
(
hess
,
grad
,
nparams
,
inverse
);
step
[
i
]
=
myparams
[
i
]
+
step
[
i
];
}
ncf
=
cf_PVM_single
(
step
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
);
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
step
[
i
]
=-
inverse
[
i
];
}
if
(
success
=
(
ncf
<
pcf
))
{
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
olambda
=
0.0
;
step
[
i
]
=
myparams
[
i
]
+
step
[
i
];
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
myparams
[
i
]
=
step
[
i
];
}
}
lambda
=
lambda
/
10.0
;
}
if
(
zero_cf_diff_conv
(
pcf
,
ncf
,
cftol
)){
__syncthreads
();
return
;
cf_PVM_single
(
step
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_d
,
sumf
,
ncf
);
}
pcf
=
ncf
;
if
(
idB
==
0
){
}
else
{
if
(
success
=
(
ncf
<
pcf
)){
olambda
=
lambda
;
olambda
=
0.0
;
lambda
=
lambda
*
10.0
;
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
if
(
lambda
>
ltol
){
myparams
[
i
]
=
step
[
i
];
return
;
}
lambda
=
lambda
/
10.0
;
if
(
zero_cf_diff_conv
(
pcf
,
ncf
,
cftol
)){
end
=
true
;
}
pcf
=
ncf
;
}
else
{
olambda
=
lambda
;
lambda
=
lambda
*
10.0
;
if
(
lambda
>
ltol
){
end
=
true
;
}
}
}
}
}
__syncthreads
();
if
(
end
)
return
;
}
}
return
;
}
}
__device__
void
levenberg_marquardt_PVM_single_c_gpu
(
//INPUT
__device__
void
levenberg_marquardt_PVM_single_c_gpu
(
//INPUT
...
@@ -101,72 +123,94 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT
...
@@ -101,72 +123,94 @@ __device__ void levenberg_marquardt_PVM_single_c_gpu( //INPUT
const
double
*
bvals
,
const
double
*
bvals
,
const
int
nparams
,
const
int
nparams
,
const
bool
m_include_f0
,
const
bool
m_include_f0
,
const
int
idB
,
double
*
step
,
//shared memory
double
*
grad
,
//shared memory
double
*
hess
,
//shared memory
double
*
inverse
,
//shared memory
double
&
pcf
,
//shared memory
double
&
ncf
,
//shared memory
double
&
lambda
,
//shared memory
double
&
cftol
,
//shared memory
double
&
ltol
,
//shared memory
double
&
olambda
,
//shared memory
bool
&
success
,
//shared memory
bool
&
end
,
//shared memory
double
*
shared
,
//shared memory
double
*
fs
,
//shared memory
double
*
f_deriv
,
//shared memory
double
*
x
,
//shared memory
double
&
_d
,
//shared memory
double
&
sumf
,
//shared memory
//INPUT-OUTPUT
//INPUT-OUTPUT
double
*
myparams
)
double
*
myparams
)
//shared memory
{
{
double
pcf
;
int
niter
=
0
;
double
lambda
=
0.1
;
int
maxiter
=
200
;
double
cftol
=
1.0e-8
;
double
ltol
=
1.0e20
;
if
(
idB
==
0
){
end
=
false
;
bool
success
=
true
;
lambda
=
0.1
;
double
olambda
=
0.0
;
cftol
=
1.0e-8
;
double
grad
[
NPARAMS
];
ltol
=
1.0e20
;
double
hess
[
NPARAMS
*
NPARAMS
];
success
=
true
;
double
inverse
[
NPARAMS
];
olambda
=
0.0
;
double
step
[
NPARAMS
];
ncf
=
0
;
}
double
ncf
=
0
;
cf_PVM_single_c
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_d
,
sumf
,
pcf
);
int
maxiter
=
200
;
__syncthreads
();
int
niter
=
0
;
pcf
=
cf_PVM_single_c
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
);
while
(
!
(
success
&&
niter
++
>=
maxiter
)){
//if success we not increase niter (first condition is true)
while
(
!
(
success
&&
niter
++
>=
maxiter
)){
//if success we not increase niter (first condition is true)
//function cost has decreise, we have advanced.
//function cost has decreise, we have advanced.
if
(
success
){
if
(
success
){
grad_PVM_single_c
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
grad
);
grad_PVM_single_c
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
f_deriv
,
x
,
_d
,
sumf
,
grad
);
hess_PVM_single_c
(
myparams
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
hess
);
__syncthreads
();
hess_PVM_single_c
(
myparams
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
f_deriv
,
x
,
_d
,
sumf
,
hess
);
}
}
for
(
int
i
=
0
;
i
<
nparams
;
i
++
)
{
if
(
idB
==
0
){
hess
[(
i
*
nparams
)
+
i
]
+=
lambda
-
olambda
;
for
(
int
i
=
0
;
i
<
nparams
;
i
++
)
{
}
hess
[(
i
*
nparams
)
+
i
]
+=
lambda
-
olambda
;
//Levenberg LM_L
}
solver
(
hess
,
grad
,
nparams
,
inverse
);
solver
(
hess
,
grad
,
nparams
,
inverse
);
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
step
[
i
]
=-
inverse
[
i
];
step
[
i
]
=-
inverse
[
i
];
}
}
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
step
[
i
]
=
myparams
[
i
]
+
step
[
i
];
step
[
i
]
=
myparams
[
i
]
+
step
[
i
];
}
ncf
=
cf_PVM_single_c
(
step
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
);
if
(
success
=
(
ncf
<
pcf
))
{
olambda
=
0.0
;
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
myparams
[
i
]
=
step
[
i
];
}
}
lambda
=
lambda
/
10.0
;
}
if
(
zero_cf_diff_conv
(
pcf
,
ncf
,
cftol
)){
__syncthreads
();
return
;
cf_PVM_single_c
(
step
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_d
,
sumf
,
ncf
);
}
pcf
=
ncf
;
if
(
idB
==
0
){
}
else
{
if
(
success
=
(
ncf
<
pcf
))
{
olambda
=
lambda
;
olambda
=
0.0
;
lambda
=
lambda
*
10.0
;
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
if
(
lambda
>
ltol
){
myparams
[
i
]
=
step
[
i
];
return
;
}
}
lambda
=
lambda
/
10.0
;
}
if
(
zero_cf_diff_conv
(
pcf
,
ncf
,
cftol
)){
end
=
true
;
}
pcf
=
ncf
;
}
else
{
olambda
=
lambda
;
lambda
=
lambda
*
10.0
;
if
(
lambda
>
ltol
){
end
=
true
;
}
}
}
__syncthreads
();
if
(
end
)
return
;
}
}
return
;
}
}
...
@@ -176,71 +220,93 @@ __device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT
...
@@ -176,71 +220,93 @@ __device__ void levenberg_marquardt_PVM_multi_gpu( //INPUT
const
double
*
bvals
,
const
double
*
bvals
,
const
int
nparams
,
const
int
nparams
,
const
bool
m_include_f0
,
const
bool
m_include_f0
,
const
int
idB
,
double
*
step
,
//shared memory
double
*
grad
,
//shared memory
double
*
hess
,
//shared memory
double
*
inverse
,
//shared memory
double
&
pcf
,
//shared memory
double
&
ncf
,
//shared memory
double
&
lambda
,
//shared memory
double
&
cftol
,
//shared memory
double
&
ltol
,
//shared memory
double
&
olambda
,
//shared memory
bool
&
success
,
//shared memory
bool
&
end
,
//shared memory
double
*
shared
,
//shared memory
double
*
fs
,
//shared memory
double
*
x
,
//shared memory
double
&
_a
,
//shared memory
double
&
_b
,
//shared memory
double
&
sumf
,
//shared memory
//INPUT-OUTPUT
//INPUT-OUTPUT
double
*
myparams
)
double
*
myparams
)
//shared memory
{
{
double
pcf
;
int
niter
=
0
;
double
lambda
=
0.1
;
int
maxiter
=
200
;
double
cftol
=
1.0e-8
;
double
ltol
=
1.0e20
;
if
(
idB
==
0
){
end
=
false
;
bool
success
=
true
;
lambda
=
0.1
;
double
olambda
=
0.0
;
cftol
=
1.0e-8
;
double
grad
[
NPARAMS
];
ltol
=
1.0e20
;
double
hess
[
NPARAMS
*
NPARAMS
];
success
=
true
;
double
inverse
[
NPARAMS
];
olambda
=
0.0
;
double
step
[
NPARAMS
];
ncf
=
0
;
}
double
ncf
=
0
;
cf_PVM_multi
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_a
,
_b
,
sumf
,
pcf
);
int
maxiter
=
200
;
__syncthreads
();
int
niter
=
0
;
pcf
=
cf_PVM_multi
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
);
while
(
!
(
success
&&
niter
++
>=
maxiter
)){
//if success we not increase niter (first condition is true)
while
(
!
(
success
&&
niter
++
>=
maxiter
)){
//if success we not increase niter (first condition is true)
//function cost has decreise, we have advanced.
//function cost has decreise, we have advanced.
if
(
success
){
if
(
success
){
grad_PVM_multi
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
grad
);
grad_PVM_multi
(
myparams
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_a
,
_b
,
sumf
,
grad
);
hess_PVM_multi
(
myparams
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
hess
);
__syncthreads
();
}
hess_PVM_multi
(
myparams
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_a
,
_b
,
sumf
,
hess
);
for
(
int
i
=
0
;
i
<
nparams
;
i
++
)
{
hess
[(
i
*
nparams
)
+
i
]
+=
lambda
-
olambda
;
}
}
solver
(
hess
,
grad
,
nparams
,
inverse
);
if
(
idB
==
0
){
for
(
int
i
=
0
;
i
<
nparams
;
i
++
)
{
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
hess
[(
i
*
nparams
)
+
i
]
+=
lambda
-
olambda
;
//Levenberg LM_L
step
[
i
]
=-
inverse
[
i
];
}
}
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
solver
(
hess
,
grad
,
nparams
,
inverse
);
step
[
i
]
=
myparams
[
i
]
+
step
[
i
];
}
ncf
=
cf_PVM_multi
(
step
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
);
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
step
[
i
]
=-
inverse
[
i
];
}
if
(
success
=
(
ncf
<
pcf
))
{
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
olambda
=
0.0
;
step
[
i
]
=
myparams
[
i
]
+
step
[
i
];
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
myparams
[
i
]
=
step
[
i
];
}
}
lambda
=
lambda
/
10.0
;
}
if
(
zero_cf_diff_conv
(
pcf
,
ncf
,
cftol
)){
__syncthreads
();
return
;
cf_PVM_multi
(
step
,
mydata
,
bvecs
,
bvals
,
nparams
,
m_include_f0
,
idB
,
shared
,
fs
,
x
,
_a
,
_b
,
sumf
,
ncf
);
}
pcf
=
ncf
;
if
(
idB
==
0
){
}
else
{
if
(
success
=
(
ncf
<
pcf
))
{
olambda
=
lambda
;
olambda
=
0.0
;
lambda
=
lambda
*
10.0
;
for
(
int
i
=
0
;
i
<
nparams
;
i
++
){
if
(
lambda
>
ltol
){
myparams
[
i
]
=
step
[
i
];
return
;
}
}
lambda
=
lambda
/
10.0
;
}
if
(
zero_cf_diff_conv
(
pcf
,
ncf
,
cftol
)){
end
=
true
;
}
pcf
=
ncf
;
}
else
{
olambda
=
lambda
;
lambda
=
lambda
*
10.0
;
if
(
lambda
>
ltol
){
end
=
true
;
}
}
}
__syncthreads
();
if
(
end
)
return
;
}
}
return
;
}
}
#endif
#endif
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment