-
Matthew Webster authored
Cherrypick from master 2015-09-14 14:53:31 UTC Matthew Webster <mwebster@fmrib.ox.ac.uk> 'fix for tica/migp problem': dual_regression melgmix.cc melgmix.h melhlprfns.h melica.cc melica.h melpca.cc melpca.h
Matthew Webster authoredCherrypick from master 2015-09-14 14:53:31 UTC Matthew Webster <mwebster@fmrib.ox.ac.uk> 'fix for tica/migp problem': dual_regression melgmix.cc melgmix.h melhlprfns.h melica.cc melica.h melpca.cc melpca.h
melgmix.cc 18.54 KiB
/* MELODIC - Multivariate exploratory linear optimized decomposition into
independent components
melgmix.cc - Gaussian Mixture Model
Christian F. Beckmann, FMRIB Analysis Group
Copyright (C) 1999-2013 University of Oxford */
/* CCOPYRIGHT */
#include "newimage/newimageall.h"
#include "melgmix.h"
#include "utils/log.h"
#include "miscmaths/miscprob.h"
#include <time.h>
#include "libvis/miscplot.h"
#include "libvis/miscpic.h"
using namespace Utilities;
using namespace NEWIMAGE;
using namespace MISCPLOT;
using namespace MISCPIC;
string float2str(float f,int width, int prec, bool scientif){
ostringstream os;
int redw = int(std::abs(std::log10(std::abs(f))))+1;
if(width>0)
os.width(width);
if(scientif)
os.setf(ios::scientific);
os.precision(redw+std::abs(prec));
os.setf(ios::internal, ios::adjustfield);
os << f;
return os.str();
}
namespace Melodic{
void MelGMix::setup(const RowVector& dat,
const string dirname,
int cnum, volume<float> themask,
volume<float> themean,
int num_mix, float eps, bool fixit){
cnumber = cnum;
Mask = themask;
Mean = themean;
prefix = string("IC_")+num2str(cnum);
fitted = false;
nummix = num_mix;
numdata = dat.Ncols();
//normalise data
datamean = mean(dat,2).AsScalar();
datastdev= stdev(dat,2).AsScalar();
data=(dat - datamean)/datastdev;
dbgmsg(" mapdat; mean: " << datamean << " std: " <<datastdev << endl);
props=zeros(1,nummix);
vars=zeros(1,nummix);
means=zeros(1,nummix);
Params=zeros(1,nummix);
logprobY = 1.0;
props = std::pow(float(nummix),float(-1.0));
Matrix tmp1;
tmp1 = data * data.t() / numdata;
vars = tmp1.AsScalar();
float Dmin, Dmax, IntSize;
Dmin = min(data).AsScalar(); Dmax = max(data).AsScalar();
IntSize = Dmax / nummix;
means(1)=mean(data,2).AsScalar();
for (int ctr=2; ctr <= means.Ncols(); ctr++){
means(ctr) = Dmax - (ctr - 1.5) * IntSize;
}
means(2)=means(1)+2*sqrt(vars(1));
if(nummix>2)
means(3)=means(1)-2*sqrt(vars(1));
epsilon = eps;
if(epsilon >=0 && epsilon < 0.0000001)
{epsilon = std::log(float(dat.Ncols()))/1000 ;}
fixdim = fixit;
dbgmsg(" parameters; " << means << " : " << vars << " : " << props << endl);
}
Matrix MelGMix::threshold(const RowVector& dat, string levels){
Matrix Res;
Res = 1.0;
string tmpstr;
tmpstr = levels;
//cerr << " Levels : " << levels << endl << endl;
Matrix levls(1,4);
levls = 0;
Matrix fpr;
Matrix fdr;
Matrix nht;
char *p;
char t[1024];
const char *discard = ", [];{(})abceghijklmoqstuvwxyzABCEGHIJKLMNOQSTUVWXYZ~!@#$%^&*_-=+|\':></?";
strcpy(t, tmpstr.c_str());
p=strtok(t,discard);
while(p){
Matrix New(1,1);
New(1,1) = atof(p);
if(strchr(p,'d')){
levls(1,3)++;
if(fdr.Storage()>0){
fdr = fdr | New;
}else{
fdr = New;
}
}else{
if(strchr(p,'p')){
levls(1,2)++;
if(fpr.Storage()>0){
fpr = fpr | New;
}else{
fpr = New;
}
}else{
if(strchr(p,'n')){
levls(1,4)++;
if(nht.Storage()>0){
nht = nht | New;
}else{
nht = New;
}
}else{
levls(1,1)++;
levls = levls | New;
}
}
}
p=strtok(NULL,discard);
}
if(fpr.Storage()>0){levls = levls | fpr;}
if(fdr.Storage()>0){levls = levls | fdr;}
if(nht.Storage()>0){levls = levls | nht;}
// cerr << " levles : " << levls << endl << endl;
Res = threshold(data, levls);
set_threshmaps(Res);
return Res;
}
Matrix MelGMix::threshold(const RowVector& dat, Matrix& levels){
Matrix tests;
tests=levels;
Matrix Nprobs;
//if only single Gaussian: resort to nht
if(nummix==1||props(1)>0.999||probmap.Sum()<0.05){
if(levels(1,4)==0){
Matrix New(1,6);
New(1,5)=0.05;
New(1,6)=0.01;
New(1,4)=2;New(1,1)=0;New(1,2)=0;New(1,3)=0;tests=New;
}else{
Matrix New;
New = levels.Columns(int(1+levels(1,1)+levels(1,2)
+levels(1,3)),levels.Ncols());
New(1,4) = levels(1,4);
New(1,1)=0;New(1,1)=0;New(1,3)=0;
tests=New;
}
}
int numtests = int(tests(1,1)+tests(1,2)+tests(1,3)+tests(1,4));
Matrix Res(numtests,numdata);
Res = 0.0;
int next = 1;
for(int ctr1=1;ctr1<=tests(1,1);ctr1++){
if(4+next <= tests.Ncols()){
message(" alternative hypothesis test at p > " << tests(1,4+next) << endl);
add_infstr(" alternative hypothesis test at p > "+float2str(tests(1,4+next),0,2,false));
Matrix tmpNull;
tmpNull = dat;
/*
float cutoffpos, cutoffneg;
cutoffpos = means(1)+100*std::sqrt(vars(1)+0.0000001);
cutoffneg = means(1)-100*std::sqrt(vars(1)+0.0000001);
for(int ctr=1; ctr<=tmpNull.Ncols(); ctr++)
if( probmap(ctr) > tests(1,4+next) ){
if( dat(ctr) > means(1) )
cutoffpos = std::min(cutoffpos, float(dat(ctr)));
else
cutoffneg = std::max(cutoffneg, float(dat(ctr)));
}
for(int ctr=1; ctr<=tmpNull.Ncols(); ctr++)
if( (dat(ctr) > cutoffneg) && (dat(ctr) < cutoffpos) )
tmpNull(1,ctr)=0.0;
*/
for(int ctr=1; ctr<=tmpNull.Ncols(); ctr++)
if( probmap(ctr) < tests(1,4+next) ){
tmpNull(1,ctr)=0.0;
}
Res.Row(next) << tmpNull;
}
next++;
}
for(int ctr1=1;ctr1<=tests(1,2);ctr1++){
if(4+next <=tests.Ncols()){
cerr << " false positives control " << tests(1,4+next)<<endl;
Matrix tmp1;
tmp1 = normcdf(dat,means(1),vars(1));
Matrix tmpNull;
tmpNull = dat;
for(int ctr=1; ctr<=tmp1.Ncols(); ctr++)
if(tmp1(1,ctr) < tests(1,4+next))
tmpNull(1,ctr)=0.0;
Res.Row(next) << tmpNull;
}
next++;
}
for(int ctr1=1;ctr1<=tests(1,3);ctr1++){
if(4+next <=tests.Ncols()){
message(" Local False Discovery Rate control at p < " << tests(1,4+next) << endl);
add_infstr(" Local False Discovery Rate control at p < "+float2str(tests(1,4+next),0,2,false));
RowVector tmp=dat;
SortAscending(tmp);
RowVector newcdf(tmp);
newcdf << normcdf(tmp,means(1),vars(1));
float thrp = tmp(tmp.Ncols())+0.01;
float thrn = tmp(1)-0.01;
int ctr=tmp.Ncols();
do{
thrp = tmp(ctr);
ctr-=1;
}while(ctr>0 && ( (1.0-newcdf(ctr))*tmp.Ncols() < (tests(1,4+next)*(tmp.Ncols()-ctr+1)) ));
ctr=1;
do{
thrn = tmp(ctr);
ctr+=1;
}while(ctr<=tmp.Ncols() && ( (newcdf(ctr))*tmp.Ncols() < (tests(1,4+next)*ctr)));
tmp = dat;
for(ctr=1; ctr<=tmp.Ncols();ctr++)
if((tmp(ctr) < thrp)&&(tmp(ctr) > thrn))
tmp(ctr) = 0.0;
Res.Row(next) << tmp;
}
next++;
}
for(int ctr1=1;ctr1<=tests(1,4);ctr1++){
if(4+next <=tests.Ncols()){
message(" 2-sided null hypothesis test at " << tests(1,4+next)<<endl);
add_infstr(" 2-sided null hypothesis test at "+float2str(tests(1,4+next),0,2,false));
double mu, sig;
mu = dat.Sum()/numdata;
sig = var(dat,2).AsScalar();
Matrix tmp1;
tmp1 = normcdf(dat,mu,std::abs(sig));
Matrix tmpNull;
tmpNull = dat;
for(int ctr=1; ctr<=tmp1.Ncols(); ctr++)
if((tmp1(1,ctr) < 1-0.5*(tests(1,4+next))&&
(tmp1(1,ctr) > 0.5*(tests(1,4+next)))))
tmpNull(1,ctr)=0.0;
Res.Row(next) << tmpNull;
}
next++;
}
return Res;
}
/* GMM fitting */
void MelGMix::gmmupdate(){
int it_ctr = 1;
bool exitloop = false;
float oldll;
Matrix tmp0;Matrix tmp1;Matrix prob_K__y_theta;
Matrix kdata;
RowVector prob_Y__theta;RowVector Nbar;
RowVector mubar;RowVector sigmabar;RowVector pibar;
do{
oldll = logprobY;
//make sure all variances are acceptable
for(int ctr1=1; ctr1 <=vars.Ncols(); ctr1++)
if(vars(ctr1)<0.0001){
vars(ctr1) = 0.0001;
}
tmp0 = normpdf(data,means,vars);
tmp1 = SP(props.t()*ones(1,numdata),tmp0);
prob_Y__theta << sum(tmp1,1);
logprobY = log(prob_Y__theta).Sum();
prob_K__y_theta = SP(tmp1,pow(ones(nummix,1)*prob_Y__theta,-1));
Nbar << sum(prob_K__y_theta,2).t();
pibar = Nbar / numdata;
kdata = ones(nummix,1)*data;
mubar <<SP(sum(SP(kdata,prob_K__y_theta),2).t(),pow(Nbar,-1));
kdata -= mubar.t()*ones(1,numdata);
kdata = pow(kdata,2);
sigmabar << SP(sum(SP(kdata,prob_K__y_theta),2).t(),pow(Nbar,-1));
means = mubar;
vars = sigmabar;
props = pibar;
if(epsilon<0){exitloop = it_ctr >= -epsilon;}
else{exitloop = (((logprobY-oldll < epsilon)&&(it_ctr>20))
||(it_ctr>400));}
it_ctr++;
}while(!exitloop);
}
void MelGMix::gmmfit(){
int i,j;
if(fixdim){
if(nummix>1){
gmmupdate();
add_params(means,vars,props,logprobY,MDL,Evi,true);
}else{
means.ReSize(1);
means = data.Sum()/numdata;
vars.ReSize(1);
vars = var(data,2);
props.ReSize(1);
props = 1.0;
gmmevidence();
}
}else{
RowVector Score(Params.Ncols());
do{
gmmupdate();
Score(nummix) = gmmevidence();
int idx1,idx2;
RowVector pitmp = props;
pitmp.MaximumAbsoluteValue1(idx1);
pitmp(idx1)=0.0;
pitmp.MaximumAbsoluteValue1(idx2);
if(props.MaximumAbsoluteValue1(i)<0.9){
if((vars(idx2)>0.15)&&
(std::abs(means(idx2)-means(idx1))<0.5*vars(idx1))){
Score(nummix) = Score(nummix)/(2*(means(idx1)));
}
}
add_params(means,vars,props,logprobY,MDL,Evi,true);
gmmreducemm();
means = means.Columns(1,nummix);
vars = vars.Columns(1,nummix);
props = props.Columns(1,nummix);
}while(nummix>1);
means.ReSize(1);
means = data.Sum()/numdata;
vars.ReSize(1);
vars = var(data,2);
props.ReSize(1);
props = 1.0;
Score(nummix) = gmmevidence();
add_params(means,vars,props,logprobY,MDL,Evi,true);
//identify best MM
Score.MinimumAbsoluteValue2(i,j);
means.ReSize(j);
vars.ReSize(j);
props.ReSize(j);
nummix = j;
int index; index = 3 + (j-1)*5;
means = Params.SubMatrix(index,index,1,j);
vars = Params.SubMatrix(index+1,index+1,1,j);
props = Params.SubMatrix(index+2,index+2,1,j);
}
props.MaximumAbsoluteValue2(i,j);
if(j>1){
float tmp;
tmp = means(1);means(1) = means(j);means(j)=tmp;
tmp = vars(1);vars(1) = vars(j);vars(j)=tmp;
tmp = props(1);props(1) = props(j);props(j)=tmp;
}
add_params(means,vars,props,logprobY,MDL,Evi,true);
if(nummix==1)
probmap << normcdf(data,means(1),vars(1));
else{
Matrix Nprobs;
Matrix tmp0;
tmp0 = normpdf(data,means,vars);
Nprobs = SP(props.t()*ones(1,numdata),tmp0);
tmp0 = ones(Nprobs.Nrows(),1)*pow(sum(Nprobs,1),-1);
Nprobs = SP(tmp0,Nprobs);
probmap << SP(sum(Nprobs.Rows(2,Nprobs.Nrows()),1),
pow(sum(Nprobs,1),-1));
}
}
float MelGMix::gmmevidence(){
Matrix tmp0;
if(means.Ncols()>1){
tmp0 = normpdf(data,means,vars);
}else{
tmp0 = normpdf(data,means.AsScalar(),vars.AsScalar());
}
Matrix tmp1;
tmp1 = SP(props.t()*ones(1,numdata),tmp0);
tmp0 = SP(tmp0,pow(ones(nummix,1)*sum(tmp1,1),-1));
tmp0 = pow(tmp0-ones(nummix,1)*tmp0.Row(nummix),2);
float logH = 0;
if(means.Ncols()>1){
logH = sum(log(sum(tmp0.Rows(1,nummix-1),2)),1).AsScalar();
}
logH = logH + 2*sum(log(std::sqrt(2.0)*numdata*props),2).AsScalar();
logH = logH - 2*sum(props,2).AsScalar();
RowVector prob_Y__theta;
prob_Y__theta << sum(tmp1,1);
logprobY = log(prob_Y__theta).Sum();
MDL = -logprobY + (1.5*nummix-0.5)* std::log(float(numdata));
Evi = -logprobY +nummix*std::log(2.0)-std::log(MISCMATHS::gamma(nummix))
-3*nummix/2*std::log(M_PI)+0.5*logH;
return Evi;
}
void MelGMix::gmmreducemm(){
Matrix dlm(nummix,nummix);
Matrix mus(nummix,nummix);
Matrix rs(nummix,nummix);
for(int ctri=1;ctri<=nummix; ctri++){
for(int ctrj=1;ctrj<=nummix; ctrj++){
mus(ctri,ctrj) = (props(ctri)*means(ctri)+props(ctrj)*means(ctrj))
/(props(ctri)+props(ctrj));
rs(ctri,ctrj) = (props(ctri)*(vars(ctri)+
std::pow(means(ctri)-mus(ctri,ctrj),2) )
+ props(ctrj)*(vars(ctrj)
+ std::pow(means(ctrj)-mus(ctri,ctrj),2)))
/ (props(ctri)+props(ctrj));
dlm(ctri,ctrj) = 0.5*numdata*(
props(ctri)*std::log(
std::abs(rs(ctri,ctrj))/std::abs(vars(ctri)))
+ props(ctrj)*std::log(std::abs(rs(ctri,ctrj))
/ std::abs(vars(ctrj))));
}
}
dlm += IdentityMatrix(nummix)*dlm.Maximum();
int i,j;
float val;
val=dlm.MinimumAbsoluteValue2(i,j);
nummix--;
RowVector newmean(nummix);
RowVector newvars(nummix);
RowVector newprop(nummix);
int ctr0 = 1;
for(int ctr=1; ctr<=nummix+1; ctr++){
if(ctr!=i&&ctr!=j){
newmean(ctr0) = means(ctr);
newvars(ctr0) = vars(ctr);
newprop(ctr0) = props(ctr);
ctr0++;
}
}
//cerr << "ctr0 " << ctr0 << endl;
if(ctr0<=nummix){
newmean(ctr0) = mus(i,j);
newvars(ctr0) = rs(i,j);
newprop(ctr0) = props(i)+props(j);
means = newmean;
vars=newvars;
props=newprop;
}
}
void MelGMix::ggmfit(){
// fit a mixture of a Gaussian and multiple Gamma functions to the histogram
float log_p_y_theta = 1.0;
float old_ll = 2.0;
float g_eps = 0.000001;
int it_ctr = 0;
double Dmax, Dmin;
Dmax = 2 * data.Maximum();
Dmin = -2 * data.Minimum();
//resize means, vars and props
if(nummix > 3)
nummix = 3;
means = means.Columns(1,nummix);
vars = vars.Columns(1,nummix);
props = props.Columns(1,nummix);
means(1) = -2*mean(data,2).AsScalar();
Matrix tmp1;Matrix prob_K__y_theta;
Matrix kdata;
RowVector prob_Y__theta;RowVector Nbar;
RowVector mubar;RowVector sigmabar;RowVector pibar;
Matrix p_ygx(nummix,numdata);
offset = 0.0;
float const2;
Matrix negdata(data);
negdata = -data;
while((it_ctr<30) ||
((std::abs(old_ll - log_p_y_theta)>g_eps) && (it_ctr<500))){ // fit GGM
it_ctr++;
//offset = (std::min(0.2,1-props(1)))*std::sqrt(vars(1));
//make sure all variances are acceptable
for(int ctr1=1; ctr1 <=nummix; ctr1++)
if(vars(ctr1)<0.0001){
vars(ctr1) = 0.0001;
}
p_ygx = 0.0;
p_ygx.Row(1) << normpdf(data,means(1),vars(1));
const2 = (2.6-props(1))*sqrt(vars(1))+means(1); //min. nht level
means(2) = (std::max(means(2), std::max(0.001,
0.5 * ( const2 + std::sqrt( const2*const2 + 4*vars(2))))));
vars(2) = std::max(std::min(vars(2), 0.5*std::pow(means(2),2)),0.0001);
p_ygx.Row(2) << gammapdf(data,means(2),vars(2));
if(nummix>2){
const2 = (2.6-props(1))*sqrt(vars(1))-means(1);
means(3) = -(std::max(-means(3), std::max(0.001,
0.5 * ( const2 + std::sqrt( const2*const2 + 4*vars(3))))));
vars(3) = std::max(std::min(vars(3), 0.5*std::pow(means(3),2)),0.0001);
p_ygx.Row(3) << gammapdf(negdata,-means(3),vars(3));
}
tmp1 = SP(props.t()*ones(1,numdata),p_ygx);
prob_Y__theta << sum(tmp1,1);
//deal with non-modelled voxels
for(int ctr=1; ctr<=tmp1.Ncols(); ctr++)
if(prob_Y__theta(ctr) < 0.0001)
prob_Y__theta(ctr) = 0.0001;
old_ll = log_p_y_theta;
log_p_y_theta = log(prob_Y__theta).Sum();
if((it_ctr<30) ||
((std::abs(old_ll - log_p_y_theta)>g_eps) && (it_ctr<300))){//update
prob_K__y_theta = SP(tmp1,pow(ones(nummix,1)*prob_Y__theta,-1));
Nbar << sum(prob_K__y_theta,2).t();
for(int ctr=1; ctr<=Nbar.Ncols(); ctr++)
if(Nbar(ctr) < 0.0001 * numdata)
Nbar = Nbar + 0.0001;
pibar= Nbar / numdata;
// cerr << "pibar :" << pibar << endl;
kdata = ones(nummix,1)*data;
mubar <<SP(sum(SP(kdata,prob_K__y_theta),2).t(),pow(Nbar,-1));
// cerr << "mubar :" << mubar << endl;
kdata -= mubar.t()*ones(1,numdata);
kdata = pow(kdata,2);
sigmabar << SP(sum(SP(kdata,prob_K__y_theta),2).t(),pow(Nbar,-1));
means = mubar;
vars = sigmabar;
props = pibar;
}//update
} //while loop
props = props / sum(props,2).AsScalar();
add_params(means,vars,props,logprobY,MDL,Evi,true);
probmap << SP(sum(tmp1.Rows(2,tmp1.Nrows()),1),
pow(sum(tmp1,1),-1));
dbgmsg(" mu: " << means << " sig: " << vars << " prop: " << props << endl);
if(props(1)<0.4 ){
//set up GMM
message(" try Gaussian Mixture Model " << endl);
props=zeros(1,nummix);
vars=zeros(1,nummix);
means=zeros(1,nummix);
Params=zeros(1,nummix);
logprobY = 1.0;
props = std::pow(float(nummix),float(-1.0));
tmp1 = data * data.t() / numdata;
vars = tmp1.AsScalar();
float Dmin, Dmax, IntSize;
Dmin = min(data).AsScalar(); Dmax = max(data).AsScalar();
IntSize = Dmax / nummix;
means(1)=mean(data,2).AsScalar();
for (int ctr=2; ctr <= means.Ncols(); ctr++){
means(ctr) = Dmax - (ctr - 1.5) * IntSize;
}
means(2)=means(1)+sqrt(vars(1));
if(nummix>2)
means(3)=means(1)-sqrt(vars(1));
fit(string("GMM"));
}
}
/* INPUT / OUTPUT */
void MelGMix::add_params(Matrix& mu, Matrix& sig, Matrix& pi,
float logLH, float MDL, float Evi, bool advance){
int size = Params.Ncols();
if(size<2){size=2;}
Matrix New(5,size);
New = 0;
New.SubMatrix(3,3,1,mu.Ncols())=mu;
New.SubMatrix(4,4,1,mu.Ncols())=sig;
New.SubMatrix(5,5,1,mu.Ncols())=pi;
New(1,1)=nummix;
New(1,2)=-logLH;
New(2,1)=Evi;
New(2,2)=MDL;
if(Params.Storage()>nummix){
Params = New & Params;
}else{
Params = New;
}
}
void MelGMix::get_params(int index, Matrix& mu, Matrix& sig, Matrix& pi,
float logLH, float MDL, float Evi){
}
void MelGMix::save(){
}
void MelGMix::status(const string &txt){
cerr << txt << "epsilon " << epsilon << endl;
cerr << txt << "nummix " << nummix << endl;
cerr << txt << "numdata " << numdata << endl;
cerr << txt << "means " << means << endl;
cerr << txt << "vars " << vars << endl;
cerr << txt << "props " << props << endl;
//write_ascii_matrix(logger.appendDir(string(txt + "means")),means);
//write_ascii_matrix(logger.appendDir(string(txt + "vars")),vars);
//write_ascii_matrix(logger.appendDir(string(txt + "props")),props);
}
void MelGMix::create_rep(){
}
}