-
Mark Jenkinson authoredMark Jenkinson authored
melpca.cc 16.08 KiB
/* MELODIC - Multivariate exploratory linear optimized decomposition into
independent components
melpca.cc - PCA and whitening
Christian F. Beckmann, FMRIB Image Analysis Group
Copyright (C) 1999-200 University of Oxford */
/* CCOPYRIGHT */
#include "newimage/newimageall.h"
#include "utils/log.h"
#include "meloptions.h"
#include "meldata.h"
#include "melodic.h"
#include "newmat/newmatap.h"
#include "newmat/newmatio.h"
#include "melpca.h"
#include "libprob/libprob.h"
using namespace Utilities;
using namespace NEWIMAGE;
namespace Melodic{
void MelodicPCA::perf_pca(const Matrix &Data)
{
message("Starting PCA ... ");
SymmetricMatrix Corr;
if(opts.segment.value().length()>0){
Matrix Res;
Res = ones(Data.Nrows(),1)*melodat.get_RXweight();
Res = SP(melodat.get_Data(),Res);
Corr = cov(Res.t());
}
else{
Corr = cov(melodat.get_Data().t());
}
Matrix tmpE;
DiagonalMatrix tmpD;
EigenValues(Corr,tmpD,tmpE);
if(opts.tsmooth.value()){
message(" temporal smoothing of Eigenvectors " << endl);
tmpE=melodat.smoothColumns(tmpE);
}
melodat.set_pcaE(tmpE);
melodat.set_pcaD(tmpD);
RowVector AdjEV;
AdjEV = tmpD.AsRow().Reverse();
SortDescending(AdjEV);
RowVector PercEV(AdjEV);
PercEV = cumsum(AdjEV / sum(AdjEV,2).AsScalar());
write_ascii_matrix(logger.appendDir("eigenvalues_percent"),PercEV);
melodat.set_EVP(PercEV);
AdjEV = (AdjEV - min(AdjEV).AsScalar())/(max(AdjEV).AsScalar() - min(AdjEV).AsScalar());
melodat.set_EV((AdjEV));
message("done" << endl);
}
void MelodicPCA::perf_white(const Matrix &Data)
{
Matrix RE;
DiagonalMatrix RD;
Matrix tmpWhite;
Matrix tmpDeWhite;
int N = melodat.get_pcaE().Ncols();
if(opts.pca_dim.value() > N){
message("dimensionality too large - using -dim " << N <<
" instead " << endl);
opts.pca_dim.set_T(N);
}
if(opts.pca_dim.value() < 0){
if(opts.remove_meanvol.value()){
opts.pca_dim.set_T(N-2);
}else{
opts.pca_dim.set_T(N-1);
}
}
if(opts.pca_dim.value() ==0){
opts.pca_dim.set_T(pcadim());
if(melodat.get_Data().Nrows()<20){
opts.pca_dim.set_T(N-2);
message("too few data points for full estimation, using "
<< opts.pca_dim.value() << " instead"<< endl);
}
}
if(opts.approach.value()==string("jade") && opts.pca_dim.value() > 30){
message("dimensionality too large for jade estimation - using --dim 30 instead" << endl);
opts.pca_dim.set_T(30);
}
message("Start whitening using "<< opts.pca_dim.value()<<" dimensions ... " << endl);
RowVector tmpEVP;
tmpEVP << melodat.get_EVP();
float varp = 1.0;
if(opts.pca_dim.value() <= tmpEVP.Ncols()){
varp = tmpEVP(opts.pca_dim.value());
}
message(" retaining "<< varp*100 <<" percent of the variability " << endl);
RE = melodat.get_pcaE().Columns(N-opts.pca_dim.value()+1,N);
RD << abs(melodat.get_pcaD().SymSubMatrix(N-opts.pca_dim.value()+1,N));
tmpWhite = sqrt(abs(RD.i()))*RE.t();
tmpDeWhite = RE*sqrt(RD);
melodat.set_white(tmpWhite);
melodat.set_dewhite(tmpDeWhite);
message(" ... done"<< endl << endl);
}
int MelodicPCA::pcadim()
{
message("Estimating the number of sources from the data (PPCA) ..." << endl);
//First, estimate the smoothness of the data;
// set up all strings
string SM_path = opts.binpath + "smoothest";
string Mask_fname = logger.appendDir("mask");
if(opts.segment.value().length()>0){
Mask_fname = opts.segment.value();
}
// Setup external call to smoothest:
char callSMOOTHESTstr[1000];
ostrstream osc(callSMOOTHESTstr,1000);
osc << SM_path << " -d " << melodat.data_dim()
<< " -r " << opts.inputfname.value() << " -m "
<< Mask_fname << " > " << logger.appendDir("smoothest") << '\0';
message(" Calling Smoothest: " << callSMOOTHESTstr << endl);
system(callSMOOTHESTstr);
//read back the results
ifstream in;
string str;
float Resel = 1.0;
in.open(logger.appendDir("smoothest").c_str(), ios::in);
if(in>0){
for(int ctr=1; ctr<7; ctr++){ in >> str;}
in.close();
if(str!="nan"){
Resel = atof(str.c_str());
}
}
//cerr << " Resels : " << Resel << endl << endl;
melodat.set_resels(Resel);
Matrix PPCAest;
// if(!opts.varnorm.value()){
SymmetricMatrix Corr;
if(opts.segment.value().length()>0){
Matrix Res;
Res = ones(melodat.get_DataVN().Nrows(),1)*melodat.get_RXweight();
Res = SP(melodat.get_DataVN(),Res);
Corr = cov(Res.t());
}
else{
Corr = cov(melodat.get_DataVN().t());
}
DiagonalMatrix tmpD;
Matrix tmpE;
EigenValues(Corr,tmpD,tmpE);
// }
RowVector AdjEV;
AdjEV << tmpD.AsRow();
AdjEV = AdjEV.Columns(3,AdjEV.Ncols());
AdjEV = AdjEV.Reverse();
RowVector CircleLaw;
int NumVox = (int) floor(melodat.data_samples()/(2.5*Resel));
CircleLaw = Feta(int(AdjEV.Ncols()), NumVox);
for(int ctr=1;ctr<=CircleLaw.Ncols(); ctr++){
if(CircleLaw(ctr)<5*10e-10){CircleLaw(ctr) = 5*10e-10;}
}
//write_ascii_matrix(logger.appendDir("tmpA1"),AdjEV);
//AdjEV = AdjEV.Columns(2,AdjEV.Ncols());
//write_ascii_matrix(logger.appendDir("tmpA2"),AdjEV);
//cerr<< AdjEV.Nrows() << " x " << AdjEV.Ncols() << endl;
//cerr<< CircleLaw.Nrows() << " x " << CircleLaw.Ncols() << endl;
float slope;
slope = CircleLaw.Columns(int(AdjEV.Ncols()/4),AdjEV.Ncols() -
int(AdjEV.Ncols()/4)).Sum() /
AdjEV.Columns(int(AdjEV.Ncols()/4),AdjEV.Ncols() -
int(AdjEV.Ncols()/4)).Sum();
//CircleLaw = slope * (CircleLaw-1) + 1;
// write_ascii_matrix(logger.appendDir("claw"),CircleLaw.Columns(1,AdjEV.Ncols()));
RowVector PercEV(AdjEV);
PercEV = cumsum(AdjEV / sum(AdjEV,2).AsScalar());
// write_ascii_matrix(logger.appendDir("ev"),AdjEV);
//cerr << int(AdjEV.Ncols()) << " " << NumVox << " " << slope << endl;
AdjEV << SP(AdjEV,pow(CircleLaw.Columns(1,AdjEV.Ncols()),-1));
// cerr << "recalculated" << endl;
SortDescending(AdjEV);
int maxEV = 1;
float threshold = 0.98;
for(int ctr_i = 1; ctr_i < PercEV.Ncols(); ctr_i++){
if((PercEV(ctr_i)<threshold)&&(PercEV(ctr_i+1)>=threshold)){maxEV=ctr_i;}
}
if(maxEV<3){maxEV=PercEV.Ncols()/2;}
RowVector NewEV;
Matrix temp1;
temp1 = abs(AdjEV);
NewEV << temp1.SubMatrix(1,1,1,maxEV);
PPCAest = ppca_est(NewEV, NumVox);
RowVector estimators(5);
estimators = 1.0;
Matrix PPCA2(PPCAest);
for(int ctr=1; ctr<=PPCA2.Ncols(); ctr++){
PPCA2.Column(ctr) = (PPCA2.Column(ctr) -
min(PPCA2.Column(ctr)).AsScalar()) /
( max(PPCA2.Column(ctr)).AsScalar() -
min(PPCA2.Column(ctr)).AsScalar());
}
int ctr_i = 1;
while((ctr_i< PPCAest.Nrows()-1)&&
(PPCAest(ctr_i,2) < PPCAest(ctr_i+1,2))&&(ctr_i<maxEV))
{estimators(1)=ctr_i+1;ctr_i++;}
ctr_i = 1;
while((ctr_i< PPCAest.Nrows()-1)&&
(PPCAest(ctr_i,3) < PPCAest(ctr_i+1,3))&&(ctr_i<maxEV))
{estimators(2)=ctr_i+1;ctr_i++;}
ctr_i = 1;
while((ctr_i< PPCAest.Nrows()-1)&&
(PPCAest(ctr_i,4) < PPCAest(ctr_i+1,4))&&(ctr_i<maxEV))
{estimators(3)=ctr_i+1;ctr_i++;}
ctr_i = 1;
while((ctr_i< PPCAest.Nrows()-1)&&
(PPCAest(ctr_i,5) < PPCAest(ctr_i+1,5))&&(ctr_i<maxEV))
{estimators(4)=ctr_i+1;ctr_i++;}
ctr_i = 1;
while((ctr_i< PPCAest.Nrows()-1)&&
(PPCAest(ctr_i,6) < PPCAest(ctr_i+1,6))&&(ctr_i<maxEV))
{estimators(5)=ctr_i+1;ctr_i++;}
int res = 0;
ColumnVector PPCA;
if(opts.pca_est.value() == string("lap")){
res = int(estimators(1));
PPCA << PPCA2.Column(2);
}
if(opts.pca_est.value() == string("bic")){
res = int(estimators(2));
PPCA << PPCA2.Column(2);
}
if(opts.pca_est.value() == string("mdl")){
res = int(estimators(3));
PPCA << PPCA2.Column(4);
}
if(opts.pca_est.value() == string("aic")){
res = int(estimators(5));
PPCA << PPCA2.Column(6);
}
if(res==0){//median estimator
PPCA = PPCA2.Column(2);
for(int ctr=1; ctr<=PPCA2.Nrows(); ctr++){
RowVector tmp = PPCA2.SubMatrix(ctr,ctr,2,6);
// SortAscending(tmp);
// float themean = float(tmp.Sum()/5);
// if(std::abs(int(tmp(2)-themean)) < std::abs(int(tmp(3)-themean)))
// PPCA(ctr) = tmp(2);
// else
// PPCA(ctr) = tmp(3);
PPCA(ctr) = float(tmp.Sum()/5);
}
ctr_i = 1;
while((PPCA(ctr_i) < PPCA(ctr_i+1))&&(ctr_i<maxEV)){
res=ctr_i+1;ctr_i++;
}
}
// cerr << estimators << " " << res << endl;
//write_ascii_matrix(logger.appendDir("PPCA2"),PPCA2);
AdjEV = (AdjEV - min(AdjEV).AsScalar())/(max(AdjEV).AsScalar() - min(AdjEV).AsScalar());
write_ascii_matrix(logger.appendDir("PPCA"),PPCA);
write_ascii_matrix(logger.appendDir("eigenvalues_adjusted"),AdjEV.t());
write_ascii_matrix(logger.appendDir("eigenvalues_percent"),PercEV.t());
melodat.set_EVP(PercEV);
melodat.set_EV(AdjEV);
melodat.set_PPCA(PPCA);
//PPCA << sum(PPCAest.Columns(2,6),2);
//ctr_i = 1;
//while((PPCA(ctr_i) < PPCA(ctr_i+1))&&(ctr_i<maxEV)){
// res=ctr_i+1;ctr_i++;
//}
//res = int(sum(estimators,2).AsScalar()/5);
// res = int(estimators(1)); // Laplace approximation
// SortAscending(estimators);
// if(std::abs(int(estimators(2))-res) < std::abs(int(estimators(3))-res))
// res = int(estimators(2));
// else
// res = int(estimators(3));
//write_ascii_matrix(logger.appendDir("PPCA"),PPCAest);
//write_ascii_matrix(logger.appendDir("dimest"),estimators);
return res;
}
RowVector MelodicPCA::Feta(int n1, int n2)
{
float nu = (float) n1/n2;
float bm = pow((1-sqrt(nu)),2.0);
float bp = pow((1+sqrt(nu)),2.0);
//cerr << "nu, bm, bp " << nu << " " <<bm << " " << bp << endl;
float lrange = 0.9*bm;
float urange = 1.1*bp;
// int dummy;
RowVector eta(30*n1);
float rangestepsize = (urange - lrange) / eta.Ncols();
for(int ctr_i = 0; ctr_i < eta.Ncols(); ctr_i++){
eta(ctr_i+1) = lrange + rangestepsize * (ctr_i);
}
RowVector teta(10*n1);
teta = 0;
float stepsize = (bp - bm) / teta.Ncols();
for(int ctr_i = 0; ctr_i < teta.Ncols(); ctr_i++){
teta(ctr_i+1) = stepsize*(ctr_i);
}
//cerr << teta(1)+bm << " " << teta(1000)+bm << endl;
//cerr << eta(1)<< " " << eta(eta.Ncols())<< endl;
//cerr << "BP1" << endl;
//write_ascii_matrix(logger.appendDir("teta"),teta.t());
// RowVector tmp1(teta);
// tmp1 = teta + bm;
// cerr << "tmp1" << endl;
// tmp1 = pow(2*M_PI*nu*(tmp1),-1);
// cerr << "tmp1" << endl;
// RowVector tmp2(teta);
// cerr << "tmp2" << endl;
// tmp2 = SP(teta, bp-bm-teta);
// cerr << "tmp2" << endl;
// tmp2=abs(tmp2);
// cerr << "tmp2" << endl;
RowVector feta(teta);
feta = SP(pow(2*M_PI*nu*(teta + bm),-1), pow(SP(teta, bp-bm-teta),0.5));
//Matrix location;
teta = teta + bm;
//cerr << "teta : " << teta.Nrows() << " x " << teta.Ncols() << endl;
//cerr << "eta : " << eta.Nrows() << " x " << eta.Ncols() << endl;
//cerr << "feta : " << feta.Nrows() << " x " << feta.Ncols() << endl;
//c/err << "vor location (input)" << endl;
//cin >> dummy;
//location = SP(teta.t()*ones(1,eta.Ncols()),pow(ones(teta.Ncols(),1)*eta,-1));
//cerr << "nach location (input)" << endl;
//cin >> dummy;
//cerr << " weiter " << endl;
//for(int ctr_i = 1; ctr_i <= location.Ncols(); ctr_i++){
// for(int ctr_j = 1; ctr_j <= location.Nrows(); ctr_j++){
// if(location(ctr_j,ctr_i)<1){location(ctr_j,ctr_i)=1;}
// else {location(ctr_j,ctr_i)=0;}
// }
// }
//write_ascii_matrix(logger.appendDir("location"),location);
// write_ascii_matrix(logger.appendDir("teta"),teta);
//write_ascii_matrix(logger.appendDir("eta"),eta);
//write_ascii_matrix(logger.appendDir("feta"),feta);
//RowVector claw;
// claw = n1*(1-sum(SP(stepsize*feta.t()*ones(1,eta.Ncols()),location),1).AsRow());
RowVector claw(eta);
claw = 0;
for(int ctr_i = 1; ctr_i <= eta.Ncols(); ctr_i++){
double tmpval = 0.0;
for(int ctr_j = 1; ctr_j <= teta.Ncols(); ctr_j++){
if(( double(teta(ctr_j))/double(eta(ctr_i)) )<1)
tmpval += feta(ctr_j);
}
claw(ctr_i) = n1*(1-stepsize*tmpval);
}
//write_ascii_matrix(logger.appendDir("claw"),claw);
//cerr << "BP1" << endl;
RowVector Res(n1); //invert the CDF
//cerr << "n1=" << n1 << endl;
for(int ctr_i = 1; ctr_i < eta.Ncols(); ctr_i++){
if(floor(claw(ctr_i))>floor(claw(ctr_i+1))){
// cerr << int(floor(claw(ctr_i))) << " ";
Res(int(floor(claw(ctr_i)))) = eta(ctr_i);
}
}
//cerr << endl;
// cerr << " Done with loop " << int(floor(tmp4b(ctr_i))) << endl;
//write_ascii_matrix(logger.appendDir("claw-dstn"),Res);
return Res;
}
RowVector MelodicPCA::cumsum(const RowVector& Inp)
{
UpperTriangularMatrix UT(Inp.Ncols());
UT=1.0;
RowVector Res;
Res = Inp * UT;
return Res;
}
Matrix MelodicPCA::ppca_est(const RowVector& eigenvalues, const int N)
{
RowVector logLambda(eigenvalues);
logLambda = log(eigenvalues);
int d = logLambda.Ncols();
RowVector k(d);
for(int ctr_i = 1; ctr_i <=d; ctr_i++){
k(ctr_i)=ctr_i;
}
RowVector m(d);
m=d*k-0.5*SP(k,k+1);
RowVector loggam(d);
loggam = 0.5*k.Reverse();
for(int ctr_i = 1; ctr_i <=d; ctr_i++){
loggam(ctr_i)=lgam(loggam(ctr_i));
}
loggam = cumsum(loggam);
RowVector l_probU(d);
l_probU = -log(2)*k + loggam - cumsum(0.5*log(M_PI)*k.Reverse());
RowVector tmp1;
tmp1 = -cumsum(eigenvalues).Reverse()+sum(eigenvalues,2).AsScalar();
tmp1(1) = 0.95*tmp1(2);
tmp1=tmp1.Reverse();
RowVector tmp2;
tmp2 = -cumsum(logLambda).Reverse()+sum(logLambda,2).AsScalar();
tmp2(1)=tmp2(2);
tmp2=tmp2.Reverse();
RowVector tmp3;
tmp3 = d-k;
tmp3(d) = 1.0;
RowVector tmp4;
tmp4 = SP(tmp1,pow(tmp3,-1));
for(int ctr_i = 1; ctr_i <=d; ctr_i++){
if(tmp4(ctr_i)<0.01){tmp4(ctr_i)=0.01;}
if(tmp3(ctr_i)<0.01){tmp3(ctr_i)=0.01;}
if(tmp1(ctr_i)<0.01){tmp1(ctr_i)=0.01;}
}
RowVector l_nu;
l_nu = SP(-N/2*(d-k),log(tmp4));
l_nu(d) = 0;
RowVector l_lam;
l_lam = -(N/2)*cumsum(logLambda);
RowVector l_lhood;
l_lhood = SP(pow(tmp3,-1),tmp2)-log(SP(pow(tmp3,-1),tmp1));
Matrix t1,t2, t3;
UpperTriangularMatrix triu(d);
triu = 1.0;
for(int ctr_i = 1; ctr_i <= triu.Ncols(); ctr_i++){
triu(ctr_i,ctr_i)=0.0;
}
t1 = (ones(d,1) * eigenvalues);
t1 = SP(triu,t1.t() - t1);
t2 = pow(tmp4,-1).t()*ones(1,d);
t3 = ones(d,1)*pow(eigenvalues,-1);
t2 = SP(triu, t2.t()-t3.t());
for(int ctr_i = 1; ctr_i <= t1.Ncols(); ctr_i++){
for(int ctr_j = 1; ctr_j <= t1.Nrows(); ctr_j++){
if(t1(ctr_j,ctr_i)<=0){t1(ctr_j,ctr_i)=1;}
}
}
for(int ctr_i = 1; ctr_i <= t2.Ncols(); ctr_i++){
for(int ctr_j = 1; ctr_j <= t2.Nrows(); ctr_j++){
if(t2(ctr_j,ctr_i)<=0){t2(ctr_j,ctr_i)=1;}
}
}
t1 = cumsum(sum(log(t1),2).AsRow());
t2 = cumsum(sum(log(t2),2).AsRow());
RowVector l_Az(d);
l_Az << (t1+t2);
RowVector l_lap;
l_lap = l_probU + l_nu +l_Az + l_lam + 0.5*log(2*M_PI)*(m+k)-0.5*log(N)*k;
RowVector l_BIC;
l_BIC = l_lam + l_nu - 0.5*log(N)*(m+k);
RowVector l_RRN;
l_RRN = -0.5*N*SP(k,log(SP(cumsum(eigenvalues),pow(k,-1))))+l_nu;
RowVector l_AIC;
l_AIC = -2*N*SP(tmp3,l_lhood)+ 2*(1+d*k+0.5*(k-1));
l_AIC = -l_AIC;
RowVector l_MDL;
l_MDL = -N*SP(tmp3,l_lhood)+ 0.5*(1+d*k+0.5*(k-1))*log(N);
l_MDL = -l_MDL;
Matrix Res;
Res = eigenvalues.t();
Res |= l_lap.t();
Res |= l_BIC.t();
Res |= l_MDL.t();
Res |= l_RRN.t();
Res |= l_AIC.t();
return Res;
}
}