-
Tim Behrens authoredTim Behrens authored
minimize.cc 12.91 KiB
/* minimize
Tim Behrens, FMRIB Image Analysis Group
Copyright (C) 1999-2000 University of Oxford */
/* CCOPYRIGHT */
#include <string>
#include <iostream>
#include <fstream>
#include <unistd.h>
#include <vector>
#include <algorithm>
#include "newmatap.h"
#include "newmatio.h"
#include "miscmaths.h"
#include "minimize.h"
#define WANT_STREAM
#define WANT_MATH
using namespace NEWMAT;
using namespace std;
///////////////////////////////////////////////////////
namespace MISCMATHS {
float diff1(const ColumnVector& x, const EvalFunction& func, int i,float h,int errorord)
{
//computes the first derivative of "eval" wrt the i^th parameter at point "x" with step size h
ColumnVector xtmp=x;
float deriv=0;
if(errorord==1){
xtmp(i)=xtmp(i)+h;
float en_plus=func.evaluate(xtmp);
float en=func.evaluate(x);
deriv=(en_plus-en)/h;
}
else if(errorord==2){
xtmp(i)=xtmp(i)+h;
float en_plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-(2*h);
float en_minus=func.evaluate(xtmp);
deriv=(en_plus-en_minus)/(2*h);
}
else{
xtmp(i)=xtmp(i)+(2*h);
float en_2plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-(2*h);
float en_minus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_2minus=func.evaluate(xtmp);
deriv=(-en_2plus+8*en_plus-8*en_minus+en_2minus)/(12*h);
}
return deriv;
}
float diff2(const ColumnVector& x, const EvalFunction& func, int i,float h,int errorord)
{
//computes the second derivative of "eval" wrt the i^th parameter at point "x" with step size h
ColumnVector xtmp=x;
float deriv=0;
if(errorord==1){
xtmp(i)=xtmp(i)+(2*h);
float en_2plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_plus=func.evaluate(xtmp);
float en=func.evaluate(x);
deriv=(en_2plus-2*en_plus+en)/(h*h);
}
else if(errorord==2){
xtmp(i)=xtmp(i)+h;
float en_plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-(2*h);
float en_minus=func.evaluate(xtmp);
float en=func.evaluate(x);
deriv=(en_plus-2*en+en_minus)/(h*h);
}
else{
xtmp(i)=xtmp(i)+(2*h);
float en_2plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-(2*h);
float en_minus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_2minus=func.evaluate(xtmp);
float en=func.evaluate(x);
deriv=(-en_2plus+16*en_plus-30*en+16*en_minus-en_2minus)/(12*h*h);
}
return deriv;
}
float diff2(const ColumnVector& x, const EvalFunction& func, int i,int j,float h,int errorord)
{//computes the cross derivative of "eval" wrt the i^th and j^th parameter at point "x" with step size h
ColumnVector xtmp=x;
float deriv=0;
if(errorord==1){
xtmp(i)=xtmp(i)+h; xtmp(j)=xtmp(j)+h;
float en_iplus_jplus=func.evaluate(xtmp);
xtmp(j)=xtmp(j)-h;
float en_iplus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;xtmp(j)=xtmp(j)+h;
float en_jplus=func.evaluate(xtmp);
float en=func.evaluate(x);
deriv=(en_iplus_jplus-en_iplus-en_jplus+en)/(h*h);}
else if(errorord==2){
xtmp(i)=xtmp(i)+h; xtmp(j)=xtmp(j)+h;
float en_iplus_jplus=func.evaluate(xtmp);
xtmp(j)=xtmp(j)-2*h;
float en_iplus_jminus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-2*h;xtmp(j)=xtmp(j)+2*h;
float en_iminus_jplus=func.evaluate(xtmp);
xtmp(j)=xtmp(j)-2*h;
float en_iminus_jminus=func.evaluate(xtmp);
deriv=(en_iplus_jplus-en_iplus_jminus-en_iminus_jplus+en_iminus_jminus)/(4*h*h);
}
else{
xtmp(i)=xtmp(i)+2*h;xtmp(j)=xtmp(j)+2*h;
float en_i2plus_j2plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_iplus_j2plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-2*h;
float en_iminus_j2plus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_i2minus_j2plus=func.evaluate(xtmp);
xtmp(j)=xtmp(j)-h;
float en_i2minus_jplus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)+h;
float en_iminus_jplus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)+2*h;
float en_iplus_jplus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)+h;
float en_i2plus_jplus=func.evaluate(xtmp);
xtmp(j)=xtmp(j)-2*h;
float en_i2plus_jminus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_iplus_jminus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-2*h;
float en_iminus_jminus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)-h;
float en_i2minus_jminus=func.evaluate(xtmp);
xtmp(j)=xtmp(j)-h;
float en_i2minus_j2minus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)+h;
float en_iminus_j2minus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)+2*h;
float en_iplus_j2minus=func.evaluate(xtmp);
xtmp(i)=xtmp(i)+h;
float en_i2plus_j2minus=func.evaluate(xtmp);
deriv=(en_i2plus_j2plus-8*en_iplus_j2plus+8*en_iminus_j2plus-en_i2minus_j2plus
-8*en_i2plus_jplus+64*en_iplus_jplus-64*en_iminus_jplus+8*en_i2minus_jplus
+8*en_i2plus_jminus-64*en_iplus_jminus+64*en_iminus_jminus-8*en_i2minus_jminus
-en_i2plus_j2minus+8*en_iplus_j2minus-8*en_iminus_j2minus+en_i2minus_j2minus)/(144*h*h);
}
return deriv;
}
ReturnMatrix gradient(const ColumnVector& x, const EvalFunction& func, float h,int errorord){
ColumnVector deriv(x.Nrows());
deriv = 0;
for(int i=1;i<=x.Nrows();i++){
deriv(i) = diff1(x,func,i,h,errorord);
}
deriv.Release();
return deriv;
}
ReturnMatrix hessian(const ColumnVector& x, const EvalFunction& func, float h,int errorord)
{ //evaluates the hessian of function "eval" at x in parameter space
//errorord=4 requires something like 8n^2-3n evaluations
//errorord=2 requires something like 2n^2+n evaluations
//errorord=1 requires same as errorord=2. no point really.
// NB Hessian will compute _all_ derivatives even if non_varying parameters exist. The user must prune out rows/colums that are non needed.
SymmetricMatrix hess(x.Nrows());
for(int i=1;i<=x.Nrows();i++){
for(int j=1;j<=i;j++){
if(i!=j) hess(i,j)=diff2(x,func,i,j,h,errorord);
else hess(i,j)=diff2(x,func,i,h,errorord);
}
}
hess.Release();
return hess;
}
void minsearch(ColumnVector& x, const EvalFunction& func, ColumnVector& paramstovary){
//perform generic function minimization without gradient info
// Number of nonvarying parameters
int n_nonvary=0;
for(int i=1;i<=paramstovary.Nrows();i++){
if(paramstovary(i)>0){
paramstovary(i)=1;
}
else{
paramstovary(i)=0;
n_nonvary++;
}
}
//Number of parameters to estimate
int n=x.Nrows()-n_nonvary, maxiter=200*n,iter=0;
int func_evals=0;
// Some things we'll need.
float rho=1,chi=2,psi=0.5,sigma=0.5;
float tolx=1e-6,tolf=1e-6;
ColumnVector onesn(n);
onesn=1;
ColumnVector one2n(n),two2np1(n);
for(int i=1;i<=n;i++){
one2n(i)=i;
two2np1(i)=i+1;
}
// We want to store the best n+1 parameter estimates
// I'm going to store them as a vector of pairs of floats and ColVecs
// so I can sort them based on energy
vector<pair<float, ColumnVector> > v;
float en=func.evaluate(x);
func_evals++;
pair<float, ColumnVector> tmppair;
tmppair.first=en;
tmppair.second=x;
v.push_back(tmppair);
float usual_delta=0.05,zero_term_delta=0.00025;
//perturb each parameter by a bit, and store the cost.
ColumnVector y;
for(int i=1;i<=n;i++){
// The values of nonvarying parameters should be the same in
// all of the optional param vectors and therefore in all
// combinations of them in the remainder of the code.
if(paramstovary(i)==1){
y=x;
if(y(i)!=0){y(i)=(1+usual_delta)*y(i);}
else{y(i)=(1+zero_term_delta);}
en=func.evaluate(y);
func_evals++;
tmppair.first=en;
tmppair.second=y;
v.push_back(tmppair);
}
}
sort(v.begin(),v.end(),pair_comparer()); //wasn't that easy...
string how="";
ColumnVector xbar(n),xr(n),xe(n),xc(n),xcc(n),xtmp(n);
//cerr<<"starting loop"<<endl;
while(iter<=maxiter){
iter++;
if(v[n].first-v[0].first< tolf){
ColumnVector tmpvec1,tmpvec2;
bool stopsearch=true;
for(int i=0;i<n;i++){//iterate over paramsets
tmpvec1=v[i].second;
tmpvec2=v[i+1].second;
for(int j=1;j<=n;j++){//iterate over n params
if(fabs( tmpvec1(j)-tmpvec2(j) ) >tolx){stopsearch=false;}
}
}
if(stopsearch){break;}
}
//compute reflection point
// xbar is average of best n paramsets.
xbar=0;
for(int i=0;i<n;i++){
xbar=xbar+v[i].second;
}
xbar=xbar/n;
xr=(1+rho)*xbar-rho*v[n].second; //reflection point
float en_xr=func.evaluate(xr);func_evals++;
if(en_xr < v[0].first){ //en_xr is better than our current best
//compute expansion point
xe=(1+rho*chi)*xbar-rho*chi*v[n].second;
float en_xe=func.evaluate(xe);func_evals++;
if(en_xe<en_xr){
tmppair.first=en_xe;
tmppair.second=xe;
v[n]=tmppair;
how="expand";
}
else{ //en_xr < en_xe
tmppair.first=en_xr;
tmppair.second=xr;
v[n]=tmppair;
how="reflect1";
}
}
else{ //en_xr is worse than our current best
if(en_xr<=v[n-1].first){ //en_xr is better than our current second worst
tmppair.first=en_xr;
tmppair.second=xr;
v[n]=tmppair;
how="reflect2";
}
else{//en_xr is worse than our current secind worst
//perform contraction
if(en_xr<v[n].first){//en_xr better than current worst
//perform outside contraction
xc=(1+rho*psi)*xbar-rho*psi*v[n].second;
float en_xc = func.evaluate(xc); func_evals++;
if(en_xc<=v[n].first){ //en_xr is better than our current worst
tmppair.first=en_xc;
tmppair.second=xc;
v[n]=tmppair;
how="contract outside";
}
else{ //xc no good
//perform a shrink
how="shrink";
}
}
else{//en_xr worse than currenst worst
//perform inside contraction
xcc = (1-psi)*xbar + psi*v[n].second;
float en_xcc=func.evaluate(xcc);func_evals++;
if(en_xcc<v[n].first){
tmppair.first=en_xcc;
tmppair.second=xcc;
v[n]=tmppair;
how="contract inside";
}
else{
//perform a shrink
how="shrink";
}
}
if(how=="shrink"){
for(int i=1;i<n+1;i++){
tmppair.second=v[0].second+sigma*(v[i].second-v[0].second);
tmppair.first=func.evaluate(xtmp);func_evals++;
v[i]=tmppair;
}
}
}
}
sort(v.begin(),v.end(),pair_comparer()); //double bracks constructs a temporary object
} //closing while
x=v[0].second;
}
void scg(ColumnVector& x,const gEvalFunction& func, ColumnVector& paramstovary, float tol, float eps, int niters){
//number of nonvarying parameters.
int n_nonvary=0;
for(int i=1;i<=paramstovary.Nrows();i++){
if(paramstovary(i)>0){
paramstovary(i)=1;
}
else{
paramstovary(i)=0;
n_nonvary++;
}
}
int fevals=0;
int gevals=0;
int nparams=x.Nrows();
float sigma0 = 1.0e-4;
float fold=func.evaluate(x); fevals++;
float fnow=0,fnew=0;
//Comput gradient wrt all parameters which we wish to estimate
ColumnVector gradold=func.g_evaluate(x);gevals++;
gradold=SP(gradold,paramstovary);
ColumnVector gradnew=gradold;
ColumnVector d=-gradnew; // search direction
ColumnVector xplus,xnew,gplus;
bool success=true;
int nsuccess=0;
float lambda=1.0;
float lambdamin = 1.0e-15;
float lambdamax = 1.0e15;
int j = 1;
float mu=0,kappa=0,sigma=0,gamma=0,alpha=0,delta=0,Delta,beta=0;
// main loop..
while(j<niters){
if(success){
mu=(d.t()*gradnew).AsScalar();
if(mu >= 0){
d=-gradnew;
mu=(d.t()*gradnew).AsScalar();
}
kappa=(d.t()*d).AsScalar();
if(kappa<eps){
break;
}
sigma=sigma0/std::sqrt(kappa);
xplus = x + sigma*d;
gplus=func.g_evaluate(xplus);gevals++;
gplus=SP(gplus,paramstovary);
gamma = (d.t()*(gplus - gradnew)).AsScalar()/sigma;
}
delta = gamma + lambda*kappa;
if (delta <= 0){
delta = lambda*kappa;
lambda = lambda - gamma/kappa;
}
alpha = - mu/delta;
xnew = x + alpha*d;
fnew=func.evaluate(xnew);fevals++;
Delta = 2*(fnew - fold)/(alpha*mu);
if (Delta >= 0){
success = true;
nsuccess = nsuccess + 1;
x = xnew;
fnow = fnew;}
else{
success = false;
fnow = fold;
}
if (success == 1){
//Test for termination...
if ( (max(abs(d*alpha))).AsScalar() < tol && std::abs(fnew-fold) < tol){
break;
}
else{
fold = fnew;
gradold = gradnew;
gradnew=func.g_evaluate(x);gevals++;
gradnew=SP(gradnew,paramstovary);
if ((gradnew.t()*gradnew).AsScalar() == 0){
break;
}
}
}
if (Delta < 0.25){
// lambda = min(4.0*lambda, lambdamax);
lambda=4.0*lambda<lambdamax ? 4.0*lambda : lambdamax;
}
if (Delta > 0.75){
//lambda = max(0.5*lambda, lambdamin);
lambda = 0.5*lambda > lambdamin ? 0.5*lambda : lambdamin;
}
if (nsuccess == nparams){
d = -gradnew;
nsuccess = 0;
}
else{
if (success == 1){
beta = ((gradold - gradnew).t()*gradnew).AsScalar()/mu;
d = beta*d - gradnew;
}
}
j++;
}
}
}