Newer
Older

Jesper Andersson
committed
/*! \file nonlin.h
\brief Declarations for nonlinear optimisation
\author Jesper Andersson
\version 1.0b, 2009.
*/
// Contains declaration for nonlinear optimisation
//
// Simplex.h
//
// Jesper Andersson, FMRIB Image Analysis Group
//
// Copyright (C) 2009 University of Oxford
//
#ifndef nonlin_h
#define nonlin_h
#include <string>
#include <vector>
#include <iostream>
#include <iomanip>
#include <boost/shared_ptr.hpp>
#include "bfmatrix.h"
#include "newmat.h"
namespace MISCMATHS {
enum NLMethod {NL_VM, // Variable-Metric (see NRinC)
NL_CG, // Conjugate-Gradient (see NRinC)
NL_SCG, // Scaled Conjugate-Gradient (See Moller 1993).

Jesper Andersson
committed
NL_GD, // Gradient Descent
NL_NM}; // Nelder-Mead simplex method (see NRinC)
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
enum LMType {LM_L, LM_LM}; // Levenberg or Levenberg-Marquardt
enum VMUpdateType {VM_DFP, VM_BFGS}; // See NRinC chapter 10.
enum CGUpdateType {CG_FR, CG_PR}; // Fletcher-Reeves, Polak-Ribiere
enum VMMatrixType {VM_OPT, //
VM_COL, // Store all rank-one updates as column-vectors
VM_FULL}; // Store full estimate of inverse Hessian
enum LinOut {LM_MAXITER, // Too many iterations in line-minimisation
LM_LAMBDA_NILL, // Could not find a minima along this direction
LM_CONV}; // Line-minimisation converged.
enum NonlinOut {NL_UNDEFINED, // Initial value before minimisation
NL_MAXITER, // Too many iterations
NL_LM_MAXITER, // To many iterations during a line-minimisation
NL_PARCONV, // Convergence. Step in parameter space small
NL_GRADCONV, // Convergence. Gradient small
NL_CFCONV, // Convergence. Change in cost-function small
NL_LCONV}; // Convergence, lambda very large
const double EPS = 2.0e-16; // Losely based on NRinC 20.1
class NonlinException: public std::exception
{
private:
std::string m_msg;
public:
NonlinException(const std::string& msg) throw(): m_msg(msg) {}
virtual const char * what() const throw() {
return string("Nonlin: msg=" + m_msg).c_str();
}
~NonlinException() throw() {}
};
// NonlinParam is a struct that contains the
// information about "how" the
// minisation should be performed. I.e. it
// contains things like choice of minimisation
// algorithm, # of parameters, converegence
// criteria etc
class NonlinParam
{
public:
NonlinParam(int pnpar,
NLMethod pmtd,
NEWMAT::ColumnVector ppar=NEWMAT::ColumnVector(),
bool plogcf=false,
bool ploglambda=false,
bool plogpar=false,
int pmaxiter=200,
double pcftol=1.0e-8,
double pgtol=1.0e-8,
double pptol=4.0*EPS,
VMUpdateType pvmut=VM_BFGS,
double palpha=1.0e-4,
double pstepmax=10,
int plm_maxiter=50,
int pmaxrestart=0,
bool pautoscale=true,
CGUpdateType pcgut=CG_PR,
double plm_ftol=1.0e-3,
LMType plmtype=LM_LM,
double pltol=1.0e20,
int pcg_maxiter=200,
double pcg_tol=1.0e-6,
double plambda=0.1)
: npar(pnpar), mtd(pmtd), logcf(plogcf), loglambda(ploglambda),
logpar(plogpar), verb(pverb), maxiter(pmaxiter), pw(10), pp(5),
cfw(10), cfp(5), cftol(pcftol), gtol(pgtol), ptol(pptol),
vmut(pvmut), alpha(palpha), stepmax(pstepmax), lm_maxiter(plm_maxiter),
maxrestart(pmaxrestart), autoscale(pautoscale), cgut(pcgut),
lm_ftol(plm_ftol), lmtype(plmtype), ltol(pltol), cg_maxiter(pcg_maxiter),
cg_tol(pcg_tol), lambda(), cf(), par(), niter(0),
nrestart(0), status(NL_UNDEFINED)
{
lambda.push_back(plambda);
if (ppar.Nrows()) SetStartingEstimate(ppar);
else {
NEWMAT::ColumnVector tmp(npar);
tmp = 0.0;
SetStartingEstimate(tmp);
}
amoeba_start.ReSize(npar);
amoeba_start = 1.0;
}
~NonlinParam() {}
// Routines to check values
int NPar() const {return(npar);}
NLMethod Method() const {return(mtd);}
int MaxIter() const {return(maxiter);}
int NIter() const {return(niter);}
double FractionalCFTolerance() const {return(cftol);}
double FractionalGradientTolerance() const {return(gtol);}
double FractionalParameterTolerance() const {return(ptol);}
VMUpdateType VariableMetricUpdate() const {return(vmut);}
double VariableMetricAlpha() const {return(alpha);}
int MaxVariableMetricRestarts() const {return(maxrestart);}
int VariableMetricRestarts() const {return(nrestart);}
bool VariableMetricAutoScale() const {return(autoscale);}
double LineSearchMaxStep() const {return(stepmax);}
int LineSearchMaxIterations() const {return(lm_maxiter);}
CGUpdateType ConjugateGradientUpdate() const {return(cgut);}
double LineSearchFractionalParameterTolerance() const {return(lm_ftol);}
LMType GaussNewtonType() const {return(lmtype);}
double LambdaConvergenceCriterion() const {return(ltol);}
int EquationSolverMaxIter() const {return(cg_maxiter);}
double EquationSolverTol() const {return(cg_tol);}
NEWMAT::ColumnVector GetAmoebaStart() const {return(amoeba_start);}
bool LoggingParameters() const {return(logpar);}
bool LoggingCostFunction() const {return(logcf);}
bool LoggingLambda() const {return(loglambda);}
bool Verbose() const {return(verb);}
// Routines to get output
double Lambda() const {return(lambda.back());}
double InitialLambda() const {if (loglambda) return(lambda[0]); else throw NonlinException("InitialLabda: Lambda not logged"); }
const std::vector<double>& LambdaHistory() const {if (loglambda) return(lambda); else throw NonlinException("InitialLabda: Lambda not logged"); }
const NEWMAT::ColumnVector& Par() const {return(par.back());}
const NEWMAT::ColumnVector& InitialPar() const {if (logpar) return(par[0]); else throw NonlinException("InitialPar: Parameters not logged"); }
const std::vector<NEWMAT::ColumnVector>& ParHistory() const {if (logpar) return(par); else throw NonlinException("ParHistory: Parameters not logged"); }
double CF() const {return(cf.back());}
double InitialCF() const {if (logcf) return(cf[0]); else throw NonlinException("InitialCF: Cost-function not logged"); }
const std::vector<double> CFHistory() const {if (logcf) return(cf); else throw NonlinException("CFHistory: Cost-function not logged"); }
NonlinOut Status() const {return(status);}
bool Success() const { switch(status) { case NL_UNDEFINED: case NL_MAXITER: case NL_LM_MAXITER: return(false); default: return(true); } };
// Routines to set values of steering parameters
void SetVerbose(bool on=true) {verb = on;}
void SetPrintWidthPrecision(int ppw, int ppp, int pcfw, int pcfp) {pw=ppw; pp=ppp; cfw=pcfw; cfp=pcfp;}
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
void SetMethod(NLMethod pmtd) {mtd = pmtd;}
void LogCF(bool flag=true) {logcf = flag;}
void LogPar(bool flag=true) {logpar = flag;}
void LogLambda(bool flag=true) {loglambda = flag;}
void SetStartingEstimate(NEWMAT::ColumnVector& sp) {
if (niter) throw NonlinException("SetStartingEstimates: Object has to be reset before setting new starting parameters");
SetPar(sp);
}
void SetMaxIter(unsigned int pmiter) {maxiter = pmiter;}
void SetFractionalCFTolerance(double pcftol) {
if (pcftol>0.5) throw NonlinException("SetFractionalCFTolerance: Nonsensically large tolerance");
else if (pcftol <= 0.0) NonlinException("SetFractionalCFTolerance: Tolerance must be non-zero and positive");
cftol = pcftol;
}
void SetFractionalGradientTolerance(double pgtol) {
if (pgtol>0.5) throw NonlinException("SetFractionalGradientTolerance: Nonsensically large tolerance");
else if (pgtol <= 0.0) NonlinException("SetFractionalGradientTolerance: Tolerance must be non-zero and positive");
gtol = pgtol;
}
void SetFractionalParameterTolerance(double pptol) {
if (pptol>0.5) throw NonlinException("SetFractionalParameterTolerance: Nonsensically large tolerance");
else if (pptol <= 0.0) NonlinException("SetFractionalParameterTolerance: Tolerance must be non-zero and positive");
ptol = pptol;
}
void SetVariableMetricUpdate(VMUpdateType pvmut) {vmut = pvmut;}
void SetVariableMetricAlpha(double palpha) {
if (palpha>=1.0 || palpha<=0.0) throw NonlinException("SetVariableMetricAlpha: Alpha must be between 0 and 1");
alpha = palpha;
}
void SetMaxVariableMetricRestarts(unsigned int pmaxrestart) {maxrestart = pmaxrestart;}
void SetVariableMetricAutoScale(bool flag=true) {autoscale = flag;}
void SetLineSearchMaxStep(double pstepmax) {
if (pstepmax<=0) throw NonlinException("SetLineSearchMaxStep: maxstep must be non-zero and positive");
stepmax = pstepmax;
}
void SetLineMinimisationMaxIterations(unsigned int plm_maxiter) {lm_maxiter = plm_maxiter;}
void SetConjugateGradientUpdate(CGUpdateType pcgut) {cgut = pcgut;}
void SetLineMinimisationFractionalParameterTolerance(double plm_ftol) {
if (plm_ftol>0.5) throw NonlinException("SetLineMinimisationFractionalParameterTolerance: Nonsensically large tolerance");
else if (plm_ftol <= 0.0) NonlinException("SetLineMinimisationFractionalParameterTolerance: Tolerance must be non-zero and positive");
lm_ftol = plm_ftol;
}
void SetGaussNewtonType(LMType plmtype) {lmtype = plmtype;}
void SetLambdaConvergenceCriterion(double pltol) {
if (pltol<1.0) throw NonlinException("SetLambdaConvergenceCriterion: Nonsensically small tolerance");
ltol = pltol;
}
void SetEquationSolverMaxIter(int pcg_maxiter) {cg_maxiter = pcg_maxiter;}
void SetEquationSolverTol(double pcg_tol) {cg_tol = pcg_tol;}
void SetStartAmoeba(const NEWMAT::ColumnVector& l) {
if (l.Nrows() != npar) throw NonlinException("SetStartAmoeba: npar and amoeba mismatch");
else amoeba_start = l;
}
// Reset is used to reset a NonlinParam object after it has run to convergence, thereby allowing it
// to be reused with a different CF object. This is to avoid the cost of creating the object many
// times when fitting for example multiple voxels.
void Reset() {}
// Routines used by the (global) non-linear fitting routines. Note that these can
// all be called for const objects.
void SetPar(const NEWMAT::ColumnVector& p) const {
if (p.Nrows() != npar) throw NonlinException("SetPar: Mismatch between starting vector and # of parameters");
if (logpar || !par.size()) par.push_back(p);
else par[0] = p;
if (Verbose()) std::cout << std::setw(pw) << std::setprecision(pp) << "p = " << p.t();
}
void SetCF(double pcf) const {
if (logcf || !cf.size()) cf.push_back(pcf);
else cf[0] = pcf;
if (Verbose()) std::cout << std::setw(cfw) << std::setprecision(cfp) << "cf = " << pcf << std::endl;
}
void SetLambda(double pl) const {
if (loglambda || !lambda.size()) lambda.push_back(pl);
else lambda[0] = pl;
}
bool NextIter(bool success=true) const {if (success && niter++ >= maxiter) return(false); else return(true);}
bool NextRestart() const {if (nrestart++ >= maxrestart) return(false); else return(true);}
void SetStatus(NonlinOut pstatus) const {status = pstatus;}
private:
// INPUT PARAMETERS
//
// Paramaters that apply to all algorithms
const int npar; // # of parameters
NLMethod mtd; // Minimisation method
bool logcf; // If true, history of cost-function is logged
bool loglambda; // If true, history of lambda is logged
bool logpar; // If true history of parameters is logged
bool verb; // Prints parameter and cf values for each iteration
int maxiter; // Maximum # of iterations allowed
int pw; // Print-width of parameter values
int pp; // Print-precision of parameter values
int cfw; // Print-width of cost function
int cfp; // Print-precision of cost function
double cftol; // Tolerance for cost-function gonvergence criterion
double gtol; // Tolerance for gradient convergence criterion
double ptol; // Tolerance for parameter convergence criterion
// Parameters that apply to Variable-Metric Algorithm
VMUpdateType vmut; // DFP or BFGS
double alpha; // Criterion for convergence in line minimisation
double stepmax; // Maximum step length for line minimisation
int lm_maxiter; // Maximum # of iterations for line minimisation
int maxrestart; // Maximum # of restarts that should be done.
bool autoscale; // "Automatic" search for optimal scaling
// Parameters that apply to CG algorithm
CGUpdateType cgut; // Fletcher-Reeves or Polak-Ribiere
double lm_ftol; // Convergence criterion for line-search
// Parameters that apply to LM algorithm
LMType lmtype; // Levenberg or Levenberg-Marquardt
double ltol; // Convergence criterion based on large lambda
int cg_maxiter; // Maximum # of iterations for iterative "inverse" of Hessian
double cg_tol; // Tolerance for iterative "inverse" of Hessian
// Parameters that apply to amoeba/simplex algorithm
NEWMAT::ColumnVector amoeba_start; // If set, specifies what the initial amoeba looks like.
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
//
// OUTPUT PARAMETERS
//
mutable std::vector<double> lambda; // (History of) lambda (LM and SCG type minimisation)
mutable std::vector<double> cf; // (History of) cost-function
mutable std::vector<NEWMAT::ColumnVector> par; // (History of) Parameter estimates
mutable int niter; // Number of iterations
mutable int nrestart; // Number of restarts
mutable NonlinOut status; // Output status
NonlinParam& operator=(const NonlinParam& rhs); // Hide assignment
};
// NonlinCF (Cost Function) is a virtual
// class that defines a minimal interface.
// By subclassing NonlinCF the "user" can
// create a class that allows him/her to
// use NONLIN to minimise his/her function.
class NonlinCF
{
private:
NonlinCF& operator=(const NonlinCF& rhs); // Hide assignment
public:
NonlinCF() {}
virtual ~NonlinCF() {}
virtual double sf() const {return(1.0);}
virtual NEWMAT::ReturnMatrix grad(const NEWMAT::ColumnVector& p) const;
virtual boost::shared_ptr<BFMatrix> hess(const NEWMAT::ColumnVector& p,
boost::shared_ptr<BFMatrix> iptr=boost::shared_ptr<BFMatrix>()) const;
virtual double cf(const NEWMAT::ColumnVector& p) const = 0;
};
// Varmet matrix is a "helper" class
// that makes it a little easier to
// implement variable-metric minimisation.
class VarmetMatrix
{
private:
int sz;
VMMatrixType mtp;
VMUpdateType utp;
NEWMAT::Matrix mat;
std::vector<double> sf;
std::vector<NEWMAT::ColumnVector> vec;
VarmetMatrix& operator=(const VarmetMatrix& rhs); // Hide assignment
public:
explicit VarmetMatrix(int psz, VMMatrixType pmtp, VMUpdateType putp)
: sz(psz), mtp(pmtp), utp(putp)
{
if (sz > 0 && mtp == VM_OPT) {
if (sz < 100) {
mtp = VM_FULL;
NEWMAT::IdentityMatrix tmp(sz);
mat = tmp;
}
else {
mtp = VM_COL;
}
}
}
~VarmetMatrix() {}
int size() {return(sz);}
VMUpdateType updatetype() {return(utp);}
VMMatrixType matrixtype() {return(mtp);}
void print() const;
void reset()
{
if (sz > 0) {
if (mtp == VM_FULL) {
NEWMAT::IdentityMatrix tmp(sz);
mat = tmp;
}
else if (mtp == VM_COL) {
sf.clear();
vec.clear();
}
}
}
void update(const NEWMAT::ColumnVector& pdiff, // x_{i+1} - x_i
const NEWMAT::ColumnVector& gdiff); // \nabla f_{i+1} - \nabla f_i
friend NEWMAT::ColumnVector operator*(const VarmetMatrix& m,
const NEWMAT::ColumnVector& v);
};
// Declaration of (global) main function for minimisation
NonlinOut nonlin(const NonlinParam& p, const NonlinCF& cfo);
// Declaration of global utility functions
pair<NEWMAT::ColumnVector,NEWMAT::ColumnVector> check_grad(const NEWMAT::ColumnVector& par,
const NonlinCF& cfo);
pair<boost::shared_ptr<BFMatrix>,boost::shared_ptr<BFMatrix> > check_hess(const NEWMAT::ColumnVector& par,
const NonlinCF& cfo);
} // End namespace MISCMATHS
#endif // end #ifndef nonlin_h