Skip to content
Snippets Groups Projects
Commit 51e3cb03 authored by Jesper Andersson's avatar Jesper Andersson
Browse files

Non-linear optimisation using a variety of algorithms

parent 5b02f4fb
No related branches found
No related tags found
No related merge requests found
// Definitions for module nonlin
#include <iostream>
#include <fstream>
#include <iomanip>
#include <string>
#include <vector>
#include <cmath>
#include "newmat.h"
#include "newmatio.h"
#include "bfmatrix.h"
#include "nonlin.h"
using namespace std;
using namespace NEWMAT;
namespace MISCMATHS {
// Declarations of routines for use only in this module
// Main routine for Variable-Metric optimisation
NonlinOut varmet(const NonlinParam& p, const NonlinCF& cfo);
// Main routine for Conjugate-Gradient optimisation
NonlinOut congra(const NonlinParam& p, const NonlinCF& cfo);
// Main routine for scaled conjugate-gradient optimisation
NonlinOut sccngr(const NonlinParam& p, const NonlinCF& cfo);
// Main routine for Levenberg-Marquardt optimisation
NonlinOut levmar(const NonlinParam& p, const NonlinCF& cfo);
LinOut linsrch(// Input
const ColumnVector& pdir, // Search direction
const ColumnVector& p0, // Current parameter values
const ColumnVector& grad, // Gradient at p0
const NonlinCF& cfo, // Cost-function object
double f0, // Current cost-function value
double sf, // Scale factor for cost-function
double maxiter, // Max # of iterations in line minimisation
double sm, // Stepmax
double alpha, // Alpha (sorry).
double ptol, // Tolerance in parameter space
// Output
double *lambda,// Resulting step length
double *of, // Value of cost-function on output
ColumnVector *np); // New parameters
double scale_factor(const ColumnVector& p, // Current parameter values
const ColumnVector& pdir, // Search direction
const NonlinCF& cfo, // Cost-function object
int maxiter, // Max # of iterations
double sf); // Scale factor.
LinOut linmin(// Input
const ColumnVector& p, // Current parameter values
const ColumnVector& pdir, // Search direction
const NonlinCF& cfo, // Cost-function object
double sf, // Scale factor for cost-function
pair<double,double> lp, // Left point
pair<double,double> mp, // Point somewhere in interval
pair<double,double> rp, // Right point
double ftol, // Fractional tolerance
int maxiter,// Max # of iterations
// Output
pair<double,double> *x); // Best point
pair<double,double> bracket(// Input
const ColumnVector& p, // Current parameter values
const ColumnVector& pdir, // Search direction
const NonlinCF& cfo, // Cost-function object
double ptol, // Relative tolerance for parameter values
double sf, // Scale factor of cost-function
// Output
pair<double,double> *p_0, // Cost-function value at p
pair<double,double> *p_m); // Point between p_0 and p_l
// Utility routine that checks for convergence based on "zero"-gradient
// Utility routines that checks for convergence based on various criteria
// Based on zero (neglible) gradient
bool zero_grad_conv(const ColumnVector& par,
const ColumnVector& grad,
double cf,
double gtol);
// Based on zero (neglible) decrease in cost-function
bool zero_cf_diff_conv(double cfo,
double cfn,
double cftol);
// Based on zero (neglible) step in parameter space
bool zero_par_step_conv(const ColumnVector& par,
const ColumnVector& step,
double ptol);
void print_newmat(const NEWMAT::GeneralMatrix& m,
std::string fname);
// If user choses not to overide the grad-method of the NonlinCF
// base class this routine will be used to calculate numerical derivatives.
ReturnMatrix NonlinCF::grad(const ColumnVector& p) const
{
ColumnVector gradv(p.Nrows());
ColumnVector tmpp = p;
double tiny = 1e-8;
double cf0 = cf(tmpp);
for (int i=0; i<p.Nrows(); i++) {
double step = tiny * max(tmpp.element(i),1.0);
tmpp.element(i) += step;
gradv.element(i) = (cf(tmpp) - cf0) / step;
tmpp.element(i) -= step;
}
gradv.Release();
return(gradv);
}
// If user choses not to overide the hess-method of the NonlinCF
// base class this routine will calculate numerical 2nd derivatives.
// Note also that the hessian will only be used for the Levenberg-
// Marquardt minimisation.
// It is in general _not_ a good idea to use a method that explicitly
// uses the Hessian (i.e. LM) when calculating it numerically.
// Note that it returns a (safe) pointer to BFMatrix. BFMatrix has two
// subclasses FullBFMatrix, and SparseBFMatrix which can be used
// used interchangeably depending on if the structure of the Hessian
// renders it sparse or not.
boost::shared_ptr<BFMatrix> NonlinCF::hess(const ColumnVector& p,
boost::shared_ptr<BFMatrix> iptr) const
{
boost::shared_ptr<BFMatrix> hessm;
if (iptr && int(iptr->Nrows())==p.Nrows() && int(iptr->Ncols())==p.Nrows()) hessm = iptr;
else hessm = boost::shared_ptr<BFMatrix>(new FullBFMatrix(p.Nrows(),p.Nrows()));
ColumnVector tmpp = p;
double tiny = 1e-4;
double fx0y0 = cf(tmpp);
ColumnVector fdx(p.Nrows());
ColumnVector step(p.Nrows());
// First calculate all f(x+dx_i) values
for (int i=0; i<p.Nrows(); i++) {
step.element(i) = tiny * max(tmpp.element(i),1.0);
tmpp.element(i) += step.element(i);
fdx.element(i) = cf(tmpp);
tmpp.element(i) -= step.element(i);
}
// Then values of matrix
for (int i=0; i<p.Nrows(); i++) {
for (int j=i; j<p.Nrows(); j++) {
if (i==j) { // If diagonal element
tmpp.element(i) -= step.element(i);
double tmp = cf(tmpp);
hessm->Set(i+1,i+1,(fdx.element(i) + tmp - 2.0*fx0y0) / (step.element(i)*step.element(i)));
tmpp.element(i) += step.element(i);
}
else { // If off-diagonal element
tmpp.element(i) += step.element(i);
tmpp.element(j) += step.element(j);
hessm->Set(i+1,j+1,(cf(tmpp)+fx0y0-fdx.element(i)-fdx.element(j)) / (step.element(i)*step.element(j)));
hessm->Set(j+1,i+1,hessm->Peek(i+1,j+1));
tmpp.element(i) -= step.element(i);
tmpp.element(j) -= step.element(j);
}
}
}
return(hessm);
}
// Display (for debug purposes) matrix if it is small enough for that to make sense
void VarmetMatrix::print() const
{
if (sz > 10) {
cout << "Matrix too big to be meaningful to display" << endl;
return;
}
else {
if (mtp == VM_FULL) {
cout << setw(10) << setprecision(5) << mat;
}
else if (mtp == VM_COL) {
Matrix tmp = IdentityMatrix(sz);
for (unsigned int i=0; i<sf.size(); i++) {
tmp += sf[i] * vec[i]*(vec[i]).t();
}
cout << setw(10) << setprecision(5) << tmp;
}
}
return;
}
// Update estimate of inverse Hessian based on latest step
void VarmetMatrix::update(const NEWMAT::ColumnVector& pdiff, // x_{i+1} - x_i
const NEWMAT::ColumnVector& gdiff) // \nabla f_{i+1} - \nabla f_i
{
// Sort out if this call defines size of problem
if (pdiff.Nrows() != sz || gdiff.Nrows() != sz) {
if (sf.size() == 0 && pdiff.Nrows()==gdiff.Nrows()) {
sz = pdiff.Nrows();
if (mtp == VM_OPT) {
if (sz < 100) {mtp = VM_FULL;}
else {mtp = VM_COL;}
}
}
else {throw NonlinException("VarmetMatrix::update: mismatch between vector and matrix sizes");}
}
// Now do the actual update
double sf1 = DotProduct(pdiff,gdiff);
if ((sf1*sf1) > MISCMATHS::EPS*DotProduct(pdiff,pdiff)*DotProduct(gdiff,gdiff)) {
sf1 = 1.0 / sf1;
ColumnVector v2 = (*this) * gdiff;
double sf2 = -1.0 / DotProduct(gdiff,v2);
if (mtp == VM_FULL) {
mat += sf1 * pdiff * pdiff.t();
mat += sf2 * v2 * v2.t();
}
else {
vec.push_back(pdiff);
vec.push_back(v2);
sf.push_back(sf1);
sf.push_back(sf2);
}
if (utp == VM_BFGS) {
if (mtp == VM_FULL) {
ColumnVector u = sf1*pdiff + sf2*v2;
mat -= (1.0/sf2) * u * u.t();
}
else {
vec.push_back(sf1*pdiff + sf2*v2);
sf.push_back(-1.0/sf2);
}
}
}
}
// Multiply representation of matrix with vector
ColumnVector operator*(const VarmetMatrix& m, const ColumnVector& v)
{
if (m.mtp == VM_FULL) {return(m.mat*v);}
else {
ColumnVector ov = v; // Multiplication with unity matrix
if (m.sf.size() != 0) {
std::vector<double>::const_iterator sfp;
std::vector<NEWMAT::ColumnVector>::const_iterator vep;
for (sfp=m.sf.begin(), vep=m.vec.begin(); sfp!=m.sf.end(); ++sfp, ++vep) {
double tmp = (*sfp) * DotProduct((*vep),v);
ov += tmp * (*vep);
}
}
return(ov);
}
}
// Gateway function to routines for non-linear optimisation
NonlinOut nonlin(const NonlinParam& p, const NonlinCF& cfo)
{
NonlinOut status = NL_MAXITER;
// Call functions that actually do the job
switch (p.Method()) {
case NL_VM:
status = varmet(p,cfo);
break;
case NL_CG:
status = congra(p,cfo);
break;
case NL_SCG:
status = sccngr(p,cfo);
break;
case NL_LM:
status = levmar(p,cfo);
break;
}
return(status);
}
// Main routine for Levenberg-Marquardt optimisation
NonlinOut levmar(const NonlinParam& p, const NonlinCF& cfo)
{
// Calculate initial values
p.SetCF(cfo.cf(p.Par())); // Cost-function evaluated at current parameters
ColumnVector g = cfo.grad(p.Par()); // Gradient evaluated at current parameters
boost::shared_ptr<BFMatrix> H = cfo.hess(p.Par()); // Hessian evaluated at current parameters
double olambda = 0.0;
bool success = true; // True if last step decreased CF
while (p.NextIter(success)) {
for (int i=1; i<=p.NPar(); i++) { // Nudge diagonal of H
if (p.GaussNewtonType() == LM_LM) {
H->AddTo(i,i,(p.Lambda()-olambda)*H->Peek(i,i));
}
else if (p.GaussNewtonType() == LM_L) {
H->AddTo(i,i,p.Lambda()-olambda);
}
}
ColumnVector step = -H->SolveForx(g,SYM_POSDEF,p.EquationSolverTol(),p.EquationSolverMaxIter());
double ncf = cfo.cf(p.Par()+step);
if (success = (ncf < p.CF())) { // If last step successful
olambda = 0.0; // New H, so no need to undo old lambda
p.SetLambda(p.Lambda()/10.0);
p.SetPar(p.Par()+step);
// Check for convergence based on small decrease of cf
if (zero_cf_diff_conv(p.CF(),ncf,p.FractionalCFTolerance())) {
p.SetCF(ncf); p.SetStatus(NL_CFCONV); return(p.Status());
}
p.SetCF(ncf);
g = cfo.grad(p.Par());
H = cfo.hess(p.Par(),H);
}
else { // If last step was unsuccesful
olambda = p.Lambda(); // Returning to same H, so must undo old lambda
p.SetLambda(10.0*p.Lambda());
p.SetCF(p.CF()); // Push another copy
// Check for convergence based on _really_ large lambda
if (p.Lambda() > p.LambdaConvergenceCriterion()) {
p.SetStatus(NL_LCONV); return(p.Status());
}
}
}
// This means we did too many iterations
p.SetStatus(NL_MAXITER);
return(p.Status());
}
// Main routine for conjugate-gradient optimisation. The
// implementation follows that of Numerical Recipies
// reasonably closely.
NonlinOut congra(const NonlinParam& np, const NonlinCF& cfo)
{
// Set up initial values
np.SetCF(cfo.cf(np.Par()));
ColumnVector r = -cfo.grad(np.Par());
ColumnVector p = r;
while (np.NextIter()) {
// Check for convergence based on zero gradient
if (zero_grad_conv(np.Par(),r,np.CF(),np.FractionalGradientTolerance())) {
np.SetStatus(NL_GRADCONV); return(np.Status());
}
// Bracket minimum along p
pair<double,double> lp, mp; // Leftmost and middle point of bracket
pair<double,double> rp = bracket(np.Par(),p,cfo,np.FractionalParameterTolerance(),1.0,&lp,&mp); // Rightmost point of bracket
if (rp == lp) { // If no smaller point along p
np.SetStatus(NL_PARCONV); return(np.Status()); // Assume it is because we are at minimum
}
// Find minimum along p between lp and rp
pair<double,double> minp; // Minimum along p
LinOut lm_status = linmin(np.Par(),p,cfo,1.0,lp,mp,rp,
np.LineSearchFractionalParameterTolerance(),
np.LineSearchMaxIterations(),&minp);
// Check for problems with line-search
if (lm_status == LM_MAXITER) {np.SetStatus(NL_LM_MAXITER); return(np.Status());} // Ouch!
// Set new cf value and parameters
np.SetPar(np.Par() + minp.first*p);
np.SetCF(minp.second);
// Check for convergence based on small decrease of cost-function
if (zero_cf_diff_conv(np.CF(),minp.second,np.FractionalCFTolerance())) {np.SetStatus(NL_CFCONV); return(np.Status());}
// Check for convergence based on neglible move in parameter space
else if (zero_par_step_conv(minp.first*p,np.Par(),np.FractionalParameterTolerance())) {np.SetStatus(NL_PARCONV); return(np.Status());}
else { // If no covergence
if (((np.NIter())%np.NPar()) == 0) { // Explicitly reset directions after npar iterations
r = -cfo.grad(np.Par());
p = r;
}
else {
ColumnVector oldr = r;
r = -cfo.grad(np.Par());
if (np.ConjugateGradientUpdate() == CG_FR) { // Get conjugate direction Fletcher-Reeves flavour
p = r + (DotProduct(r,r)/DotProduct(oldr,oldr)) * p;
}
else if (np.ConjugateGradientUpdate() == CG_PR) { // Get conjugate direction Polak-Ribiere flavour
p = r + (DotProduct(r-oldr,r)/DotProduct(oldr,oldr)) * p;
}
}
}
}
// If we get here we have used too many iterations
np.SetStatus(NL_MAXITER);
return(np.Status());
}
// Main routine for scaled conjugate-gradient optimisation. The
// idea of the algorithm is similar to that of Levenberg-
// Marquardt. In the LM algorithm the search direction is a
// "compromise" between the Newton direction and the gradient
// direction, where the compromise depends on a factor lambda.
// A large lambda means that it is close to the gradient and a
// small lambda that it is close to the Newton direction. The
// value of lambda is updated each iteration depending on the
// success of the last step. In this method the compromise is
// between the "conjugate gradient" direction and the gradient
// direction. The variable names follow the (excellent!) paper
// by Martin Mller (1993) Neural Networks 6:525-533.
// I have tried to follow the notation he uses in his paper, thus
// enabling that to be the "documentation" for the routine below.
NonlinOut sccngr(const NonlinParam& np, const NonlinCF& cfo)
{
// Set up initial values
np.SetCF(cfo.cf(np.Par())); // Current value for cost-function (E in Moller 92).
double sigma = 1.0e-2; // Step-length when estmating H*p from g(w+sigma*p)-g(w)
double lambda_bar = 0.0; // Update for lambda if approximate hessian not positive definite
ColumnVector r = -cfo.grad(np.Par()); // Negative gradient
ColumnVector p = r; // Search direction
bool success = true; // True if previous step was successful
double delta = 0.0; // Used to check pos def of H in loop below
ColumnVector s(np.NPar()); // Used as approximation to H*p in loop below
while (np.NextIter()) {
double p2 = DotProduct(p,p); // p'*p, Temporary variable to save some time
if (success == true) { // If last step led to reduction of cos-function
double sigma_k = sigma/sqrt(p2); // Normalised step-length when estimating H*p
s = (cfo.grad(np.Par()+sigma_k*p) + r) / sigma_k; // Approximation to H*p
delta = DotProduct(p,s); // Approximation to p'*H*p
}
s += (np.Lambda()-lambda_bar)*p; // Equivalent to adding (l-lb)*I to H
delta += (np.Lambda()-lambda_bar)*p2; // If <0 then H+(l-lb)*I not positive definite
if (delta <= 0) { // If it H is not positive definite
s += (np.Lambda() - 2.0*(delta/p2)) * p; // Make H more diagonal dominant to ensure pos def
lambda_bar = 2.0*(np.Lambda() - delta/p2);
delta = np.Lambda()*p2 - delta;
np.SetLambda(lambda_bar);
}
double mu = DotProduct(p,r);
double alpha = mu/delta; // Step size in direction p
double tmp_cf = cfo.cf(np.Par()+alpha*p); // Value of cost-function at attempted new point
char fname[100];
sprintf(fname,"/Users/jesper/Desktop/gradient_%02d.txt",np.NIter());
print_newmat(r,fname);
sprintf(fname,"/Users/jesper/Desktop/step_%02d.txt",np.NIter());
ColumnVector step(p); step *= alpha;
print_newmat(step,fname);
double Delta = 2.0*delta*(np.CF()-tmp_cf) / (mu*mu); // > 0 means attempted step reduced cost-function
if (Delta >= 0) { // If step reduces cost-function
np.SetCF(tmp_cf); // Update lowest observed value of cost-function
np.SetPar(np.Par() + alpha*p); // Update best set of parameters
lambda_bar = 0.0;
success = true;
if ((np.NIter()%np.NPar()) == 0) { // If npar iterations since last resetting of directions
r = -cfo.grad(np.Par()); // Reset search direction to negative gradient
p = r;
}
else {
ColumnVector oldr = r;
r = -cfo.grad(np.Par());
double beta = (DotProduct(r,r)-DotProduct(oldr,r)) / mu;
p = r + beta*p; // New search direction
}
if (Delta > 0.75) { // If attempted step was \emph{REALLY} good
np.SetLambda(np.Lambda()/2.0);
}
}
else { // If step doesn't reduce cost-function
lambda_bar = np.Lambda();
success = false;
}
if (Delta < 0.25) { // If step reduced cost-function only "a little" (or not at all)
np.SetLambda(4.0*np.Lambda());
}
if (zero_grad_conv(np.Par(),r,np.CF(),np.FractionalGradientTolerance())) { // If gradient is (practically) zero
np.SetStatus(NL_GRADCONV); return(np.Status());
}
}
// If we get here we have exceeded allowed # of iterations
np.SetStatus(NL_MAXITER);
return(np.Status());
}
// Main routine for variable-metric optimisation. This implements
// the variable-metric optimisation with the BFGS or DFP updating
// schemes. The implementation details are mostly quite close to
// those described in Numerical Recipies in C.
NonlinOut varmet(const NonlinParam& p, const NonlinCF& cfo)
{
// Get scale factor to ensure a relative scale beteween
// parameters and cost-function such that fast and robust
// convergence is acheieved.
double sf = cfo.sf(); // Suggestion by "user"
ColumnVector grad = sf*cfo.grad(p.Par()); // Gradient of const-function
if (p.VariableMetricAutoScale()) {
sf = scale_factor(p.Par(),-grad,cfo,p.LineSearchMaxIterations(),sf); // Refinement by "me"
if (sf == 0.0) { // No minimum in indicated direction
p.SetStatus(NL_PARCONV); // Assume this means we are already at minimum
return(p.Status());
}
grad = (sf/cfo.sf()) * grad;
}
VarmetMatrix iH(p.NPar(),VM_OPT,p.VariableMetricUpdate()); // Inverse Hessian
p.SetCF(sf*cfo.cf(p.Par())); // Current value of cost-function
ColumnVector pdir = -(iH*grad); // Direction to search in
double lambda = 0.0; // Step-length returned by linsrch
double newcf = 0.0; // New value for cost-function
ColumnVector newpar(p.NPar()); // New point in parameter space
while (p.NextIter()) {
// Do a line-search to find a new point in parameter space
LinOut status = linsrch(pdir,p.Par(),grad,cfo,p.CF(),sf,p.LineSearchMaxIterations(),
p.LineSearchMaxStep(),p.VariableMetricAlpha(),
p.LineSearchFractionalParameterTolerance(),&lambda,&newcf,&newpar);
// Check for convergence/problems based on outcome of linsrch
if (status == LM_MAXITER) {p.SetStatus(NL_LM_MAXITER); return(p.Status());}
else if (status == LM_LAMBDA_NILL) { // This means we might be heading uphill and should restart
if (p.NextRestart()) { // If we have spare restarts
p.SetCF(p.CF()); // Another copy of old value
iH.reset(); // Back to being unity matrix
pdir = -grad;
continue;
}
else {
p.SetStatus(NL_PARCONV); return(p.Status());
}
}
// Test for convergence based on distance between points in parameter space
ColumnVector dpar = newpar - p.Par();
p.SetPar(newpar);
p.SetCF(newcf);
if (zero_par_step_conv(dpar,p.Par(),p.FractionalParameterTolerance())) {p.SetStatus(NL_PARCONV); return(p.Status());}
// Get gradient at new point
ColumnVector newgrad = sf*cfo.grad(p.Par());
// Test for convergence based on "zero" gradient
if (zero_grad_conv(p.Par(),newgrad,p.CF(),p.FractionalGradientTolerance())) {p.SetStatus(NL_GRADCONV); return(p.Status());}
// Update estimate of inverse Hessian
iH.update(dpar,newgrad-grad);
// Update parameters and get new direction to go in
grad = newgrad;
pdir = -(iH*grad); // N.B. no unary - op for iH, parenthesis necessary
}
// If we get here we have exceeded the allowed # of iterations
p.SetStatus(NL_MAXITER);
return(p.Status());
}
LinOut linsrch(// Input
const ColumnVector& dir, // Search direction
const ColumnVector& p0, // Current parameter values
const ColumnVector& grad, // Gradient at p0
const NonlinCF& cfo, // Cost-function object
double f0, // Current cost-function value
double sf, // Scale factor for cost-function
double maxiter, // Max # of iterations
double sm, // Stepmax
double alpha, // Alpha (sorry).
double ptol, // Tolerance in parameter space
// Output
double *lambda, // Resulting step length
double *of, // Value of cost-function on output
ColumnVector *np) // New parameters
{
const double lmin = 0.1;
const double lmax = 0.5;
// First make sure that the step-length suggested
// by pdir isn't completely unreasonable.
double totstep=sqrt(DotProduct(dir,dir));
ColumnVector pdir(dir);
if (totstep > sm) {pdir *= sm/totstep;}
// Calculate expected rate of change in the direction
// given by pdir.
double fp0 = DotProduct(grad,pdir);
// Calculate smallest meaningful lambda given what is
// smallest meaningful change in parameter value.
double almin=0.0;
for (int i=0; i<p0.Nrows(); i++) {
almin = max(almin,abs(pdir.element(i))/max(abs(p0.element(i)),1.0));
}
almin = ptol / almin;
// First try a step of full lambda
*lambda = 1.0; // Start with that
(*np) = p0 + (*lambda)*pdir; // First new parameters to try
double f2 = sf * cfo.cf(*np); // Cost-function value for par
// See if that does it (fat chance!)
if (f2 < f0 + alpha*(*lambda)*DotProduct(grad,(*np)-p0)) {*of = f2; return(LM_CONV);}
// Calculate Quadratic based on f(0), f'(0)
// and f(1) and find minimum of that quadratic.
*lambda = - fp0 / (2.0*(f2-f0-fp0)); // Minumum of f(lambda)
// Make sure new lambda is 0.1*old_l < lambda < 0.5*old_l
*lambda = max(lmin,*lambda);
*lambda = min(lmax,*lambda);
(*np) = p0 + (*lambda)*pdir; // Second set of new parameters to try
double f1 = sf * cfo.cf(*np); // Cost-function value for par
// Now we will start fitting cubics to f(0), f'(0),
// f(lambda_1) and f(lambda_2) where lambda_1 is the
// lambda most recently tested and where lambda_2 is
// the lambda tested second to last.
double l2 = 1.0; // Second to last lambda
double l1 = *lambda; // Last lambda
Matrix X(2,2);
ColumnVector y(2);
for (int iter=0; iter<maxiter; iter++) {
// See if present lambda might be too small
if (*lambda < almin) {*of = f1; return(LM_LAMBDA_NILL);}
// See if present value is acceptable
if (f1 < f0 + alpha*(*lambda)*DotProduct(grad,(*np)-p0)) {*of = f1; return(LM_CONV);}
// Find parameter values for cubic and square on lambda
X << pow(l1,3) << pow(l1,2) << pow(l2,3) << pow(l2,2);
y << f1-fp0*l1-f0 << f2-fp0*l2-f0;
ColumnVector b = X.i()*y;
// Find value for lambda that yield minimum of cubic
*lambda = (-b.element(1) + sqrt(pow(b.element(1),2) - 3.0*b.element(0)*fp0)) / (3.0*b.element(0));
// Make sure new lambda is 0.1*old_l < lambda < 0.5*old_l
*lambda = max(lmin*l1,*lambda);
*lambda = min(lmax*l1,*lambda);
// Get new function value and update parameters
f2 = f1;
(*np) = p0 + (*lambda)*pdir;
f1 = sf * cfo.cf(*np);
l2 = l1;
l1 = *lambda;
}
// If we are here we have exceeded # of iterations
*of = f1;
return(LM_MAXITER);
}
// Will try and find a scale factor for the cost-function such that
// the step length (lambda) for the first iteration of the variable-
// metric method is ~0.25. Empricially I have found that such a scaling
// that yields a first step length in the range 0.1 -- 0.5 will yield
// robust and fast convergence. N.B. though that I have only tested that
// for non-linear reg, and it is concievable that it is different for
// other applications with fewer parameters.
double scale_factor(const ColumnVector& p, // Current parameter values
const ColumnVector& pdir, // Search direction
const NonlinCF& cfo, // Cost-function object
int maxiter, // Max # of iterations
double sf) // Scale factor.
{
const double dl = 0.25; // Desired Lambda
const double ftol = 0.01; // Fractional tolerance for minimum
// Start out by finding an upper bound for lambda such that
// a minimum is guaranteed to be between 0 and rp
pair<double,double> lp;
pair<double,double> mp;
pair<double,double> rp = bracket(p,pdir,cfo,ftol,sf,&lp,&mp);
if (rp == mp) { // If there is no minimum in the indicated direction
return(0.0);
}
// Now find a minimum with a fractional accuracy of ~1%
pair<double,double> minpoint;
if (linmin(p,pdir,cfo,sf,lp,mp,rp,ftol,maxiter,&minpoint) == LM_MAXITER) {
throw NonlinException("Failed to find minimum along search direction");
}
sf *= minpoint.first/dl;
return(sf);
}
// Will find the minimum of the cost-function as a function of
// lambda. This routine will find the minimum to a fractional
// tolerance of lambda. This is NOT practical to use for the
// Variable Metric minimisation (too slow), but is used for
// the conjugate-gradient method and for finding the initial
// scaling between parameters and cost-function for the
// variable metric method.
LinOut linmin(// Input
const ColumnVector& p, // Current parameter values
const ColumnVector& pdir, // Search direction
const NonlinCF& cfo, // Cost-function object
double sf, // Scale factor for cost-function
pair<double,double> lp, // Left point
pair<double,double> mp, // Point somewhere in interval
pair<double,double> rp, // Right point
double ftol, // Fractional tolerance
int maxiter,// Max # of iterations
// Output
pair<double,double> *x) // Best point
{
const double gold = 0.382;// Golden section
pair<double,double> test; // New point to test
pair<double,double> w = mp; // Second best point
pair<double,double> v = mp; // Last value of second best point
double step = 0.0;
double ostep = 0.0; // Length of 2nd to last step taken
double d = 0.0; // Length of last step taken
ColumnVector y(3); // Used for fitting parabolic
Matrix X(3,3); // Used for fitting parabolic
*x = mp; // Initialise "best" point
for (int i=0; i<maxiter; i++) {
double midp = (rp.first+lp.first)/2.0; // Midpoint of bracketing points
double tol = 2.0*ftol*abs(x->first)+MISCMATHS::EPS; // Absolute tolerance
if (abs(x->first-midp) <= (tol-0.5*(rp.first-lp.first))) { // Convergence check
return(LM_CONV);
}
// Try parabolic fit, but not before third iteration
double tmp = 10.0*sqrt(MISCMATHS::EPS);
if (abs(ostep) > tol/2.0 && // If second to last step big enough
abs(x->first-w.first) > tmp &&
abs(x->first-v.first) > tmp &&
abs(w.first-v.first) > tmp) { // And points not degenerate
step = ostep;
ostep = d;
y << x->second << w.second << v.second;
X << pow(x->first,2.0) << x->first << 1.0 <<
pow(w.first,2.0) << w.first << 1.0 <<
pow(v.first,2.0) << v.first << 1.0;
ColumnVector b = X.i() * y;
if (b.element(0) < 4*MISCMATHS::EPS || // If on line or going for maximum
(test.first = -b.element(1)/(2.0*b.element(0))) <= lp.first
|| test.first >= rp.first || // If outside bracketed interval
abs(test.first-x->first) > 0.5*step) { // Or if step too big (indicates oscillation)
// Take golden step into larger interval
if (rp.first-x->first > x->first-lp.first) { // If right interval larger
test.first = x->first + gold * (rp.first - x->first);
}
else {
test.first = x->first - gold * (x->first - lp.first);
}
}
}
else { // Take golden step into larger interval
if (x->first < midp) { // If right interval larger
ostep = rp.first - x->first;
test.first = x->first + gold * (rp.first - x->first);
}
else {
ostep = x->first - lp.first;
test.first = x->first - gold * (x->first - lp.first);
}
}
d = test.first - x->first; // Signed length of step
test.second = sf*cfo.cf(p+test.first*pdir); // Evaluate cf at new point
// Now we have a new point, and we need to figure out what to do with it
if (test.second <= x->second) { // If it beats the best step
if (test.first > x->first) {lp = *x;}
else {rp = *x;}
v = w; w = *x; *x = test;
}
else {
if (test.first < x->first) {lp = test;}
else {rp = test;}
if (test.second <= w.second || w.first == x->first) {
v = w; w = test;
}
else if (test.second <= v.second || v.first == x->first || v.first == w.first) {
v = test;
}
}
}
// If we are here we have used too many iterations
return(LM_MAXITER); // Error status
}
// Will return a value lambda such that a function minimum is guaranteed to
// lie somewhere in the interval between p and p+lambda*pdir. The second value
// of the returned pair is the cost-function value at the point.
pair<double,double> bracket(// Input
const ColumnVector& p, // Current parameter values
const ColumnVector& pdir, // Search direction
const NonlinCF& cfo, // Cost-function object
double ptol, // Relative tolerance for parameter values
double sf, // Scale factor of cost-function
// Output
pair<double,double> *p_0, // Cost-function value at p
pair<double,double> *p_m) // Point between p_0 and p_l
{
pair<double,double> p_l;
const double gr = 0.618034;
const double maxstep = 100.0;
p_0->first = 0.0;
double cf0 = sf*cfo.cf(p);
double l1 = 1.0;
double cf1 = sf*cfo.cf(p+l1*pdir);
// Find maximum relative component of search direction
double test = 0.0;
for (int i=0; i<pdir.Nrows(); i++) {test = max(test,abs(pdir.element(i))/max(p.element(i),1.0));}
// Do a crude initial search for order of magnitude
while (!isfinite(cf1)) {
l1 *= 0.1;
cf1 = sf*cfo.cf(p+l1*pdir);
}
// Get third point to get us started
double l2 = 0.0;
double cf2 = 0.0;
if (cf1 < cf0) {l2 = (2.0 + gr)*l1; cf2 = sf*cfo.cf(p+l2*pdir);}
else {l2 = l1; cf2 = cf1; l1 = gr * l2; cf1 = sf*cfo.cf(p+l1*pdir);}
// Check if we already have a bracket
if (cf1 < cf0 && cf1 < cf2) {
p_l.first = l2; p_l.second = cf2;
p_m->first = l1; p_m->second = cf1;
p_0->second = cf0;
return(p_l);
}
Matrix X(2,2);
ColumnVector y(2);
double lt = 0.0;
double cft = 0.0;
while (!(cf1 < cf0 && cf1 < cf2)) { // If minimum still not bracketed
if (l2*test < ptol) { // If interval ridicously small
p_l = *p_0;
*p_m = *p_0;
return(p_l);
}
// Let's see if a parabolic might help us
if (abs(l2-l1) > 10.0*sqrt(MISCMATHS::EPS)) {
X << pow(l1,2.0) << l1 << pow(l2,2.0) << l2;
y << cf1 << cf2;
ColumnVector b = X.i()*y;
if (b.element(0) > 4.0*MISCMATHS::EPS) { // Check they are not on a line and not for maximum
lt = - (b.element(1) / (2.0 * b.element(0))); // Tentative point
if (lt > 0 && lt < l2) { // If in range of previous points
cft = sf*cfo.cf(p+lt*pdir);
if (cft < cf0 && cft < cf2) {l1=lt; cf1=cft; continue;}
else if (cft > cf0 && lt < l1 && cft < cf1) {l2=l1; cf2=cf1; l1=lt; cf1=cft; continue;}
else if (cft > cf0 && lt > l1 && cft < cf2) {l2=lt; cf2=cft; continue;}
}
else if (lt > 0 && lt < maxstep*l2) { // If expansion in allowed range
cft = sf*cfo.cf(p+lt*pdir);
l1 = l2; cf1 = cf2;
l2 = lt; cf2 = cft;
continue;
}
}
}
// If we are here the parabolic was of no use
if (cf2 < cf0) { // We need to expand
lt = (2.0 + gr)*l2;
cft = sf*cfo.cf(p+lt*pdir);
l1 = l2; cf1 = cf2;
l2 = lt; cf2 = cft;
}
else { // We need to contract
lt = gr * l1;
cft = sf*cfo.cf(p+lt*pdir);
l2 = l1; cf2 = cf1;
l1 = lt; cf1 = cft;
}
}
// If we are here we know that there is a minimum
// somewhere between 0 and l2;
p_0->second = cf0;
p_m->first = l1;
p_m->second = cf1;
p_l.first = l2;
p_l.second = cf2;
return(p_l);
}
// Utility routines that checks for convergence based on various criteria
// Based on zero (neglible) gradient
bool zero_grad_conv(const ColumnVector& par,
const ColumnVector& grad,
double cf,
double gtol)
{
double test = 0.0; // test will be largest relative component of gradient
for (int i=0; i<par.Nrows(); i++) {
test = max(test,abs(grad.element(i))*max(abs(par.element(i)),1.0));
}
test /= max(cf,1.0); // Protect against near-zero values for cost-function
return(test < gtol);
}
// Based on zero (neglible) decrease in cost-function
bool zero_cf_diff_conv(double cfo,
double cfn,
double cftol)
{
return(2.0*abs(cfo-cfn) <= cftol*(abs(cfo)+abs(cfn)+MISCMATHS::EPS));
}
// Based on zero (neglible) step in parameter space
bool zero_par_step_conv(const ColumnVector& par,
const ColumnVector& step,
double ptol)
{
double test = 0.0;
for (int i=0; i<par.Nrows(); i++) {
test = max(test,abs(step.element(i))/max(abs(par.element(i)),1.0));
}
return(test < ptol);
}
// Utility routines that allow the user to check accuracy of their own grad and hess functions
pair<ColumnVector,ColumnVector> check_grad(const ColumnVector& par,
const NonlinCF& cfo)
{
pair<ColumnVector,ColumnVector> rv;
rv.first = cfo.NonlinCF::grad(par);
rv.second = cfo.grad(par);
return(rv);
}
pair<boost::shared_ptr<BFMatrix>,boost::shared_ptr<BFMatrix> > check_hess(const ColumnVector& par,
const NonlinCF& cfo)
{
pair<boost::shared_ptr<BFMatrix>,boost::shared_ptr<BFMatrix> > rv;
rv.first = cfo.NonlinCF::hess(par);
rv.second = cfo.hess(par);
return(rv);
}
void print_newmat(const NEWMAT::GeneralMatrix& m,
std::string fname)
{
if (!fname.length()) {
cout << endl << m << endl;
}
else {
try {
std::ofstream fout(fname.c_str());
fout << setprecision(10) << m;
}
catch(...) {
std::string errmsg("print_newmat: Failed to write to file " + fname);
throw NonlinException(errmsg);
}
}
}
} // End namespace MISCMATHS
nonlin.h 0 → 100644
// Declarations for nonlinear optimisation
#ifndef nonlin_h
#define nonlin_h
#include <string>
#include <vector>
#include <boost/shared_ptr.hpp>
#include "bfmatrix.h"
#include "newmat.h"
namespace MISCMATHS {
enum NLMethod {NL_VM, // Variable-Metric (see NRinC)
NL_CG, // Conjugate-Gradient (see NRinC)
NL_SCG, // Scaled Conjugate-Gradient (See Moller 1993).
NL_LM}; // Levenberg-Marquardt (see NRinC)
enum LMType {LM_L, LM_LM}; // Levenberg or Levenberg-Marquardt
enum VMUpdateType {VM_DFP, VM_BFGS}; // See NRinC chapter 10.
enum CGUpdateType {CG_FR, CG_PR}; // Fletcher-Reeves, Polak-Ribiere
enum VMMatrixType {VM_OPT, //
VM_COL, // Store all rank-one updates as column-vectors
VM_FULL}; // Store full estimate of inverse Hessian
enum LinOut {LM_MAXITER, // Too many iterations in line-minimisation
LM_LAMBDA_NILL, // Could not find a minima along this direction
LM_CONV}; // Line-minimisation converged.
enum NonlinOut {NL_UNDEFINED, // Initial value before minimisation
NL_MAXITER, // Too many iterations
NL_LM_MAXITER, // To many iterations during a line-minimisation
NL_PARCONV, // Convergence. Step in parameter space small
NL_GRADCONV, // Convergence. Gradient small
NL_CFCONV, // Convergence. Change in cost-function small
NL_LCONV}; // Convergence, lambda very large
const double EPS = 2.0e-16; // Losely based on NRinC 20.1
class NonlinException: public std::exception
{
private:
std::string m_msg;
public:
NonlinException(const std::string& msg) throw(): m_msg(msg) {}
virtual const char * what() const throw() {
return string("Nonlin: msg=" + m_msg).c_str();
}
~NonlinException() throw() {}
};
// NonlinParam is a struct that contains the
// information about "how" the
// minisation should be performed. I.e. it
// contains things like choice of minimisation
// algorithm, # of parameters, converegence
// criteria etc
class NonlinParam
{
public:
NonlinParam(int pnpar,
NLMethod pmtd,
NEWMAT::ColumnVector ppar=NEWMAT::ColumnVector(),
bool plogcf=false,
bool ploglambda=false,
bool plogpar=false,
int pmaxiter=200,
double pcftol=1.0e-8,
double pgtol=1.0e-8,
double pptol=4.0*EPS,
VMUpdateType pvmut=VM_BFGS,
double palpha=1.0e-4,
double pstepmax=10,
int plm_maxiter=50,
int pmaxrestart=0,
bool pautoscale=true,
CGUpdateType pcgut=CG_PR,
double plm_ftol=1.0e-3,
LMType plmtype=LM_LM,
double pltol=1.0e20,
int pcg_maxiter=200,
double pcg_tol=1.0e-6,
double plambda=0.1)
: npar(pnpar), mtd(pmtd), logcf(plogcf),
loglambda(ploglambda), logpar(plogpar), maxiter(pmaxiter),
cftol(pcftol), gtol(pgtol), ptol(pptol), vmut(pvmut),
alpha(palpha), stepmax(pstepmax), lm_maxiter(plm_maxiter),
maxrestart(pmaxrestart), autoscale(pautoscale), cgut(pcgut),
lm_ftol(plm_ftol), lmtype(plmtype), ltol(pltol), cg_maxiter(pcg_maxiter),
cg_tol(pcg_tol), lambda(), cf(), par(), niter(0), nrestart(0), status(NL_UNDEFINED)
{
lambda.push_back(plambda);
if (ppar.Nrows()) SetStartingEstimate(ppar);
else {
NEWMAT::ColumnVector tmp(npar);
tmp = 0.0;
SetStartingEstimate(tmp);
}
}
~NonlinParam() {}
// Routines to check values
int NPar() const {return(npar);}
NLMethod Method() const {return(mtd);}
int MaxIter() const {return(maxiter);}
int NIter() const {return(niter);}
double FractionalCFTolerance() const {return(cftol);}
double FractionalGradientTolerance() const {return(gtol);}
double FractionalParameterTolerance() const {return(ptol);}
VMUpdateType VariableMetricUpdate() const {return(vmut);}
double VariableMetricAlpha() const {return(alpha);}
int MaxVariableMetricRestarts() const {return(maxrestart);}
int VariableMetricRestarts() const {return(nrestart);}
bool VariableMetricAutoScale() const {return(autoscale);}
double LineSearchMaxStep() const {return(stepmax);}
int LineSearchMaxIterations() const {return(lm_maxiter);}
CGUpdateType ConjugateGradientUpdate() const {return(cgut);}
double LineSearchFractionalParameterTolerance() const {return(lm_ftol);}
LMType GaussNewtonType() const {return(lmtype);}
double LambdaConvergenceCriterion() const {return(ltol);}
int EquationSolverMaxIter() const {return(cg_maxiter);}
double EquationSolverTol() const {return(cg_tol);}
bool LoggingParameters() const {return(logpar);}
bool LoggingCostFunction() const {return(logcf);}
bool LoggingLambda() const {return(loglambda);}
// Routines to get output
double Lambda() const {return(lambda.back());}
double InitialLambda() const {if (loglambda) return(lambda[0]); else {throw NonlinException("InitialLabda: Lambda not logged"); return(0.0);}}
const std::vector<double>& LambdaHistory() const {if (loglambda) return(lambda); else {throw NonlinException("InitialLabda: Lambda not logged"); return(lambda);}}
const NEWMAT::ColumnVector& Par() const {return(par.back());}
const NEWMAT::ColumnVector& InitialPar() const {if (logpar) return(par[0]); else {throw NonlinException("InitialPar: Parameters not logged"); return(par[0]);}}
const std::vector<NEWMAT::ColumnVector>& ParHistory() const {if (logpar) return(par); else {throw NonlinException("ParHistory: Parameters not logged"); return(par);}}
double CF() const {return(cf.back());}
double InitialCF() const {if (logcf) return(cf[0]); else {throw NonlinException("InitialCF: Cost-function not logged"); return(cf[0]);}}
const std::vector<double> CFHistory() const {if (logcf) return(cf); else {throw NonlinException("CFHistory: Cost-function not logged"); return(cf);}}
NonlinOut Status() const {return(status);}
// Routines to set values of steering parameters
void SetMethod(NLMethod pmtd) {mtd = pmtd;}
void LogCF(bool flag=true) {logcf = flag;}
void LogPar(bool flag=true) {logpar = flag;}
void LogLambda(bool flag=true) {loglambda = flag;}
void SetStartingEstimate(NEWMAT::ColumnVector& sp) {
if (niter) throw NonlinException("SetStartingEstimates: Object has to be reset before setting new starting parameters");
SetPar(sp);
}
void SetMaxIter(unsigned int pmiter) {maxiter = pmiter;}
void SetFractionalCFTolerance(double pcftol) {
if (pcftol>0.5) throw NonlinException("SetFractionalCFTolerance: Nonsensically large tolerance");
else if (pcftol <= 0.0) NonlinException("SetFractionalCFTolerance: Tolerance must be non-zero and positive");
cftol = pcftol;
}
void SetFractionalGradientTolerance(double pgtol) {
if (pgtol>0.5) throw NonlinException("SetFractionalGradientTolerance: Nonsensically large tolerance");
else if (pgtol <= 0.0) NonlinException("SetFractionalGradientTolerance: Tolerance must be non-zero and positive");
gtol = pgtol;
}
void SetFractionalParameterTolerance(double pptol) {
if (pptol>0.5) throw NonlinException("SetFractionalParameterTolerance: Nonsensically large tolerance");
else if (pptol <= 0.0) NonlinException("SetFractionalParameterTolerance: Tolerance must be non-zero and positive");
ptol = pptol;
}
void SetVariableMetricUpdate(VMUpdateType pvmut) {vmut = pvmut;}
void SetVariableMetricAlpha(double palpha) {
if (palpha>=1.0 || palpha<=0.0) throw NonlinException("SetVariableMetricAlpha: Alpha must be between 0 and 1");
alpha = palpha;
}
void SetMaxVariableMetricRestarts(unsigned int pmaxrestart) {maxrestart = pmaxrestart;}
void SetVariableMetricAutoScale(bool flag=true) {autoscale = flag;}
void SetLineSearchMaxStep(double pstepmax) {
if (pstepmax<=0) throw NonlinException("SetLineSearchMaxStep: maxstep must be non-zero and positive");
stepmax = pstepmax;
}
void SetLineMinimisationMaxIterations(unsigned int plm_maxiter) {lm_maxiter = plm_maxiter;}
void SetConjugateGradientUpdate(CGUpdateType pcgut) {cgut = pcgut;}
void SetLineMinimisationFractionalParameterTolerance(double plm_ftol) {
if (plm_ftol>0.5) throw NonlinException("SetLineMinimisationFractionalParameterTolerance: Nonsensically large tolerance");
else if (plm_ftol <= 0.0) NonlinException("SetLineMinimisationFractionalParameterTolerance: Tolerance must be non-zero and positive");
lm_ftol = plm_ftol;
}
void SetGaussNewtonType(LMType plmtype) {lmtype = plmtype;}
void SetLambdaConvergenceCriterion(double pltol) {
if (pltol<1.0) throw NonlinException("SetLambdaConvergenceCriterion: Nonsensically small tolerance");
ltol = pltol;
}
void SetEquationSolverMaxIter(int pcg_maxiter) {cg_maxiter = pcg_maxiter;}
void SetEquationSolverTol(double pcg_tol) {cg_tol = pcg_tol;}
// Reset is used to reset a NonlinParam object after it has run to convergence, thereby allowing it
// to be reused with a different CF object. This is to avoid the cost of creating the object many
// times when fitting for example multiple voxels.
void Reset() {}
// Routines used by the (global) non-linear fitting routines. Note that these can
// all be called for const objects.
void SetPar(const NEWMAT::ColumnVector& p) const {
if (p.Nrows() != npar) throw NonlinException("SetPar: Mismatch between starting vector and # of parameters");
if (logpar || !par.size()) par.push_back(p);
else par[0] = p;
}
void SetCF(double pcf) const {
if (logcf || !cf.size()) cf.push_back(pcf);
else cf[0] = pcf;
}
void SetLambda(double pl) const {
if (loglambda || !lambda.size()) lambda.push_back(pl);
else lambda[0] = pl;
}
bool NextIter(bool success=true) const {if (success && niter++ >= maxiter) return(false); else return(true);}
bool NextRestart() const {if (nrestart++ >= maxrestart) return(false); else return(true);}
void SetStatus(NonlinOut pstatus) const {status = pstatus;}
private:
// INPUT PARAMETERS
//
// Paramaters that apply to all algorithms
const int npar; // # of parameters
NLMethod mtd; // Minimisation method
bool logcf; // If true, history of cost-function is logged
bool loglambda; // If true, history of lambda is logged
bool logpar; // If true history of parameters is logged
int maxiter; // Maximum # of iterations allowed
double cftol; // Tolerance for cost-function gonvergence criterion
double gtol; // Tolerance for gradient convergence criterion
double ptol; // Tolerance for parameter convergence criterion
// Parameters that apply to Variable-Metric Algorithm
VMUpdateType vmut; // DFP or BFGS
double alpha; // Criterion for convergence in line minimisation
double stepmax; // Maximum step length for line minimisation
int lm_maxiter; // Maximum # of iterations for line minimisation
int maxrestart; // Maximum # of restarts that should be done.
bool autoscale; // "Automatic" search for optimal scaling
// Parameters that apply to CG algorithm
CGUpdateType cgut; // Fletcher-Reeves or Polak-Ribiere
double lm_ftol; // Convergence criterion for line-search
// Parameters that apply to LM algorithm
LMType lmtype; // Levenberg or Levenberg-Marquardt
double ltol; // Convergence criterion based on large lambda
int cg_maxiter; // Maximum # of iterations for iterative "inverse" of Hessian
double cg_tol; // Tolerance for iterative "inverse" of Hessian
//
// OUTPUT PARAMETERS
//
mutable std::vector<double> lambda; // (History of) lambda (LM and SCG type minimisation)
mutable std::vector<double> cf; // (History of) cost-function
mutable std::vector<NEWMAT::ColumnVector> par; // (History of) Parameter estimates
mutable int niter; // Number of iterations
mutable int nrestart; // Number of restarts
mutable NonlinOut status; // Output status
NonlinParam& operator=(const NonlinParam& rhs); // Hide assignment
};
// NonlinCF (Cost Function) is a virtual
// class that defines a minimal interface.
// By subclassing NonlinCF the "user" can
// create a class that allows him/her to
// use NONLIN to minimise his/her function.
class NonlinCF
{
private:
NonlinCF& operator=(const NonlinCF& rhs); // Hide assignment
public:
NonlinCF() {}
virtual ~NonlinCF() {}
virtual double sf() const {return(1.0);}
virtual NEWMAT::ReturnMatrix grad(const NEWMAT::ColumnVector& p) const;
virtual boost::shared_ptr<BFMatrix> hess(const NEWMAT::ColumnVector& p,
boost::shared_ptr<BFMatrix> iptr=boost::shared_ptr<BFMatrix>()) const;
virtual double cf(const NEWMAT::ColumnVector& p) const = 0;
};
// Varmet matrix is a "helper" class
// that makes it a little easier to
// implement variable-metric minimisation.
class VarmetMatrix
{
private:
int sz;
VMMatrixType mtp;
VMUpdateType utp;
NEWMAT::Matrix mat;
std::vector<double> sf;
std::vector<NEWMAT::ColumnVector> vec;
VarmetMatrix& operator=(const VarmetMatrix& rhs); // Hide assignment
public:
explicit VarmetMatrix(int psz, VMMatrixType pmtp, VMUpdateType putp)
: sz(psz), mtp(pmtp), utp(putp)
{
if (sz > 0 && mtp == VM_OPT) {
if (sz < 100) {
mtp = VM_FULL;
NEWMAT::IdentityMatrix tmp(sz);
mat = tmp;
}
else {
mtp = VM_COL;
}
}
}
~VarmetMatrix() {}
int size() {return(sz);}
VMUpdateType updatetype() {return(utp);}
VMMatrixType matrixtype() {return(mtp);}
void print() const;
void reset()
{
if (sz > 0) {
if (mtp == VM_FULL) {
NEWMAT::IdentityMatrix tmp(sz);
mat = tmp;
}
else if (mtp == VM_COL) {
sf.clear();
vec.clear();
}
}
}
void update(const NEWMAT::ColumnVector& pdiff, // x_{i+1} - x_i
const NEWMAT::ColumnVector& gdiff); // \nabla f_{i+1} - \nabla f_i
friend NEWMAT::ColumnVector operator*(const VarmetMatrix& m,
const NEWMAT::ColumnVector& v);
};
// Declaration of (global) main function for minimisation
NonlinOut nonlin(const NonlinParam& p, const NonlinCF& cfo);
// Declaration of global utility functions
pair<NEWMAT::ColumnVector,NEWMAT::ColumnVector> check_grad(const NEWMAT::ColumnVector& par,
const NonlinCF& cfo);
pair<boost::shared_ptr<BFMatrix>,boost::shared_ptr<BFMatrix> > check_hess(const NEWMAT::ColumnVector& par,
const NonlinCF& cfo);
} // End namespace MISCMATHS
#endif // end #ifndef nonlin_h
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment