diff --git a/nonlin.cpp b/nonlin.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5047a362ae6bb09fa6a3ff544ed06c75bd1d2d18 --- /dev/null +++ b/nonlin.cpp @@ -0,0 +1,982 @@ +// Definitions for module nonlin + +#include <iostream> +#include <fstream> +#include <iomanip> +#include <string> +#include <vector> +#include <cmath> +#include "newmat.h" +#include "newmatio.h" +#include "bfmatrix.h" +#include "nonlin.h" + +using namespace std; +using namespace NEWMAT; + +namespace MISCMATHS { + +// Declarations of routines for use only in this module + +// Main routine for Variable-Metric optimisation + +NonlinOut varmet(const NonlinParam& p, const NonlinCF& cfo); + +// Main routine for Conjugate-Gradient optimisation + +NonlinOut congra(const NonlinParam& p, const NonlinCF& cfo); + +// Main routine for scaled conjugate-gradient optimisation + +NonlinOut sccngr(const NonlinParam& p, const NonlinCF& cfo); + +// Main routine for Levenberg-Marquardt optimisation + +NonlinOut levmar(const NonlinParam& p, const NonlinCF& cfo); + +LinOut linsrch(// Input + const ColumnVector& pdir, // Search direction + const ColumnVector& p0, // Current parameter values + const ColumnVector& grad, // Gradient at p0 + const NonlinCF& cfo, // Cost-function object + double f0, // Current cost-function value + double sf, // Scale factor for cost-function + double maxiter, // Max # of iterations in line minimisation + double sm, // Stepmax + double alpha, // Alpha (sorry). + double ptol, // Tolerance in parameter space + // Output + double *lambda,// Resulting step length + double *of, // Value of cost-function on output + ColumnVector *np); // New parameters + +double scale_factor(const ColumnVector& p, // Current parameter values + const ColumnVector& pdir, // Search direction + const NonlinCF& cfo, // Cost-function object + int maxiter, // Max # of iterations + double sf); // Scale factor. + +LinOut linmin(// Input + const ColumnVector& p, // Current parameter values + const ColumnVector& pdir, // Search direction + const NonlinCF& cfo, // Cost-function object + double sf, // Scale factor for cost-function + pair<double,double> lp, // Left point + pair<double,double> mp, // Point somewhere in interval + pair<double,double> rp, // Right point + double ftol, // Fractional tolerance + int maxiter,// Max # of iterations + // Output + pair<double,double> *x); // Best point + +pair<double,double> bracket(// Input + const ColumnVector& p, // Current parameter values + const ColumnVector& pdir, // Search direction + const NonlinCF& cfo, // Cost-function object + double ptol, // Relative tolerance for parameter values + double sf, // Scale factor of cost-function + // Output + pair<double,double> *p_0, // Cost-function value at p + pair<double,double> *p_m); // Point between p_0 and p_l + +// Utility routine that checks for convergence based on "zero"-gradient + +// Utility routines that checks for convergence based on various criteria +// Based on zero (neglible) gradient +bool zero_grad_conv(const ColumnVector& par, + const ColumnVector& grad, + double cf, + double gtol); +// Based on zero (neglible) decrease in cost-function +bool zero_cf_diff_conv(double cfo, + double cfn, + double cftol); +// Based on zero (neglible) step in parameter space +bool zero_par_step_conv(const ColumnVector& par, + const ColumnVector& step, + double ptol); + +void print_newmat(const NEWMAT::GeneralMatrix& m, + std::string fname); + + +// If user choses not to overide the grad-method of the NonlinCF +// base class this routine will be used to calculate numerical derivatives. + +ReturnMatrix NonlinCF::grad(const ColumnVector& p) const +{ + ColumnVector gradv(p.Nrows()); + ColumnVector tmpp = p; + double tiny = 1e-8; + double cf0 = cf(tmpp); + for (int i=0; i<p.Nrows(); i++) { + double step = tiny * max(tmpp.element(i),1.0); + tmpp.element(i) += step; + gradv.element(i) = (cf(tmpp) - cf0) / step; + tmpp.element(i) -= step; + } + gradv.Release(); + return(gradv); +} + +// If user choses not to overide the hess-method of the NonlinCF +// base class this routine will calculate numerical 2nd derivatives. +// Note also that the hessian will only be used for the Levenberg- +// Marquardt minimisation. +// It is in general _not_ a good idea to use a method that explicitly +// uses the Hessian (i.e. LM) when calculating it numerically. +// Note that it returns a (safe) pointer to BFMatrix. BFMatrix has two +// subclasses FullBFMatrix, and SparseBFMatrix which can be used +// used interchangeably depending on if the structure of the Hessian +// renders it sparse or not. + +boost::shared_ptr<BFMatrix> NonlinCF::hess(const ColumnVector& p, + boost::shared_ptr<BFMatrix> iptr) const +{ + boost::shared_ptr<BFMatrix> hessm; + if (iptr && int(iptr->Nrows())==p.Nrows() && int(iptr->Ncols())==p.Nrows()) hessm = iptr; + else hessm = boost::shared_ptr<BFMatrix>(new FullBFMatrix(p.Nrows(),p.Nrows())); + ColumnVector tmpp = p; + double tiny = 1e-4; + double fx0y0 = cf(tmpp); + ColumnVector fdx(p.Nrows()); + ColumnVector step(p.Nrows()); + + // First calculate all f(x+dx_i) values + + for (int i=0; i<p.Nrows(); i++) { + step.element(i) = tiny * max(tmpp.element(i),1.0); + tmpp.element(i) += step.element(i); + fdx.element(i) = cf(tmpp); + tmpp.element(i) -= step.element(i); + } + + // Then values of matrix + + for (int i=0; i<p.Nrows(); i++) { + for (int j=i; j<p.Nrows(); j++) { + if (i==j) { // If diagonal element + tmpp.element(i) -= step.element(i); + double tmp = cf(tmpp); + hessm->Set(i+1,i+1,(fdx.element(i) + tmp - 2.0*fx0y0) / (step.element(i)*step.element(i))); + tmpp.element(i) += step.element(i); + } + else { // If off-diagonal element + tmpp.element(i) += step.element(i); + tmpp.element(j) += step.element(j); + hessm->Set(i+1,j+1,(cf(tmpp)+fx0y0-fdx.element(i)-fdx.element(j)) / (step.element(i)*step.element(j))); + hessm->Set(j+1,i+1,hessm->Peek(i+1,j+1)); + tmpp.element(i) -= step.element(i); + tmpp.element(j) -= step.element(j); + } + } + } + + return(hessm); +} + +// Display (for debug purposes) matrix if it is small enough for that to make sense + +void VarmetMatrix::print() const +{ + if (sz > 10) { + cout << "Matrix too big to be meaningful to display" << endl; + return; + } + else { + if (mtp == VM_FULL) { + cout << setw(10) << setprecision(5) << mat; + } + else if (mtp == VM_COL) { + Matrix tmp = IdentityMatrix(sz); + for (unsigned int i=0; i<sf.size(); i++) { + tmp += sf[i] * vec[i]*(vec[i]).t(); + } + cout << setw(10) << setprecision(5) << tmp; + } + } + return; +} + +// Update estimate of inverse Hessian based on latest step + +void VarmetMatrix::update(const NEWMAT::ColumnVector& pdiff, // x_{i+1} - x_i + const NEWMAT::ColumnVector& gdiff) // \nabla f_{i+1} - \nabla f_i +{ + // Sort out if this call defines size of problem + if (pdiff.Nrows() != sz || gdiff.Nrows() != sz) { + if (sf.size() == 0 && pdiff.Nrows()==gdiff.Nrows()) { + sz = pdiff.Nrows(); + if (mtp == VM_OPT) { + if (sz < 100) {mtp = VM_FULL;} + else {mtp = VM_COL;} + } + } + else {throw NonlinException("VarmetMatrix::update: mismatch between vector and matrix sizes");} + } + // Now do the actual update + double sf1 = DotProduct(pdiff,gdiff); + if ((sf1*sf1) > MISCMATHS::EPS*DotProduct(pdiff,pdiff)*DotProduct(gdiff,gdiff)) { + sf1 = 1.0 / sf1; + ColumnVector v2 = (*this) * gdiff; + double sf2 = -1.0 / DotProduct(gdiff,v2); + if (mtp == VM_FULL) { + mat += sf1 * pdiff * pdiff.t(); + mat += sf2 * v2 * v2.t(); + } + else { + vec.push_back(pdiff); + vec.push_back(v2); + sf.push_back(sf1); + sf.push_back(sf2); + } + if (utp == VM_BFGS) { + if (mtp == VM_FULL) { + ColumnVector u = sf1*pdiff + sf2*v2; + mat -= (1.0/sf2) * u * u.t(); + } + else { + vec.push_back(sf1*pdiff + sf2*v2); + sf.push_back(-1.0/sf2); + } + } + } +} + +// Multiply representation of matrix with vector + +ColumnVector operator*(const VarmetMatrix& m, const ColumnVector& v) +{ + if (m.mtp == VM_FULL) {return(m.mat*v);} + else { + ColumnVector ov = v; // Multiplication with unity matrix + if (m.sf.size() != 0) { + std::vector<double>::const_iterator sfp; + std::vector<NEWMAT::ColumnVector>::const_iterator vep; + for (sfp=m.sf.begin(), vep=m.vec.begin(); sfp!=m.sf.end(); ++sfp, ++vep) { + double tmp = (*sfp) * DotProduct((*vep),v); + ov += tmp * (*vep); + } + } + return(ov); + } +} + +// Gateway function to routines for non-linear optimisation + +NonlinOut nonlin(const NonlinParam& p, const NonlinCF& cfo) +{ + NonlinOut status = NL_MAXITER; + + // Call functions that actually do the job + + switch (p.Method()) { + case NL_VM: + status = varmet(p,cfo); + break; + case NL_CG: + status = congra(p,cfo); + break; + case NL_SCG: + status = sccngr(p,cfo); + break; + case NL_LM: + status = levmar(p,cfo); + break; + } + + return(status); +} + +// Main routine for Levenberg-Marquardt optimisation + +NonlinOut levmar(const NonlinParam& p, const NonlinCF& cfo) +{ + // Calculate initial values + p.SetCF(cfo.cf(p.Par())); // Cost-function evaluated at current parameters + ColumnVector g = cfo.grad(p.Par()); // Gradient evaluated at current parameters + boost::shared_ptr<BFMatrix> H = cfo.hess(p.Par()); // Hessian evaluated at current parameters + + double olambda = 0.0; + bool success = true; // True if last step decreased CF + + while (p.NextIter(success)) { + for (int i=1; i<=p.NPar(); i++) { // Nudge diagonal of H + if (p.GaussNewtonType() == LM_LM) { + H->AddTo(i,i,(p.Lambda()-olambda)*H->Peek(i,i)); + } + else if (p.GaussNewtonType() == LM_L) { + H->AddTo(i,i,p.Lambda()-olambda); + } + } + ColumnVector step = -H->SolveForx(g,SYM_POSDEF,p.EquationSolverTol(),p.EquationSolverMaxIter()); + double ncf = cfo.cf(p.Par()+step); + if (success = (ncf < p.CF())) { // If last step successful + olambda = 0.0; // New H, so no need to undo old lambda + p.SetLambda(p.Lambda()/10.0); + p.SetPar(p.Par()+step); + // Check for convergence based on small decrease of cf + if (zero_cf_diff_conv(p.CF(),ncf,p.FractionalCFTolerance())) { + p.SetCF(ncf); p.SetStatus(NL_CFCONV); return(p.Status()); + } + p.SetCF(ncf); + g = cfo.grad(p.Par()); + H = cfo.hess(p.Par(),H); + } + else { // If last step was unsuccesful + olambda = p.Lambda(); // Returning to same H, so must undo old lambda + p.SetLambda(10.0*p.Lambda()); + p.SetCF(p.CF()); // Push another copy + // Check for convergence based on _really_ large lambda + if (p.Lambda() > p.LambdaConvergenceCriterion()) { + p.SetStatus(NL_LCONV); return(p.Status()); + } + } + } + // This means we did too many iterations + p.SetStatus(NL_MAXITER); + + return(p.Status()); +} + +// Main routine for conjugate-gradient optimisation. The +// implementation follows that of Numerical Recipies +// reasonably closely. + +NonlinOut congra(const NonlinParam& np, const NonlinCF& cfo) +{ + // Set up initial values + np.SetCF(cfo.cf(np.Par())); + ColumnVector r = -cfo.grad(np.Par()); + ColumnVector p = r; + + while (np.NextIter()) { + // Check for convergence based on zero gradient + if (zero_grad_conv(np.Par(),r,np.CF(),np.FractionalGradientTolerance())) { + np.SetStatus(NL_GRADCONV); return(np.Status()); + } + // Bracket minimum along p + pair<double,double> lp, mp; // Leftmost and middle point of bracket + pair<double,double> rp = bracket(np.Par(),p,cfo,np.FractionalParameterTolerance(),1.0,&lp,&mp); // Rightmost point of bracket + if (rp == lp) { // If no smaller point along p + np.SetStatus(NL_PARCONV); return(np.Status()); // Assume it is because we are at minimum + } + // Find minimum along p between lp and rp + pair<double,double> minp; // Minimum along p + LinOut lm_status = linmin(np.Par(),p,cfo,1.0,lp,mp,rp, + np.LineSearchFractionalParameterTolerance(), + np.LineSearchMaxIterations(),&minp); + // Check for problems with line-search + if (lm_status == LM_MAXITER) {np.SetStatus(NL_LM_MAXITER); return(np.Status());} // Ouch! + // Set new cf value and parameters + np.SetPar(np.Par() + minp.first*p); + np.SetCF(minp.second); + // Check for convergence based on small decrease of cost-function + if (zero_cf_diff_conv(np.CF(),minp.second,np.FractionalCFTolerance())) {np.SetStatus(NL_CFCONV); return(np.Status());} + // Check for convergence based on neglible move in parameter space + else if (zero_par_step_conv(minp.first*p,np.Par(),np.FractionalParameterTolerance())) {np.SetStatus(NL_PARCONV); return(np.Status());} + else { // If no covergence + if (((np.NIter())%np.NPar()) == 0) { // Explicitly reset directions after npar iterations + r = -cfo.grad(np.Par()); + p = r; + } + else { + ColumnVector oldr = r; + r = -cfo.grad(np.Par()); + if (np.ConjugateGradientUpdate() == CG_FR) { // Get conjugate direction Fletcher-Reeves flavour + p = r + (DotProduct(r,r)/DotProduct(oldr,oldr)) * p; + } + else if (np.ConjugateGradientUpdate() == CG_PR) { // Get conjugate direction Polak-Ribiere flavour + p = r + (DotProduct(r-oldr,r)/DotProduct(oldr,oldr)) * p; + } + } + } + } + // If we get here we have used too many iterations + np.SetStatus(NL_MAXITER); + + return(np.Status()); +} + +// Main routine for scaled conjugate-gradient optimisation. The +// idea of the algorithm is similar to that of Levenberg- +// Marquardt. In the LM algorithm the search direction is a +// "compromise" between the Newton direction and the gradient +// direction, where the compromise depends on a factor lambda. +// A large lambda means that it is close to the gradient and a +// small lambda that it is close to the Newton direction. The +// value of lambda is updated each iteration depending on the +// success of the last step. In this method the compromise is +// between the "conjugate gradient" direction and the gradient +// direction. The variable names follow the (excellent!) paper +// by Martin Möller (1993) Neural Networks 6:525-533. +// I have tried to follow the notation he uses in his paper, thus +// enabling that to be the "documentation" for the routine below. + +NonlinOut sccngr(const NonlinParam& np, const NonlinCF& cfo) +{ + // Set up initial values + np.SetCF(cfo.cf(np.Par())); // Current value for cost-function (E in Moller 92). + double sigma = 1.0e-2; // Step-length when estmating H*p from g(w+sigma*p)-g(w) + double lambda_bar = 0.0; // Update for lambda if approximate hessian not positive definite + ColumnVector r = -cfo.grad(np.Par()); // Negative gradient + ColumnVector p = r; // Search direction + bool success = true; // True if previous step was successful + double delta = 0.0; // Used to check pos def of H in loop below + ColumnVector s(np.NPar()); // Used as approximation to H*p in loop below + + while (np.NextIter()) { + double p2 = DotProduct(p,p); // p'*p, Temporary variable to save some time + if (success == true) { // If last step led to reduction of cos-function + double sigma_k = sigma/sqrt(p2); // Normalised step-length when estimating H*p + s = (cfo.grad(np.Par()+sigma_k*p) + r) / sigma_k; // Approximation to H*p + delta = DotProduct(p,s); // Approximation to p'*H*p + } + s += (np.Lambda()-lambda_bar)*p; // Equivalent to adding (l-lb)*I to H + delta += (np.Lambda()-lambda_bar)*p2; // If <0 then H+(l-lb)*I not positive definite + if (delta <= 0) { // If it H is not positive definite + s += (np.Lambda() - 2.0*(delta/p2)) * p; // Make H more diagonal dominant to ensure pos def + lambda_bar = 2.0*(np.Lambda() - delta/p2); + delta = np.Lambda()*p2 - delta; + np.SetLambda(lambda_bar); + } + double mu = DotProduct(p,r); + double alpha = mu/delta; // Step size in direction p + double tmp_cf = cfo.cf(np.Par()+alpha*p); // Value of cost-function at attempted new point + + char fname[100]; + sprintf(fname,"/Users/jesper/Desktop/gradient_%02d.txt",np.NIter()); + print_newmat(r,fname); + sprintf(fname,"/Users/jesper/Desktop/step_%02d.txt",np.NIter()); + ColumnVector step(p); step *= alpha; + print_newmat(step,fname); + + + double Delta = 2.0*delta*(np.CF()-tmp_cf) / (mu*mu); // > 0 means attempted step reduced cost-function + if (Delta >= 0) { // If step reduces cost-function + np.SetCF(tmp_cf); // Update lowest observed value of cost-function + np.SetPar(np.Par() + alpha*p); // Update best set of parameters + lambda_bar = 0.0; + success = true; + if ((np.NIter()%np.NPar()) == 0) { // If npar iterations since last resetting of directions + r = -cfo.grad(np.Par()); // Reset search direction to negative gradient + p = r; + } + else { + ColumnVector oldr = r; + r = -cfo.grad(np.Par()); + double beta = (DotProduct(r,r)-DotProduct(oldr,r)) / mu; + p = r + beta*p; // New search direction + } + if (Delta > 0.75) { // If attempted step was \emph{REALLY} good + np.SetLambda(np.Lambda()/2.0); + } + } + else { // If step doesn't reduce cost-function + lambda_bar = np.Lambda(); + success = false; + } + if (Delta < 0.25) { // If step reduced cost-function only "a little" (or not at all) + np.SetLambda(4.0*np.Lambda()); + } + if (zero_grad_conv(np.Par(),r,np.CF(),np.FractionalGradientTolerance())) { // If gradient is (practically) zero + np.SetStatus(NL_GRADCONV); return(np.Status()); + } + } + // If we get here we have exceeded allowed # of iterations + np.SetStatus(NL_MAXITER); + return(np.Status()); +} + +// Main routine for variable-metric optimisation. This implements +// the variable-metric optimisation with the BFGS or DFP updating +// schemes. The implementation details are mostly quite close to +// those described in Numerical Recipies in C. + +NonlinOut varmet(const NonlinParam& p, const NonlinCF& cfo) +{ + // Get scale factor to ensure a relative scale beteween + // parameters and cost-function such that fast and robust + // convergence is acheieved. + + double sf = cfo.sf(); // Suggestion by "user" + ColumnVector grad = sf*cfo.grad(p.Par()); // Gradient of const-function + if (p.VariableMetricAutoScale()) { + sf = scale_factor(p.Par(),-grad,cfo,p.LineSearchMaxIterations(),sf); // Refinement by "me" + if (sf == 0.0) { // No minimum in indicated direction + p.SetStatus(NL_PARCONV); // Assume this means we are already at minimum + return(p.Status()); + } + grad = (sf/cfo.sf()) * grad; + } + + VarmetMatrix iH(p.NPar(),VM_OPT,p.VariableMetricUpdate()); // Inverse Hessian + p.SetCF(sf*cfo.cf(p.Par())); // Current value of cost-function + ColumnVector pdir = -(iH*grad); // Direction to search in + + double lambda = 0.0; // Step-length returned by linsrch + double newcf = 0.0; // New value for cost-function + ColumnVector newpar(p.NPar()); // New point in parameter space + + while (p.NextIter()) { + // Do a line-search to find a new point in parameter space + LinOut status = linsrch(pdir,p.Par(),grad,cfo,p.CF(),sf,p.LineSearchMaxIterations(), + p.LineSearchMaxStep(),p.VariableMetricAlpha(), + p.LineSearchFractionalParameterTolerance(),&lambda,&newcf,&newpar); + // Check for convergence/problems based on outcome of linsrch + if (status == LM_MAXITER) {p.SetStatus(NL_LM_MAXITER); return(p.Status());} + else if (status == LM_LAMBDA_NILL) { // This means we might be heading uphill and should restart + if (p.NextRestart()) { // If we have spare restarts + p.SetCF(p.CF()); // Another copy of old value + iH.reset(); // Back to being unity matrix + pdir = -grad; + continue; + } + else { + p.SetStatus(NL_PARCONV); return(p.Status()); + } + } + // Test for convergence based on distance between points in parameter space + ColumnVector dpar = newpar - p.Par(); + p.SetPar(newpar); + p.SetCF(newcf); + if (zero_par_step_conv(dpar,p.Par(),p.FractionalParameterTolerance())) {p.SetStatus(NL_PARCONV); return(p.Status());} + // Get gradient at new point + ColumnVector newgrad = sf*cfo.grad(p.Par()); + // Test for convergence based on "zero" gradient + if (zero_grad_conv(p.Par(),newgrad,p.CF(),p.FractionalGradientTolerance())) {p.SetStatus(NL_GRADCONV); return(p.Status());} + // Update estimate of inverse Hessian + iH.update(dpar,newgrad-grad); + // Update parameters and get new direction to go in + grad = newgrad; + pdir = -(iH*grad); // N.B. no unary - op for iH, parenthesis necessary + } + + // If we get here we have exceeded the allowed # of iterations + p.SetStatus(NL_MAXITER); + + return(p.Status()); +} + +LinOut linsrch(// Input + const ColumnVector& dir, // Search direction + const ColumnVector& p0, // Current parameter values + const ColumnVector& grad, // Gradient at p0 + const NonlinCF& cfo, // Cost-function object + double f0, // Current cost-function value + double sf, // Scale factor for cost-function + double maxiter, // Max # of iterations + double sm, // Stepmax + double alpha, // Alpha (sorry). + double ptol, // Tolerance in parameter space + // Output + double *lambda, // Resulting step length + double *of, // Value of cost-function on output + ColumnVector *np) // New parameters +{ + const double lmin = 0.1; + const double lmax = 0.5; + + // First make sure that the step-length suggested + // by pdir isn't completely unreasonable. + + double totstep=sqrt(DotProduct(dir,dir)); + ColumnVector pdir(dir); + if (totstep > sm) {pdir *= sm/totstep;} + + // Calculate expected rate of change in the direction + // given by pdir. + + double fp0 = DotProduct(grad,pdir); + + // Calculate smallest meaningful lambda given what is + // smallest meaningful change in parameter value. + + double almin=0.0; + for (int i=0; i<p0.Nrows(); i++) { + almin = max(almin,abs(pdir.element(i))/max(abs(p0.element(i)),1.0)); + } + almin = ptol / almin; + + // First try a step of full lambda + + *lambda = 1.0; // Start with that + (*np) = p0 + (*lambda)*pdir; // First new parameters to try + double f2 = sf * cfo.cf(*np); // Cost-function value for par + + // See if that does it (fat chance!) + if (f2 < f0 + alpha*(*lambda)*DotProduct(grad,(*np)-p0)) {*of = f2; return(LM_CONV);} + + // Calculate Quadratic based on f(0), f'(0) + // and f(1) and find minimum of that quadratic. + + *lambda = - fp0 / (2.0*(f2-f0-fp0)); // Minumum of f(lambda) + // Make sure new lambda is 0.1*old_l < lambda < 0.5*old_l + *lambda = max(lmin,*lambda); + *lambda = min(lmax,*lambda); + (*np) = p0 + (*lambda)*pdir; // Second set of new parameters to try + double f1 = sf * cfo.cf(*np); // Cost-function value for par + + // Now we will start fitting cubics to f(0), f'(0), + // f(lambda_1) and f(lambda_2) where lambda_1 is the + // lambda most recently tested and where lambda_2 is + // the lambda tested second to last. + + double l2 = 1.0; // Second to last lambda + double l1 = *lambda; // Last lambda + Matrix X(2,2); + ColumnVector y(2); + + for (int iter=0; iter<maxiter; iter++) { + // See if present lambda might be too small + if (*lambda < almin) {*of = f1; return(LM_LAMBDA_NILL);} + // See if present value is acceptable + if (f1 < f0 + alpha*(*lambda)*DotProduct(grad,(*np)-p0)) {*of = f1; return(LM_CONV);} + // Find parameter values for cubic and square on lambda + X << pow(l1,3) << pow(l1,2) << pow(l2,3) << pow(l2,2); + y << f1-fp0*l1-f0 << f2-fp0*l2-f0; + ColumnVector b = X.i()*y; + // Find value for lambda that yield minimum of cubic + *lambda = (-b.element(1) + sqrt(pow(b.element(1),2) - 3.0*b.element(0)*fp0)) / (3.0*b.element(0)); + // Make sure new lambda is 0.1*old_l < lambda < 0.5*old_l + *lambda = max(lmin*l1,*lambda); + *lambda = min(lmax*l1,*lambda); + // Get new function value and update parameters + f2 = f1; + (*np) = p0 + (*lambda)*pdir; + f1 = sf * cfo.cf(*np); + l2 = l1; + l1 = *lambda; + } + + // If we are here we have exceeded # of iterations + + *of = f1; + return(LM_MAXITER); +} + +// Will try and find a scale factor for the cost-function such that +// the step length (lambda) for the first iteration of the variable- +// metric method is ~0.25. Empricially I have found that such a scaling +// that yields a first step length in the range 0.1 -- 0.5 will yield +// robust and fast convergence. N.B. though that I have only tested that +// for non-linear reg, and it is concievable that it is different for +// other applications with fewer parameters. + +double scale_factor(const ColumnVector& p, // Current parameter values + const ColumnVector& pdir, // Search direction + const NonlinCF& cfo, // Cost-function object + int maxiter, // Max # of iterations + double sf) // Scale factor. +{ + const double dl = 0.25; // Desired Lambda + const double ftol = 0.01; // Fractional tolerance for minimum + + // Start out by finding an upper bound for lambda such that + // a minimum is guaranteed to be between 0 and rp + + pair<double,double> lp; + pair<double,double> mp; + pair<double,double> rp = bracket(p,pdir,cfo,ftol,sf,&lp,&mp); + if (rp == mp) { // If there is no minimum in the indicated direction + return(0.0); + } + + // Now find a minimum with a fractional accuracy of ~1% + + pair<double,double> minpoint; + + if (linmin(p,pdir,cfo,sf,lp,mp,rp,ftol,maxiter,&minpoint) == LM_MAXITER) { + throw NonlinException("Failed to find minimum along search direction"); + } + + sf *= minpoint.first/dl; + return(sf); +} + +// Will find the minimum of the cost-function as a function of +// lambda. This routine will find the minimum to a fractional +// tolerance of lambda. This is NOT practical to use for the +// Variable Metric minimisation (too slow), but is used for +// the conjugate-gradient method and for finding the initial +// scaling between parameters and cost-function for the +// variable metric method. + +LinOut linmin(// Input + const ColumnVector& p, // Current parameter values + const ColumnVector& pdir, // Search direction + const NonlinCF& cfo, // Cost-function object + double sf, // Scale factor for cost-function + pair<double,double> lp, // Left point + pair<double,double> mp, // Point somewhere in interval + pair<double,double> rp, // Right point + double ftol, // Fractional tolerance + int maxiter,// Max # of iterations + // Output + pair<double,double> *x) // Best point +{ + const double gold = 0.382;// Golden section + pair<double,double> test; // New point to test + pair<double,double> w = mp; // Second best point + pair<double,double> v = mp; // Last value of second best point + double step = 0.0; + double ostep = 0.0; // Length of 2nd to last step taken + double d = 0.0; // Length of last step taken + ColumnVector y(3); // Used for fitting parabolic + Matrix X(3,3); // Used for fitting parabolic + *x = mp; // Initialise "best" point + + for (int i=0; i<maxiter; i++) { + double midp = (rp.first+lp.first)/2.0; // Midpoint of bracketing points + double tol = 2.0*ftol*abs(x->first)+MISCMATHS::EPS; // Absolute tolerance + if (abs(x->first-midp) <= (tol-0.5*(rp.first-lp.first))) { // Convergence check + return(LM_CONV); + } + // Try parabolic fit, but not before third iteration + double tmp = 10.0*sqrt(MISCMATHS::EPS); + if (abs(ostep) > tol/2.0 && // If second to last step big enough + abs(x->first-w.first) > tmp && + abs(x->first-v.first) > tmp && + abs(w.first-v.first) > tmp) { // And points not degenerate + step = ostep; + ostep = d; + y << x->second << w.second << v.second; + X << pow(x->first,2.0) << x->first << 1.0 << + pow(w.first,2.0) << w.first << 1.0 << + pow(v.first,2.0) << v.first << 1.0; + ColumnVector b = X.i() * y; + if (b.element(0) < 4*MISCMATHS::EPS || // If on line or going for maximum + (test.first = -b.element(1)/(2.0*b.element(0))) <= lp.first + || test.first >= rp.first || // If outside bracketed interval + abs(test.first-x->first) > 0.5*step) { // Or if step too big (indicates oscillation) + // Take golden step into larger interval + if (rp.first-x->first > x->first-lp.first) { // If right interval larger + test.first = x->first + gold * (rp.first - x->first); + } + else { + test.first = x->first - gold * (x->first - lp.first); + } + } + } + else { // Take golden step into larger interval + if (x->first < midp) { // If right interval larger + ostep = rp.first - x->first; + test.first = x->first + gold * (rp.first - x->first); + } + else { + ostep = x->first - lp.first; + test.first = x->first - gold * (x->first - lp.first); + } + } + d = test.first - x->first; // Signed length of step + test.second = sf*cfo.cf(p+test.first*pdir); // Evaluate cf at new point + // Now we have a new point, and we need to figure out what to do with it + if (test.second <= x->second) { // If it beats the best step + if (test.first > x->first) {lp = *x;} + else {rp = *x;} + v = w; w = *x; *x = test; + } + else { + if (test.first < x->first) {lp = test;} + else {rp = test;} + if (test.second <= w.second || w.first == x->first) { + v = w; w = test; + } + else if (test.second <= v.second || v.first == x->first || v.first == w.first) { + v = test; + } + } + } + // If we are here we have used too many iterations + return(LM_MAXITER); // Error status +} + +// Will return a value lambda such that a function minimum is guaranteed to +// lie somewhere in the interval between p and p+lambda*pdir. The second value +// of the returned pair is the cost-function value at the point. + +pair<double,double> bracket(// Input + const ColumnVector& p, // Current parameter values + const ColumnVector& pdir, // Search direction + const NonlinCF& cfo, // Cost-function object + double ptol, // Relative tolerance for parameter values + double sf, // Scale factor of cost-function + // Output + pair<double,double> *p_0, // Cost-function value at p + pair<double,double> *p_m) // Point between p_0 and p_l +{ + pair<double,double> p_l; + const double gr = 0.618034; + const double maxstep = 100.0; + p_0->first = 0.0; + double cf0 = sf*cfo.cf(p); + double l1 = 1.0; + double cf1 = sf*cfo.cf(p+l1*pdir); + + // Find maximum relative component of search direction + + double test = 0.0; + for (int i=0; i<pdir.Nrows(); i++) {test = max(test,abs(pdir.element(i))/max(p.element(i),1.0));} + + // Do a crude initial search for order of magnitude + + while (!isfinite(cf1)) { + l1 *= 0.1; + cf1 = sf*cfo.cf(p+l1*pdir); + } + + // Get third point to get us started + double l2 = 0.0; + double cf2 = 0.0; + if (cf1 < cf0) {l2 = (2.0 + gr)*l1; cf2 = sf*cfo.cf(p+l2*pdir);} + else {l2 = l1; cf2 = cf1; l1 = gr * l2; cf1 = sf*cfo.cf(p+l1*pdir);} + // Check if we already have a bracket + if (cf1 < cf0 && cf1 < cf2) { + p_l.first = l2; p_l.second = cf2; + p_m->first = l1; p_m->second = cf1; + p_0->second = cf0; + return(p_l); + } + + Matrix X(2,2); + ColumnVector y(2); + double lt = 0.0; + double cft = 0.0; + + while (!(cf1 < cf0 && cf1 < cf2)) { // If minimum still not bracketed + if (l2*test < ptol) { // If interval ridicously small + p_l = *p_0; + *p_m = *p_0; + return(p_l); + } + // Let's see if a parabolic might help us + if (abs(l2-l1) > 10.0*sqrt(MISCMATHS::EPS)) { + X << pow(l1,2.0) << l1 << pow(l2,2.0) << l2; + y << cf1 << cf2; + ColumnVector b = X.i()*y; + if (b.element(0) > 4.0*MISCMATHS::EPS) { // Check they are not on a line and not for maximum + lt = - (b.element(1) / (2.0 * b.element(0))); // Tentative point + if (lt > 0 && lt < l2) { // If in range of previous points + cft = sf*cfo.cf(p+lt*pdir); + if (cft < cf0 && cft < cf2) {l1=lt; cf1=cft; continue;} + else if (cft > cf0 && lt < l1 && cft < cf1) {l2=l1; cf2=cf1; l1=lt; cf1=cft; continue;} + else if (cft > cf0 && lt > l1 && cft < cf2) {l2=lt; cf2=cft; continue;} + } + else if (lt > 0 && lt < maxstep*l2) { // If expansion in allowed range + cft = sf*cfo.cf(p+lt*pdir); + l1 = l2; cf1 = cf2; + l2 = lt; cf2 = cft; + continue; + } + } + } + // If we are here the parabolic was of no use + if (cf2 < cf0) { // We need to expand + lt = (2.0 + gr)*l2; + cft = sf*cfo.cf(p+lt*pdir); + l1 = l2; cf1 = cf2; + l2 = lt; cf2 = cft; + } + else { // We need to contract + lt = gr * l1; + cft = sf*cfo.cf(p+lt*pdir); + l2 = l1; cf2 = cf1; + l1 = lt; cf1 = cft; + } + } + + // If we are here we know that there is a minimum + // somewhere between 0 and l2; + + p_0->second = cf0; + p_m->first = l1; + p_m->second = cf1; + p_l.first = l2; + p_l.second = cf2; + return(p_l); +} + +// Utility routines that checks for convergence based on various criteria + +// Based on zero (neglible) gradient + +bool zero_grad_conv(const ColumnVector& par, + const ColumnVector& grad, + double cf, + double gtol) +{ + double test = 0.0; // test will be largest relative component of gradient + for (int i=0; i<par.Nrows(); i++) { + test = max(test,abs(grad.element(i))*max(abs(par.element(i)),1.0)); + } + test /= max(cf,1.0); // Protect against near-zero values for cost-function + + return(test < gtol); +} + +// Based on zero (neglible) decrease in cost-function + +bool zero_cf_diff_conv(double cfo, + double cfn, + double cftol) +{ + return(2.0*abs(cfo-cfn) <= cftol*(abs(cfo)+abs(cfn)+MISCMATHS::EPS)); +} + +// Based on zero (neglible) step in parameter space + +bool zero_par_step_conv(const ColumnVector& par, + const ColumnVector& step, + double ptol) +{ + double test = 0.0; + for (int i=0; i<par.Nrows(); i++) { + test = max(test,abs(step.element(i))/max(abs(par.element(i)),1.0)); + } + return(test < ptol); +} + +// Utility routines that allow the user to check accuracy of their own grad and hess functions + +pair<ColumnVector,ColumnVector> check_grad(const ColumnVector& par, + const NonlinCF& cfo) +{ + pair<ColumnVector,ColumnVector> rv; + + rv.first = cfo.NonlinCF::grad(par); + rv.second = cfo.grad(par); + + return(rv); +} + +pair<boost::shared_ptr<BFMatrix>,boost::shared_ptr<BFMatrix> > check_hess(const ColumnVector& par, + const NonlinCF& cfo) +{ + pair<boost::shared_ptr<BFMatrix>,boost::shared_ptr<BFMatrix> > rv; + + rv.first = cfo.NonlinCF::hess(par); + rv.second = cfo.hess(par); + + return(rv); +} + + +void print_newmat(const NEWMAT::GeneralMatrix& m, + std::string fname) +{ + if (!fname.length()) { + cout << endl << m << endl; + } + else { + try { + std::ofstream fout(fname.c_str()); + fout << setprecision(10) << m; + } + catch(...) { + std::string errmsg("print_newmat: Failed to write to file " + fname); + throw NonlinException(errmsg); + } + } +} + +} // End namespace MISCMATHS diff --git a/nonlin.h b/nonlin.h new file mode 100644 index 0000000000000000000000000000000000000000..6f6edcc3ce30116c0bb2e4667adbfed262c60621 --- /dev/null +++ b/nonlin.h @@ -0,0 +1,364 @@ +// Declarations for nonlinear optimisation + +#ifndef nonlin_h +#define nonlin_h + +#include <string> +#include <vector> +#include <boost/shared_ptr.hpp> +#include "bfmatrix.h" +#include "newmat.h" + +namespace MISCMATHS { + +enum NLMethod {NL_VM, // Variable-Metric (see NRinC) + NL_CG, // Conjugate-Gradient (see NRinC) + NL_SCG, // Scaled Conjugate-Gradient (See Moller 1993). + NL_LM}; // Levenberg-Marquardt (see NRinC) + +enum LMType {LM_L, LM_LM}; // Levenberg or Levenberg-Marquardt + +enum VMUpdateType {VM_DFP, VM_BFGS}; // See NRinC chapter 10. + +enum CGUpdateType {CG_FR, CG_PR}; // Fletcher-Reeves, Polak-Ribiere + +enum VMMatrixType {VM_OPT, // + VM_COL, // Store all rank-one updates as column-vectors + VM_FULL}; // Store full estimate of inverse Hessian + +enum LinOut {LM_MAXITER, // Too many iterations in line-minimisation + LM_LAMBDA_NILL, // Could not find a minima along this direction + LM_CONV}; // Line-minimisation converged. + +enum NonlinOut {NL_UNDEFINED, // Initial value before minimisation + NL_MAXITER, // Too many iterations + NL_LM_MAXITER, // To many iterations during a line-minimisation + NL_PARCONV, // Convergence. Step in parameter space small + NL_GRADCONV, // Convergence. Gradient small + NL_CFCONV, // Convergence. Change in cost-function small + NL_LCONV}; // Convergence, lambda very large + +const double EPS = 2.0e-16; // Losely based on NRinC 20.1 + +class NonlinException: public std::exception +{ +private: + std::string m_msg; +public: + NonlinException(const std::string& msg) throw(): m_msg(msg) {} + + virtual const char * what() const throw() { + return string("Nonlin: msg=" + m_msg).c_str(); + } + + ~NonlinException() throw() {} +}; + +// NonlinParam is a struct that contains the +// information about "how" the +// minisation should be performed. I.e. it +// contains things like choice of minimisation +// algorithm, # of parameters, converegence +// criteria etc + +class NonlinParam +{ +public: + + NonlinParam(int pnpar, + NLMethod pmtd, + NEWMAT::ColumnVector ppar=NEWMAT::ColumnVector(), + bool plogcf=false, + bool ploglambda=false, + bool plogpar=false, + int pmaxiter=200, + double pcftol=1.0e-8, + double pgtol=1.0e-8, + double pptol=4.0*EPS, + VMUpdateType pvmut=VM_BFGS, + double palpha=1.0e-4, + double pstepmax=10, + int plm_maxiter=50, + int pmaxrestart=0, + bool pautoscale=true, + CGUpdateType pcgut=CG_PR, + double plm_ftol=1.0e-3, + LMType plmtype=LM_LM, + double pltol=1.0e20, + int pcg_maxiter=200, + double pcg_tol=1.0e-6, + double plambda=0.1) + : npar(pnpar), mtd(pmtd), logcf(plogcf), + loglambda(ploglambda), logpar(plogpar), maxiter(pmaxiter), + cftol(pcftol), gtol(pgtol), ptol(pptol), vmut(pvmut), + alpha(palpha), stepmax(pstepmax), lm_maxiter(plm_maxiter), + maxrestart(pmaxrestart), autoscale(pautoscale), cgut(pcgut), + lm_ftol(plm_ftol), lmtype(plmtype), ltol(pltol), cg_maxiter(pcg_maxiter), + cg_tol(pcg_tol), lambda(), cf(), par(), niter(0), nrestart(0), status(NL_UNDEFINED) + { + lambda.push_back(plambda); + if (ppar.Nrows()) SetStartingEstimate(ppar); + else { + NEWMAT::ColumnVector tmp(npar); + tmp = 0.0; + SetStartingEstimate(tmp); + } + } + ~NonlinParam() {} + + // Routines to check values + + int NPar() const {return(npar);} + NLMethod Method() const {return(mtd);} + int MaxIter() const {return(maxiter);} + int NIter() const {return(niter);} + double FractionalCFTolerance() const {return(cftol);} + double FractionalGradientTolerance() const {return(gtol);} + double FractionalParameterTolerance() const {return(ptol);} + VMUpdateType VariableMetricUpdate() const {return(vmut);} + double VariableMetricAlpha() const {return(alpha);} + int MaxVariableMetricRestarts() const {return(maxrestart);} + int VariableMetricRestarts() const {return(nrestart);} + bool VariableMetricAutoScale() const {return(autoscale);} + double LineSearchMaxStep() const {return(stepmax);} + int LineSearchMaxIterations() const {return(lm_maxiter);} + CGUpdateType ConjugateGradientUpdate() const {return(cgut);} + double LineSearchFractionalParameterTolerance() const {return(lm_ftol);} + LMType GaussNewtonType() const {return(lmtype);} + double LambdaConvergenceCriterion() const {return(ltol);} + int EquationSolverMaxIter() const {return(cg_maxiter);} + double EquationSolverTol() const {return(cg_tol);} + bool LoggingParameters() const {return(logpar);} + bool LoggingCostFunction() const {return(logcf);} + bool LoggingLambda() const {return(loglambda);} + + // Routines to get output + + double Lambda() const {return(lambda.back());} + double InitialLambda() const {if (loglambda) return(lambda[0]); else {throw NonlinException("InitialLabda: Lambda not logged"); return(0.0);}} + const std::vector<double>& LambdaHistory() const {if (loglambda) return(lambda); else {throw NonlinException("InitialLabda: Lambda not logged"); return(lambda);}} + const NEWMAT::ColumnVector& Par() const {return(par.back());} + const NEWMAT::ColumnVector& InitialPar() const {if (logpar) return(par[0]); else {throw NonlinException("InitialPar: Parameters not logged"); return(par[0]);}} + const std::vector<NEWMAT::ColumnVector>& ParHistory() const {if (logpar) return(par); else {throw NonlinException("ParHistory: Parameters not logged"); return(par);}} + double CF() const {return(cf.back());} + double InitialCF() const {if (logcf) return(cf[0]); else {throw NonlinException("InitialCF: Cost-function not logged"); return(cf[0]);}} + const std::vector<double> CFHistory() const {if (logcf) return(cf); else {throw NonlinException("CFHistory: Cost-function not logged"); return(cf);}} + NonlinOut Status() const {return(status);} + + // Routines to set values of steering parameters + void SetMethod(NLMethod pmtd) {mtd = pmtd;} + void LogCF(bool flag=true) {logcf = flag;} + void LogPar(bool flag=true) {logpar = flag;} + void LogLambda(bool flag=true) {loglambda = flag;} + void SetStartingEstimate(NEWMAT::ColumnVector& sp) { + if (niter) throw NonlinException("SetStartingEstimates: Object has to be reset before setting new starting parameters"); + SetPar(sp); + } + void SetMaxIter(unsigned int pmiter) {maxiter = pmiter;} + void SetFractionalCFTolerance(double pcftol) { + if (pcftol>0.5) throw NonlinException("SetFractionalCFTolerance: Nonsensically large tolerance"); + else if (pcftol <= 0.0) NonlinException("SetFractionalCFTolerance: Tolerance must be non-zero and positive"); + cftol = pcftol; + } + void SetFractionalGradientTolerance(double pgtol) { + if (pgtol>0.5) throw NonlinException("SetFractionalGradientTolerance: Nonsensically large tolerance"); + else if (pgtol <= 0.0) NonlinException("SetFractionalGradientTolerance: Tolerance must be non-zero and positive"); + gtol = pgtol; + } + void SetFractionalParameterTolerance(double pptol) { + if (pptol>0.5) throw NonlinException("SetFractionalParameterTolerance: Nonsensically large tolerance"); + else if (pptol <= 0.0) NonlinException("SetFractionalParameterTolerance: Tolerance must be non-zero and positive"); + ptol = pptol; + } + void SetVariableMetricUpdate(VMUpdateType pvmut) {vmut = pvmut;} + void SetVariableMetricAlpha(double palpha) { + if (palpha>=1.0 || palpha<=0.0) throw NonlinException("SetVariableMetricAlpha: Alpha must be between 0 and 1"); + alpha = palpha; + } + void SetMaxVariableMetricRestarts(unsigned int pmaxrestart) {maxrestart = pmaxrestart;} + void SetVariableMetricAutoScale(bool flag=true) {autoscale = flag;} + void SetLineSearchMaxStep(double pstepmax) { + if (pstepmax<=0) throw NonlinException("SetLineSearchMaxStep: maxstep must be non-zero and positive"); + stepmax = pstepmax; + } + void SetLineMinimisationMaxIterations(unsigned int plm_maxiter) {lm_maxiter = plm_maxiter;} + void SetConjugateGradientUpdate(CGUpdateType pcgut) {cgut = pcgut;} + void SetLineMinimisationFractionalParameterTolerance(double plm_ftol) { + if (plm_ftol>0.5) throw NonlinException("SetLineMinimisationFractionalParameterTolerance: Nonsensically large tolerance"); + else if (plm_ftol <= 0.0) NonlinException("SetLineMinimisationFractionalParameterTolerance: Tolerance must be non-zero and positive"); + lm_ftol = plm_ftol; + } + void SetGaussNewtonType(LMType plmtype) {lmtype = plmtype;} + void SetLambdaConvergenceCriterion(double pltol) { + if (pltol<1.0) throw NonlinException("SetLambdaConvergenceCriterion: Nonsensically small tolerance"); + ltol = pltol; + } + void SetEquationSolverMaxIter(int pcg_maxiter) {cg_maxiter = pcg_maxiter;} + void SetEquationSolverTol(double pcg_tol) {cg_tol = pcg_tol;} + + + // Reset is used to reset a NonlinParam object after it has run to convergence, thereby allowing it + // to be reused with a different CF object. This is to avoid the cost of creating the object many + // times when fitting for example multiple voxels. + void Reset() {} + // Routines used by the (global) non-linear fitting routines. Note that these can + // all be called for const objects. + void SetPar(const NEWMAT::ColumnVector& p) const { + if (p.Nrows() != npar) throw NonlinException("SetPar: Mismatch between starting vector and # of parameters"); + if (logpar || !par.size()) par.push_back(p); + else par[0] = p; + } + void SetCF(double pcf) const { + if (logcf || !cf.size()) cf.push_back(pcf); + else cf[0] = pcf; + } + void SetLambda(double pl) const { + if (loglambda || !lambda.size()) lambda.push_back(pl); + else lambda[0] = pl; + } + bool NextIter(bool success=true) const {if (success && niter++ >= maxiter) return(false); else return(true);} + bool NextRestart() const {if (nrestart++ >= maxrestart) return(false); else return(true);} + void SetStatus(NonlinOut pstatus) const {status = pstatus;} +private: + + // INPUT PARAMETERS + // + // Paramaters that apply to all algorithms + + const int npar; // # of parameters + NLMethod mtd; // Minimisation method + bool logcf; // If true, history of cost-function is logged + bool loglambda; // If true, history of lambda is logged + bool logpar; // If true history of parameters is logged + int maxiter; // Maximum # of iterations allowed + double cftol; // Tolerance for cost-function gonvergence criterion + double gtol; // Tolerance for gradient convergence criterion + double ptol; // Tolerance for parameter convergence criterion + + // Parameters that apply to Variable-Metric Algorithm + + VMUpdateType vmut; // DFP or BFGS + double alpha; // Criterion for convergence in line minimisation + double stepmax; // Maximum step length for line minimisation + int lm_maxiter; // Maximum # of iterations for line minimisation + int maxrestart; // Maximum # of restarts that should be done. + bool autoscale; // "Automatic" search for optimal scaling + + // Parameters that apply to CG algorithm + + CGUpdateType cgut; // Fletcher-Reeves or Polak-Ribiere + double lm_ftol; // Convergence criterion for line-search + + // Parameters that apply to LM algorithm + + LMType lmtype; // Levenberg or Levenberg-Marquardt + double ltol; // Convergence criterion based on large lambda + int cg_maxiter; // Maximum # of iterations for iterative "inverse" of Hessian + double cg_tol; // Tolerance for iterative "inverse" of Hessian + + // + // OUTPUT PARAMETERS + // + mutable std::vector<double> lambda; // (History of) lambda (LM and SCG type minimisation) + mutable std::vector<double> cf; // (History of) cost-function + mutable std::vector<NEWMAT::ColumnVector> par; // (History of) Parameter estimates + mutable int niter; // Number of iterations + mutable int nrestart; // Number of restarts + mutable NonlinOut status; // Output status + + NonlinParam& operator=(const NonlinParam& rhs); // Hide assignment + +}; + +// NonlinCF (Cost Function) is a virtual +// class that defines a minimal interface. +// By subclassing NonlinCF the "user" can +// create a class that allows him/her to +// use NONLIN to minimise his/her function. +class NonlinCF +{ +private: + NonlinCF& operator=(const NonlinCF& rhs); // Hide assignment +public: + NonlinCF() {} + virtual ~NonlinCF() {} + virtual double sf() const {return(1.0);} + virtual NEWMAT::ReturnMatrix grad(const NEWMAT::ColumnVector& p) const; + virtual boost::shared_ptr<BFMatrix> hess(const NEWMAT::ColumnVector& p, + boost::shared_ptr<BFMatrix> iptr=boost::shared_ptr<BFMatrix>()) const; + virtual double cf(const NEWMAT::ColumnVector& p) const = 0; +}; + +// Varmet matrix is a "helper" class +// that makes it a little easier to +// implement variable-metric minimisation. +class VarmetMatrix +{ +private: + int sz; + VMMatrixType mtp; + VMUpdateType utp; + NEWMAT::Matrix mat; + std::vector<double> sf; + std::vector<NEWMAT::ColumnVector> vec; + + VarmetMatrix& operator=(const VarmetMatrix& rhs); // Hide assignment + +public: + explicit VarmetMatrix(int psz, VMMatrixType pmtp, VMUpdateType putp) + : sz(psz), mtp(pmtp), utp(putp) + { + if (sz > 0 && mtp == VM_OPT) { + if (sz < 100) { + mtp = VM_FULL; + NEWMAT::IdentityMatrix tmp(sz); + mat = tmp; + } + else { + mtp = VM_COL; + } + } + } + + ~VarmetMatrix() {} + + int size() {return(sz);} + VMUpdateType updatetype() {return(utp);} + VMMatrixType matrixtype() {return(mtp);} + void print() const; + + void reset() + { + if (sz > 0) { + if (mtp == VM_FULL) { + NEWMAT::IdentityMatrix tmp(sz); + mat = tmp; + } + else if (mtp == VM_COL) { + sf.clear(); + vec.clear(); + } + } + } + + void update(const NEWMAT::ColumnVector& pdiff, // x_{i+1} - x_i + const NEWMAT::ColumnVector& gdiff); // \nabla f_{i+1} - \nabla f_i + friend NEWMAT::ColumnVector operator*(const VarmetMatrix& m, + const NEWMAT::ColumnVector& v); +}; + +// Declaration of (global) main function for minimisation + +NonlinOut nonlin(const NonlinParam& p, const NonlinCF& cfo); + +// Declaration of global utility functions + +pair<NEWMAT::ColumnVector,NEWMAT::ColumnVector> check_grad(const NEWMAT::ColumnVector& par, + const NonlinCF& cfo); + +pair<boost::shared_ptr<BFMatrix>,boost::shared_ptr<BFMatrix> > check_hess(const NEWMAT::ColumnVector& par, + const NonlinCF& cfo); + +} // End namespace MISCMATHS + +#endif // end #ifndef nonlin_h