Skip to content
Snippets Groups Projects
Commit fd64d0cb authored by Jesper Andersson's avatar Jesper Andersson
Browse files

Added gradient descent and fixed bug in conjugate gradient

parent 74b41900
No related branches found
No related tags found
No related merge requests found
......@@ -24,6 +24,10 @@ namespace MISCMATHS {
NonlinOut varmet(const NonlinParam& p, const NonlinCF& cfo);
// Main routine for Gradient-descent optimisation
NonlinOut grades(const NonlinParam& p, const NonlinCF& cfo);
// Main routine for Conjugate-Gradient optimisation
NonlinOut congra(const NonlinParam& p, const NonlinCF& cfo);
......@@ -102,6 +106,36 @@ void print_newmat(const NEWMAT::GeneralMatrix& m,
std::string fname);
std::string NonlinParam::TextStatus() const
{
switch (status) {
case NL_UNDEFINED:
return(std::string("Status is undefined. Object has been created but no minimisation has been performed"));
break;
case NL_MAXITER:
return(std::string("The optimisation did not converge because the maximum number of iterations was exceeded"));
break;
case NL_LM_MAXITER:
return(std::string("The optimisation did not converge because the maximum number of iterations for a single line minimisation was exceeded"));
break;
case NL_PARCONV:
return(std::string("The optimisation converged. The convergence criterion was that the last step in parameter space was very short"));
break;
case NL_GRADCONV:
return(std::string("The optimisation converged. The convergence criterion was that all the elements of the gradient were very small"));
break;
case NL_CFCONV:
return(std::string("The optimisation converged. The convergence criterion was that the last step changed the cost-function by an insignificant amount"));
break;
case NL_LCONV:
return(std::string("The optimisation converged. The convergence criterion was that lambda became too large"));
break;
default:
return(std::string("Impossible status. This indicates there is a bug"));
break;
}
}
// If user choses not to overide the grad-method of the NonlinCF
// base class this routine will be used to calculate numerical derivatives.
......@@ -286,6 +320,9 @@ NonlinOut nonlin(const NonlinParam& p, const NonlinCF& cfo)
case NL_LM:
status = levmar(p,cfo);
break;
case NL_GD:
status = grades(p,cfo);
break;
}
return(status);
......@@ -343,58 +380,53 @@ NonlinOut levmar(const NonlinParam& p, const NonlinCF& cfo)
return(p.Status());
}
/*
// Main routine for Levenberg-Marquardt optimisation
NonlinOut levmar(const NonlinParam& p, const NonlinCF& cfo)
{
// Calculate initial values
p.SetCF(cfo.cf(p.Par())); // Cost-function evaluated at current parameters
ColumnVector g = cfo.grad(p.Par()); // Gradient evaluated at current parameters
boost::shared_ptr<BFMatrix> H = cfo.hess(p.Par()); // Hessian evaluated at current parameters
// Main routine for gradient-descent optimisation. It is
// included mainly as a debugging tool for when the more
// advanced methods fail and one wants to pinpoint the
// reasons for that.
double olambda = 0.0;
bool success = true; // True if last step decreased CF
NonlinOut grades(const NonlinParam& np, const NonlinCF& cfo)
{
// Set up initial values
np.SetCF(cfo.cf(np.Par()));
ColumnVector g = -cfo.grad(np.Par());
while (p.NextIter(success)) {
for (int i=1; i<=p.NPar(); i++) { // Nudge diagonal of H
if (p.GaussNewtonType() == LM_LM) {
H->AddTo(i,i,(p.Lambda()-olambda)*H->Peek(i,i));
}
else if (p.GaussNewtonType() == LM_L) {
H->AddTo(i,i,p.Lambda()-olambda);
}
}
ColumnVector step = -H->SolveForx(g,SYM_POSDEF,p.EquationSolverTol(),p.EquationSolverMaxIter());
double ncf = cfo.cf(p.Par()+step);
if (success = (ncf < p.CF())) { // If last step successful
olambda = 0.0; // New H, so no need to undo old lambda
p.SetLambda(p.Lambda()/10.0);
p.SetPar(p.Par()+step);
// Check for convergence based on small decrease of cf
if (zero_cf_diff_conv(p.CF(),ncf,p.FractionalCFTolerance())) {
p.SetCF(ncf); p.SetStatus(NL_CFCONV); return(p.Status());
}
p.SetCF(ncf);
g = cfo.grad(p.Par());
H = cfo.hess(p.Par(),H);
while (np.NextIter()) {
// Check for convergence based on zero gradient
if (zero_grad_conv(np.Par(),g,np.CF(),np.FractionalGradientTolerance())) {
np.SetStatus(NL_GRADCONV); return(np.Status());
}
// Bracket minimum along g
pair<double,double> lp, mp; // Leftmost and middle point of bracket
pair<double,double> rp = bracket(np.Par(),g,cfo,np.FractionalParameterTolerance(),1.0,&lp,&mp); // Rightmost point of bracket
if (rp == lp) { // If no smaller point along g
np.SetStatus(NL_PARCONV); return(np.Status()); // Assume it is because we are at minimum
}
else { // If last step was unsuccesful
olambda = p.Lambda(); // Returning to same H, so must undo old lambda
p.SetLambda(10.0*p.Lambda());
p.SetCF(p.CF()); // Push another copy
// Check for convergence based on _really_ large lambda
if (p.Lambda() > p.LambdaConvergenceCriterion()) {
p.SetStatus(NL_LCONV); return(p.Status());
}
// Find minimum along g between lp and rp
pair<double,double> minp; // Minimum along g
LinOut lm_status = linmin(np.Par(),g,cfo,1.0,lp,mp,rp,
np.LineSearchFractionalParameterTolerance(),
np.LineSearchMaxIterations(),&minp);
// Check for problems with line-search
if (lm_status == LM_MAXITER) {np.SetStatus(NL_LM_MAXITER); return(np.Status());} // Ouch!
// Set new cf value and parameters
np.SetPar(np.Par() + minp.first*g);
// Check for convergence based on small decrease of cost-function
if (zero_cf_diff_conv(np.CF(),minp.second,np.FractionalCFTolerance())) {np.SetCF(minp.second); np.SetStatus(NL_CFCONV); return(np.Status());}
// Check for convergence based on neglible move in parameter space
else if (zero_par_step_conv(minp.first*g,np.Par(),np.FractionalParameterTolerance())) {np.SetCF(minp.second); np.SetStatus(NL_PARCONV); return(np.Status());}
else { // If no covergence
np.SetCF(minp.second);
g = -cfo.grad(np.Par());
}
}
// This means we did too many iterations
p.SetStatus(NL_MAXITER);
return(p.Status());
// If we get here we have used too many iterations
np.SetStatus(NL_MAXITER);
return(np.Status());
}
*/
// Main routine for conjugate-gradient optimisation. The
// implementation follows that of Numerical Recipies
// reasonably closely.
......@@ -426,12 +458,12 @@ NonlinOut congra(const NonlinParam& np, const NonlinCF& cfo)
if (lm_status == LM_MAXITER) {np.SetStatus(NL_LM_MAXITER); return(np.Status());} // Ouch!
// Set new cf value and parameters
np.SetPar(np.Par() + minp.first*p);
np.SetCF(minp.second);
// Check for convergence based on small decrease of cost-function
if (zero_cf_diff_conv(np.CF(),minp.second,np.FractionalCFTolerance())) {np.SetStatus(NL_CFCONV); return(np.Status());}
if (zero_cf_diff_conv(np.CF(),minp.second,np.FractionalCFTolerance())) {np.SetCF(minp.second); np.SetStatus(NL_CFCONV); return(np.Status());}
// Check for convergence based on neglible move in parameter space
else if (zero_par_step_conv(minp.first*p,np.Par(),np.FractionalParameterTolerance())) {np.SetStatus(NL_PARCONV); return(np.Status());}
else if (zero_par_step_conv(minp.first*p,np.Par(),np.FractionalParameterTolerance())) {np.SetCF(minp.second); np.SetStatus(NL_PARCONV); return(np.Status());}
else { // If no covergence
np.SetCF(minp.second);
if (((np.NIter())%np.NPar()) == 0) { // Explicitly reset directions after npar iterations
r = -cfo.grad(np.Par());
p = r;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment