vnl_levenberg_marquardt.cxx
Go to the documentation of this file.
1 // This is core/vnl/algo/vnl_levenberg_marquardt.cxx
2 //:
3 // \file
4 // \author Andrew W. Fitzgibbon, Oxford RRG
5 // \date 31 Aug 96
6 //
7 //-----------------------------------------------------------------------------
8 
9 #include <iostream>
10 #include <cassert>
12 #include <vnl/vnl_fastops.h>
13 #include <vnl/vnl_matrix_ref.h>
15 #include <vnl/algo/vnl_netlib.h> // lmdif_()
16 
17 // see header
19  vnl_vector<double> const& initial_estimate)
20 {
21  vnl_vector<double> x = initial_estimate;
23  lm.minimize(x);
24  return x;
25 }
26 
27 // ctor
29 {
30  f_ = f;
31 
32  // If changing these defaults, check the help comments in vnl_levenberg_marquardt.h,
33  // and MAKE SURE they're consistent.
34  xtol = 1e-8; // Termination tolerance on X (solution vector)
35  maxfev = 400 * f->get_number_of_unknowns(); // Termination maximum number of iterations.
36  ftol = xtol * 0.01; // Termination tolerance on F (sum of squared residuals)
37  gtol = 1e-5; // Termination tolerance on Grad(F)' * F = 0
38  epsfcn = xtol * 0.001; // Step length for FD Jacobian
39 
40  unsigned int m = f_->get_number_of_residuals(); // I Number of residuals, must be > #unknowns
41  unsigned int n = f_->get_number_of_unknowns(); // I Number of unknowns
42 
43  set_covariance_ = false;
44  fdjac_.set_size(n,m);
45  fdjac_.fill(0.0);
46  ipvt_.set_size(n);
47  ipvt_.fill(0);
48  inv_covar_.set_size(n,n);
49  inv_covar_.fill(0.0);
50  //fdjac_ = new vnl_matrix<double>(n,m);
51  //ipvt_ = new vnl_vector<int>(n);
52  //covariance_ = new vnl_matrix<double>(n,n);
53 }
54 
56 {
57  //delete covariance_;
58  //delete fdjac_;
59  //delete ipvt_;
60 }
61 
62 //--------------------------------------------------------------------------------
63 
64 void vnl_levenberg_marquardt::lmdif_lsqfun(long* n, // I Number of residuals
65  long* p, // I Number of unknowns
66  double* x, // I Solution vector, size n
67  double* fx, // O Residual vector f(x)
68  long* iflag, // IO 0 ==> print, -1 ==> terminate
69  void* userdata)
70 {
71  auto* self =
72  static_cast<vnl_levenberg_marquardt*>(userdata);
73  vnl_least_squares_function* f = self->f_;
74  assert(*p == (int)f->get_number_of_unknowns());
75  assert(*n == (int)f->get_number_of_residuals());
76  vnl_vector_ref<double> ref_x(*p, const_cast<double*>(x));
77  vnl_vector_ref<double> ref_fx(*n, fx);
78 
79  if (*iflag == 0) {
80  if (self->trace)
81  std::cerr << "lmdif: iter " << self->num_iterations_ << " err ["
82  << x[0] << ", " << x[1] << ", " << x[2] << ", " << x[3] << ", "
83  << x[4] << ", ... ] = " << ref_fx.magnitude() << '\n';
84 
85  f->trace(self->num_iterations_, ref_x, ref_fx);
86  ++(self->num_iterations_);
87  } else {
88  f->f(ref_x, ref_fx);
89  }
90 
91  if (self->start_error_ == 0)
92  self->start_error_ = ref_fx.rms();
93 
94  if (f->failure) {
95  f->clear_failure();
96  *iflag = -1; // fsm
97  }
98 }
99 
100 
101 // This function shouldn't be inlined, because (1) modification of the
102 // body will not cause the timestamp on the header to change, and so
103 // others will not be forced to recompile, and (2) the cost of making
104 // one more function call is far, far less than the cost of actually
105 // performing the minimisation, so the inline doesn't gain you
106 // anything.
107 //
109 {
110  if ( f_->has_gradient() )
111  return minimize_using_gradient(x);
112  else
113  return minimize_without_gradient(x);
114 }
115 
116 
117 //
119 {
120  //fsm
121  if (f_->has_gradient()) {
122  std::cerr << __FILE__ " : WARNING. calling minimize_without_gradient(), but f_ has gradient.\n";
123  }
124 
125  // e04fcf
126  long m = f_->get_number_of_residuals(); // I Number of residuals, must be > #unknowns
127  long n = f_->get_number_of_unknowns(); // I Number of unknowns
128 
129  if (m < n) {
130  std::cerr << "vnl_levenberg_marquardt: Number of unknowns("<<n<<") greater than number of data ("<<m<<")\n";
132  return false;
133  }
134 
135  if (int(x.size()) != n) {
136  std::cerr << "vnl_levenberg_marquardt: Input vector length ("<<x.size()<<") not equal to num unknowns ("<<n<<")\n";
138  return false;
139  }
140 
141  vnl_vector<double> fx(m, 0.0); // W m Storage for target vector
142  vnl_vector<double> diag(n, 0); // I Multiplicative scale factors for variables
143  long user_provided_scale_factors = 1; // 1 is no, 2 is yes
144  double factor = 100;
145  long nprint = 1;
146 
147  vnl_vector<double> qtf(n, 0);
148  vnl_vector<double> wa1(n, 0);
149  vnl_vector<double> wa2(n, 0);
150  vnl_vector<double> wa3(n, 0);
151  vnl_vector<double> wa4(m, 0);
152 
153 #ifdef DEBUG
154  std::cerr << "STATUS: " << failure_code_ << '\n';
155 #endif
156 
157  num_iterations_ = 0;
158  set_covariance_ = false;
159  long info;
160  start_error_ = 0; // Set to 0 so first call to lmdif_lsqfun will know to set it.
161  v3p_netlib_lmdif_(
162  lmdif_lsqfun, &m, &n,
163  x.data_block(),
164  fx.data_block(),
165  &ftol, &xtol, &gtol, &maxfev, &epsfcn,
166  &diag[0],
167  &user_provided_scale_factors, &factor, &nprint,
168  &info, &num_evaluations_,
170  &qtf[0],
171  &wa1[0], &wa2[0], &wa3[0], &wa4[0],
172  this);
173  failure_code_ = (ReturnCodes) info;
174 
175  // One more call to compute final error.
176  lmdif_lsqfun(&m, // I Number of residuals
177  &n, // I Number of unknowns
178  x.data_block(), // I Solution vector, size n
179  fx.data_block(), // O Residual vector f(x)
180  &info, this);
181  end_error_ = fx.rms();
182 
183  // Translate status code
184  switch ((int)failure_code_) {
185  case 1: // ftol
186  case 2: // xtol
187  case 3: // both
188  case 4: // gtol
189  return true;
190  default:
191  return false;
192  }
193 }
194 
195 //--------------------------------------------------------------------------------
197 void vnl_levenberg_marquardt::lmder_lsqfun(long* n, // I Number of residuals
198  long* p, // I Number of unknowns
199  double* x, // I Solution vector, size n
200  double* fx, // O Residual vector f(x)
201  double* fJ, // O m * n Jacobian f(x)
202  long*,
203  long* iflag, // I 1 -> calc fx, 2 -> calc fjac
204  void* userdata)
205 {
206  auto* self =
207  static_cast<vnl_levenberg_marquardt*>(userdata);
208  vnl_least_squares_function* f = self->f_;
209  assert(*p == (int)f->get_number_of_unknowns());
210  assert(*n == (int)f->get_number_of_residuals());
211  vnl_vector_ref<double> ref_x(*p, (double*)x); // const violation!
212  vnl_vector_ref<double> ref_fx(*n, fx);
213  vnl_matrix_ref<double> ref_fJ(*n, *p, fJ);
214 
215  if (*iflag == 0) {
216  if (self->trace)
217  std::cerr << "lmder: iter " << self->num_iterations_ << " err ["
218  << x[0] << ", " << x[1] << ", " << x[2] << ", " << x[3] << ", "
219  << x[4] << ", ... ] = " << ref_fx.magnitude() << '\n';
220  f->trace(self->num_iterations_, ref_x, ref_fx);
221  }
222  else if (*iflag == 1) {
223  f->f(ref_x, ref_fx);
224  if (self->start_error_ == 0)
225  self->start_error_ = ref_fx.rms();
226  ++(self->num_iterations_);
227  }
228  else if (*iflag == 2) {
229  f->gradf(ref_x, ref_fJ);
230  ref_fJ.inplace_transpose();
231 
232  // check derivative?
233  if ( self->check_derivatives_ > 0 )
234  {
235  self->check_derivatives_--;
236 
237  // use finite difference to compute Jacobian
238  vnl_vector<double> feval( *n );
239  vnl_matrix<double> finite_jac( *p, *n, 0.0 );
240  vnl_vector<double> wa1( *n );
241  long info=1;
242  double diff;
243  f->f( ref_x, feval );
244  v3p_netlib_fdjac2_(
245  lmdif_lsqfun, n, p, x,
246  feval.data_block(),
247  finite_jac.data_block(),
248  n,
249  &info,
250  &(self->epsfcn),
251  wa1.data_block(),
252  self);
253  // compute difference
254  for ( unsigned i=0; i<ref_fJ.cols(); ++i )
255  for ( unsigned j=0; j<ref_fJ.rows(); ++j ) {
256  diff = ref_fJ(j,i) - finite_jac(j,i);
257  diff = diff*diff;
258  if ( diff > self->epsfcn ) {
259  std::cout << "Jac(" << i << ", " << j << ") diff: " << ref_fJ(j,i)
260  << " " << finite_jac(j,i)
261  << " " << ref_fJ(j,i)-finite_jac(j,i) << '\n';
262  }
263  }
264  }
265  }
266 
267  if (f->failure) {
268  f->clear_failure();
269  *iflag = -1; // fsm
270  }
271 }
272 
273 
274 //
276 {
277  //fsm
278  if (! f_->has_gradient()) {
279  std::cerr << __FILE__ ": called method minimize_using_gradient(), but f_ has no gradient.\n";
280  return false;
281  }
282 
283  long m = f_->get_number_of_residuals(); // I Number of residuals, must be > #unknowns
284  long n = f_->get_number_of_unknowns(); // I Number of unknowns
285 
286  if (m < n) {
287  std::cerr << __FILE__ ": Number of unknowns("<<n<<") greater than number of data ("<<m<<")\n";
289  return false;
290  }
291 
292  vnl_vector<double> fx(m, 0.0); // W m Explicitly set target to 0.0
293 
294  num_iterations_ = 0;
295  set_covariance_ = false;
296  long info;
297  start_error_ = 0; // Set to 0 so first call to lmder_lsqfun will know to set it.
298 
299 
300  double factor = 100;
301  long nprint = 1;
302  long mode=1, nfev, njev;
303 
304  vnl_vector<double> diag(n, 0);
305  vnl_vector<double> qtf(n, 0);
306  vnl_vector<double> wa1(n, 0);
307  vnl_vector<double> wa2(n, 0);
308  vnl_vector<double> wa3(n, 0);
309  vnl_vector<double> wa4(m, 0);
310 
311 
312  v3p_netlib_lmder_(
313  lmder_lsqfun, &m, &n,
314  x.data_block(),
315  fx.data_block(),
316  fdjac_.data_block(), &m,
317  &ftol,
318  &xtol,
319  &gtol,
320  &maxfev,
321  diag.data_block(),
322  &mode,
323  &factor,
324  &nprint,
325  &info,
326  &nfev, &njev,
327  ipvt_.data_block(),
328  qtf.data_block(),
329  wa1.data_block(),
330  wa2.data_block(),
331  wa3.data_block(),
332  wa4.data_block(),
333  this);
334 
335 
336 
337 
338  num_evaluations_ = num_iterations_; // for lmder, these are the same.
339  if (info<0)
340  info = ERROR_FAILURE;
341  failure_code_ = (ReturnCodes) info;
342  end_error_ = fx.rms();
343 
344  // Translate status code
345  switch (failure_code_) {
346  case 1: // ftol
347  case 2: // xtol
348  case 3: // both
349  case 4: // gtol
350  return true;
351  default:
352  return false;
353  }
354 }
355 
356 //--------------------------------------------------------------------------------
359 {
360  diagnose_outcome(std::cerr);
361 }
362 
363 // fsm: should this function be a method on vnl_nonlinear_minimizer?
364 // if not, the return codes should be moved into LM.
365 void vnl_levenberg_marquardt::diagnose_outcome(std::ostream& s) const
366 {
367 #define whoami "vnl_levenberg_marquardt"
368  //if (!verbose_) return;
369  switch (failure_code_) {
370  // case -1:
371  // have already warned.
372  // return;
373  case ERROR_FAILURE:
374  s << (whoami ": OIOIOI -- failure in leastsquares function\n");
375  break;
376  case ERROR_DODGY_INPUT:
377  s << (whoami ": OIOIOI -- lmdif dodgy input\n");
378  break;
379  case CONVERGED_FTOL: // ftol
380  s << (whoami ": converged to ftol\n");
381  break;
382  case CONVERGED_XTOL: // xtol
383  s << (whoami ": converged to xtol\n");
384  break;
385  case CONVERGED_XFTOL: // both
386  s << (whoami ": converged nicely\n");
387  break;
388  case CONVERGED_GTOL:
389  s << (whoami ": converged via gtol\n");
390  break;
391  case TOO_MANY_ITERATIONS:
392  s << (whoami ": too many iterations\n");
393  break;
395  s << (whoami ": ftol is too small. no further reduction in the sum of squares is possible.\n");
396  break;
398  s << (whoami ": xtol is too small. no further improvement in the approximate solution x is possible.\n");
399  break;
401  s << (whoami ": gtol is too small. Fx is orthogonal to the columns of the jacobian to machine precision.\n");
402  break;
403  default:
404  s << (whoami ": OIOIOI: unkown info code from lmder.\n");
405  break;
406  }
407  unsigned int m = f_->get_number_of_residuals();
408  s << whoami ": " << num_iterations_ << " iterations, "
409  << num_evaluations_ << " evaluations, "<< m <<" residuals. RMS error start/end "
410  << get_start_error() << '/' << get_end_error() << std::endl;
411 #undef whoami
412 }
413 
414 // fjac is an output m by n array. the upper n by n submatrix
415 // of fjac contains an upper triangular matrix r with
416 // diagonal elements of nonincreasing magnitude such that
417 //
418 // t t t
419 // p *(jac *jac)*p = r *r,
420 //
421 // where p is a permutation matrix and jac is the final
422 // calculated jacobian. column j of p is column ipvt(j)
423 // (see below) of the identity matrix. the lower trapezoidal
424 // part of fjac contains information generated during
425 // the computation of r.
426 
427 // fdjac is target m*n
428 
429 //: Get INVERSE of covariance at last minimum.
430 // Code thanks to Joss Knight (joss@robots.ox.ac.uk)
432 {
433  if (!set_covariance_)
434  {
435  std::cerr << __FILE__ ": get_covariance() not confirmed tested yet\n";
436  unsigned int n = fdjac_.rows();
437 
438  // matrix in FORTRAN is column-wise.
439  // transpose it to get C style order
441  // r is upper triangular matrix according to documentation.
442  // But the lower part has non-zeros somehow.
443  // clear the lower part
444  for (unsigned int i=0; i<n; ++i)
445  for (unsigned int j=0; j<i; ++j)
446  r(i,j) = 0.0;
447 
448  // compute r^T * r
449  vnl_matrix<double> rtr;
450  vnl_fastops::AtA(rtr, r);
451  vnl_matrix<double> rtrpt (n, n);
452 
453  // Permute. First order columns.
454  // Note, *ipvt_ contains 1 to n, not 0 to n-1
455  vnl_vector<int> jpvt(n);
456  for (unsigned int j = 0; j < n; ++j) {
457  unsigned int i = 0;
458  for (; i < n; i++) {
459  if (ipvt_[i] == (int)j+1) {
460  jpvt (j) = i;
461  break;
462  }
463  }
464  rtrpt.set_column(j, rtr.get_column(i));
465  }
466 
467  // Now order rows
468  for (unsigned int j = 0; j < n; ++j) {
469  inv_covar_.set_row (j, rtrpt.get_row (jpvt(j)));
470  }
471 
472  set_covariance_ = true;
473  }
474  return inv_covar_;
475 }
#define whoami
unsigned int cols() const
Return the number of columns.
Definition: vnl_matrix.h:183
vnl_matrix< double > const & get_JtJ()
Return J'*J computed at last minimum.
abs_t rms() const
Root Mean Squares of values.
Definition: vnl_vector.h:300
void init(vnl_least_squares_function *f)
vnl_matrix< T > transpose() const
Return transpose.
Definition: vnl_matrix.hxx:682
bool set_size(size_t n)
Resize to n elements.
Definition: vnl_vector.hxx:250
Abstract base for minimising functions.
vnl_vector using user-supplied storage.
Definition: vnl_fwd.h:17
vnl_matrix< T > extract(unsigned r, unsigned c, unsigned top=0, unsigned left=0) const
Extract a sub-matrix of size r x c, starting at (top,left).
Definition: vnl_matrix.hxx:728
#define m
Definition: vnl_vector.h:43
double get_end_error() const
Return the best error that was achieved by the last minimization, corresponding to the returned x.
size_t size() const
Return the length, number of elements, dimension of this vector.
Definition: vnl_vector.h:126
T const * data_block() const
Access the contiguous block storing the elements in the vector. O(1).
Definition: vnl_vector.h:230
unsigned int get_number_of_unknowns() const
Return the number of unknowns.
Abstract base for minimising functions.
abs_t magnitude() const
Return magnitude (length) of vector.
Definition: vnl_vector.h:282
virtual void gradf(vnl_vector< double > const &x, vnl_matrix< double > &jacobian)
Calculate the Jacobian, given the parameter vector x.
double epsfcn
Step length for FD Jacobian.
static void AtA(vnl_matrix< double > &out, const vnl_matrix< double > &A)
Compute $A^\top A$.
Definition: vnl_fastops.cxx:14
Levenberg Marquardt nonlinear least squares.
double ftol
Termination tolerance on F (sum of squared residuals)
vnl_matrix & inplace_transpose()
Transposes this matrix efficiently, and returns it.
Collection of C-style matrix functions.
Levenberg Marquardt nonlinear least squares.
vnl_matrix & set_column(unsigned i, T const *v)
Set the elements of the i'th column to v[i] (No bounds checking).
virtual void trace(int iteration, vnl_vector< double > const &x, vnl_vector< double > const &fx)
Called after each LM iteration to print debugging etc.
double gtol
Termination tolerance on Grad(F)' * F = 0.
bool minimize_without_gradient(vnl_vector< double > &x)
Minimize the function supplied in the constructor until convergence or failure.
Declare in a central place the list of symbols from netlib.
vnl_matrix & fill(T const &)
Sets all elements of matrix to specified value, and returns "*this".
Definition: vnl_matrix.hxx:419
static void lmdif_lsqfun(long *m, long *n, double *x, double *fx, long *iflag, void *userdata)
vnl_vector & fill(T const &v)
Set all values to v.
Definition: vnl_vector.hxx:314
unsigned int get_number_of_residuals() const
Return the number of residuals.
T const * data_block() const
Access the contiguous block storing the elements in the matrix row-wise. O(1).
Definition: vnl_matrix.h:601
vnl_vector< T > get_column(unsigned c) const
Get a vector equal to the given column.
Definition: vnl_matrix.hxx:977
vnl_vector< double > vnl_levenberg_marquardt_minimize(vnl_least_squares_function &f, vnl_vector< double > const &initial_estimate)
Find minimum of "f", starting at "initial_estimate", and return.
vnl_vector< T > get_row(unsigned r) const
Get a vector equal to the given row.
Definition: vnl_matrix.hxx:962
vnl_matrix< double > inv_covar_
ReturnCodes
Some generic return codes that apply to all minimizers.
double xtol
Termination tolerance on X (solution vector)
vnl_matrix & set_row(unsigned i, T const *v)
Set the elements of the i'th row to v[i] (No bounds checking).
unsigned int rows() const
Return the number of rows.
Definition: vnl_matrix.h:179
double get_start_error() const
Return the error of the function when it was evaluated at the start point of the last minimization.
long maxfev
Termination maximum number of iterations.
void diagnose_outcome() const
Provide an ASCII diagnosis of the last minimization on std::ostream.
vnl_least_squares_function * f_
static void lmder_lsqfun(long *m, long *n, double *x, double *fx, double *fJ, long *, long *iflag, void *userdata)
bool minimize_using_gradient(vnl_vector< double > &x)
Minimize the function supplied in the constructor until convergence or failure.
bool has_gradient() const
Return true if the derived class has indicated that gradf has been implemented.
bool set_size(unsigned r, unsigned c)
Resize to r rows by c columns. Old data lost.
Definition: vnl_matrix.hxx:390
vnl_matrix reference to user-supplied storage.
Definition: vnl_fwd.h:20
virtual void f(vnl_vector< double > const &x, vnl_vector< double > &fx)=0
The main function.
bool minimize(vnl_vector< double > &x)
Calls minimize_using_gradient() or minimize_without_gradient(),.
vnl_matrix reference to user-supplied storage.