BFGSFitter.cxx
Go to the documentation of this file.
1 
12 #ifdef _MSC_VER
13 #include "msdevstudio/MSconfig.h"
14 #endif
15 
16 #ifdef _MSC_VER
17 #define isnan _isnan
18 #endif
19 
20 //To have isnan.
21 #ifdef __APPLE__
22 #include <cstdlib>
23 #define _GLIBCPP_USE_C99 1
24 #endif
25 
26 #include "BFGSFitter.h"
27 
28 #include "NumLinAlg.h"
29 #include "StatedFCN.h"
30 
31 // see todo
32 #include <iostream>
33 using std::cout;
34 using std::endl;
35 
36 #include <cfloat>
37 #include <cstdlib>
38 #include <cassert>
39 #include <cmath>
40 #include <ctime>
41 
42 #ifdef __APPLE__
43 using std::isnan;
44 #endif
45 
46 
47 using std::pow;
48 using std::swap;
49 using std::min;
50 using std::abs;
51 using std::vector;
52 using std::vector;
53 using std::string;
54 using std::map;
55 
56 using namespace hippodraw;
57 
58 using namespace Numeric;
59 
60 BFGSFitter::BFGSFitter( const char * name )
61  : Fitter ( name ),
62  m_xinit( 1 ),
63  m_grad_cutoff( 1e-6 ),
64  m_step_cutoff( 1e-6 ),
65  m_proj_cutoff( 1e-6 ),
66  m_c1( 1e-4 ),
67  m_c2( 0.9 ),
68  m_alpha_max( 4 ),
69  m_alpha1( 1 )
70 {
71  m_iter_params[ "grad_cutoff" ] = & m_grad_cutoff;
72  m_iter_params[ "step_cutoff" ] = & m_step_cutoff;
73  m_iter_params[ "proj_cutoff" ] = & m_proj_cutoff;
74  m_iter_params[ "c1" ] = & m_c1;
75  m_iter_params[ "c2" ] = & m_c2;
76  m_iter_params[ "alpha_max" ] = & m_alpha_max;
77  m_iter_params[ "alpha1" ] = & m_alpha1;
78 }
79 
81 {
82  return new BFGSFitter ( *this ); // uses compiler generated copy constructor
83 }
84 
87 bool
90 {
91  double Alpha_star;
92 
93  // Initialization
94  vector < double > init_params;
95  m_fcn -> fillFreeParameters ( init_params );
96  setInitIter( init_params );
97 
98  // Allocate space for various matrices / vectors needed
99  vector< double > xold = m_xinit;
100  vector< double > xnew = m_xinit;
101  vector< double > gk = gradient( xold );
102  vector< double > gkPlusUn = gradient( xnew );
103 
104  vector< double > pk( m_xinit.size() );
105  vector< double > s( m_xinit.size() );
106  vector< double > y( m_xinit.size() );
107 
108  // The standard quasi newton initialization
109  eye ( m_M, m_xinit.size() );
110  m_M = ( 1.0 / norm( gk ) ) * m_M;
111 
112  vector< vector< double > > t1 , t2;
113  eye( t1, m_xinit.size() );
114  eye( t2, m_xinit.size() );
115 
116  double fx = function( xnew );
117  double fxold = fx;
118  for( int k = 1; k <= m_max_iterations; k++ )
119  {
120  // Update the iterate.
121  gk = gkPlusUn;
122  pk = m_M * (-gk);
123  Alpha_star = wolfeStep( xold, pk );
124 
125  do
126  {
127  xnew = xold + Alpha_star * pk;
128  fx = function( xnew );
129  Alpha_star /= 3.0;
130  }
131  while( isnan( fx ) );
132 
133  gkPlusUn = gradient( xnew );
134 
135  s = xnew - xold;
136  y = gkPlusUn - gk;
137 
138  double ys = innerProduct( y, s );
139  double yy = norm( y );
140  double ss = norm( s );
141 
142  // Termination criteria
143  if( ( abs( ys ) < m_proj_cutoff ) ||
144  ( abs( Alpha_star ) < DBL_EPSILON ) ||
145  ( ss < m_step_cutoff ) ||
146  ( yy < m_grad_cutoff ) ||
147  ( fx >= fxold ) )
148  break;
149 
150  // DFP Update of inverse of approximate hessian.
151  // M = M-(s*y'*M+M*y*s')/(y'*s)+(1+(y'*M*y)/(y'*s))*(s*s')/(y'*s);
152  double temp = ( 1 + innerProduct( y, m_M * y ) / ys ) / ys;
153 
154  t1 = ( outerProduct(s, y) * m_M)/ys + ( m_M * outerProduct(y , s) ) / ys;
155  t2 = temp * outerProduct( s, s );
156  m_M = m_M - t1 + t2;
157 
158  // one pass of the loop is over so refresh
159  xold = xnew;
160  fxold = fx;
161  }
162 
163  m_fcn -> setFreeParameters ( xold );
164  //write( xold );
165 
166  return true;
167 }
168 
171 double BFGSFitter::wolfeStep( const std::vector< double >& x0,
172  const std::vector< double >& p ) const
173 {
174  double step_fac = sqrt(2.0); // Geometric step factor; must be > 1
175 
176  double phi0 = function( x0 ); // Function value at the initial point
177  double dphi0 = gradp( x0, p ); // innner product of gradient and p
178 
179  // dphi0 >= 0 means you are ascending. To avoid it set step = 0 and
180  // terminate the iterations ( maybe after giving out a warning )
181  // Algorithm ensures that such case will not arise but curse of finite
182  // precision mathematics lead to some very small positive dphi0.
183  if ( dphi0 >= 0 )
184  return 0.0;
185 
186  double Alpha0 = 0;
187  double Alphai = m_alpha1;
188  double Alphaim = Alpha0;
189  double phiim = phi0;
190  int i = 1;
191  int done = 0;
192  int cnt = 0;
193 
194  double phii, dphii;
195 
196  while ( !done )
197  {
198  phii = function( x0 + Alphai * p );
199 
200  if ( (phii > (phi0 + m_c1 * Alphai * dphi0)) ||
201  ( (phii >= phiim) && ( i > 1)) )
202  return zoom( x0, p, phi0, dphi0, Alphaim, Alphai );
203 
204  dphii = gradp( x0 + Alphai * p , p );
205 
206  if ( abs( dphii ) <= -m_c2 * dphi0 )
207  return Alphai;
208 
209  if (dphii >= 0)
210  return zoom( x0, p, phi0, dphi0, Alphai, Alphaim );
211 
212  // Choose new Alphai in (Alphai, Alpha_max)
213  Alphaim = Alphai;
214  phiim = phii;
215  Alphai = min( step_fac * Alphai, m_alpha_max );
216 
217  if ( Alphai == m_alpha_max )
218  cnt += 1;
219 
220  if (cnt > 1)
221  {
222 // cout << "WARNING: Unable to bracket a strong Wolfe pt. in [ "
223 // << m_alpha1 << ", " << m_alpha_max << " ]" << endl;
224  return m_alpha_max;
225  }
226 
227  i += 1;
228  //cout << " Wolfe Iteration: " << i << endl;
229  }
230 
231  return 0.0;
232 }
233 
234 double BFGSFitter::zoom( const std::vector< double >& x0,
235  const std::vector< double >& p,
236  double phi0, double dphi0,
237  double Alphalo, double Alphahi ) const
238 {
239  int MaxIter = 20;
240  int iter = 0;
241  int done = 0;
242 
243  double philo, phij;
244  double dphij;
245  double Alphaj;
246  double Alpha_star = 0.0;
247 
248  while ( !done && iter < MaxIter )
249  {
250  iter += 1;
251 
252  philo = function( x0 + Alphalo * p );
253 
254  // Find a trial step length Alphaj between Alphalo and Alphahi
255  Alphaj = interpolate( x0, p, Alphalo, Alphahi );
256 
257  // Evalaute phi( Alphaj )
258  phij = function( x0 + Alphaj * p );
259 
260  if( (phij > phi0 + m_c1 * Alphaj * dphi0) || (phij >= philo) )
261  Alphahi = Alphaj;
262  else
263  {
264  dphij = gradp( x0 + Alphaj * p , p );
265 
266  if ( abs( dphij ) <= -m_c2 * dphi0 )
267  Alpha_star = Alphaj;
268  return Alpha_star;
269 
270  if ( dphij * ( Alphahi - Alphalo ) >= 0 )
271  Alphahi = Alphalo;
272 
273  Alphalo = Alphaj;
274  }
275 
276  }
277 
278  // If above loop fails take the mid-point
279  if (iter == MaxIter)
280  Alpha_star = 0.5 * ( Alphahi + Alphalo );
281 
282  return Alpha_star;
283 
284 }
285 
286 
287 double BFGSFitter::interpolate( const std::vector< double >& x0,
288  const std::vector< double >& p,
289  double Alphaim,
290  double Alphai ) const
291 {
292 
293  if ( Alphaim > Alphai )
294  swap( Alphaim, Alphai);
295 
296  double phiim = function( x0 + Alphaim * p );
297  double phii = function( x0 + Alphai * p );
298 
299  double dphiim = gradp( x0 + Alphaim * p, p );
300  double dphii = gradp( x0 + Alphai * p, p );
301 
302  double d1 = dphiim + dphii - 3 * ( (phiim - phii)/(Alphaim - Alphai) );
303  double d2 = sqrt( d1 * d1 - dphiim * dphii);
304 
305  double Alphaip = Alphai - (Alphai - Alphaim) *
306  ( (dphiim + d2 - d1) / (dphii - dphiim + 2 * d2) );
307 
308  double lth = abs(Alphai - Alphaim);
309 
310  if( abs(Alphaip - Alphai) < 0.05 * lth ||
311  abs(Alphaip - Alphaim) < 0.05 * lth ||
312  Alphaip < Alphaim ||
313  Alphaip > Alphai )
314  Alphaip = 0.5 * (Alphai + Alphaim);
315 
316  return Alphaip;
317 }
318 
319 double BFGSFitter::function( const std::vector< double > & u ) const
320 {
321  // Gets the value of the objective function from pvfcn
322  // first you have to convert the vector to vector
323  vector< double > x( u.size() );
324 
325  for( unsigned int i = 0; i < u.size(); i++ )
326  x[i] = u[i];
327 
328  m_fcn -> setFreeParameters ( x );
329 
330  return objectiveValue();
331 
332  // Following are a few standard test functions
333  // which were used to test this minimizer.
334 
335  //double x = u[0]; double y = u[1];
336 
337  // Rosenbrock's function
338  //return 100 * (y - x * x) * (y - x * x) + (1 - x) * (1 - x);
339 
340  // Freudenstein and Roth's Function
341  //return pow(-13 + x + ((5 - y)*y - 2 )*y, 2) +
342  // pow(-29 + x + ((y + 1)*y - 14)*y, 2);
343 
344  // Beale Function
345  //return pow(1.5 - x*(1 - y ), 2) +
346  // pow(2.25 - x*(1 - y*y ), 2) +
347  // pow(2.625 - x*(1 - y*y*y ), 2);
348 }
349 
350 vector< double >
351 BFGSFitter::gradient( const std::vector< double > & u ) const
352 {
353  double h = 1e-5;
354  vector< double > x( u.size(), 0.0 );
355  vector< double > xph( u.size(), 0.0 );
356 
357  vector< double > g( u.size() );
358 
359  for( unsigned int i = 0; i < u.size(); i++ )
360  x[i] = u[i];
361 
362  // Calculating the gradient by finite differencing
363  m_fcn -> setFreeParameters ( x );
364  double fx = m_fcn -> objectiveValue ();
365  double fxph = 0.0;
366  for( unsigned int i = 0; i < u.size(); i++ )
367  {
368  for( unsigned int j = 0; j < u.size(); j++ ) {
369  xph[j] = ( i == j ) ? ( x[j] + h ) : x[j];
370  }
371  m_fcn -> setFreeParameters ( xph );
372  fxph = m_fcn -> objectiveValue ();
373  g[i] = ( fxph - fx ) / h;
374  }
375 
376  // Following are a few gradients of standard test functions
377  // which were used to test this minimizer.
378 
379  /* double x = u[0]; double y = u[1];
380  vector< double > g(2);*/
381 
382  // Gradient of Rosenbrock's function
383  /* g[0] = -400 * x * ( y - x * x ) - 2 * ( 1 - x );
384  g[1] = 200 * ( y - x * x );*/
385 
386  // Gradient of Freudenstein and Roth's Function
387  //g[0] = 2*(y*(y*(y+1)-14)+ x - 29) +
388  // 2*(y*((5-y)*y-2) + x - 13);
389  //g[1] = 2*(y*(2*y+1) + y*(y+1) - 14) * (y*(y*(y+1) - 14) + x - 29) +
390  // 2*((5-y)*y + (5-2*y)*y - 2) * (y*((5-y)*y - 2) + x - 13);
391 
392  // Gradient of Beale Function
393  //g[0]= 2*(2.625 - x * (1 - y*y*y) ) * (y*y*y - 1)+
394  // 2*(2.25 - x * (1 - y*y) ) * (y*y - 1)+
395  // 2*(1.5 - x * (1 - y ) ) * (y - 1);
396  //g[1]= 6*x*y*y*(2.625 - x * (1 - y*y*y)) +
397  // 4*x*y *(2.25 - x * (1 - y*y)) +
398  // 2*x *(1.5 - x * (1 - y ));
399 
400  return g;
401 }
402 
403 
404 double BFGSFitter::gradp( const std::vector< double > & u,
405  const std::vector< double > & p ) const
406 {
407  double h = 1e-5;
408  vector< double > x( u.size() );
409 
410  // Calculating the gradient in direction of p by finite differencing
411  for ( unsigned int i = 0; i < u.size(); i++ ) {
412  x[i] = u[i];
413  }
414 // double fx = m_fcn -> operator()( x );
415  m_fcn -> setFreeParameters ( x );
416  double fx = m_fcn -> objectiveValue ( );
417 
418  for ( unsigned int i = 0; i < u.size(); i++ ) {
419  x[i] += h * p[i] ;
420  }
421  m_fcn -> setFreeParameters ( x );
422  double fxph = m_fcn -> objectiveValue ();
423 
424  return ( fxph - fx ) / h;
425 
426  // Following are a directional derivative of standard test functions
427  // which were used to test this minimizer.
428 
429  /* double x = u[0]; double y = u[1];
430  vector< double > g(2);*/
431 
432  // Gradient of Rosenbrock's function
433  /* g[0] = -400 * x * ( y - x * x ) - 2 * ( 1 - x );
434  g[1] = 200 * ( y - x * x );*/
435 
436  // Gradient of Freudenstein and Roth's Function
437  //g[0] = 2*(y*(y*(y+1)-14)+ x - 29) +
438  // 2*(y*((5-y)*y-2) + x - 13);
439  //g[1] = 2*(y*(2*y+1) + y*(y+1) - 14) * (y*(y*(y+1) - 14) + x - 29) +
440  // 2*((5-y)*y + (5-2*y)*y - 2) * (y*((5-y)*y - 2) + x - 13);
441 
442  // Gradient of Beale Function
443  //g[0]= 2*(2.625 - x * (1 - y*y*y) ) * (y*y*y - 1)+
444  // 2*(2.25 - x * (1 - y*y) ) * (y*y - 1)+
445  // 2*(1.5 - x * (1 - y ) ) * (y - 1);
446  //g[1]= 6*x*y*y*(2.625 - x * (1 - y*y*y)) +
447  // 4*x*y *(2.25 - x * (1 - y*y)) +
448  // 2*x *(1.5 - x * (1 - y ));
449 
450  // return g[0] * p[0] + g[1] * p[1];
451 }
452 
453 const vector< double > & BFGSFitter::initIter() const
454 {
455  return m_xinit;
456 }
457 
458 int BFGSFitter::setInitIter( const std::vector< double > & xinit )
459 {
460  m_xinit.resize( xinit.size() );
461 
462  // Provide a random perturbations to the initial value
463  //srand( (unsigned) time( NULL ) );
464  //for( unsigned int i = 0; i < xinit.size(); i++ )
465  //m_xinit[i] = xinit[i] * (1 + 0.025 * ( 0.5 - rand() / ( RAND_MAX + 1.0 )));
466 
467  m_xinit = xinit;
468 
469  return EXIT_SUCCESS;
470 }
471 
472 int BFGSFitter::calcCovariance ( std::vector < std::vector < double > >& cov )
473 {
474  cov.resize( m_xinit.size() );
475  for( unsigned int i = 0; i < m_xinit.size(); i++ )
476  cov[i].resize( m_xinit.size(), 0.0 );
477 
478  for( unsigned int i = 0; i < m_xinit.size(); i++ )
479  for( unsigned int j = 0; j < m_xinit.size(); j++ )
480  cov[i][j] = m_M[i][j];
481 
482  // set return flag as EXIT_SUCCESS if cov is Positive Definite,
483  // and to EXIT_FAILURE otherwise.
484  int flag = cholFactor( cov );
485 
486  for( unsigned int i = 0; i < m_xinit.size(); i++ )
487  for( unsigned int j = 0; j < m_xinit.size(); j++ )
488  cov[i][j] = m_M[i][j];
489 
490  return flag;
491 }
492 
493 
494 double BFGSFitter::iterParam ( std::string name )
495 {
496  // First check if user is attempting to access the max_iterations
497  // This is a hack, but it was necessary to ensure a uniform interface
498  if( name == "max_iterations" )
499  return m_max_iterations;
500 
501  // Don't use map::operator[]() to find the name and its
502  // associated value, as it will create one if it doesn't exist.
503  map< string, double * >::const_iterator it
504  = m_iter_params.find ( name );
505 
506  if ( it == m_iter_params.end () )
507  cout << name << " is not a valid iteration parameter name" << endl;
508  else
509  return *m_iter_params[name];
510 
511  return 0.0;
512 }
513 
514 int BFGSFitter::setIterParam ( std::string name, double value )
515 
516 {
517 
518  // First check if user is attempting to modify the max_iterations
519  // This is a hack, but it was necessary to ensure a uniform interface
520  if( name == "max_iterations" )
521  {
522  m_max_iterations = ( int ) value;
523  return EXIT_SUCCESS;
524  }
525 
526  // Now start worrying about the other parameters.
527  // Don't diretly use map::operator[]() to find the name and its
528  // associated value, as it will create one if it doesn't exist.
529  map< string, double * >::const_iterator it
530  = m_iter_params.find ( name );
531 
532  if ( it == m_iter_params.end () )
533  {
534  cout << name << " is not a valid iteration parameter name" << endl;
535  return EXIT_FAILURE;
536  }
537  else
538  {
539  *m_iter_params[name] = value;
540  return EXIT_SUCCESS;
541  }
542 
543  return EXIT_FAILURE;
544 }

Generated for HippoDraw Class Library by doxygen