You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1392 lines
47 KiB

// modified version of:
/*
* Project: LevenbergMarquardtLeastSquaresFitting
*
* File: lmmin.c
*
* Contents: Levenberg-Marquardt core implementation,
* and simplified user interface.
*
* Author: Joachim Wuttke <j.wuttke@fz-juelich.de>
*
* Acknowledgments:
* This software has been build on top of public domain work
* by Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. Moré,
* Steve Moshier, and the authors of lapack.
* Over the years, it has been much improved thanks to
* feedback from numerous users. See ../CHANGELOG.
*
* Licence: see ../COPYING (FreeBSD)
*
* Homepage: joachimwuttke.de/lmfit
*/
#include "Common.h"
#include "lmmin.h"
using namespace SEACAVE;
/*****************************************************************************/
/* set numeric constants */
/*****************************************************************************/
/* machine-dependent constants from float.h */
#define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
#define LM_DWARF DBL_MIN /* smallest nonzero number */
#define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
#define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
#define LM_USERTOL 30*LM_MACHEP /* users are recommended to require this */
/* If the above values do not work, the following seem good for an x86:
LM_MACHEP .555e-16
LM_DWARF 9.9e-324
LM_SQRT_DWARF 1.e-160
LM_SQRT_GIANT 1.e150
LM_USER_TOL 1.e-14
The following values should work on any machine:
LM_MACHEP 1.2e-16
LM_DWARF 1.0e-38
LM_SQRT_DWARF 3.834e-20
LM_SQRT_GIANT 1.304e19
LM_USER_TOL 1.e-14
*/
const lm_control_struct lm_control_double = {
LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL, 100., 100, 1
#ifdef LMFIT_PRINTOUT
, 0
#endif
};
const lm_control_struct lm_control_float = {
1.e-7, 1.e-7, 1.e-7, 1.e-7, 100., 100, 0
#ifdef LMFIT_PRINTOUT
, 0
#endif
};
/*****************************************************************************/
/* set message texts (indexed by status.info) */
/*****************************************************************************/
const char *lm_infmsg[] = {
"success (sum of squares below underflow limit)",
"success (the relative error in the sum of squares is at most tol)",
"success (the relative error between x and the solution is at most tol)",
"success (both errors are at most tol)",
"trapped by degeneracy (increasing epsilon might help)",
"timeout (number of calls to fcn has reached maxcall*(n+1))",
"failure (ftol<tol: cannot reduce sum of squares any further)",
"failure (xtol<tol: cannot improve approximate solution any further)",
"failure (gtol<tol: cannot improve approximate solution any further)",
"exception (not enough memory)",
"fatal coding error (improper input parameters)",
"exception (break requested within function evaluation)"
};
const char *lm_shortmsg[] = {
"success (0)",
"success (f)",
"success (p)",
"success (f,p)",
"degenerate",
"call limit",
"failed (f)",
"failed (p)",
"failed (o)",
"no memory",
"invalid input",
"user break"
};
#ifdef LMFIT_PRINTOUT
/*****************************************************************************/
/* lm_printout_std (default monitoring routine) */
/*****************************************************************************/
void lm_printout_std( int n_par, const double *par, int m_dat,
const void *data, const double *fvec,
int printflags, int iflag, int iter, int nfev)
/*
* data : for soft control of printout behaviour, add control
* variables to the data struct
* iflag : 0 (init) 1 (outer loop) 2(inner loop) -1(terminated)
* iter : outer loop counter
* nfev : number of calls to *evaluate
*/
{
int i;
if( !printflags )
return;
if( printflags & 1 ){
/* location of printout call within lmdif */
if (iflag == 2) {
printf("trying step in gradient direction ");
} else if (iflag == 1) {
printf("determining gradient (iteration %2d)", iter);
} else if (iflag == 0) {
printf("starting minimization ");
} else if (iflag == -1) {
printf("terminated after %3d evaluations ", nfev);
}
}
if( printflags & 2 ){
printf(" par: ");
for (i = 0; i < n_par; ++i)
printf(" %18.11g", par[i]);
printf(" => norm: %18.11g", lm_enorm(m_dat, fvec));
}
if( printflags & 3 )
printf( "\n" );
if ( (printflags & 8) || ((printflags & 4) && iflag == -1) ) {
printf(" residuals:\n");
for (i = 0; i < m_dat; ++i)
printf(" fvec[%2d]=%12g\n", i, fvec[i] );
}
}
#endif
/*****************************************************************************/
/* lm_minimize (intermediate-level interface) */
/*****************************************************************************/
void lmmin( int n_par, double *par, int m_dat, const void *data,
void (*evaluate) (const double *par, int m_dat, const void *data,
double *fvec, double *fjac, int *info),
const lm_control_struct *control, lm_status_struct *status
#ifdef LMFIT_PRINTOUT
, void (*printout) (int n_par, const double *par, int m_dat,
const void *data, const double *fvec,
int printflags, int iflag, int iter, int nfev)
#endif
)
{
/*** allocate work space. ***/
double *fvec, *diag, *fjac, *qtf, *wa1, *wa2, *wa3, *wa4;
int *ipvt;
int n = n_par;
int m = m_dat;
/* One malloc call to allocate several arrays (Frank Polchow, 2013) */
fvec = (double*)malloc((2*m+5*n+n*m)*sizeof(double) + n*sizeof(int));
if (NULL==fvec) {//fail in allocation
status->info = 9;
return;
}
diag = (double *) &fvec[m];
qtf = (double *) &diag[n];
fjac = (double *) &qtf[n];
wa1 = (double *) &fjac[n*m];
wa2 = (double *) &wa1[n];
wa3 = (double *) &wa2[n];
wa4 = (double *) &wa3[n];
ipvt = (int *) &wa4[m];
/*** perform fit. ***/
status->info = 0;
/* this goes through the modified legacy interface: */
lm_lmdif(
m, n, par, fvec, control->ftol, control->xtol, control->gtol,
control->maxcall, control->epsilon, (control->scale_diag ? diag : NULL),
(control->scale_diag ? 1 : 2), control->stepbound, status->info,
status->nfev, fjac, ipvt, qtf, wa1, wa2, wa3, wa4,
evaluate, data
#ifdef LMFIT_PRINTOUT
, printout, control->printflags
#endif
);
#ifdef LMFIT_PRINTOUT
if ( printout )
(*printout)( n, par, m, data, fvec, control->printflags, -1, 0, status->nfev );
#endif
status->fnorm = lm_enorm(m, fvec);
if ( status->info < 0 )
status->info = 11;
/*** clean up. ***/
free(fvec);
} /*** lmmin. ***/
/*****************************************************************************/
/* lm_lmdif (low-level, modified legacy interface for full control) */
/*****************************************************************************/
void lm_lmpar( int n, double *r, int ldr, int *ipvt, double *diag,
double *qtb, double delta, double *par, double *x,
double *sdiag, double *aux, double *xdi );
void lm_qrfac( int m, int n, double *a, int pivot, int *ipvt,
double *rdiag, double *acnorm, double *wa );
void lm_qrsolv( int n, double *r, int ldr, int *ipvt, double *diag,
double *qtb, double *x, double *sdiag, double *wa );
void lm_lmdif( int m, int n, double *x, double *fvec, double ftol,
double xtol, double gtol, int maxfev, double epsfcn,
double *diag, int mode, double factor, int& info, int& nfev,
double *fjac, int *ipvt, double *qtf, double *wa1,
double *wa2, double *wa3, double *wa4,
void (*evaluate) (const double *par, int m_dat, const void *data,
double *fvec, double *fjac, int *info),
const void *data
#ifdef LMFIT_PRINTOUT
, void (*printout) (int n_par, const double *par, int m_dat,
const void *data, const double *fvec,
int printflags, int iflag, int iter, int nfev),
int printflags
#endif
)
{
/*
* The purpose of lmdif is to minimize the sum of the squares of
* m nonlinear functions in n variables by a modification of
* the levenberg-marquardt algorithm. The user must provide a
* subroutine evaluate which calculates the functions. The jacobian
* is then calculated by a forward-difference approximation.
*
* The multi-parameter interface lm_lmdif is for users who want
* full control and flexibility. Most users will be better off using
* the simpler interface lmmin provided above.
*
* Parameters:
*
* m is a positive integer input variable set to the number
* of functions.
*
* n is a positive integer input variable set to the number
* of variables; n must not exceed m.
*
* x is an array of length n. On input x must contain an initial
* estimate of the solution vector. On OUTPUT x contains the
* final estimate of the solution vector.
*
* fvec is an OUTPUT array of length m which contains
* the functions evaluated at the output x.
*
* ftol is a nonnegative input variable. Termination occurs when
* both the actual and predicted relative reductions in the sum
* of squares are at most ftol. Therefore, ftol measures the
* relative error desired in the sum of squares.
*
* xtol is a nonnegative input variable. Termination occurs when
* the relative error between two consecutive iterates is at
* most xtol. Therefore, xtol measures the relative error desired
* in the approximate solution.
*
* gtol is a nonnegative input variable. Termination occurs when
* the cosine of the angle between fvec and any column of the
* jacobian is at most gtol in absolute value. Therefore, gtol
* measures the orthogonality desired between the function vector
* and the columns of the jacobian.
*
* maxfev is a positive integer input variable. Termination
* occurs when the number of calls to lm_fcn is at least
* maxfev by the end of an iteration.
*
* epsfcn is an input variable used in choosing a step length for
* the forward-difference approximation. The relative errors in
* the functions are assumed to be of the order of epsfcn.
*
* diag is an array of length n. If mode = 1 (see below), diag is
* internally set. If mode = 2, diag must contain positive entries
* that serve as multiplicative scale factors for the variables.
*
* mode is an integer input variable. If mode = 1, the
* variables will be scaled internally. If mode = 2,
* the scaling is specified by the input diag.
*
* factor is a positive input variable used in determining the
* initial step bound. This bound is set to the product of
* factor and the euclidean norm of diag*x if nonzero, or else
* to factor itself. In most cases factor should lie in the
* interval (0.1,100.0). Generally, the value 100.0 is recommended.
*
* info is an integer OUTPUT variable that indicates the termination
* status of lm_lmdif as follows:
*
* info < 0 termination requested by user-supplied routine *evaluate;
*
* info = 0 fnorm almost vanishing;
*
* info = 1 both actual and predicted relative reductions
* in the sum of squares are at most ftol;
*
* info = 2 relative error between two consecutive iterates
* is at most xtol;
*
* info = 3 conditions for info = 1 and info = 2 both hold;
*
* info = 4 the cosine of the angle between fvec and any
* column of the jacobian is at most gtol in
* absolute value;
*
* info = 5 number of calls to lm_fcn has reached or
* exceeded maxfev;
*
* info = 6 ftol is too small: no further reduction in
* the sum of squares is possible;
*
* info = 7 xtol is too small: no further improvement in
* the approximate solution x is possible;
*
* info = 8 gtol is too small: fvec is orthogonal to the
* columns of the jacobian to machine precision;
*
* info =10 improper input parameters;
*
* nfev is an OUTPUT variable set to the number of calls to the
* user-supplied routine *evaluate.
*
* fjac is an OUTPUT m by n array. The upper n by n submatrix
* of fjac contains an upper triangular matrix r with
* diagonal elements of nonincreasing magnitude such that
*
* pT*(jacT*jac)*p = rT*r
*
* (NOTE: T stands for matrix transposition),
*
* where p is a permutation matrix and jac is the final
* calculated jacobian. Column j of p is column ipvt(j)
* (see below) of the identity matrix. The lower trapezoidal
* part of fjac contains information generated during
* the computation of r.
*
* ipvt is an integer OUTPUT array of length n. It defines a
* permutation matrix p such that jac*p = q*r, where jac is
* the final calculated jacobian, q is orthogonal (not stored),
* and r is upper triangular with diagonal elements of
* nonincreasing magnitude. Column j of p is column ipvt(j)
* of the identity matrix.
*
* qtf is an OUTPUT array of length n which contains
* the first n elements of the vector (q transpose)*fvec.
*
* wa1, wa2, and wa3 are work arrays of length n.
*
* wa4 is a work array of length m, used among others to hold
* residuals from evaluate.
*
* evaluate points to the subroutine which calculates the
* m nonlinear functions. Implementations should be written as follows:
*
* void evaluate( double* par, int m_dat, void *data,
* double* fvec, double *fjac, int *info )
* {
* // for ( i=0; i<m_dat; ++i )
* // calculate fvec[i] for given parameters par;
* // to stop the minimization,
* // set *info to a negative integer.
* }
*
* printout points to the subroutine which informs about fit progress.
* Call with printout=0 if no printout is desired.
* Call with printout=lm_printout_std to use the default implementation.
*
* printflags is passed to printout.
*
* data is an input pointer to an arbitrary structure that is passed to
* evaluate. Typically, it contains experimental data to be fitted.
*
*/
int i, j;
double actred, delta, dirder, eps, fnorm, fnorm1, gnorm, par, pnorm,
prered, ratio, step, sum, temp1, temp2, temp3, xnorm;
static const double p1 = 0.1;
static const double p0001 = 1.0e-4;
#ifdef LMFIT_PRINTOUT
int iter = 0;/* outer loop counter */
#endif
nfev = 0; /* function evaluation counter */
par = 0; /* Levenberg-Marquardt parameter */
delta = 0; /* to prevent a warning (initialization within if-clause) */
xnorm = 0; /* ditto */
/*** lmdif: check input parameters for errors. ***/
if ((n <= 0) || (m < n) || (ftol < 0.) || (xtol < 0.) || (gtol < 0.) || (maxfev <= 0) || (factor <= 0.)) {
info = 10; // invalid parameter
return;
}
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmdif\n");
#endif
/*** lmdif: the outer loop. ***/
do {
info = 0;
if (epsfcn == 0) {
if (nfev == 0) {
/*** outer: evaluate function at starting point, calculate norm, and get user provided Jacobian. ***/
(*evaluate) (x, m, data, fvec, fjac, &info);
fnorm = lm_enorm(m, fvec);
if (fnorm <= LM_DWARF) {
info = 0;
return;
}
} else {
/*** outer: get user provided Jacobian. ***/
(*evaluate) (x, m, data, NULL, fjac, &info);
}
#ifdef LMFIT_PRINTOUT
if( printout )
(*printout) (n, x, m, data, fvec, printflags, 1, iter, nfev);
#endif
if (info < 0)
return; /* user requested break */
} else {
if (nfev == 0) {
/*** outer: evaluate function at starting point and calculate norm. ***/
(*evaluate) (x, m, data, fvec, NULL, &info);
#ifdef LMFIT_PRINTOUT
if( printout )
(*printout) (n, x, m, data, fvec, printflags, 0, 0, nfev);
#endif
if (info < 0)
return;
fnorm = lm_enorm(m, fvec);
if( fnorm <= LM_DWARF ){
info = 0;
return;
}
eps = sqrt(MAXF(epsfcn,LM_MACHEP)); /* for calculating the Jacobian by forward differences */
}
/*** outer: calculate the Jacobian. ***/
for (j = 0; j < n; ++j) {
const double temp = x[j];
step = eps*MAXF(eps,ABS(temp));
x[j] = temp + step; /* replace temporarily */
info = 0;
(*evaluate) (x, m, data, wa4, NULL, &info);
#ifdef LMFIT_PRINTOUT
if( printout )
(*printout) (n, x, m, data, wa4, printflags, 1, iter, nfev);
#endif
if (info < 0)
return; /* user requested break */
step = x[j] - temp; /* due to float point arithmetic errors most of the time this is not equal to step */
for (i = 0; i < m; ++i)
fjac[j*m+i] = (wa4[i] - fvec[i]) / step;
x[j] = temp; /* restore */
}
}
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmdif/ outer loop iter=%d nfev=%d fnorm=%.10e\n", iter, nfev, fnorm);
#endif
#ifdef LMFIT_DEBUG_MATRIX
/* print the entire matrix */
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++)
printf("%.5e ", fjac[j*m+i]);
printf("\n");
}
#endif
/*** outer: compute the qr factorization of the Jacobian. ***/
lm_qrfac(m, n, fjac, 1, ipvt, wa1, wa2, wa3);
/* return values are ipvt, wa1=rdiag, wa2=acnorm */
if (nfev == 0) {
/* first iteration only */
if (mode != 2) {
/* diag := norms of the columns of the initial Jacobian */
for (j = 0; j < n; j++) {
diag[j] = wa2[j];
if (wa2[j] == 0.)
diag[j] = 1.;
}
/* use diag to scale x, then calculate the norm */
for (j = 0; j < n; j++)
wa3[j] = diag[j] * x[j];
xnorm = lm_enorm(n, wa3);
} else {
/* calculate the norm */
xnorm = lm_enorm(n, x);
}
/* initialize the step bound delta. */
delta = factor * xnorm;
if (delta == 0.)
delta = factor;
} else {
if (mode != 2) {
for (j = 0; j < n; j++)
diag[j] = MAXF( diag[j], wa2[j] );
}
}
/*** outer: form (q transpose)*fvec and store first n components in qtf. ***/
memcpy(wa4, fvec, sizeof(double)*m);
for (j = 0; j < n; j++) {
temp3 = fjac[j*m+j];
if (temp3 != 0.) {
sum = 0;
for (i = j; i < m; i++)
sum += fjac[j*m+i] * wa4[i];
double temp = -sum / temp3;
for (i = j; i < m; i++)
wa4[i] += fjac[j*m+i] * temp;
}
fjac[j*m+j] = wa1[j];
qtf[j] = wa4[j];
}
/*** outer: compute norm of scaled gradient and test for convergence. ***/
gnorm = 0;
for (j = 0; j < n; j++) {
if (wa2[ipvt[j]] == 0)
continue;
sum = 0.;
for (i = 0; i <= j; i++)
sum += fjac[j*m+i] * qtf[i];
gnorm = MAXF( gnorm, fabs( sum / wa2[ipvt[j]] / fnorm ) );
}
if (gnorm <= gtol) {
info = 4;
return;
}
/*** the inner loop. ***/
do {
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmdif/ inner loop iter=%d nfev=%d\n", iter, nfev);
#endif
/*** inner: determine the levenberg-marquardt parameter. ***/
lm_lmpar( n, fjac, m, ipvt, diag, qtf, delta, &par, wa1, wa2, wa4, wa3 );
/* used return values are fjac (partly), par, wa1=x, wa3=diag*x */
for (j = 0; j < n; j++)
wa2[j] = x[j] - wa1[j]; /* new parameter vector ? */
pnorm = lm_enorm(n, wa3);
/* at first call, adjust the initial step bound. */
if (nfev++ == 0)
delta = MINF(delta, pnorm);
/*** inner: evaluate the function at x + p and calculate its norm. ***/
info = 0;
(*evaluate) (wa2, m, data, wa4, NULL, &info);
#ifdef LMFIT_PRINTOUT
if( printout )
(*printout) (n, wa2, m, data, wa4, printflags, 2, iter, nfev);
#endif
if (info < 0)
return; /* user requested break. */
fnorm1 = lm_enorm(m, wa4);
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmdif/ pnorm %.10e fnorm1 %.10e fnorm %.10e"
" delta=%.10e par=%.10e\n",
pnorm, fnorm1, fnorm, delta, par);
#endif
/*** inner: compute the scaled actual reduction. ***/
if (p1 * fnorm1 < fnorm)
actred = 1 - SQUARE(fnorm1 / fnorm);
else
actred = -1;
/*** inner: compute the scaled predicted reduction and
the scaled directional derivative. ***/
for (j = 0; j < n; j++) {
wa3[j] = 0;
for (i = 0; i <= j; i++)
wa3[i] -= fjac[j*m+i] * wa1[ipvt[j]];
}
temp1 = lm_enorm(n, wa3) / fnorm;
temp2 = sqrt(par) * pnorm / fnorm;
prered = SQUARE(temp1) + 2 * SQUARE(temp2);
dirder = -(SQUARE(temp1) + SQUARE(temp2));
/*** inner: compute the ratio of the actual to the predicted reduction. ***/
ratio = prered != 0 ? actred / prered : 0;
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmdif/ actred=%.10e prered=%.10e ratio=%.10e"
" sq(1)=%.10e sq(2)=%.10e dd=%.10e\n",
actred, prered, prered != 0 ? ratio : 0.,
SQUARE(temp1), SQUARE(temp2), dirder);
#endif
/*** inner: update the step bound. ***/
if (ratio <= 0.25) {
double temp = (actred >= 0 ? 0.5 : 0.5 * dirder / (dirder + 0.5 * actred));
if (p1 * fnorm1 >= fnorm || temp < p1)
temp = p1;
delta = temp * MINF(delta, pnorm / p1);
par /= temp;
} else if (par == 0. || ratio >= 0.75) {
delta = pnorm / 0.5;
par *= 0.5;
}
/*** inner: test for successful iteration. ***/
if (ratio >= p0001) {
/* yes, success: update x, fvec, and their norms. */
if (mode != 2) {
for (j = 0; j < n; j++) {
x[j] = wa2[j];
wa2[j] = diag[j] * x[j];
}
} else {
memcpy(x, wa2, sizeof(double)*n);
}
std::swap(fvec, wa4);
xnorm = lm_enorm(n, wa2);
fnorm = fnorm1;
#ifdef LMFIT_PRINTOUT
++iter;
#endif
}
#ifdef LMFIT_DEBUG_MESSAGES
else {
printf("ATTN: iteration considered unsuccessful\n");
}
#endif
/*** inner: test for convergence. ***/
if( fnorm<=LM_DWARF ){
info = 0;
return;
}
if (fabs(actred) <= ftol && prered <= ftol && 0.5 * ratio <= 1)
info = 1;
if (delta <= xtol * xnorm)
info += 2;
if (info != 0)
return;
/*** inner: tests for termination and stringent tolerances. ***/
if (nfev >= maxfev){
info = 5;
return;
}
if (fabs(actred) <= LM_MACHEP &&
prered <= LM_MACHEP && 0.5 * ratio <= 1){
info = 6;
return;
}
if (delta <= LM_MACHEP * xnorm){
info = 7;
return;
}
if (gnorm <= LM_MACHEP){
info = 8;
return;
}
/*** inner: end of the loop. repeat if iteration unsuccessful. ***/
} while (ratio < p0001);
/*** outer: end of the loop. ***/
} while (1);
} /*** lm_lmdif. ***/
/*****************************************************************************/
/* lm_lmpar (determine Levenberg-Marquardt parameter) */
/*****************************************************************************/
void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
double *qtb, double delta, double *par, double *x,
double *sdiag, double *aux, double *xdi)
{
/* Given an m by n matrix a, an n by n nonsingular diagonal
* matrix d, an m-vector b, and a positive number delta,
* the problem is to determine a value for the parameter
* par such that if x solves the system
*
* a*x = b and sqrt(par)*d*x = 0
*
* in the least squares sense, and dxnorm is the euclidean
* norm of d*x, then either par=0 and (dxnorm-delta) < 0.1*delta,
* or par>0 and abs(dxnorm-delta) < 0.1*delta.
*
* Using lm_qrsolv, this subroutine completes the solution of the problem
* if it is provided with the necessary information from the
* qr factorization, with column pivoting, of a. That is, if
* a*p = q*r, where p is a permutation matrix, q has orthogonal
* columns, and r is an upper triangular matrix with diagonal
* elements of nonincreasing magnitude, then lmpar expects
* the full upper triangle of r, the permutation matrix p,
* and the first n components of qT*b. On output
* lmpar also provides an upper triangular matrix s such that
*
* pT*(aT*a + par*d*d)*p = sT*s.
*
* s is employed within lmpar and may be of separate interest.
*
* Only a few iterations are generally needed for convergence
* of the algorithm. If, however, the limit of 10 iterations
* is reached, then the output par will contain the best
* value obtained so far.
*
* parameters:
*
* n is a positive integer input variable set to the order of r.
*
* r is an n by n array. on input the full upper triangle
* must contain the full upper triangle of the matrix r.
* on OUTPUT the full upper triangle is unaltered, and the
* strict lower triangle contains the strict upper triangle
* (transposed) of the upper triangular matrix s.
*
* ldr is a positive integer input variable not less than n
* which specifies the leading dimension of the array r.
*
* ipvt is an integer input array of length n which defines the
* permutation matrix p such that a*p = q*r. column j of p
* is column ipvt(j) of the identity matrix.
*
* diag is an input array of length n which must contain the
* diagonal elements of the matrix d.
*
* qtb is an input array of length n which must contain the first
* n elements of the vector (q transpose)*b.
*
* delta is a positive input variable which specifies an upper
* bound on the euclidean norm of d*x.
*
* par is a nonnegative variable. on input par contains an
* initial estimate of the levenberg-marquardt parameter.
* on OUTPUT par contains the final estimate.
*
* x is an OUTPUT array of length n which contains the least
* squares solution of the system a*x = b, sqrt(par)*d*x = 0,
* for the output par.
*
* sdiag is an array of length n which contains the
* diagonal elements of the upper triangular matrix s.
*
* aux is a multi-purpose work array of length n.
*
* xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
*
*/
int i, j, nsing;
double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
static const double p1 = 0.1;
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmpar\n");
#endif
/*** lmpar: compute and store in x the gauss-newton direction. if the
jacobian is rank-deficient, obtain a least squares solution. ***/
nsing = n;
for (j = 0; j < n; j++) {
aux[j] = qtb[j];
if (r[j * ldr + j] == 0 && nsing == n)
nsing = j;
if (nsing < n)
aux[j] = 0;
}
#ifdef LMFIT_DEBUG_MESSAGES
printf("nsing %d ", nsing);
#endif
for (j = nsing - 1; j >= 0; j--) {
aux[j] = aux[j] / r[j + ldr * j];
double temp = aux[j];
for (i = 0; i < j; i++)
aux[i] -= r[j * ldr + i] * temp;
}
for (j = 0; j < n; j++)
x[ipvt[j]] = aux[j];
/*** lmpar: initialize the iteration counter, evaluate the function at the
origin, and test for acceptance of the gauss-newton direction. ***/
if (diag) {
for (j = 0; j < n; j++)
xdi[j] = diag[j] * x[j];
dxnorm = lm_enorm(n, xdi);
} else {
dxnorm = lm_enorm(n, x);
}
fp = dxnorm - delta;
if (fp <= p1 * delta) {
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmpar/ terminate (fp<p1*delta)\n");
#endif
*par = 0;
return;
}
/*** lmpar: if the jacobian is not rank deficient, the newton
step provides a lower bound, parl, for the 0. of
the function. otherwise set this bound to 0.. ***/
parl = 0;
if (nsing >= n) {
if (diag) {
for (j = 0; j < n; j++)
aux[j] = diag[ipvt[j]] * xdi[ipvt[j]] / dxnorm;
} else {
for (j = 0; j < n; j++)
aux[j] = x[ipvt[j]] / dxnorm;
}
for (j = 0; j < n; j++) {
double sum = 0;
for (i = 0; i < j; i++)
sum += r[j * ldr + i] * aux[i];
aux[j] = (aux[j] - sum) / r[j + ldr * j];
}
double temp = lm_enorm(n, aux);
parl = fp / delta / temp / temp;
}
/*** lmpar: calculate an upper bound, paru, for the 0. of the function. ***/
for (j = 0; j < n; j++) {
double sum = 0;
for (i = 0; i <= j; i++)
sum += r[j * ldr + i] * qtb[i];
if (diag) {
aux[j] = sum / diag[ipvt[j]];
} else {
aux[j] = sum;
}
}
gnorm = lm_enorm(n, aux);
paru = gnorm / delta;
if (paru == 0.)
paru = LM_DWARF / MINF(delta, p1);
/*** lmpar: if the input par lies outside of the interval (parl,paru),
set par to the closer endpoint. ***/
*par = MAXF(*par, parl);
*par = MINF(*par, paru);
if (*par == 0.)
*par = gnorm / dxnorm;
#ifdef LMFIT_DEBUG_MESSAGES
printf("lmpar/ parl %.4e par %.4e paru %.4e\n", parl, *par, paru);
#endif
/*** lmpar: iterate. ***/
for (int iter = 0; ; iter++) {
/** evaluate the function at the current value of par. **/
if (*par == 0.)
*par = MAXF(LM_DWARF, 0.001 * paru);
double temp = sqrt(*par);
if (diag) {
for (j = 0; j < n; j++)
aux[j] = temp * diag[j];
} else {
for (j = 0; j < n; j++)
aux[j] = temp;
}
lm_qrsolv( n, r, ldr, ipvt, aux, qtb, x, sdiag, xdi );
/* return values are r, x, sdiag */
if (diag) {
for (j = 0; j < n; j++)
xdi[j] = diag[j] * x[j]; /* used as output */
dxnorm = lm_enorm(n, xdi);
} else {
dxnorm = lm_enorm(n, x);
}
fp_old = fp;
fp = dxnorm - delta;
/** if the function is small enough, accept the current value
of par. Also test for the exceptional cases where parl
is zero or the number of iterations has reached 10. **/
if (fabs(fp) <= p1 * delta
|| (parl == 0. && fp <= fp_old && fp_old < 0.)
|| iter == 10)
break; /* the only exit from the iteration. */
/** compute the Newton correction. **/
if (diag) {
for (j = 0; j < n; j++)
aux[j] = diag[ipvt[j]] * xdi[ipvt[j]] / dxnorm;
} else {
for (j = 0; j < n; j++)
aux[j] = x[ipvt[j]] / dxnorm;
}
for (j = 0; j < n; j++) {
aux[j] = aux[j] / sdiag[j];
for (i = j + 1; i < n; i++)
aux[i] -= r[j * ldr + i] * aux[j];
}
temp = lm_enorm(n, aux);
parc = fp / delta / temp / temp;
/** depending on the sign of the function, update parl or paru. **/
if (fp > 0)
parl = MAXF(parl, *par);
else if (fp < 0)
paru = MINF(paru, *par);
/* the case fp==0 is precluded by the break condition */
/** compute an improved estimate for par. **/
*par = MAXF(parl, *par + parc);
}
} /*** lm_lmpar. ***/
/*****************************************************************************/
/* lm_qrfac (QR factorisation, from lapack) */
/*****************************************************************************/
void lm_qrfac(int m, int n, double *a, int pivot, int *ipvt,
double *rdiag, double *acnorm, double *wa)
{
/*
* This subroutine uses householder transformations with column
* pivoting (optional) to compute a qr factorization of the
* m by n matrix a. That is, qrfac determines an orthogonal
* matrix q, a permutation matrix p, and an upper trapezoidal
* matrix r with diagonal elements of nonincreasing magnitude,
* such that a*p = q*r. The householder transformation for
* column k, k = 1,2,...,min(m,n), is of the form
*
* i - (1/u(k))*u*uT
*
* where u has zeroes in the first k-1 positions. The form of
* this transformation and the method of pivoting first
* appeared in the corresponding linpack subroutine.
*
* Parameters:
*
* m is a positive integer input variable set to the number
* of rows of a.
*
* n is a positive integer input variable set to the number
* of columns of a.
*
* a is an m by n array. On input a contains the matrix for
* which the qr factorization is to be computed. On OUTPUT
* the strict upper trapezoidal part of a contains the strict
* upper trapezoidal part of r, and the lower trapezoidal
* part of a contains a factored form of q (the non-trivial
* elements of the u vectors described above).
*
* pivot is a logical input variable. If pivot is set true,
* then column pivoting is enforced. If pivot is set false,
* then no column pivoting is done.
*
* ipvt is an integer OUTPUT array of length lipvt. This array
* defines the permutation matrix p such that a*p = q*r.
* Column j of p is column ipvt(j) of the identity matrix.
* If pivot is false, ipvt is not referenced.
*
* rdiag is an OUTPUT array of length n which contains the
* diagonal elements of r.
*
* acnorm is an OUTPUT array of length n which contains the
* norms of the corresponding columns of the input matrix a.
* If this information is not needed, then acnorm can coincide
* with rdiag.
*
* wa is a work array of length n. If pivot is false, then wa
* can coincide with rdiag.
*
*/
int i, j, k, kmax, minmn;
double ajnorm;
/*** qrfac: compute initial column norms and initialize several arrays. ***/
for (j = 0; j < n; j++) {
acnorm[j] = lm_enorm(m, &a[j*m]);
rdiag[j] = acnorm[j];
wa[j] = rdiag[j];
if (pivot)
ipvt[j] = j;
}
#ifdef LMFIT_DEBUG_MESSAGES
printf("qrfac\n");
#endif
/*** qrfac: reduce a to r with householder transformations. ***/
minmn = MINF(m, n);
for (j = 0; j < minmn; j++) {
if (!pivot)
goto pivot_ok;
/** bring the column of largest norm into the pivot position. **/
kmax = j;
for (k = j + 1; k < n; k++)
if (rdiag[k] > rdiag[kmax])
kmax = k;
if (kmax == j)
goto pivot_ok;
for (i = 0; i < m; i++) {
double temp = a[j*m+i];
a[j*m+i] = a[kmax*m+i];
a[kmax*m+i] = temp;
}
rdiag[kmax] = rdiag[j];
wa[kmax] = wa[j];
k = ipvt[j];
ipvt[j] = ipvt[kmax];
ipvt[kmax] = k;
pivot_ok:
/** compute the Householder transformation to reduce the
j-th column of a to a multiple of the j-th unit vector. **/
ajnorm = lm_enorm(m-j, &a[j*m+j]);
if (ajnorm == 0.) {
rdiag[j] = 0;
continue;
}
if (a[j*m+j] < 0.)
ajnorm = -ajnorm;
for (i = j; i < m; i++)
a[j*m+i] /= ajnorm;
a[j*m+j] += 1;
/** apply the transformation to the remaining columns
and update the norms. **/
for (k = j + 1; k < n; k++) {
double sum = 0;
for (i = j; i < m; i++)
sum += a[j*m+i] * a[k*m+i];
double temp = sum / a[j + m * j];
for (i = j; i < m; i++)
a[k*m+i] -= temp * a[j*m+i];
if (pivot && rdiag[k] != 0.) {
temp = a[m * k + j] / rdiag[k];
temp = MAXF(0., 1 - temp * temp);
rdiag[k] *= sqrt(temp);
temp = rdiag[k] / wa[k];
if ( 0.05 * SQUARE(temp) <= LM_MACHEP ) {
rdiag[k] = lm_enorm(m-j-1, &a[m*k+j+1]);
wa[k] = rdiag[k];
}
}
}
rdiag[j] = -ajnorm;
}
}
/*****************************************************************************/
/* lm_qrsolv (linear least-squares) */
/*****************************************************************************/
void lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
double *qtb, double *x, double *sdiag, double *wa)
{
/*
* Given an m by n matrix a, an n by n diagonal matrix d,
* and an m-vector b, the problem is to determine an x which
* solves the system
*
* a*x = b and d*x = 0
*
* in the least squares sense.
*
* This subroutine completes the solution of the problem
* if it is provided with the necessary information from the
* qr factorization, with column pivoting, of a. That is, if
* a*p = q*r, where p is a permutation matrix, q has orthogonal
* columns, and r is an upper triangular matrix with diagonal
* elements of nonincreasing magnitude, then qrsolv expects
* the full upper triangle of r, the permutation matrix p,
* and the first n components of (q transpose)*b. The system
* a*x = b, d*x = 0, is then equivalent to
*
* r*z = qT*b, pT*d*p*z = 0,
*
* where x = p*z. If this system does not have full rank,
* then a least squares solution is obtained. On output qrsolv
* also provides an upper triangular matrix s such that
*
* pT *(aT *a + d*d)*p = sT *s.
*
* s is computed within qrsolv and may be of separate interest.
*
* Parameters
*
* n is a positive integer input variable set to the order of r.
*
* r is an n by n array. On input the full upper triangle
* must contain the full upper triangle of the matrix r.
* On OUTPUT the full upper triangle is unaltered, and the
* strict lower triangle contains the strict upper triangle
* (transposed) of the upper triangular matrix s.
*
* ldr is a positive integer input variable not less than n
* which specifies the leading dimension of the array r.
*
* ipvt is an integer input array of length n which defines the
* permutation matrix p such that a*p = q*r. Column j of p
* is column ipvt(j) of the identity matrix.
*
* diag is an input array of length n which must contain the
* diagonal elements of the matrix d.
*
* qtb is an input array of length n which must contain the first
* n elements of the vector (q transpose)*b.
*
* x is an OUTPUT array of length n which contains the least
* squares solution of the system a*x = b, d*x = 0.
*
* sdiag is an OUTPUT array of length n which contains the
* diagonal elements of the upper triangular matrix s.
*
* wa is a work array of length n.
*
*/
int i, kk, j, k, nsing;
double qtbpj;
double _sin, _cos, _tan, _cot; /* local variables, not functions */
/*** qrsolv: copy r and (q transpose)*b to preserve input and initialize s.
in particular, save the diagonal elements of r in x. ***/
for (j = 0; j < n; j++) {
for (i = j; i < n; i++)
r[j * ldr + i] = r[i * ldr + j];
x[j] = r[j * ldr + j];
wa[j] = qtb[j];
}
#ifdef LMFIT_DEBUG_MESSAGES
printf("qrsolv\n");
#endif
/*** qrsolv: eliminate the diagonal matrix d using a Givens rotation. ***/
for (j = 0; j < n; j++) {
/*** qrsolv: prepare the row of d to be eliminated, locating the
diagonal element using p from the qr factorization. ***/
if (diag) {
if (diag[ipvt[j]] == 0.)
goto L90;
for (k = j+1; k < n; k++)
sdiag[k] = 0.;
sdiag[j] = diag[ipvt[j]];
} else {
for (k = j+1; k < n; k++)
sdiag[k] = 0.;
sdiag[j] = 1.;
}
/*** qrsolv: the transformations to eliminate the row of d modify only
a single element of qT*b beyond the first n, which is initially 0. ***/
qtbpj = 0.;
for (k = j; k < n; k++) {
/** determine a Givens rotation which eliminates the
appropriate element in the current row of d. **/
if (sdiag[k] == 0.)
continue;
kk = k + ldr * k;
if (fabs(r[kk]) < fabs(sdiag[k])) {
_cot = r[kk] / sdiag[k];
_sin = 1 / sqrt(1 + SQUARE(_cot));
_cos = _sin * _cot;
} else {
_tan = sdiag[k] / r[kk];
_cos = 1 / sqrt(1 + SQUARE(_tan));
_sin = _cos * _tan;
}
/** compute the modified diagonal element of r and
the modified element of ((q transpose)*b,0). **/
r[kk] = _cos * r[kk] + _sin * sdiag[k];
double temp = _cos * wa[k] + _sin * qtbpj;
qtbpj = -_sin * wa[k] + _cos * qtbpj;
wa[k] = temp;
/** accumulate the transformation in the row of s. **/
for (i = k + 1; i < n; i++) {
temp = _cos * r[k * ldr + i] + _sin * sdiag[i];
sdiag[i] = -_sin * r[k * ldr + i] + _cos * sdiag[i];
r[k * ldr + i] = temp;
}
}
L90:
/** store the diagonal element of s and restore
the corresponding diagonal element of r. **/
sdiag[j] = r[j * ldr + j];
r[j * ldr + j] = x[j];
}
/*** qrsolv: solve the triangular system for z. if the system is
singular, then obtain a least squares solution. ***/
nsing = n;
for (j = 0; j < n; j++) {
if (sdiag[j] == 0. && nsing == n)
nsing = j;
if (nsing < n)
wa[j] = 0;
}
for (j = nsing - 1; j >= 0; j--) {
double sum = 0;
for (i = j + 1; i < nsing; i++)
sum += r[j * ldr + i] * wa[i];
wa[j] = (wa[j] - sum) / sdiag[j];
}
/*** qrsolv: permute the components of z back to components of x. ***/
for (j = 0; j < n; j++)
x[ipvt[j]] = wa[j];
} /*** lm_qrsolv. ***/
/*****************************************************************************/
/* lm_enorm (Euclidean norm) */
/*****************************************************************************/
double lm_enorm(int n, const double *x)
{
/* Given an n-vector x, this function calculates the
* euclidean norm of x.
*
* The euclidean norm is computed by accumulating the sum of
* squares in three different sums. The sums of squares for the
* small and large components are scaled so that no overflows
* occur. Non-destructive underflows are permitted. Underflows
* and overflows do not occur in the computation of the unscaled
* sum of squares for the intermediate components.
* The definitions of small, intermediate and large components
* depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
* restrictions on these constants are that LM_SQRT_DWARF**2 not
* underflow and LM_SQRT_GIANT**2 not overflow.
*
* Parameters
*
* n is a positive integer input variable.
*
* x is an input array of length n.
*/
int i;
double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
s1 = 0;
s2 = 0;
s3 = 0;
x1max = 0;
x3max = 0;
agiant = LM_SQRT_GIANT / n;
/** sum squares. **/
for (i = 0; i < n; i++) {
xabs = fabs(x[i]);
if (xabs > LM_SQRT_DWARF) {
if ( xabs < agiant ) {
s2 += xabs * xabs;
} else if ( xabs > x1max ) {
temp = x1max / xabs;
s1 = 1 + s1 * SQUARE(temp);
x1max = xabs;
} else {
temp = xabs / x1max;
s1 += SQUARE(temp);
}
} else if ( xabs > x3max ) {
temp = x3max / xabs;
s3 = 1 + s3 * SQUARE(temp);
x3max = xabs;
} else if (xabs != 0.) {
temp = xabs / x3max;
s3 += SQUARE(temp);
}
}
/** calculation of norm. **/
if (s1 != 0)
return x1max * sqrt(s1 + (s2 / x1max) / x1max);
else if (s2 != 0)
if (s2 >= x3max)
return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
else
return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
else
return x3max * sqrt(s3);
} /*** lm_enorm. ***/