This repository has been archived by the owner on Nov 23, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
newton.go
147 lines (130 loc) · 4.5 KB
/
newton.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// Copyright ©2015 The gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package optimize
import (
"math"
"github.com/gonum/matrix/mat64"
)
const maxNewtonModifications = 20
// Newton implements a modified Newton's method for Hessian-based unconstrained
// minimization. It applies regularization when the Hessian is not positive
// definite, and it can converge to a local minimum from any starting point.
//
// Newton iteratively forms a quadratic model to the objective function f and
// tries to minimize this approximate model. It generates a sequence of
// locations x_k by means of
// solve H_k d_k = -∇f_k for d_k,
// x_{k+1} = x_k + α_k d_k,
// where H_k is the Hessian matrix of f at x_k and α_k is a step size found by
// a line search.
//
// Away from a minimizer H_k may not be positive definite and d_k may not be a
// descent direction. Newton implements a Hessian modification strategy that
// adds successively larger multiples of identity to H_k until it becomes
// positive definite. Note that the repeated trial factorization of the
// modified Hessian involved in this process can be computationally expensive.
//
// If the Hessian matrix cannot be formed explicitly or if the computational
// cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method
// can be used instead.
type Newton struct {
// Linesearcher is used for selecting suitable steps along the descent
// direction d. Accepted steps should satisfy at least one of the Wolfe,
// Goldstein or Armijo conditions.
// If Linesearcher == nil, an appropriate default is chosen.
Linesearcher Linesearcher
// Increase is the factor by which a scalar tau is successively increased
// so that (H + tau*I) is positive definite. Larger values reduce the
// number of trial Hessian factorizations, but also reduce the second-order
// information in H.
// Increase must be greater than 1. If Increase is 0, it is defaulted to 5.
Increase float64
ls *LinesearchMethod
hess *mat64.SymDense // Storage for a copy of the Hessian matrix.
chol mat64.Cholesky // Storage for the Cholesky factorization.
tau float64
}
func (n *Newton) Init(loc *Location) (Operation, error) {
if n.Increase == 0 {
n.Increase = 5
}
if n.Increase <= 1 {
panic("optimize: Newton.Increase must be greater than 1")
}
if n.Linesearcher == nil {
n.Linesearcher = &Bisection{}
}
if n.ls == nil {
n.ls = &LinesearchMethod{}
}
n.ls.Linesearcher = n.Linesearcher
n.ls.NextDirectioner = n
return n.ls.Init(loc)
}
func (n *Newton) Iterate(loc *Location) (Operation, error) {
return n.ls.Iterate(loc)
}
func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) {
dim := len(loc.X)
n.hess = resizeSymDense(n.hess, dim)
n.tau = 0
return n.NextDirection(loc, dir)
}
func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) {
// This method implements Algorithm 3.3 (Cholesky with Added Multiple of
// the Identity) from Nocedal, Wright (2006), 2nd edition.
dim := len(loc.X)
d := mat64.NewVector(dim, dir)
grad := mat64.NewVector(dim, loc.Gradient)
n.hess.CopySym(loc.Hessian)
// Find the smallest diagonal entry of the Hessian.
minA := n.hess.At(0, 0)
for i := 1; i < dim; i++ {
a := n.hess.At(i, i)
if a < minA {
minA = a
}
}
// If the smallest diagonal entry is positive, the Hessian may be positive
// definite, and so first attempt to apply the Cholesky factorization to
// the un-modified Hessian. If the smallest entry is negative, use the
// final tau from the last iteration if regularization was needed,
// otherwise guess an appropriate value for tau.
if minA > 0 {
n.tau = 0
} else if n.tau == 0 {
n.tau = -minA + 0.001
}
for k := 0; k < maxNewtonModifications; k++ {
if n.tau != 0 {
// Add a multiple of identity to the Hessian.
for i := 0; i < dim; i++ {
n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau)
}
}
// Try to apply the Cholesky factorization.
pd := n.chol.Factorize(n.hess)
if pd {
// Store the solution in d's backing array, dir.
d.SolveCholeskyVec(&n.chol, grad)
d.ScaleVec(-1, d)
return 1
}
// Modified Hessian is not PD, so increase tau.
n.tau = math.Max(n.Increase*n.tau, 0.001)
}
// Hessian modification failed to get a PD matrix. Return the negative
// gradient as the descent direction.
d.ScaleVec(-1, grad)
return 1
}
func (n *Newton) Needs() struct {
Gradient bool
Hessian bool
} {
return struct {
Gradient bool
Hessian bool
}{true, true}
}