-
Notifications
You must be signed in to change notification settings - Fork 3
/
Steepest Descent with Numerical Gradient.py
138 lines (119 loc) · 6.66 KB
/
Steepest Descent with Numerical Gradient.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import numpy as np
from numpy import linalg as LA
import sys
from numpy import log, sin, cos, exp, pi
np.set_printoptions(suppress=True)
# Steepest Descent for solving a non-linear system F(x) = 0, where F is a function from R^n to R^n,
# x is a vector of R^n, and 0 is the zero vector of R^n, and n = 2 or n = 3.
# The method approximates a solution to minimize the function g = sum_(i=1)^(n) F(x)[i]**2
# The gradiant of g is evaluated by a 5-point midpoint derivative formula
# The method returns a vector approximation to the system which has an infinity norm less than the tolerance
# and the value of g(x) on the approximation
# The method is based on algorithm 10.3 from the book "Numerical Analysis" by Burden, 10th Ed.
# PARAMETERS:
# F -> The function which creates the non-linear system. F must be of length 2 or 3.
# g -> The function g = sum_(i=1)^(n) F(x)[i]**2
# vec -> The initial approximation vector
# tol -> the tolerance for the method, such that an approximation returns when ||x||_(inf) < tol
def steepest_descent_numerical_grad(F: np.array, g, vec: np.array, tol: float):
for k in range(1, sys.maxsize):
g_1 = g(vec, F) # g_1 = g(x^((k)))
z = __midpoint_rule(F, vec) # z = grad(g(x^(k)))
z_0 = LA.norm(z)
if z_0 == 0:
print("zero gradient")
print(vec, g_1) # procedure completed, might have minimum
return
z = z / z_0 # make z a vector unit
a_3 = 1
g_3 = g(vec - a_3 * z, F)
while g_3 >= g_1:
a_3 = a_3 / 2
g_3 = g(vec - a_3 * z, F)
if a_3 < tol / 2:
print("No likely improvement") # procedure completed, might have minimum
print(vec, g_1)
return
a_2 = a_3 / 2
g_2 = g(vec - a_2 * z, F)
# calculating Newton's interpolating polynomial P(a) = g1 + h1*a + h3*a*(a-a2)
# that interpolates h(a) at a = 0, a = a2 and a = a3
h_1 = (g_2 - g_1) / a_2
h_2 = (g_3 - g_2) / (a_3 - a_2)
h_3 = (h_2 - h_1) / a_3
a_0 = 0.5 * (a_2 - (h_1 / h_3)) # critical point of P occurs at a0
g_0 = g(vec - a_0 * z, F)
if g_0 == min(g_0, g_3):
a = a_0
else:
a = a_3
vec = vec - a * z
if np.abs(g(vec, F) - g_1) < tol:
print("Steepest Descent with numerical gradiant: the number of iterations:", k)
print(vec, g(vec, F)) # procedure is successful
return
# the command below can be removed if you want to see only the final result
print("iteration", k, ":", vec, g(vec, F))
print('Maximum iterations exceeded') # procedure is unsuccessful
return
# Private method for calculating the gradiant of g using 5-point midpoint rule
# The method calculates the approximating derivatives of F, and returns the approximate gradiant
# The method works only on F of length 2 or 3.
# PARAMETERS:
# F -> The function F of the non-linear system
# vec -> a vector in which the approximation is calculated
def __midpoint_rule(F, vec: list):
x = vec[0]
y = vec[1]
if len(vec) == 2:
return np.array([2 * F(x, y, 0)[0] * (5 / 6) *
(F(x - 0.2, y, 0)[0] - 8 * F(x - 0.1, y, 0)[0] + 8 * F(x + 0.1, y, 0)[0] - F(x + 0.2, y, 0)[0])
+ 2 * F(x, y, 0)[1] * (5 / 6) * (
F(x - 0.2, y, 0)[1] - 8 * F(x - 0.1, y, 0)[1] + 8 * F(x + 0.1, y, 0)[1] -
F(x + 0.2, y, 0)[1]),
2 * F(x, y, 0)[0] * (5 / 6) *
(F(x, y - 0.2, 0)[0] - 8 * F(x, y - 0.1, 0)[0] + 8 * F(x, y + 0.1, 0)[0] - F(x, y + 0.2, 0)[0])
+ 2 * F(x, y, 0)[1] * (5 / 6) *
(F(x, y - 0.2, 0)[1] - 8 * F(x, y - 0.1, 0)[1] + 8 * F(x, y + 0.1, 0)[1] - F(x, y + 0.2, 0)[
1])], dtype=float)
else:
z = vec[2]
return np.array([2 * F(x, y, z)[0] * (5 / 6) * (
F(x - 0.2, y, z)[0] - 8 * F(x - 0.1, y, z)[0] + 8 * F(x + 0.1, y, z)[0] - F(x + 0.2, y, z)[0])
+ 2 * F(x, y, z)[1] * (5 / 6) * (
F(x - 0.2, y, z)[1] - 8 * F(x - 0.1, y, z)[1] + 8 * F(x + 0.1, y, z)[1] -
F(x + 0.2, y, z)[1])
+ 2 * F(x, y, z)[2] * (5 / 6) * (
F(x - 0.2, y, z)[2] - 8 * F(x - 0.1, y, z)[2] + 8 * F(x + 0.1, y, z)[2] -
F(x + 0.2, y, z)[2]),
2 * F(x, y, z)[0] * (5 / 6) * (
F(x, y - 0.2, z)[0] - 8 * F(x, y - 0.1, z)[0] + 8 * F(x, y + 0.1, z)[0] -
F(x, y + 0.2, z)[0])
+ 2 * F(x, y, z)[1] * (5 / 6) * (
F(x, y - 0.2, z)[1] - 8 * F(x, y - 0.1, z)[1] + 8 * F(x, y + 0.1, z)[1] -
F(x, y + 0.2, z)[1])
+ 2 * F(x, y, z)[2] * (5 / 6) * (
F(x, y - 0.2, z)[2] - 8 * F(x, y - 0.1, z)[2] + 8 * F(x, y + 0.1, z)[2] -
F(x, y + 0.2, z)[2]),
2 * F(x, y, z)[0] * (5 / 6) * (
F(x, y, z - 0.2)[0] - 8 * F(x, y, z - 0.1)[0] + 8 * F(x, y, z + 0.1)[0] -
F(x, y, z + 0.2)[0])
+ 2 * F(x, y, z)[1] * (5 / 6) * (
F(x, y, z - 0.2)[1] - 8 * F(x, y, z - 0.1)[1] + 8 * F(x, y, z + 0.1)[1] -
F(x, y, z + 0.2)[1])
+ 2 * F(x, y, z)[2] * (5 / 6) * (
F(x, y, z - 0.2)[2] - 8 * F(x, y, z - 0.1)[2] + 8 * F(x, y, z + 0.1)[2] -
F(x, y, z + 0.2)[2])
], dtype=float)
# Input functions:
def function_a(x: float, y: float, z: float): # write the desired function
return np.array([log(x ** 2 + y ** 2) - sin(x * y) - log(2) - log(pi), exp(x - y) + cos(x * y)])
def function_b(x: float, y: float, z: float):
return np.array([3 * x - cos(y * z) - 0.5, x ** 2 - 625 * y ** 2 - 0.25, exp(-x * y) + 20 * z + (10 * pi - 3) / 3])
def g(vec: np.array, f):
if len(vec) == 2:
return f(vec[0], vec[1], 0)[0] ** 2 + f(vec[0], vec[1], 0)[1] ** 2
else:
return f(vec[0], vec[1], vec[2])[0] ** 2 + f(vec[0], vec[1], vec[2])[1] ** 2 + \
f(vec[0], vec[1], vec[2])[2] ** 2
steepest_descent_numerical_grad(function_a, g, np.array([2.0, 2.0]), 10 ** -6)