# define function
def f(x1, x2):
'''
:param x1: value for x1.
:param x2: value for x2.
:return: rosenbrock value for the given x1, x2.
'''
return (1 - x1)**2 + 100 * (x2 - x1**2)**2
def y_per_steps(list_x1, list_x2, opt_func):
'''
Function to plot the values of y for each step.
:param list_x1: list of all x1 values in the path.
:param list_x2: list of all x2 values in the path.
:param opt_func: the function being analysed.
'''
steps = list(range(len(list_x1)))
y_values = opt_func(np.array(list_x1), np.array(list_x2))
plt.plot(steps, y_values)
plt.xlabel('Number of updates')
plt.ylabel('f(x1, x2)')
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
def plot_function(list_x1, list_x2, opt_func):
'''
Function to plot the optimization path of a function.
:param list_x1: list of all x1 values in the path.
:param list_x2: list of all x2 values in the path.
:param opt_func: the function being analysed.
'''
xmin, xmax, xstep = -2., 2, .1
ymin, ymax, ystep = -2., 2, .1
x1, x2 = np.meshgrid(np.arange(xmin, xmax + xstep, xstep), np.arange(ymin, ymax + ystep, ystep))
y = opt_func(x1, x2)
minima = np.array([1., 1.]).reshape(-1, 1)
list_x1 = np.array(list_x1).reshape(-1,1)
list_x2 = np.array(list_x2).reshape(-1,1)
fig, ax = plt.subplots(figsize=(15, 9))
ax.contour(x1, x2, y, levels=np.logspace(0, 5, 10), norm=LogNorm(), cmap=plt.cm.jet)
ax.quiver(list_x1[:-1], list_x2[:-1], list_x1[1:]-list_x1[:-1], list_x2[1:]-list_x2[:-1], scale_units='xy', angles='xy', scale=1, color='k')
ax.plot(*minima, 'r*', markersize=18)
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
def derivative_function(x1,x2):
grad_x1 = -2 * (1 - x1) - 400 * x1 * (x2 - (x1)**2)
grad_x2 = 200 * (x2 - (x1)**2)
return grad_x1, grad_x2
def gradient_descent(initial_x1, initial_x2, convergence_test, lr, lr_red=False):
'''
Function to compute the gradient descent.
:param initial_x1: initial value for x1.
:param initial_x2: initial value for x2.
:param convergence_test: tolerance value.
:param lr: learning rate.
:param lr_red: flag to inform if learning rate will change among steps.
:return: func returns the final values for x1,x2 and a list of all x1 and x2 values in the optimization path.
'''
x1 = initial_x1
x2 = initial_x2
x1_list = [x1]
x2_list = [x2]
diff_x = 1e+10
step = 0
# print(f'Step: {step} - f(x1, x2): {f(x1, x2)}')
while diff_x > convergence_test and step <= 50000:
try:
grad_x1, grad_x2 = derivative_function(x1,x2)
x1_prev = x1
x2_prev = x2
x1 = x1_prev - (grad_x1 * lr)
x2 = x2_prev - (grad_x2 * lr)
f_x_old = f(x1_prev, x2_prev)
f_x_new = f(x1, x2)
diff_x = abs(f_x_new - f_x_old)
step += 1
# print(f'Step: {step} - f(x1, x2): {f(x1, x2)}')
if lr_red:
lr = lr * 0.999
x1_list.append(x1)
x2_list.append(x2)
except OverflowError:
print(f'Número de atualizações : {step}')
print(f'Valor final do par (x1, x2) : ({x1}, {x2})')
return x1, x2, x1_list, x2_list
print(f'Número de atualizações : {step}')
print(f'Valor final do par (x1, x2) : ({x1}, {x2})')
print(f'Valor final da função f(x1, x2) : ({f(x1, x2)})')
return x1, x2, x1_list, x2_list
x1, x2, list_x1, list_x2 = gradient_descent(0, 0, 1e-5, 1e-3)
Número de atualizações : 3096
Valor final do par (x1, x2) : (0.8973659319527174, 0.8048289691500675)
Valor final da função f(x1, x2) : (0.01055281795618874)
y_per_steps(list_x1, list_x2, f)
plot_function(list_x1, list_x2, f)
x1, x2, list_x1, list_x2 = gradient_descent(0, 0, 1e-5, 1e-4)
Número de atualizações : 12384
Valor final do par (x1, x2) : (0.7222519131907612, 0.520346429814203)
Valor final da função f(x1, x2) : (0.07731336295746909)
y_per_steps(list_x1, list_x2, f)
plot_function(list_x1, list_x2, f)
x1, x2, list_x1, list_x2 = gradient_descent(0, 0, 1e-5, 1e-2)
Número de atualizações : 39
Valor final do par (x1, x2) : (1.4533287241727849e+178, 4.726785626727598e+118)
y_per_steps(list_x1, list_x2, f)
plot_function(list_x1, list_x2, f)
x1, x2, list_x1, list_x2 = gradient_descent(0, 0, 1e-5, 5e-3, lr_red=True)
Número de atualizações : 1467
Valor final do par (x1, x2) : (0.9038989188815177, 0.8166259741242391)
Valor final da função f(x1, x2) : (0.009252005608539756)
y_per_steps(list_x1, list_x2, f)
plot_function(list_x1, list_x2, f)
import tensorflow as tf
def reset(initial_value=0.0):
'''
Function to reset the variables for a initial value.
:param initial_value: value to be set as initial in reset method.
:return: the x1, x2 value as tf variable.
'''
x1 = tf.Variable(initial_value)
x2 = tf.Variable(initial_value)
return x1, x2
def gradient_descent_tf(convergence_test, lr):
'''
Function to compute the gradient descent using tensorflow.
:param convergence_test: tolerance value
:param lr: learning rate
:return: func returns the list of all x1 and x2 values in the optimization path
'''
lr = tf.Variable(lr)
x1, x2 = reset()
opt = tf.keras.optimizers.SGD(learning_rate=lr)
list_x1 = [x1.numpy()]
list_x2 = [x2.numpy()]
diff_x = 1e+10
step = 0
f_x_new = 1e+10
while diff_x > convergence_test and step <= 50000:
f_x_old = f(x1, x2)
with tf.GradientTape(persistent=True) as tape:
tape.watch(x1)
tape.watch(x2)
y = f(x1, x2)
grads = tape.gradient(y, [x1, x2])
processed_grads = [g for g in grads]
x1 = x1 - processed_grads[0] * lr
x2 = x2 - processed_grads[1] * lr
list_x1.append(x1.numpy())
list_x2.append(x2.numpy())
f_x_new = f(x1, x2)
diff_x = abs(f_x_new - f_x_old)
step += 1
print(f'Número de atualizações : {step}')
print(f'Valor final do par (x1, x2) : ({x1.numpy()}, {x2.numpy()})')
return list_x1, list_x2
list_x1, list_x2 = gradient_descent_tf(convergence_test=1e-5, lr=1e-3)
Número de atualizações : 3096
Valor final do par (x1, x2) : (0.8973656296730042, 0.8048283457756042)
y_per_steps(list_x1, list_x2, f)
plot_function(list_x1, list_x2, f)