import random
import numpy as np
import matplotlib.pyplot as plt
import sobol_seq
from scipy.optimize import minimize,Bounds
from CartPole import CartPole
3.1 Linear Policy, Non-Linear Model Observation Noise
#Kernel Function
def kernel_func(x0,x1,sig):
delta_x = x0-x1
delta_x_2 = np.zeros([4,1])
for i in range(4):
delta_x_2[i]=(delta_x[i])**2
delta_x_2[2]=np.sin(0.5*(delta_x[2]))**2
expo = (-0.5)*float(np.dot(sig, delta_x_2))
kernel = np.e**expo
return kernel
#range of input for creating data
def r_range(i, minimum,maximum):
d = i*(maximum-minimum)
output = np.full(np.shape(i),minimum)+d
return output
#generate datapoints using sobol_seq
def generate_x(n):
x_data = sobol_seq.i4_sobol_generate(4,n)
d_0=r_range(x_data[:,0],-50,50)
d_1=r_range(x_data[:,1],-10,10)
d_2=r_range(x_data[:,2],-(np.pi),np.pi)
d_3=r_range(x_data[:,3],-15,15)
output = np.column_stack((d_0,d_1,d_2,d_3))
return output
def K_matrix(a,b,sig_invert):
A = np.shape(a)[0]
B = np.shape(b)[0]
K = np.zeros(shape=(A,B))
for i in range(A):
for j in range(B):
K[i][j] =kernel_func(a[i],b[j],sig_invert)
return K
x= CartPole()
N =500
M = 200
sigma =[2.63690e-16,1.6274e-06, 2.6313, 1.3831e-02, 2.464e-04]
X = generate_x(N)
X_M = generate_x(M)
lamda = 1e-4
# generate a Nx4 matrix
# generate corresponding y
Y = np.zeros(shape=(N,4))
for i in range(N):
x.setState(X[i,:4])
x.performAction(X[i,-1])
Y[i] = x.getState() - X[i,:4]
KNN = K_matrix(X,X, sigma)
KMN = K_matrix(X_M, X, sigma)
KNM = K_matrix(X, X_M, sigma)
KMM = K_matrix(X_M, X_M, sigma)
#alpha = (KMN@KNM + lamda*KMM)^-1(KMN@Y)
#alpha = (c)^-1*(d)
c = KMN @ KNM +lamda*KMM
d = np.zeros([4,M])
matrix_alphas = np.zeros([4,M])
for i in range(4):
d[i] = KMN @ Y_change_State[:,i]
matrix_alphas[i]=np.linalg.lstsq(c,d[i],rcond=-1)[0]
def model_pred_control(p):
CartpoleState.setState([0, 0, 0.1, 0])
pred = np.zeros(shape=(15,5))
l = 0
for i in range(15):
l += CartpoleState.loss()
noise = 0.05 * np.random.normal(0, 1, (4,))
f = np.dot(p, CartpoleState.getState())
pred[i, :4] = CartpoleState.getState()+noise
pred[i, -1] = f
K1M_pred = np.array([kernel_gauss(pred[i], s,sigma) for s in X_M])
c = K1M_pred @ alphas.transpose()
new_pred = c + pred[i,:4]
CartpoleState.setState(new_pred)
CartpoleState.remap_angle()
return l
Optimizing Policy Parameters
p_1 = [ 8.93643704, 12.10589224, 122.87328005, 21.14929645]
# 0.1
# x0 = np.array([ 2.02341039, 3.2269465 , 74.44066079, 11.26992476])
# [-3.35802526, 4.51585084, 75.99296052, 13.08667829]
# [-27.31719334, 20.01673045, 27.35351323, 29.13319048]
# [ -0.56636693, 13.40674337, 102.23761576, 24.65008773]
# [ 8.86874351, 12.5770018 , 118.85806506, 22.32026644]
# 0.2
# [ 5.87200144, 6.43440688, 98.36793522, 15.06893969]
# [ 8.93643704, 12.10589224, 122.87328005, 21.14929645]
# [-3.35802526, 4.51585084, 75.99296052, 13.08667829]
# [ 2.02341039, 3.2269465 , 74.44066079, 11.26992476]
res = minimize(model_pred_control,[64.55803217, 8.25290263,1.10774183, 16.46884571], method='nelder-mead',
options={'maxiter':3000,'xatol': 1e-5, 'disp': True})
print(res.x)
Warning: Maximum number of iterations has been exceeded.
[66.97895838 8.56238648 1.06620151 17.08642742]
Time Evolution/ Rollouts
def plot_evolution_policy(n, p):
init_state_array = np.array([[0, 0, 0.2, 0],[0, 0, 0.12, 0], [0, 0, 0.1, 0], [0, 0, 0.12, 0]])
x0 = np.zeros([4,n])
x1 = np.zeros([4,n])
x2 = np.zeros([4,n])
x3 = np.zeros([4,n])
x4 = np.zeros([4,n])
x_0 = np.zeros([4,n])
x_1 = np.zeros([4,n])
x_2 = np.zeros([4,n])
x_3 = np.zeros([4,n])
x_4 = np.zeros([4,n])
ind = 0
for init in init_state_array:
CartpoleState.setState(init)
l = 0
for i in range(n):
cur_state = CartpoleState.getState()
noise = 0.05 * np.random.normal(0, 1, (4,))
cur_state+=noise
x0[ind][i], x1[ind][i], x2[ind][i], x3[ind][i] = cur_state[0], cur_state[1], cur_state[2], cur_state[3]
l += CartpoleState.loss()
f = np.dot(p, CartpoleState.getState())
x4[ind][i] = f
K1M_pred = np.array([kernel_gauss(np.array([x0[ind][i], x1[ind][i], x2[ind][i], x3[ind][i],x4[ind][i]]), s, sigma) for s in X_M])
c = K1M_pred @ alphas.transpose()
new_pred = c + cur_state
CartpoleState.setState(new_pred)
CartpoleState.remap_angle()
# TrainingCartPole.remap_angle()
ind += 1
X = np.array([x0, x1, x2, x3, x4])
ind=0
for init in init_state_array:
CartpoleState.setState(init)
l = 0
for i in range(n):
cur_state = CartpoleState.getState()
x_0[ind][i], x_1[ind][i], x_2[ind][i], x_3[ind][i] = cur_state[0], cur_state[1], cur_state[2], cur_state[3]
l += CartpoleState.loss()
f = np.dot(p, CartpoleState.getState())
x_4[ind][i] = f
K1M_pred = np.array([kernel_gauss(np.array([x_0[ind][i], x_1[ind][i], x_2[ind][i], x_3[ind][i],x_4[ind][i]]), s, sigma) for s in X_M])
c = K1M_pred @ alphas.transpose()
new_pred = c + cur_state
CartpoleState.setState(new_pred)
CartpoleState.remap_angle()
# TrainingCartPole.remap_angle()
ind += 1
X_no = np.array([x_0,x_1,x_2,x_3,x_4])
fig1, axs1 = plt.subplots(4,figsize=(10,5))
fig1.suptitle('Time evolution of variables With Observation noise')
time = np.arange(0, 0.2*n, 0.2)
states = ["location", "velocity", "pole angle", "pole velocity"]
index = 0
for i in range(4):
axs1[i].plot(time, X[index][0],'--', color='royalblue',label = 'Noise ' +f"X = {init_state_array[0]}")
axs1[i].plot(time, X[index][3],'--',color = 'black', label = 'Noise ' + f"X = {init_state_array[3]}")
axs1[i].plot(time, X_no[index][0], color='royalblue',label = f"X = {init_state_array[0]}")
axs1[i].plot(time, X_no[index][3],color = 'black', label = f"X = {init_state_array[3]}")
axs1[i].set(xlabel="Time")
axs1[i].set(ylabel=states[index])
index += 1
axs1[0].legend(bbox_to_anchor=(1.1, 1.3), loc='upper right')
plt.show()
plot_evolution_policy(30,[2,3,78,11])
plot_evolution_policy(30,[65.89589719, 8.3084518 , 1.09210506, 17.29356596])
plot_evolution_policy(30,[64.55803217, 8.25290263,1.10774183, 16.46884571])
#[2,3,78,11]
#[24.1498626 18.42980936 1.10772824 26.2491273 ]