Assignment 5

import numpy as np # Create reward matrix and initial matrix reward_matrix = np.asarray([[0, 0, 0], [0, 10, 0], [0, 0, 0]]).astype(float) initial_matrix = np.asarray([[0, 0, 0], [0, 0, 0], [0, 0, 0]]).astype(float) # Dimensions of the matrix nr_of_rows = 3 nr_of_columns = 3 # Choose an epsilon close to 0 eps = 0.0001 # The discount (gamma) value discount = 0.9 # Action probability action = 0.8 no_action = 1 - action # Start value value = 10 # Fucntion that calculate the rewards and returns the highest rewards for each coordinate def calculate_reward(i, j, reward_matrix, initial_matrix, discount): # List that will be used to store the rewards of each direction reward_for_each_direction = [] # All possible directions north = (i, j + 1) east = (i + 1, j) west = (i - 1, j) south = (i, j - 1) directions = [north, east, west, south] # Iterate over the list of directions for direction in directions: # Use try-except to ignore when a direction is out of bounds try: reward = action * (reward_matrix[direction] + initial_matrix[direction] * discount) + no_action * (reward_matrix[i, j] + initial_matrix[i, j] * discount) reward_for_each_direction.append(reward) except IndexError: pass # Picks out the highest reward from the list of rewards max_reward = np.max(reward_for_each_direction) return max_reward while value > eps: # Create a copy of the inital matrix to be used in the new iteration new_matrix = initial_matrix.copy() for j in range(nr_of_rows): for i in range(nr_of_columns): # Calculates the reward for the current coordinate reward = calculate_reward(i, j, reward_matrix, initial_matrix, discount) # Updates the copy of the initial_matrix with the highest reward new_matrix[i, j] = reward # Updates the value with the difference between the current and previous iteration value = np.abs(new_matrix - initial_matrix).sum() initial_matrix = new_matrix # Print the results for i in range(len(initial_matrix)): print(initial_matrix[i]) # From this we can find the optimal policy for each coordinate. # |E/S|S|S/W| # |E|N/E/S/W|W| # |N/E|N|N/W|