# Generate Gridworld Function
from enum import Enum
from random import random, randint, choice
from collections import defaultdict
from typing import List, Callable, Tuple
from copy import deepcopy
# Heuristics
def manhattan_heuristic(x1: int, y1: int, x2: int, y2: int):
return abs(x1 - x2) + abs(y1 - y2)
# A*
from collections import defaultdict
import heapq
from typing import Tuple, Callable, List, Dict
import numpy as np
def a_star(
grid: List[List[float]],
source: Tuple[int, int] = (0, 0),
goal: Tuple[int, int] = None,
h: Callable = manhattan_heuristic,
W: int = 1,
) -> (List[Tuple[int, int]], int, List[Tuple[int,int]], int, int):
"""A* Implementation
Used to find optimal shortest path from source to goal on a full gridworld (i.e with complete information of the block cells).
Args:
grid (2d int list): A square grid with 0 representing empty square and 1 representing a blocked square
source (tuple) Optional: control where to start the A* search from
goal (tuple): control where the target of the A* search algorithm
h (func) Optional: heuristic function and the default heuristic function is the manhattan distance
W (int) Optional: weight on the heuristic function
Returns:
list(tuple(int, int)): the path from the source node to the goal node which consists of coordinate points (i.e. x,y) in the form of tuples.
int: the number of steps that need to taken from soure to goal (i.e. len(path) - 1 or 2(dim - 1))
list(tuple(int,int)): all visited squares from the source point. Contains int tuples in the form of (x,y).
int: trajectory - num of times a node was popped from the fringe
int: cells processed - num of times we check if a particular grid square was block or not
"""
if source == goal:
return [source], 0, [source], 0, 0
g = defaultdict(lambda: float("inf")) # dictionary with key: (x,y) tuple and value: length of the shortest path from (0,0) to (x,y)
g[source] = 0 # set distance to source 0
parent = {
source: None
} # dictionary with key: (x,y) tuple and value is a (x,y) tuple where the value point is the parent point of the key point in terms of path traversal
pq = [(0, -g[source], source)]
trajectory = cells_processed = 0 # statistics to keep track while A* runs
while pq:
_, _, curr = heapq.heappop(pq) # pop from priority queue
# pretty_print_explored_region(grid, list(g.keys())) # uncomment if you want to see each step of A*
trajectory += 1
if curr == goal: # check if we found our goal point
break
# generate children
for dx, dy in [(0, 1), (-1, 0), (0, -1), (1, 0)]: # right down left up
new_x, new_y = (curr[0] + dx, curr[1] + dy) # compute children
# skip out of bounds or blocked children
if not (0 <= new_x < len(grid) and 0 <= new_y < len(grid)) or grid[new_x][new_y] == 1.0:
continue
children = (new_x, new_y)
new_g= g[curr] + 1 # add + 1 to the real cost of children
if (
children not in g or new_g < g[children]
): # only care about new undiscovered children or children with lower cost
cells_processed += 1
g[children] = new_g # update cost
parent[children] = curr # update parent of children to current
h_value = h(*children, *goal) # calculate h value
f = new_g + W * h_value # generate f(n')
heapq.heappush(
pq, (f, -new_g, children)
) # add children to priority queue | -new_g is added to break ties
else:
return [], float("inf"), list(g.keys()), trajectory, cells_processed
# generate path traversal using parent dict
path = [curr]
while curr != source:
curr = parent[curr]
path.append(curr)
path.reverse() # reverse the path so that it is the right order
return path, g[goal], list(g.keys()), trajectory, cells_processed
class Terrain(Enum):
FLAT = 2
HILLY = 3
FOREST = 4
def gen_gridworld(dim:int, prob:float, solvable: bool = None, is_terrain_enabled: bool = True) -> (List[List[float]], Tuple[int, int]):
"""generate gridworld implemenation
generates a dim by dim gridworld where the (0,0) is source
every cell besides the source cell has probability of "prob" to be blocked
if a cell is not blocked then it is randomly assigned a terrain type out of flat, hilly, and forest
at the end a random not blocked cell will be chosen as the target
NOTE: take of note of what is stored at each cell. The first value indicates what cell type it is and the second value indicates if the cell contains the target
Args:
dim (int) : the dimensions of the square grid
prob (float): the probability of a square being blocked
solvability (bool): returns a grid world that is either solvable or unsolvable depending on this param
Returns:
list[list[int]]: a dim by dim square grid with blocked and unblock squares
Tuple[int, int]: a tuple containing the coordinates of target cell
"""
grid = [[0.0] * dim for _ in range(dim)]
while True:
for i in range(dim):
for j in range(dim):
p = random()
if p >= prob:
if is_terrain_enabled:
t = randint(1,3)
if t == 1:
grid[i][j] = 0.8 # Terrain.FLAT
elif t == 2:
grid[i][j] = 0.5 # Terrain.HILLY
elif t == 3:
grid[i][j] = 0.2 # Terrain.FOREST
else:
grid[i][j] = 1.0 # blocked cell
# Place the source and target
unblocked = [(i,j) for i in range(dim) for j in range(dim) if grid[i][j] != 1]
# no unblocked cells
if not unblocked:
continue
# choose q random source and target, note source and target can be the same cell
source, target = choice(unblocked), choice(unblocked)
path, *_ = a_star(grid, source, target)
if solvable is None or solvable == bool(path):
break
return grid, source, target
# Nicely prints out the grid
# Testing use only.
class colors: #
HEADER = '\033[95m'
RED = '\033[91m'
BLUE = '\033[94m'
BOLD = '\033[1m'
ENDC = '\033[0m'
def pretty_print_explored_region(grid, visited=[], path=[]):
import math
grid_copy = deepcopy(grid)
for x,y in visited:
grid_copy[x][y] = colors.RED + str(grid[x][y]) + colors.ENDC
for x,y in path:
if type(grid_copy[x][y]) == str:
grid_copy[x][y] = colors.HEADER + str(grid[x][y]) + colors.ENDC
else:
grid_copy[x][y] = colors.BLUE + str(grid[x][y]) + colors.ENDC
for i in range(len(grid)):
for j in range(len(grid)):
if type(grid_copy[i][j]) != str:
if grid_copy[i][j] == 0.8:
color = '\33[43m'
elif grid_copy[i][j] == 0.5:
color = '\33[102m'
elif grid_copy[i][j] == 0.2:
color = '\33[42m'
elif grid_copy[i][j] == 1:
color = '\33[100m'
elif grid_copy[i][j] == 0:
color = '\033[94m'
grid_copy[i][j] = color + str(grid_copy[i][j]) + colors.ENDC
for row in grid_copy:
for col in row:
print(col, end=" ") # print each element separated by space
print() # Add newline
print("-------")
# RED == visited
# Purple == path that was also visited
# Blue == path but was not visited (i.e. if agent knew the complete grid then it was have used the blue path)

# Agent 6 Implementation
class CellState(Enum):
BLOCKED = 1
OPEN = 2
UNKNOWN = 3
class Intel:
def __init__(self, i:int, j:int, p: float, q: float):
self.i = i
self.j = j
self.searched = False # if we have examined this cell
self.status = CellState.UNKNOWN
self.p = p # probability that target is in current cell
self.q = q # Starting search success rate # TODO (0.5 + 0.2 + 0.8)/3 not sure if this is right
self.utility = None # Can be any type
def __str__(self):
return f"status: {self.status}, coord: {self.i, self.j}, p: {self.p}, q: {self.q}, utility: {self.utility}, searched: {self.searched}"
def __repr__(self):
return self.__str__()
# initialize knowledge base
def create_initial_kb(grid_len: int, grid: List[List[int]]) -> List[List[Intel]]:
kb = []
for row_idx, row in enumerate(grid):
kb.append([])
for col_idx, _ in enumerate(row):
kb[row_idx].append(Intel(row_idx, col_idx, 1/grid_len**2, 7/20))
return kb
# Create current grid based on current knowledge base
def generate_current_grid_from_kb(kb: List[List[Intel]]) -> List[List[float]]:
current_grid = []
for row_idx, row in enumerate(kb):
current_grid.append([])
for col_idx, _ in enumerate(row):
blocked = 0 if kb[row_idx][col_idx].status != CellState.BLOCKED else 1
current_grid[row_idx].append(blocked)
return current_grid
def _run_agent_algo(grid_len: int, kb: List[List[Intel]], source: Tuple[int, int], _agent_utility_function: Callable) -> Tuple[int, int]:
x,y = source
if not (0 <= x < grid_len and 0 <= y < grid_len):
raise Exception("source must reside within the knowledge base dim")
# calculates utility for each cell
for i in range(grid_len):
for j in range(grid_len):
kb[i][j].utility = _agent_utility_function(kb[i][j], source)
#sorts the utility by increasing order and manhattan distance by decreasing order
sorted_intel = sorted([kb[i][j] for i in range(grid_len) for j in range(grid_len)], key= lambda intel: intel.utility)
#pick the best one
res = [sorted_intel[0]]
#if there are multiple "best" ones then get them as well
for i in range(1, len(sorted_intel)):
if res[0].utility == sorted_intel[i].utility:
res.append(sorted_intel[i])
else:
break
#randomly pick from the "best" list which is our next goal cell
goal = choice(res)
return goal

def _agent6_utility_function(intel: Intel, source: Tuple[int, int]):
return (-intel.p, manhattan_heuristic(*source, intel.i, intel.j))
def agent6(grid, source, target):
return agent_template(grid, source, target, _agent6_utility_function)

grid2, source2, target2 = gen_gridworld(10, 0.3, True)
pretty_print_explored_region(grid2, [], [source2, target2])
full_path_6, len_full_path_6, num_bumps_6, movements_6, examinations_6 = agent6(grid2, source2, target2)
pretty_print_explored_region(grid2, full_path_6, [source2, target2])
print(len_full_path_6, num_bumps_6, movements_6, examinations_6)
print(full_path_6)

# Agent 7 Implementation
def _agent7_utility_function(intel: Intel, source: Tuple[int, int]):
return (-(intel.p * intel.q), manhattan_heuristic(*source, intel.i, intel.j))
def agent7(grid, source, target):
return agent_template(grid, source, target, _agent7_utility_function)

grid3, source3, target3 = gen_gridworld(10, 0.3, True)
pretty_print_explored_region(grid3, [], [source3, target3])
full_path_7, len_full_path_7, num_bumps_7, movements_7, examinations_7 = agent7(grid3, source3, target3)
pretty_print_explored_region(grid3, full_path_7, [source3, target3])
print(len_full_path_7, num_bumps_7, movements_7, examinations_7)
print(full_path_7)

# Agent 8 Implementation
def _agent8_utility_function(intel: Intel, source: Tuple[int, int]): # This is the best one
prob_finding_target_in_square = intel.p * intel.q
# Return a value to minimize
dist = manhattan_heuristic(*source, intel.i, intel.j) + 1 # Add 1 to avoid 0 distances
return (-prob_finding_target_in_square / dist, -prob_finding_target_in_square, dist)
def agent8(grid, source, target):
return agent_template(grid, source, target, _agent8_utility_function)

grid4, source4, target4 = gen_gridworld(10, 0.3, True)
pretty_print_explored_region(grid4, [], [source4, target4])
full_path_8, len_full_path_8, num_bumps_8, movements_8, examinations_8 = agent8(grid4, source4, target4)
pretty_print_explored_region(grid4, full_path_8, [source4, target4])
print(len_full_path_8, num_bumps_8, movements_8, examinations_8)
print(full_path_8)

import numpy as np
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
from timeit import default_timer as timer
agent6_average_bumps = 0
agent7_average_bumps = 0
agent8_average_bumps = 0
agent6_average_movements = 0
agent7_average_movements = 0
agent8_average_movements = 0
agent6_average_examinations = 0
agent7_average_examinations = 0
agent8_average_examinations = 0
agent6_average_runtime = 0
agent7_average_runtime = 0
agent8_average_runtime = 0
agent6_average_ratios = 0
agent7_average_ratios = 0
agent8_average_ratios = 0
agent6_average_actions = 0
agent7_average_actions = 0
agent8_average_actions = 0
n = 30
cnt = agent6_times = agent7_times = agent8_times = 0
agent6_bumps = agent7_bumps = agent8_bumps = 0
agent6_movements = agent7_movements = agent8_movements = 0
agent6_examinations = agent7_examinations = agent8_examinations = 0
agent6_ratios = agent7_ratios = agent8_ratios = 0
agent6_actions = agent7_actions = agent8_actions = 0
cnt = 0
while cnt != n:
grid, source, target = gen_gridworld(50, .3, True)
start = timer()
_, _, agent6_bump, agent6_movement, agent6_examination = agent6(grid, source, target)
stop = timer()
agent6_times += (stop-start)
start = timer()
_,_, agent7_bump, agent7_movement, agent7_examination = agent7(grid, source, target)
stop = timer()
agent7_times += (stop-start)
start = timer()
_,_, agent8_bump, agent8_movement, agent8_examination = agent8(grid, source, target)
stop = timer()
agent8_times += (stop-start)
agent6_bumps += agent6_bump
agent7_bumps += agent7_bump
agent8_bumps += agent8_bump
agent6_movements += agent6_movement
agent7_movements += agent7_movement
agent8_movements += agent8_movement
agent6_examinations += agent6_examination
agent7_examinations += agent7_examination
agent8_examinations += agent8_examination
agent6_ratios += (agent6_movement/agent6_examination)
agent7_ratios += (agent7_movement/agent7_examination)
agent8_ratios += (agent8_movement/agent8_examination)
agent6_actions += (agent6_movement + agent6_examination)
agent7_actions += (agent7_movement + agent7_examination)
agent8_actions += (agent8_movement + agent8_examination)
cnt+= 1
agent6_average_bumps = (agent6_bumps/n)
agent7_average_bumps = (agent7_bumps/n)
agent8_average_bumps = (agent8_bumps/n)
agent6_average_movements = (agent6_movements/n)
agent7_average_movements = (agent7_movements/n)
agent8_average_movements = (agent8_movements/n)
agent6_average_examinations = (agent6_examinations/n)
agent7_average_examinations = (agent7_examinations/n)
agent8_average_examinations = (agent8_examinations/n)
agent6_average_runtime = (agent6_times/n)
agent7_average_runtime = (agent7_times/n)
agent8_average_runtime = (agent8_times/n)
agent6_average_ratios = (agent6_ratios/n)
agent7_average_ratios = (agent7_ratios/n)
agent8_average_ratios = (agent8_ratios/n)
agent6_average_actions = (agent6_actions/n)
agent7_average_actions = (agent7_actions/n)
agent8_average_actions = (agent8_actions/n)
# a) Average Movements
plt.bar(["Agent 6", "Agent 7", "Agent 8"], [agent6_average_movements, agent7_average_movements, agent8_average_movements])
plt.ylabel('Average Movements')
plt.title('Agent vs Average Movements')
plt.show()
# b) Average Examinations
plt.bar(["Agent 6", "Agent 7", "Agent 8"], [agent6_average_examinations, agent7_average_examinations, agent8_average_examinations])
plt.ylabel('Average Examinations')
plt.title('Agent vs Average Examinations')
plt.show()
# c) Average Runtime
plt.bar(["Agent 6", "Agent 7", "Agent 8"], [agent6_average_runtime, agent7_average_runtime, agent8_average_runtime])
plt.ylabel('Average Runtime')
plt.title('Agent vs Average Runtime')
plt.show()
# d) Average Bumps
plt.bar(["Agent 6", "Agent 7", "Agent 8"], [agent6_average_bumps, agent7_average_bumps, agent8_average_bumps])
plt.ylabel('Average Bumps')
plt.title('Agent vs Average Bumps')
plt.show()
# e) Average Ratios
plt.bar(["Agent 6", "Agent 7", "Agent 8"], [agent6_average_ratios, agent7_average_ratios, agent8_average_ratios])
plt.ylabel('Average Ratios')
plt.title('Agent vs Average Movements/Examinations')
plt.show()
# f) Average Actions
plt.bar(["Agent 6", "Agent 7", "Agent 8"], [agent6_average_actions, agent7_average_actions, agent8_average_actions])
plt.ylabel('Average Actions')
plt.title('Agent vs Average Actions (Movements + Examinations)')
plt.show()

# Agent 9 Implementatio
def agent9(grid: List[List[float]], source: Tuple[int, int], target: Tuple[int, int]):
if not grid or not grid[0]:
raise Exception("grid is not valid")
full_path = [source]
grid_len = len(grid)
kb = create_initial_kb(grid_len, grid)
initial_source, initial_goal = source, target
row_idx, col_idx = source
if source == target:
return [source], 1, 0, 0, 1
kb[row_idx][col_idx].q = grid[row_idx][col_idx]
kb[row_idx][col_idx].status = CellState.OPEN
# initial sense at time step 0
_update_probabilities_target_detection(
kb,
source[0],
source[1],
check_target_in_neighbors(source, target),
)
target_unfound = True
num_bumps = movements = examinations = 0
while target_unfound:
# print(f"(1) Target pos: {target}")
# probs = [[x.p for x in row] for row in kb]
# sum1 = 0
# for row in probs:
# print(row)
# sum1 += sum(row)
# print(f"\n{sum1}\n")
# sense -> plan next move -> agent moves -> examine -> target moves
# Plan next move based on probabilities
goal_intel = _run_agent_algo(grid_len, kb, source, _agent9_utility_function)
goal = (goal_intel.i, goal_intel.j)
# Plan a path to s using knowledge base from current cell
path, _, _, _, _ = a_star(generate_current_grid_from_kb(kb), source, goal)
completed_path = True
for idx, (row_idx, col_idx) in enumerate(path[1:]):
# print(f"(2) Target pos: {target}")
# probs = [[x.p for x in row] for row in kb]
# sum1 = 0
# for row in probs:
# print(row)
# sum1 += sum(row)
# print(f"\n{sum1}\n")
movements += 1
# Observe the cell type and if not blocked move agent to new cell
if grid[row_idx][col_idx] != 1:
if kb[row_idx][col_idx].status == CellState.UNKNOWN:
kb[row_idx][col_idx].q = grid[row_idx][col_idx]
kb[row_idx][col_idx].status = CellState.OPEN
source = (row_idx, col_idx)
full_path.append(source)
else: # Hit a block - update probabilities and break to find new path
num_bumps += 1
# Update blocked cell and probabilities
set_cell_blocked_or_not_target(kb, row_idx, col_idx)
source = full_path[-1]
completed_path = False
target = _target_agent_next(grid, target)
update_probabilities(
kb,
source[0],
source[1],
check_target_in_neighbors(source, target),
)
break
# If we are not at the goal, move target and update probabilities accordinly
# If we find a better goal, break
if (row_idx, col_idx) != goal:
target = _target_agent_next(grid, target)
# Sense 4 neighbors and update probabilities respectively
update_probabilities(
kb,
source[0],
source[1],
check_target_in_neighbors(source, target),
)
new_goal_intel = _run_agent_algo(grid_len, kb, source, _agent9_utility_function)
if new_goal_intel != goal_intel:
completed_path = False
break
if not path and source != goal: # Found a node that has no path to it - consider it blocked
set_cell_blocked_or_not_target(kb, goal[0], goal[1])
completed_path = False
# If we hit the goal node, examine
if completed_path:
examinations += 1
goal_intel.searched = True
if (source[0], source[1]) == target:
# print(source, goal)
target_unfound = False
else:
set_cell_blocked_or_not_target(kb, row_idx, col_idx, False)
target = _target_agent_next(grid, target)
update_probabilities(
kb,
source[0],
source[1],
check_target_in_neighbors(source, target),
)
print(f"End target pos: {target}")
return full_path, len(full_path), num_bumps, movements, examinations # all grids assume to be solvable
def _agent9_utility_function(intel: Intel, source: Tuple[int, int]):
dist = manhattan_heuristic(*source, intel.i, intel.j) + 1
return (-intel.p / dist, -intel.p, dist)
def _target_agent_next(grid: List[List[float]], pos: Tuple[int, int]) -> Tuple[int, int]:
return choice([(x, y) for (x,y) in get_neighbors(pos[0], pos[1], len(grid)) if grid[x][y] != 1])
def check_target_in_neighbors(src: Tuple[int, int], target: Tuple[int, int]) -> bool:
return (abs(target[1] - src[1]) + abs(target[0] - src[0])) == 1
def update_probabilities(
kb: List[List[Intel]],
src_row: int,
src_col: int,
target_detected: bool
) -> None:
_update_probabilities_time(kb, src_row, src_col, target_detected)
_update_probabilities_target_detection(kb, src_row, src_col, target_detected)
def _update_probabilities_time(
kb: List[List[Intel]],
src_row: int,
src_col: int,
target_detected: bool
) -> None:
# To save time, this does not compute the whole grid in some cases.
# Thus, this should be used in conjunction with _update_probabilities_target_detection.
# Otherwise, probabilities may not sum to 1
old_kb = deepcopy(kb)
neighbors_src = get_neighbors(src_row, src_col, len(kb))
for curr_row, kb_row in enumerate(kb):
for curr_col, kb_cell in enumerate(kb_row):
if (curr_row, curr_col) not in neighbors_src and target_detected:
kb_cell.p = 0
else:
prob_target_in_current = 0
if not kb[curr_row][curr_col].status == CellState.BLOCKED:
# unblocked_neighbors = [(x,y) for x,y in get_neighbors(neighbor_row, neighbor_col, len(kb)) if kb[x][y].status != CellState.BLOCKED]
for (neighbor_row, neighbor_col) in get_neighbors(curr_row, curr_col, len(kb)):
unblocked_neighbors = [(x,y) for x,y in get_neighbors(neighbor_row, neighbor_col, len(kb)) if kb[x][y].status != CellState.BLOCKED]
# prob_target_in_current += (old_kb[neighbor_row][neighbor_col].p / len(get_neighbors(neighbor_row, neighbor_col, len(kb))))
prob_target_in_current += (old_kb[neighbor_row][neighbor_col].p / len(unblocked_neighbors))
kb_cell.p = prob_target_in_current
def _update_probabilities_target_detection(
kb: List[List[Intel]],
src_row: int,
src_col: int,
target_detected: bool
) -> None:
# This method does not update the time of the probabilities. It only updates the probabilities based on whether a target is detected
# This is effectively a scaling portion of calculating the probability, and it assumes we have the probabilities
# unscaled at the current time
neighbors_src = get_neighbors(src_row, src_col, len(kb))
prob_target_in_neighbors_src = 0
for row, col in neighbors_src:
prob_target_in_neighbors_src += kb[row][col].p
# prob_target_not_in_neighbors_or_src = 1 - (prob_target_in_neighbors_src + kb[src_row][src_col].p)
for curr_row, kb_row in enumerate(kb):
for curr_col, kb_cell in enumerate(kb_row):
# if (curr_row == src_row and curr_col == src_col): # Not sure ab this
# kb_cell.p = 0
if (curr_row, curr_col) in neighbors_src:
# Note: prob_target_in_neighbors_src cannot be 0 if we detect target in neighbors
# if target_detected and prob_target_in_neighbors_src <= 0:
# raise Exception(f"Target detected at {(curr_row, curr_col)} but probabilities don't match - 0 error. Diff: {prob_target_in_neighbors_src}")
# print(prob_target_in_neighbors_src, target_detected)
kb_cell.p = kb_cell.p / prob_target_in_neighbors_src if target_detected else 0
else:
# if not target_detected and 1 - prob_target_in_neighbors_src <= 0:
# raise Exception(f"Target not detected but probabilities don't match - 0 error. Diff: {1 - prob_target_in_neighbors_src}")
kb_cell.p = 0 if target_detected else kb_cell.p / (1 - prob_target_in_neighbors_src)
def get_neighbors(row_idx, col_idx, grid_len):
neighbors = []
for (x,y) in [(row_idx + 1, col_idx), (row_idx, col_idx + 1), (row_idx - 1, col_idx), (row_idx, col_idx - 1)]:
if 0 <= x < grid_len and 0 <= y < grid_len:
neighbors.append((x,y))
return neighbors
def update_current_cell(
kb: List[List[Intel]],
row_idx: int,
col_idx: int,
new_p: float) -> None:
kb[row_idx][col_idx].p *= new_p #NOTE the * symbol here
def update_other_cell(
kb: List[List[Intel]],
row_idx: int,
col_idx: int,
new_p: float) -> None:
for i in range(len(kb)):
for j in range(len(kb[i])):
if i == row_idx and j == col_idx or kb[i][j].status == CellState.BLOCKED:
continue
kb[i][j].p *= new_p #NOTE the * symbol here
def set_cell_blocked_or_not_target(
kb: List[List[Intel]],
row_idx: int,
col_idx: int,
is_blocked: bool = True
) -> None:
if is_blocked:
kb[row_idx][col_idx].status = CellState.BLOCKED
kb[row_idx][col_idx].q = 0 #float('NaN') this will raise an exception if we use q again #FIXME: Changed to 0 to check
if kb[row_idx][col_idx].p != 1:
update_other_cell(kb, row_idx, col_idx, 1/(1 - kb[row_idx][col_idx].p))
update_current_cell(kb, row_idx, col_idx, 0)

grid, source, target = gen_gridworld(10, 0.3, True)
pretty_print_explored_region(grid, [], [source, target])
full_path_9, len_full_path_9, num_bumps_9, movements_9, examinations_9 = agent9(grid, source, target)
pretty_print_explored_region(grid, full_path_9, [source, target])
print(len_full_path_9, num_bumps_9, movements_9, examinations_9)
print(full_path_9)

import numpy as np
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
from timeit import default_timer as timer
agent6_average_bumps = 0
agent7_average_bumps = 0
agent8_average_bumps = 0
agent9_average_bumps = 0
agent6_average_movements = 0
agent7_average_movements = 0
agent8_average_movements = 0
agent9_average_movements = 0
agent6_average_examinations = 0
agent7_average_examinations = 0
agent8_average_examinations = 0
agent9_average_examinations = 0
agent6_average_runtime = 0
agent7_average_runtime = 0
agent8_average_runtime = 0
agent9_average_runtime = 0
agent6_average_ratios = 0
agent7_average_ratios = 0
agent8_average_ratios = 0
agent9_average_ratios = 0
agent6_average_actions = 0
agent7_average_actions = 0
agent8_average_actions = 0
agent9_average_actions = 0
n = 10
cnt = agent6_times = agent7_times = agent8_times = agent9_times = 0
agent6_bumps = agent7_bumps = agent8_bumps = agent9_bumps = 0
agent6_movements = agent7_movements = agent8_movements = agent9_movements = 0
agent6_examinations = agent7_examinations = agent8_examinations = agent9_examinations = 0
agent6_ratios = agent7_ratios = agent8_ratios = agent9_ratios = 0
agent6_actions = agent7_actions = agent8_actions = agent9_actions = 0
cnt = 0
while cnt != n:
grid, source, target = gen_gridworld(10, .3, True)
start = timer()
_, _, agent6_bump, agent6_movement, agent6_examination = agent6(grid, source, target)
stop = timer()
agent6_times += (stop-start)
start = timer()
_,_, agent7_bump, agent7_movement, agent7_examination = agent7(grid, source, target)
stop = timer()
agent7_times += (stop-start)
start = timer()
_,_, agent8_bump, agent8_movement, agent8_examination = agent8(grid, source, target)
stop = timer()
agent8_times += (stop-start)
start = timer()
_,_, agent9_bump, agent9_movement, agent9_examination = agent9(grid, source, target)
stop = timer()
agent9_times += (stop-start)
agent6_bumps += agent6_bump
agent7_bumps += agent7_bump
agent8_bumps += agent8_bump
agent9_bumps += agent9_bump
agent6_movements += agent6_movement
agent7_movements += agent7_movement
agent8_movements += agent8_movement
agent9_movements += agent9_movement
agent6_examinations += agent6_examination
agent7_examinations += agent7_examination
agent8_examinations += agent8_examination
agent9_examinations += agent9_examination
agent6_ratios += (agent6_movement/agent6_examination)
agent7_ratios += (agent7_movement/agent7_examination)
agent8_ratios += (agent8_movement/agent8_examination)
agent9_ratios += (agent9_movement/agent9_examination)
agent6_actions += (agent6_movement + agent6_examination)
agent7_actions += (agent7_movement + agent7_examination)
agent8_actions += (agent8_movement + agent8_examination)
agent9_actions += (agent9_movement + agent9_examination)
cnt+= 1
agent6_average_bumps = (agent6_bumps/n)
agent7_average_bumps = (agent7_bumps/n)
agent8_average_bumps = (agent8_bumps/n)
agent9_average_bumps = (agent9_bumps/n)
agent6_average_movements = (agent6_movements/n)
agent7_average_movements = (agent7_movements/n)
agent8_average_movements = (agent8_movements/n)
agent9_average_movements = (agent9_movements/n)
agent6_average_examinations = (agent6_examinations/n)
agent7_average_examinations = (agent7_examinations/n)
agent8_average_examinations = (agent8_examinations/n)
agent9_average_examinations = (agent9_examinations/n)
agent6_average_runtime = (agent6_times/n)
agent7_average_runtime = (agent7_times/n)
agent8_average_runtime = (agent8_times/n)
agent9_average_runtime = (agent9_times/n)
agent6_average_ratios = (agent6_ratios/n)
agent7_average_ratios = (agent7_ratios/n)
agent8_average_ratios = (agent8_ratios/n)
agent9_average_ratios = (agent9_ratios/n)
agent6_average_actions = (agent6_actions/n)
agent7_average_actions = (agent7_actions/n)
agent8_average_actions = (agent8_actions/n)
agent9_average_actions = (agent9_actions/n)
# a) Average Movements
plt.bar(["Agent 6", "Agent 7", "Agent 8", "Agent 9"], [agent6_average_movements, agent7_average_movements, agent8_average_movements, agent9_average_movements])
plt.ylabel('Average Movements')
plt.title('Agent vs Average Movements')
plt.show()
# b) Average Examinations
plt.bar(["Agent 6", "Agent 7", "Agent 8", "Agent 9"], [agent6_average_examinations, agent7_average_examinations, agent8_average_examinations, agent9_average_examinations])
plt.ylabel('Average Examinations')
plt.title('Agent vs Average Examinations')
plt.show()
# c) Average Runtime
plt.bar(["Agent 6", "Agent 7", "Agent 8", "Agent 9"], [agent6_average_runtime, agent7_average_runtime, agent8_average_runtime, agent9_average_runtime])
plt.ylabel('Average Runtime')
plt.title('Agent vs Average Runtime')
plt.show()
# d) Average Bumps
plt.bar(["Agent 6", "Agent 7", "Agent 8", "Agent 9"], [agent6_average_bumps, agent7_average_bumps, agent8_average_bumps, agent9_average_bumps])
plt.ylabel('Average Bumps')
plt.title('Agent vs Average Bumps')
plt.show()
# e) Average Ratios
plt.bar(["Agent 6", "Agent 7", "Agent 8", "Agent 9"], [agent6_average_ratios, agent7_average_ratios, agent8_average_ratios, agent9_average_ratios])
plt.ylabel('Average Ratios')
plt.title('Agent vs Average Movements/Examinations')
plt.show()
# f) Average Actions
plt.bar(["Agent 6", "Agent 7", "Agent 8", "Agent 9"], [agent6_average_actions, agent7_average_actions, agent8_average_actions, agent9_average_actions])
plt.ylabel('Average Actions')
plt.title('Agent vs Average Actions (Movements + Examinations)')
plt.show()