# Ella Has s1051869
# Tsvetomira Krikoryan s1051145
import random
def action_x(s):
rnd_number = random.random()
if s == 'A':
if rnd_number < 0.6:
s_prime = 'A'
else:
s_prime = 'B'
elif s == 'B':
if rnd_number < 0.3:
s_prime = 'A'
else:
s_prime = 'B'
return s_prime
def action_y(s):
rnd_number = random.random()
if s == 'A':
if rnd_number < 0.5:
s_prime = 'A'
else:
s_prime = 'B'
elif s == 'B':
if rnd_number < 0.8:
s_prime = 'A'
else:
s_prime = 'B'
return s_prime
action_x('A')
action_y('B')
def step(state, action):
new_state = action(state)
if new_state == 'A':
reward = 1
else:
reward = 0
return new_state, reward
state = 'A'
total_reward_x = 0
for i in range(100):
state, reward = step(state, action_x)
total_reward_x += reward
print(total_reward_x)
36
state = 'A'
total_reward_y = 0
for i in range(100):
state, reward = step(state, action_y)
total_reward_y += reward
print(total_reward_y)
49
state = 'A'
total_reward_random = 0
for i in range(100):
rnd_number = random.random()
if rnd_number < 0.5:
state, reward = step(state, action_x)
else:
state, reward = step(state, action_y)
total_reward_random += reward
print(total_reward_random)
40
import matplotlib.pyplot as plt
import numpy as np
height = [total_reward_x, total_reward_y, total_reward_random]
bars = ('x', 'y', 'random')
y_pos = np.arange(len(bars))
plt.xticks(y_pos, bars)
plt.bar(y_pos, height)
plt.title('Total reward per policy')
plt.ylabel('Reward')
plt.show()
# States: hungry, not hungry, ate too much
# Actions: eat, don't eat
# Rewards: r(hungry)=-10, r(not_hungry)=10, r(ate too much)=-8
# Dynamics: see drawing
import gym
from gym import spaces
import numpy as np
class HungerEnv(gym.Env):
metadata= {'render.modes': ['human']}
def __init__(self):
super(HungerEnv, self).__init__()
self.observation_space = spaces.Discrete(2)
self.action_space = spaces.Discrete(2)
self.agent_pos = np.random.randint(3)
def _next_observation(self):
return {'state': self.agent_pos}
def step(self, action):
# dictionary of functions that return [reward, next_state] transitions for each action
state_0_transitions = { # hungry
0: lambda: [-10, 0],
1: lambda: [[-10, 0],[10, 1],[-8, 2]][np.random.choice(3,p=[0.1,0.8,0.1])]
}
state_1_transitions = { # not hungry
0: lambda: [[10, 1],[-10, 0]][np.random.choice(2, p=[0.6, 0.4])],
1: lambda: [[10, 1],[-8, 2]][np.random.choice(2, p=[0.2, 0.8])]
}
state_2_transitions = { # ate too much
0: lambda: [[10, 1],[-8, 2]][np.random.choice(2, p=[0.5, 0.5])],
1: lambda: [-8, 2]
}
reward = None
next_state = None
if (self.agent_pos==0):
reward, next_state = state_0_transitions[action]()
elif (self.agent_pos==1):
reward, next_state = state_1_transitions[action]()
else:
reward, next_state = state_2_transitions[action]()
# Transition the agent to the next state
self.agent_pos = next_state
# Return the data as defined by the gym interface
return self._next_observation(), reward, False, {}
def reset(self):
# Reset the agent's position
self.agent_pos = np.random.randint(3)
return self._next_observation()
def render(self, mode='human'):
if mode == 'human':
pretty_print_state = {
0: "Hungry",
1: "Not hungry",
2: "Ate too much"
}
print('Current State: {}'.format(pretty_print_state[self.agent_pos]))
else:
raise NotImplementedError()
env = HungerEnv()
curr_state = env.agent_pos
steps = 100
pretty_print_a = {
0: "Don't eat",
1: "Eat"
}
rewards = [0]
cummulative_reward = 0
for _ in range(steps):
env.render()
action = env.action_space.sample() # Sample an action from the current state
print_a = None
print_a = pretty_print_a[action]
print("Action taken: {}".format(print_a))
obs, reward, _, _ = env.step(action) # Execute a step, get observation and reward
curr_state = obs['state']
print("Reward obtained: {}".format(reward))
print("===========================")
cummulative_reward += reward
rewards.append(cummulative_reward)
plt.plot(range(steps+1), rewards)
plt.title("Cummulative reward")
plt.xlabel("Step")
plt.ylabel("Reward")
plt.show()
Current State: Hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Don't eat
Reward obtained: -10
===========================
Current State: Hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: -8
===========================
Current State: Ate too much
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================
Current State: Not hungry
Action taken: Don't eat
Reward obtained: 10
===========================