import random
def action_x(s):
prob = random.randint(1, 100)
if s == 'A':
if prob <= 60:
s_prime = 'A'
else:
s_prime = 'B'
else:
if prob <= 30:
s_prime = 'A'
else:
s_prime = 'B'
return s_prime
def action_y(s):
prob = random.randint(1, 100)
if s == 'A':
if prob <= 50:
s_prime = 'A'
else:
s_prime = 'B'
#50% to A, 50% to B
else: #B
#80% to A, 20% to B
if prob <= 80:
s_prime = 'A'
else:
s_prime = 'B'
return s_prime
def step(state, action):
new_state = action
if new_state == 'A':
reward = 1
else:
reward = 0
return new_state, reward
state = 'A'
reward = 1
for i in range(100):
result = step(state, action_x(state))
state = result[0]
reward += result[1]
print(reward)
51
state = 'A'
reward = 1
for i in range(100):
result = step(state, action_y(state))
state = result[0]
reward += result[1]
print(reward)
66
state = 'A'
reward = 1
for i in range(100):
r = random.randint(0,1)
if r == 0:
result = step(state, action_x(state))
else:
result = step(state, action_y(state))
state = result[0]
reward += result[1]
print(reward)
55
#!pip install gym
import gym
from gym import spaces
import numpy as np
class BedEnv(gym.Env):
"""
Beds custom environment, built with the gym interface
"""
metadata= {'render.modes': ['human']}
def __init__(self):
super(BedEnv, self).__init__()
self.observation_space = spaces.Discrete(3)
self.action_space = spaces.Tuple((spaces.Discrete(1), spaces.Discrete(1), spaces.Discrete(1)))
self.agent_pos = np.random.randint(3)
def _next_observation(self):
return {'state': self.agent_pos}
def step(self, action):
# dictionary of functions that return [reward, next_state] transitions for each action
state_0_transitions = { #asleep
0: lambda: [[30, 1],[50, 2]][np.random.choice(2,p=[0.75,0.25])] #wake up
}
state_1_transitions = { #awake in bed
0: lambda: [[5, 0],[20, 2]][np.random.choice(2,p=[0.45,0.55])] #turn over
}
state_2_transitions = { #out of bed
0: lambda: [[0, 0],[10, 1]][np.random.choice(2,p=[0.15,0.85])] #get in bed
}
reward = None
next_state = None
if (self.agent_pos==0):
# Agent is asleep. Use state 0 transitions
reward, next_state = state_0_transitions[action]()
elif (self.agent_pos == 1):
# Agent is awake in bed. Use state 1 transitions
reward, next_state = state_1_transitions[action]()
else:
# Agent is out of bed. Use state 2 transitions
reward, next_state = state_2_transitions[action]()
# Transition the agent to the next state
self.agent_pos = next_state
# Return the data as defined by the gym interface
return self._next_observation(), reward, False, {}
def reset(self):
# Reset the agent's position
self.agent_pos = np.random.randint(3)
return self._next_observation()
def render(self, mode='human'):
if mode == 'human':
pretty_print_state = {
0: "asleep",
1: "awake in bed",
2: "out of bed"
}
print('Current State: {}'.format(pretty_print_state[self.agent_pos]))
else:
raise NotImplementedError()
env = BedEnv()
curr_state = env.agent_pos
steps = 100
pretty_print_a_0 = {
0: "wake up",
}
pretty_print_a_1 = {
0: "turn over",
}
pretty_print_a_2 = {
0: "get in bed",
}
reward = 0
for _ in range(steps):
env.render()
action = env.action_space[curr_state].sample() # Sample an action from the current state
print_a = None
if curr_state==0:
print_a = pretty_print_a_0[action]
elif curr_state == 1:
print_a = pretty_print_a_1[action]
else:
print_a = pretty_print_a_2[action]
print("Action taken: {}".format(print_a))
obs, new_reward, _, _ = env.step(action) # Execute a step, get observation and reward
curr_state = obs['state']
reward += new_reward
print("Cumulative reward obtained: {}".format(reward))
print("===========================")
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 50
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 60
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 65
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 95
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 100
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 130
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 135
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 165
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 170
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 200
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 205
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 255
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 265
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 285
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 285
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 315
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 335
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 345
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 365
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 375
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 380
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 410
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 430
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 440
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 445
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 475
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 495
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 505
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 525
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 525
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 555
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 575
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 585
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 605
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 615
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 635
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 635
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 665
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 685
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 685
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 715
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 720
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 770
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 770
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 820
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 830
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 835
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 885
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 895
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 915
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 925
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 945
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 955
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 960
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 990
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1010
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1020
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1040
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1050
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1070
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1080
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1085
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1115
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1135
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1145
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1165
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1175
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1195
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1205
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1210
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1240
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1260
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1260
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1310
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1320
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1325
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1375
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1385
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1405
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1415
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1435
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1445
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1450
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1480
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1500
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1510
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1530
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1540
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1560
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1570
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1575
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1605
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1610
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1660
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1670
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1675
===========================
Current State: asleep
Action taken: wake up
Cumulative reward obtained: 1705
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1725
===========================
Current State: out of bed
Action taken: get in bed
Cumulative reward obtained: 1735
===========================
Current State: awake in bed
Action taken: turn over
Cumulative reward obtained: 1740
===========================