Group_43_assignment_2

import random def action_x(s): prob = random.randint(1, 100) if s == 'A': if prob <= 60: s_prime = 'A' else: s_prime = 'B' else: if prob <= 30: s_prime = 'A' else: s_prime = 'B' return s_prime def action_y(s): prob = random.randint(1, 100) if s == 'A': if prob <= 50: s_prime = 'A' else: s_prime = 'B' #50% to A, 50% to B else: #B #80% to A, 20% to B if prob <= 80: s_prime = 'A' else: s_prime = 'B' return s_prime

def step(state, action): new_state = action if new_state == 'A': reward = 1 else: reward = 0 return new_state, reward state = 'A' reward = 1 for i in range(100): result = step(state, action_x(state)) state = result[0] reward += result[1] print(reward)

state = 'A' reward = 1 for i in range(100): result = step(state, action_y(state)) state = result[0] reward += result[1] print(reward)

state = 'A' reward = 1 for i in range(100): r = random.randint(0,1) if r == 0: result = step(state, action_x(state)) else: result = step(state, action_y(state)) state = result[0] reward += result[1] print(reward)

#!pip install gym import gym from gym import spaces import numpy as np class BedEnv(gym.Env): """ Beds custom environment, built with the gym interface """ metadata= {'render.modes': ['human']} def __init__(self): super(BedEnv, self).__init__() self.observation_space = spaces.Discrete(3) self.action_space = spaces.Tuple((spaces.Discrete(1), spaces.Discrete(1), spaces.Discrete(1))) self.agent_pos = np.random.randint(3) def _next_observation(self): return {'state': self.agent_pos} def step(self, action): # dictionary of functions that return [reward, next_state] transitions for each action state_0_transitions = { #asleep 0: lambda: [[30, 1],[50, 2]][np.random.choice(2,p=[0.75,0.25])] #wake up } state_1_transitions = { #awake in bed 0: lambda: [[5, 0],[20, 2]][np.random.choice(2,p=[0.45,0.55])] #turn over } state_2_transitions = { #out of bed 0: lambda: [[0, 0],[10, 1]][np.random.choice(2,p=[0.15,0.85])] #get in bed } reward = None next_state = None if (self.agent_pos==0): # Agent is asleep. Use state 0 transitions reward, next_state = state_0_transitions[action]() elif (self.agent_pos == 1): # Agent is awake in bed. Use state 1 transitions reward, next_state = state_1_transitions[action]() else: # Agent is out of bed. Use state 2 transitions reward, next_state = state_2_transitions[action]() # Transition the agent to the next state self.agent_pos = next_state # Return the data as defined by the gym interface return self._next_observation(), reward, False, {} def reset(self): # Reset the agent's position self.agent_pos = np.random.randint(3) return self._next_observation() def render(self, mode='human'): if mode == 'human': pretty_print_state = { 0: "asleep", 1: "awake in bed", 2: "out of bed" } print('Current State: {}'.format(pretty_print_state[self.agent_pos])) else: raise NotImplementedError() env = BedEnv() curr_state = env.agent_pos steps = 100 pretty_print_a_0 = { 0: "wake up", } pretty_print_a_1 = { 0: "turn over", } pretty_print_a_2 = { 0: "get in bed", } reward = 0 for _ in range(steps): env.render() action = env.action_space[curr_state].sample() # Sample an action from the current state print_a = None if curr_state==0: print_a = pretty_print_a_0[action] elif curr_state == 1: print_a = pretty_print_a_1[action] else: print_a = pretty_print_a_2[action] print("Action taken: {}".format(print_a)) obs, new_reward, _, _ = env.step(action) # Execute a step, get observation and reward curr_state = obs['state'] reward += new_reward print("Cumulative reward obtained: {}".format(reward)) print("===========================")