import random
import numpy as np
from collections import defaultdict, deque
from gym import Env, spaces
from keras.models import Sequential
from keras.layers import Dense
from keras.losses import Huber
from keras.initializers import Zeros
from keras.optimizers import adam_v2
# Words
WORDS = open("words.txt", "r").read().splitlines()
WORDS_N = len(WORDS)
# Hyperparameters
ALPHA = 0.002
EPSILON = 0.9
EPSILON_DECAY = 0.99995
EPSILON_MIN = 0.1
GAMMA = 0.95
# DQN Configuration
BATCH_SIZE = 64
MAX_MEMORY = 100000
# Agent
class Agent:
def __init__(self, env):
# Wordle environment
self.env = env
# Wordle observation space dimensions
self.dimensions = env.observation_space.shape
# Hyperparameters
self.alpha = ALPHA
self.epsilon = EPSILON
self.epsilon_decay = EPSILON_DECAY
self.epsilon_min = EPSILON_MIN
self.gamma = GAMMA
# Batch replay size
self.batch_size = BATCH_SIZE
# Batch replay memory
self.memory = deque(maxlen=MAX_MEMORY)
# Training model
self.q = self.get_model()
# Prediction model
self.target_q = self.get_model()
# This returns a neural network model.
def get_model(self):
model = Sequential()
model.add(Dense(512, input_dim=self.dimensions[1], activation="relu", bias_initializer=Zeros()))
model.add(Dense(512, activation="relu", bias_initializer=Zeros()))
model.add(Dense(self.env.action_space.n, activation="linear"))
model.compile(loss=Huber(), optimizer=adam_v2.Adam(lr=self.alpha))
return model
# This returns a random action with epsilon probability.
def get_epsilon_action(self, state):
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
return self.env.action_space.sample() if np.random.random() < self.epsilon else np.argmax(self.target_q.predict(state.reshape(self.dimensions))[0])
# This records an experience.
def remember(self, state, action, reward, state_, done):
self.memory.append((state, action, reward, state_, done))
# This copies the weights from the trained model to the prediction model.
def target_train(self):
weights = self.q.get_weights()
self.target_q.set_weights(weights[:len(self.target_q.get_weights())])
# This fits the training model according to a random subset of experience.
def replay(self):
if len(self.memory) >= self.batch_size:
samples = random.sample(self.memory, self.batch_size)
x = []
y = []
for sample in samples:
state, action, reward, state_, done = sample
target = self.target_q.predict(state.reshape(self.dimensions))
if done:
target[0][action] = reward
else:
target[0][action] = reward + self.gamma * max(self.target_q.predict(state_.reshape(self.dimensions))[0])
x.append(state)
y.append(target)
self.q.fit(np.array(x), np.array(y), batch_size=self.batch_size, verbose=0)
# This is the main training loop in which the agent plays multiple games of Wordle.
def train(self, steps):
self.env.set_explore()
for i in range(steps):
rewards = 0
state = self.env.reset()
done = False
while not done:
action = self.get_epsilon_action(state)
state_, reward, done, info = self.env.step(action)
self.remember(state, action, reward, state_, done)
# The prediction model is trained every 100 steps.
if i % 100 == 0:
self.target_train()
# The agent replays memory every 4 steps.
if i % 4 == 0:
self.replay()
rewards += reward
state = state_
print("Traing step {}".format(i))
# This returns the optimal action.
def get_optimal_action(self, state):
return np.argmax(self.target_q.predict(state.reshape(self.dimensions))[0])
# This saves the weights of the neural networks.
def save(self, f, target_f):
self.q.save_weights(f)
self.target_q.save_weights(target_f)
# This loads the weights of the neural networks from a file.
def load(self, f, target_f):
self.q.load_weights(f)
self.target_q.load_weights(target_f)
# Environment
class Wordle(Env):
def __init__(self):
self.words = WORDS
self.action_space = spaces.Discrete(len(WORDS))
self.observation_space = spaces.Box(low=0, high=7, shape=(1, 26), dtype=np.int8)
self.test = False
self.prev_answer = self.words[np.random.randint(0, len(self.words))]
# This resets the environment, chooses a new Wordle solution, and returns the initial game state.
def reset(self):
answer = self.words[np.random.randint(0, len(self.words))]
# Get a new Wordle solution with 0.1 probability if in training mode.
self.answer = answer if self.test else answer if np.random.random() < 0.1 else self.prev_answer
self.prev_answer = self.answer
self.state = np.zeros(26, dtype=np.int8)
self.counter = 0
self.success = set()
self.used = set()
return np.copy(self.state)
# This takes one step in the Wordle game with guess `action`, and returns the new state, reward, done status, and information.
def step(self, action):
reward = self._get_reward(action)
self._step(action)
done = self._get_done()
info = self._get_info()
return np.copy(self.state), reward, done, info
# If implemented, this hides the game board.
def close(self):
pass
# If implemented, this shows the game board.
def render(self):
pass
# This is a helper function for taking a step in the game.
def _step(self, action):
for i, c in enumerate(self.words[action]):
a = ord(c) - ord("a")
if self.state[a] == 0:
self.state[a] = 2 if c in self.answer else 1
if self.state[a] == 2 and self.answer[i] == c:
self.state[a] = i + 3
self.success.add(i)
self.counter += 1
self.used.add(action)
# This calculates the reward given guess `action`.
def _get_reward(self, action):
word = self.words[action]
if word == self.answer:
return 1000
if word in self.used:
return -500
reward = -1000 if self.counter == 5 else 0
for i, c in enumerate(word):
a = ord(c) - ord("a")
if self.state[a] >= 3:
reward += 100 if self.answer[i] == c else -100
elif self.state[a] == 1:
reward += -100
else:
reward += 100 if self.answer[i] == c else 50 if c in self.answer else 0
return reward
# This determines whether the game is over.
def _get_done(self):
return self.counter == 6 or len(self.success) == 5
# This returns metadata about the current game.
def _get_info(self):
return {
"answer": self.answer,
"state": self.state,
"counter": self.counter,
"success": self.success,
"used": self.used
}
# This sets the environment to testing.
def set_test(self):
self.test = True
# This sets the environment to training.
def set_explore(self):
self.test = False
# Initialization
env = Wordle()
agent = Agent(env)
# Training
agent.train(10000)
# This simulates n games of Wordle with in testing mode.
def simulate(agent, n):
agent.env.set_test()
for _ in range(n):
data = []
state = agent.env.reset()
done = False
while not done:
action = agent.get_optimal_action(state)
state_, reward, done, info = agent.env.step(action)
data.append((state, agent.env.words[action], reward, state_, done, info))
state = state_
print(data)
# Testing
simulate(agent, 1)