How to play Codenames
How SpaCy can Play Codenames Without Training or Context
import spacy
import numpy as np
from numpy import dot
from numpy.linalg import norm
import pandas as pd
import spacy
from pprint import pprint
import random as rd
import multiprocessing as mp
from functools import partial
import prettytable as pt
nlp = spacy.load('en_core_web_lg')
word_vector = nlp('word').vector
print(word_vector.shape) # this models makes length 300 vectors
print(word_vector[:10]) # print the first 10 digits
king_vec = nlp('king').vector
queen_vec = nlp('queen').vector
cos_sim = dot(king_vec, queen_vec)/(norm(king_vec)*norm(queen_vec))
print(f'King and Queen have a similarity score of: {cos_sim}')
raining_vec = nlp('raining').vector
cos_sim = dot(king_vec, raining_vec)/(norm(king_vec)*norm(raining_vec))
print(f'King and raining have a similarity score of: {cos_sim}')
print(f"SpaCy's similarity value for Queen is {nlp('king').similarity(nlp('queen'))}")
print(f"SpaCy's similarity value for raining is {nlp('king').similarity(nlp('raining'))}")
# guessing based on hints
# note for simplicity I ignore casing
def guess_a_word(hint_word, hint_number, game_words):
similarity_vectors = np.argsort([nlp(str(word).lower()).similarity(nlp(hint_word.lower())) for word in game_words])[::-1]
return [game_words[w] for w in similarity_vectors[:hint_number]]
game_words = pd.read_csv('words.txt', header=None).squeeze().sample(25, random_state=42)
pprint(game_words.values.reshape((5, 5)))
hint_to_computer = 'ocean'
print(guess_a_word(hint_to_computer, 2, game_words.tolist()))
# # only run me if you must!
# # preprocess the hint/game words for efficiency
# hint_words = pd.read_csv('hint_words.csv', index_col=0).hints.tolist()
# guess_words = pd.read_csv('words.txt', skiprows=0).iloc[:, 0].tolist()
# data = pd.DataFrame(np.zeros((len(hint_words), len(guess_words))))
# for i in range(len(hint_words)):
# for j in range(len(guess_words)):
# data.iloc[i, j] = nlp(guess_words[j]).similarity(nlp(hint_words[i]))
# print(data.head())
# data.index = hint_words
# data.columns = guess_words
# data.index = data.index.str.lower()
# data.columns = data.columns.str.lower()
# data.to_csv('similarity_matrix.csv')
similarity_matrix = pd.read_csv('similarity_matrix.csv', index_col=0).drop_duplicates()
R = "\033[1;31m" # RED
G = '\033[1;32m' # GREEN
B = "\033[1;34m" # Blue
N = "\033[0m" # Reset
DARK_GREY='\033[1;30m'
rd.seed(42)
word_bank = rd.sample(similarity_matrix.columns.tolist(), 16)
red_words, blue_words, neutral_words, assasin = word_bank[:5], word_bank[5:10], word_bank[10:15], word_bank[15]
color_bank = [DARK_GREY + 'black' + N]
[color_bank.extend([B + 'blue' + N, R + 'red' + N, N + 'neutral' + N]) for i in range(5)]
translation_dict = pd.DataFrame(sorted(color_bank))
translation_dict.columns, translation_dict.index = ['color'], word_bank
print(translation_dict)
# These variables will be useful later
color_dict = {}
blue_words = translation_dict.loc[translation_dict.color == B + 'blue'+ N].index.tolist()
red_words = translation_dict.loc[translation_dict.color == R + 'red'+ N].index.tolist()
neutral_words = translation_dict.loc[translation_dict.color == N + 'neutral'+ N].index.tolist()
black_word = translation_dict.loc[translation_dict.color == DARK_GREY + 'black' + N].index.tolist()[0]
for word in translation_dict.index.tolist():
if word in blue_words:
color_dict[word] = 'blue'
elif word in red_words:
color_dict[word] = 'red'
elif word in neutral_words:
color_dict[word] = 'neutral'
else:
color_dict[word] = 'black'
def hint_giver(sim_mat, game_board, color='red'):
best_score = 0
best_words = []
game_similarity = sim_mat.loc[:, game_board.index.tolist()] # no need to compute words not in our game
good_words = red_words if color == 'red' else blue_words
bad_words = red_words if color == 'blue' else blue_words
for row in game_similarity.iterrows():
hint, df = row[0], row[1].sort_values(ascending=False) # the similarities for each hint
score = 0 # the score for this hint
for game_word in df.index.tolist():
if color_dict[game_word] == color:
score += 1 # we only care about hints that have consecutive words in our color
continue
break
if best_score < score: # update if better
best_score = score
best_words = [hint]
elif best_score == score: # add if tied
best_words.append(hint)
if len(best_words) > 1: # tie breaker!! (this is where the assasin weight comes in)
bad_similarities = []
for i in best_words:
assasin_similarity = game_similarity.loc[i, black_word]
other_team_similarity = game_similarity.loc[i, bad_words].max()
bad_similarities.append(assasin_similarity*10 + other_team_similarity) # penalize assasin heavely, and enemy words a lot!
best_words = best_words[np.argmin(bad_similarities)]
return best_score, best_words
print(hint_giver(similarity_matrix, translation_dict))
# lets play codenames as the hint giver!
def print_board(game_words, show_color=True):
color_dict2 = {'red': R, 'blue': B, 'neutral': N, 'black': DARK_GREY}
tab = pt.PrettyTable()
tab.field_names = [' ', 'A', 'B', 'C', 'D']
board_str = ''
board = np.asarray(game_words).reshape(4, 4)
for i in range(4):
if show_color:
tab.add_row([i+1, *[color_dict2[color_dict[word]] + word + N for word in board[i]]])
else:
tab.add_row([i+1, *board[i]])
print(tab)
print_board(word_bank)
hint_to_computer, how_many_to_hint = 'fruit', 1 # put your guess and number here
print(guess_a_word(hint_to_computer, how_many_to_hint, word_bank))
# Lets be the guesser
# reset a new board (one we havent seen yet)
rd.seed(56)
word_bank = rd.sample(similarity_matrix.columns.tolist(), 16)
color_bank = [DARK_GREY + 'black' + N]
[color_bank.extend([B + 'blue' + N, R + 'red' + N, N + 'neutral' + N]) for i in range(5)]
translation_dict = pd.DataFrame(sorted(color_bank))
translation_dict.columns, translation_dict.index = ['color'], word_bank
red_words, blue_words, neutral_words, assasin = word_bank[:5], word_bank[5:10], word_bank[10:15], word_bank[15]
color_dict = {}
blue_words = translation_dict.loc[translation_dict.color == B + 'blue'+ N].index.tolist()
red_words = translation_dict.loc[translation_dict.color == R + 'red'+ N].index.tolist()
neutral_words = translation_dict.loc[translation_dict.color == N + 'neutral'+ N].index.tolist()
black_word = translation_dict.loc[translation_dict.color == DARK_GREY + 'black' + N].index.tolist()[0]
for word in translation_dict.index.tolist():
if word in blue_words:
color_dict[word] = 'blue'
elif word in red_words:
color_dict[word] = 'red'
elif word in neutral_words:
color_dict[word] = 'neutral'
else:
color_dict[word] = 'black'
print_board(word_bank, False)
hint_number, computers_hint_word = hint_giver(similarity_matrix, translation_dict)
print(computers_hint_word, ', ', hint_number)
print(f'The right answers were: \n{red_words}')
print('\n')
print(f'The computer would have guessed the following (in order):\n{guess_a_word(computers_hint_word, hint_number, word_bank)}')