Computerizing Codenames - Deepnote + SpaCy Competition

How to play Codenames

How SpaCy can Play Codenames Without Training or Context

import spacy import numpy as np from numpy import dot from numpy.linalg import norm import pandas as pd import spacy from pprint import pprint import random as rd import multiprocessing as mp from functools import partial import prettytable as pt nlp = spacy.load('en_core_web_lg') word_vector = nlp('word').vector print(word_vector.shape) # this models makes length 300 vectors print(word_vector[:10]) # print the first 10 digits

king_vec = nlp('king').vector queen_vec = nlp('queen').vector cos_sim = dot(king_vec, queen_vec)/(norm(king_vec)*norm(queen_vec)) print(f'King and Queen have a similarity score of: {cos_sim}') raining_vec = nlp('raining').vector cos_sim = dot(king_vec, raining_vec)/(norm(king_vec)*norm(raining_vec)) print(f'King and raining have a similarity score of: {cos_sim}')

print(f"SpaCy's similarity value for Queen is {nlp('king').similarity(nlp('queen'))}") print(f"SpaCy's similarity value for raining is {nlp('king').similarity(nlp('raining'))}")

# guessing based on hints # note for simplicity I ignore casing def guess_a_word(hint_word, hint_number, game_words): similarity_vectors = np.argsort([nlp(str(word).lower()).similarity(nlp(hint_word.lower())) for word in game_words])[::-1] return [game_words[w] for w in similarity_vectors[:hint_number]]

game_words = pd.read_csv('words.txt', header=None).squeeze().sample(25, random_state=42) pprint(game_words.values.reshape((5, 5))) hint_to_computer = 'ocean' print(guess_a_word(hint_to_computer, 2, game_words.tolist()))

# # only run me if you must! # # preprocess the hint/game words for efficiency # hint_words = pd.read_csv('hint_words.csv', index_col=0).hints.tolist() # guess_words = pd.read_csv('words.txt', skiprows=0).iloc[:, 0].tolist() # data = pd.DataFrame(np.zeros((len(hint_words), len(guess_words)))) # for i in range(len(hint_words)): # for j in range(len(guess_words)): # data.iloc[i, j] = nlp(guess_words[j]).similarity(nlp(hint_words[i])) # print(data.head()) # data.index = hint_words # data.columns = guess_words # data.index = data.index.str.lower() # data.columns = data.columns.str.lower() # data.to_csv('similarity_matrix.csv')

similarity_matrix = pd.read_csv('similarity_matrix.csv', index_col=0).drop_duplicates() R = "\033[1;31m" # RED G = '\033[1;32m' # GREEN B = "\033[1;34m" # Blue N = "\033[0m" # Reset DARK_GREY='\033[1;30m' rd.seed(42) word_bank = rd.sample(similarity_matrix.columns.tolist(), 16) red_words, blue_words, neutral_words, assasin = word_bank[:5], word_bank[5:10], word_bank[10:15], word_bank[15] color_bank = [DARK_GREY + 'black' + N] [color_bank.extend([B + 'blue' + N, R + 'red' + N, N + 'neutral' + N]) for i in range(5)] translation_dict = pd.DataFrame(sorted(color_bank)) translation_dict.columns, translation_dict.index = ['color'], word_bank print(translation_dict) # These variables will be useful later color_dict = {} blue_words = translation_dict.loc[translation_dict.color == B + 'blue'+ N].index.tolist() red_words = translation_dict.loc[translation_dict.color == R + 'red'+ N].index.tolist() neutral_words = translation_dict.loc[translation_dict.color == N + 'neutral'+ N].index.tolist() black_word = translation_dict.loc[translation_dict.color == DARK_GREY + 'black' + N].index.tolist()[0] for word in translation_dict.index.tolist(): if word in blue_words: color_dict[word] = 'blue' elif word in red_words: color_dict[word] = 'red' elif word in neutral_words: color_dict[word] = 'neutral' else: color_dict[word] = 'black'

def hint_giver(sim_mat, game_board, color='red'): best_score = 0 best_words = [] game_similarity = sim_mat.loc[:, game_board.index.tolist()] # no need to compute words not in our game good_words = red_words if color == 'red' else blue_words bad_words = red_words if color == 'blue' else blue_words for row in game_similarity.iterrows(): hint, df = row[0], row[1].sort_values(ascending=False) # the similarities for each hint score = 0 # the score for this hint for game_word in df.index.tolist(): if color_dict[game_word] == color: score += 1 # we only care about hints that have consecutive words in our color continue break if best_score < score: # update if better best_score = score best_words = [hint] elif best_score == score: # add if tied best_words.append(hint) if len(best_words) > 1: # tie breaker!! (this is where the assasin weight comes in) bad_similarities = [] for i in best_words: assasin_similarity = game_similarity.loc[i, black_word] other_team_similarity = game_similarity.loc[i, bad_words].max() bad_similarities.append(assasin_similarity*10 + other_team_similarity) # penalize assasin heavely, and enemy words a lot! best_words = best_words[np.argmin(bad_similarities)] return best_score, best_words print(hint_giver(similarity_matrix, translation_dict))

# lets play codenames as the hint giver! def print_board(game_words, show_color=True): color_dict2 = {'red': R, 'blue': B, 'neutral': N, 'black': DARK_GREY} tab = pt.PrettyTable() tab.field_names = [' ', 'A', 'B', 'C', 'D'] board_str = '' board = np.asarray(game_words).reshape(4, 4) for i in range(4): if show_color: tab.add_row([i+1, *[color_dict2[color_dict[word]] + word + N for word in board[i]]]) else: tab.add_row([i+1, *board[i]]) print(tab) print_board(word_bank)

hint_to_computer, how_many_to_hint = 'fruit', 1 # put your guess and number here print(guess_a_word(hint_to_computer, how_many_to_hint, word_bank))

# Lets be the guesser # reset a new board (one we havent seen yet) rd.seed(56) word_bank = rd.sample(similarity_matrix.columns.tolist(), 16) color_bank = [DARK_GREY + 'black' + N] [color_bank.extend([B + 'blue' + N, R + 'red' + N, N + 'neutral' + N]) for i in range(5)] translation_dict = pd.DataFrame(sorted(color_bank)) translation_dict.columns, translation_dict.index = ['color'], word_bank red_words, blue_words, neutral_words, assasin = word_bank[:5], word_bank[5:10], word_bank[10:15], word_bank[15] color_dict = {} blue_words = translation_dict.loc[translation_dict.color == B + 'blue'+ N].index.tolist() red_words = translation_dict.loc[translation_dict.color == R + 'red'+ N].index.tolist() neutral_words = translation_dict.loc[translation_dict.color == N + 'neutral'+ N].index.tolist() black_word = translation_dict.loc[translation_dict.color == DARK_GREY + 'black' + N].index.tolist()[0] for word in translation_dict.index.tolist(): if word in blue_words: color_dict[word] = 'blue' elif word in red_words: color_dict[word] = 'red' elif word in neutral_words: color_dict[word] = 'neutral' else: color_dict[word] = 'black' print_board(word_bank, False) hint_number, computers_hint_word = hint_giver(similarity_matrix, translation_dict) print(computers_hint_word, ', ', hint_number)

print(f'The right answers were: \n{red_words}') print('\n') print(f'The computer would have guessed the following (in order):\n{guess_a_word(computers_hint_word, hint_number, word_bank)}')