In [3]:
import gensim
import inflect
import random
import numpy as np

In [4]:
model = gensim.models.KeyedVectors.load_word2vec_format(
    'GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000
)

In [5]:
with open("./words.txt") as f:
    words = f.readlines()

words = [w.strip() for w in words] 

In [355]:
def generate_board(word_list):
    used = set()
    red = []
    blue = []
    neutral = []
    assassin = []

    while len(red) < 9:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            red.append(word)
            used.add(index)

    while len(blue) < 8:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            blue.append(word)
            used.add(index)
            
    while len(neutral) < 7:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            neutral.append(word)
            used.add(index)
    
    while not assassin:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            assassin.append(word)
            used.add(index)
    board = red + blue + neutral + assassin
    random.shuffle(board)
    board = np.reshape(board,(5,5))
    return board, red, blue, neutral, assassin

def guess(clue, words, n):
    poss = {}
    for w in words:
        poss[w] = model.similarity(clue, w)
    poss_lst = sorted(poss, key=poss.__getitem__, reverse=True)
    top_n = poss_lst[:n]
    return [w for w in top_n if poss[w] > 0.1]

def clean_clue(word1, word2):
    engine = inflect.engine()
    word1 = word1.lower()
    word2 = word2.lower()
    return not (word1 in word2 or word2 in word1 or "_" in word2 or word2 == engine.plural(word1))

def give_clue(words,bad_words):
    similarities = {}
    for i in range(len(words)):
        for j in range(i + 1, len(words)):
            similarities[(words[i], words[j])] = model.similarity(words[i], words[j])
            
    while True:
        max_correlated_words = max(similarities, key=similarities.get)
        print("Going for:", max_correlated_words)
        c_words = list(max_correlated_words)
        clues = model.most_similar(positive=c_words,topn=10, restrict_vocab=10000)
        clues_dict = dict(clues)
        cleaned_clues = [c[0] for c in clues if all([clean_clue(w,c[0]) for w in c_words])]

        while cleaned_clues:
            for w in bad_words:
                if not cleaned_clues:
                    break
                possible_clue = max(cleaned_clues, key=lambda x: clues_dict[x])
                enemy_match = model.most_similar_to_given(possible_clue, bad_words)
                enemy_sim = model.similarity(enemy_match, possible_clue)
                if enemy_sim >= model.similarity(max_correlated_words[0], possible_clue) or enemy_sim >= model.similarity(max_correlated_words[1], possible_clue):
                    print("Enemy word " + enemy_match + " was too close. Removing " + possible_clue)
                    cleaned_clues.remove(possible_clue)
                else:
                    return possible_clue, tuple(max_correlated_words)
        print("Too many enemy correlations. Removing ", max_correlated_words)
        similarities.pop(max_correlated_words) 

In [351]:
board, red, blue, neutral, assassin = generate_board(words)

In [341]:
print(red)
print(blue)
print(neutral)
print(assassin)

['head', 'limousine', 'capital', 'nurse', 'model', 'mammoth', 'bond', 'snowman', 'back']
['log', 'jet', 'rock', 'turkey', 'fork', 'oil', 'trip', 'stadium']
['bark', 'forest', 'hotel', 'wave', 'robin', 'flute', 'human']
['lap']


In [359]:
correct = 0
for i in range(1000):
    board, red, blue, neutral, assassin = generate_board(words)
    clue, intended = give_clue(red, assassin + blue + neutral)
    attempt = tuple(guess(clue, red+blue+neutral+assassin, 2))
    if intended == attempt or (intended[1], intended[0]) == attempt:
        correct += 1
print(correct/1000)

Going for: ('air', 'fly')
Going for: ('spike', 'switch')
Going for: ('human', 'dinosaur')
Enemy word zoo was too close. Removing animal
Enemy word zoo was too close. Removing animals
Going for: ('gold', 'chocolate')
Going for: ('fly', 'cross')
Enemy word rose was too close. Removing flew
Going for: ('copper', 'rose')
Enemy word boom was too close. Removing climbed
Enemy word boom was too close. Removing surged
Enemy word boom was too close. Removing soared
Enemy word field was too close. Removing fell
Going for: ('head', 'arm')
Going for: ('amazon', 'europe')
Going for: ('mass', 'wave')
Going for: ('needle', 'arm')
Going for: ('phoenix', 'amazon')
Enemy word piano was too close. Removing iTunes
Enemy word robot was too close. Removing evil
Enemy word millionaire was too close. Removing mysterious
Enemy word bank was too close. Removing eBay
Enemy word millionaire was too close. Removing suppose
Going for: ('hospital', 'center')
Enemy word doctor was too close. Removing clinic
Going for

0.30281237