In [363]:
import gensim
import inflect
import random
import numpy as np

In [None]:
model = gensim.models.KeyedVectors.load_word2vec_format(
    'GoogleNews-vectors-negative300.bin.gz', binary=True, limit=200000
)

In [226]:
with open("./words.txt") as f:
    words = f.readlines()

words = [w.strip() for w in words] 

In [366]:
def generate_board(word_list):
    used = set()
    red = []
    blue = []
    neutral = []
    assassin = []

    while len(red) < 9:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            red.append(word)
            used.add(index)

    while len(blue) < 8:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            blue.append(word)
            used.add(index)
            
    while len(neutral) < 7:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            neutral.append(word)
            used.add(index)
    
    while not assassin:
        index = random.choice(range(len(word_list)))
        word = word_list[index]
        if index not in used:
            assassin.append(word)
            used.add(index)
    board = red + blue + neutral + assassin
    random.shuffle(board)
    board = np.reshape(board,(5,5))
    return board, red, blue, neutral, assassin

def guess(clue, words, n):
    poss = {}
    for w in words:
        poss[w] = model.similarity(clue, w)
    poss_lst = sorted(poss, key=poss.__getitem__, reverse=True)
    top_n = poss_lst[:n]
    return [w for w in top_n if poss[w] > 0.1]

def clean_clue(word1, word2):
    engine = inflect.engine()
    word1 = word1.lower()
    word2 = word2.lower()
    return not (word1 in word2 or word2 in word1 or "_" in word2 or word2 == engine.plural(word1))

def give_clue(words,bad_words):
    similarities = {}
    for i in range(len(words)):
        for j in range(i + 1, len(words)):
            similarities[(words[i], words[j])] = model.similarity(words[i], words[j])
    
    max_correlated_words = max(similarities, key=similarities.get)
    print(max_correlated_words)
    c_words = list(max_correlated_words)
    clues = model.most_similar(positive=c_words,negative=bad_words,topn=10)
    print(clues)
    clues_dict = dict(clues)
    cleaned_clues = [c[0] for c in clues if all([clean_clue(w,c[0]) for w in c_words])]
    return max(cleaned_clues, key=lambda x: clues_dict[x]) + " for 2"

In [398]:
board, red, blue, neutral, assassin = generate_board(words)    

In [399]:
print(red)
print(blue)
print(neutral)
print(assassin)

['back', 'worm', 'tower', 'robin', 'switch', 'fly', 'capital', 'dice', 'night']
['note', 'queen', 'washer', 'cell', 'millionaire', 'racket', 'yard', 'england']
['time', 'bond', 'eye', 'shot', 'ivory', 'olive', 'march']
['king']


In [400]:
give_clue(red, blue+assassin)

('worm', 'robin')
[('SPOT', 0.15648123621940613), ('OWL', 0.15048080682754517), ('Framework', 0.1483299881219864), ('Avian_Influenza', 0.14176473021507263), ('EAB', 0.14126001298427582), ('Biological_Diversity', 0.13827215135097504), ('USACE', 0.137831449508667), ('XCP', 0.1364476978778839), ('raptors', 0.13546928763389587), ('UFO_sightings', 0.13386830687522888)]


'SPOT for 2'

In [378]:
model.similarity("dash", "sprint")

0.44282317