In [34]:
import copy
import io
import ipywidgets as widgets
from ipywidgets import Button, HBox, VBox
from IPython.display import display
import json
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances, cosine_distances
import spacy
from tqdm.notebook import trange, tqdm

In [35]:
DATA_PATH = "../data/"

In [36]:
nlp = spacy.load("en_core_web_sm")

In [37]:
with open(DATA_PATH + "./words_dictionary.json", "r") as f:
    words_dictionary = json.load(f)

In [38]:
def load_vectors_pd(fname, total_to_load=10000):
    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    data = {}
    index = []
    total_loaded = 0
    for line in fin:
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = list(map(float, tokens[1:]))
        total_loaded += 1
        if total_loaded >= total_to_load:
            break
    return pd.DataFrame(data)

javascript to pull the codenames from the website


```console.log(`red = ${Array.from(document.querySelectorAll('[aria-label="red"')).map(el => (Array.from(el.children[0].children).filter(el=> (el.tagName=="SECTION"))[0].children[0].innerHTML))}`);
console.log(`blue = ${Array.from(document.querySelectorAll('[aria-label="blue"')).map(el => (Array.from(el.children[0].children).filter(el=> (el.tagName=="SECTION"))[0].children[0].innerHTML))}`);
console.log(`gray = ${Array.from(document.querySelectorAll('[aria-label="gray"')).map(el => (Array.from(el.children[0].children).filter(el=> (el.tagName=="SECTION"))[0].children[0].innerHTML))}`);
console.log(`black = ${Array.from(document.querySelectorAll('[aria-label="black"')).map(el => (Array.from(el.children[0].children).filter(el=> (el.tagName=="SECTION"))[0].children[0].innerHTML))}`);```

In [39]:
# type in codenames here

red_codenames = ['pit', 'newton', 'elephant', 'sub', 'street', 'model', 'lemon', 'lock']
blue_codenames = ['judge', 'paddle', 'volcano', 'bed', 'break', 'driver', 'ram', 'hotel']
beige_codenames = ['snow', 'tattoo', 'guitar', 'paint', 'trick', 'walrus', 'hook']
black_codenames = ["squirrel"]

red_codenames = ['fog', 'tornado', 'poison', 'purse', 'night', 'brush', 'aztec', 'blade']
blue_codenames = ['crab', 'balloon', 'diamond', 'file', 'state', 'ruler', 'sound', 'dollar', 'drum']
beige_codenames = ['shower', 'tea', 'university', 'bomb', 'leather', 'cover', 'tattoo']
black_codenames = ['mouse']

red_codenames = ['duck', 'round', 'wool', 'bulb', 'earth', 'army', 'bear', 'ant']
blue_codenames = ['server', 'spell', 'desk', 'knight', 'ham', 'hose', 'mug', 'comb', 'horse']
beige_codenames = ['storm', 'green', 'staff', 'tie', 'dust', 'space', 'watch']
black_codenames = ['anthem']


red_codenames = ['SOLDIER', 'NOVEL', 'CELL', 'FACE', 'JELLYFISH', 'HAWAII', 'MATCH', 'RICE']
blue_codenames = ['SHAKESPEARE', 'RUST', 'COFFEE', 'GERMANY', 'BOIL', 'GOLD', 'NAPOLEON', 'HIDE', 'PYRAMID']
beige_codenames = ['HAND', 'WOOD', 'PLAY', 'VET', 'DUCK', 'BRICK', 'CABLE']
black_codenames = ['ROBIN']

In [40]:
red_codenames = ['ICE', 'SQUASH', 'GAS', 'BUCK', 'MARACAS', 'COWBOY', 'NOSE', 'PEN']
blue_codenames = ["SPIDER", 'CHANGE',  "PIRATE", 'BOOT', 'MOTHER', 'MOSCOW', 'FAIR', 'COAST', 'POLICE']
beige_codenames = ["FISH", "KID",  'ENGINE', 'PIPE', 'DOG', 'OPERA', 'STICKER']
black_codenames = ['BOND']

In [41]:
red_codenames = ['TEAM', 'COLLAR', 'RAT', 'BICYCLE', 'HAWK', 'BEAR', 'MUMMY', 'PINE', 'VIKING']
blue_codenames = ['BRIDGE', 'MOTHER', 'LEAF', 'AMERICA', 'CHAIN', 'BATTLESHIP', 'SKULL', 'DRESSING']
beige_codenames = ['NIGHT', 'WIND', 'WHALE', 'FOREST', 'SMUGGLER', 'SWAMP', 'WHEEL']
black_codenames = ['SANTA']

In [42]:
red_codenames = list(map(lambda x: x.lower(), red_codenames))
blue_codenames = list(map(lambda x: x.lower(), blue_codenames))
beige_codenames = list(map(lambda x: x.lower(), beige_codenames))
black_codenames = list(map(lambda x: x.lower(), black_codenames))

In [43]:
all_codenames = red_codenames + blue_codenames + beige_codenames + black_codenames

assert len(all_codenames) == 25

In [44]:
codenames_to_lemmas = {}
for codename in all_codenames:
    doc = nlp(codename)
    for token in doc:
        codenames_to_lemmas[codename] = token.lemma_
codenames_to_lemmas["viking"] = "viking"
codenames_to_lemmas["dressing"] = "dressing"


for codename_set in [blue_codenames, red_codenames, beige_codenames, black_codenames]:
    for i in range(len(codename_set)):
        codename_set[i] = codenames_to_lemmas[codename_set[i]]

In [45]:
vectors = load_vectors_pd(DATA_PATH + "crawl-300d-2M.vec", 200000)

In [46]:
vector_word_to_lemma_path = DATA_PATH + "/vector_word_to_lemma_v2.json"
with open(vector_word_to_lemma_path, "r") as f:
    vector_word_to_lemma = json.load(f)
# vector_word_to_lemma = dict()

for i in trange(vectors.shape[1]):
    word = vectors.columns[i]
    if word not in vector_word_to_lemma:
        doc = nlp(word)
        for token in doc:
            vector_word_to_lemma[word] = token.lemma_
            # there is punctuation that must be removed, signs and such, that occur at the end of a word
            # so we stop after reading the first word (I guess this works, at least none of these are showing
            # up in my spot checks now
            break

with open(vector_word_to_lemma_path, "w") as f:
    json.dump(vector_word_to_lemma, f)

  0%|          | 0/200000 [00:00<?, ?it/s]

In [47]:
# with open(vector_word_to_lemmadd_path, "w") as f:
#     json.dump(vector_word_to_lemma, f)

# vector_word_to_lemma["viking"] = "viking"

In [48]:
# distinct words in vocabulary versus total words
print(len(vector_word_to_lemma.keys()))
print(len(set(vector_word_to_lemma.values())))

200000
124348


In [49]:
lemmas = set([vector_word_to_lemma[word] for word in vectors.columns])
for codename in all_codenames:
    assert(codename in lemmas)

In [50]:
def compare_codename_to_word(codename, word):
    lower_word = word.lower()
    lower_lemma = vector_word_to_lemma[word].lower()
    return codename not in lower_lemma and lower_lemma not in codename and codename not in lower_word and lower_word not in codename
compare_codename_to_word("model", "Modes")

def transform_cos_distance(cos_distance, a, b):
    return np.exp(a - b * cos_distance) / (1 + np.exp(a - b * cos_distance))

In [51]:
all_cos_distances = pd.DataFrame(cosine_distances(vectors[all_codenames].transpose(), vectors.transpose()), index=all_codenames, columns=vectors.columns)
all_prob_from_cosine = transform_cos_distance(all_cos_distances, a=12, b=16)

In [52]:
words_to_keep = []
for word in all_prob_from_cosine.columns:
    keep_word = True
    for codename in all_prob_from_cosine.index:
        if compare_codename_to_word(codename, word) is False:
            keep_word = False
    if word not in words_dictionary:
        keep_word = False
    if keep_word:
        words_to_keep.append(word)

In [53]:
total_words_to_print = 0
for word in reversed(all_prob_from_cosine.columns):
    if word not in words_dictionary:
        print(word)
        total_words_to_print += 1
    if total_words_to_print >= 20:
        break
    # print("word" + word)

Aniket
Nahin
snowfields
Fwy
work-outs
e39
Haniyeh
Ravnica
well-directed
Refundable
Rastafarians
accoglienza
Maladies
STARK
Currumbin
Kawabata
Ced
Dorin
MANSION
Dohrn


In [54]:
def colors_from_codenames(s, cutoff_for_bold=0.5):
    colors = []
    for idx in s.index:
        if idx in red_codenames:
            if s[idx] >= cutoff_for_bold:
                colors.append('background-color: crimson')
            else:
                colors.append('background-color: lightcoral')
        if idx in blue_codenames:
            if s[idx] >= cutoff_for_bold:
                colors.append('background-color: royalblue')
            else:
                colors.append('background-color: cornflowerblue')
        if idx in beige_codenames:
            if s[idx] >= cutoff_for_bold:
                colors.append('background-color: tan')
            else:
                colors.append('background-color: bisque')
        if idx in black_codenames:
            if s[idx] >= cutoff_for_bold:
                colors.append('background-color: dimgray')
            else:
                colors.append('background-color: gainsboro')
    return colors

In [55]:
def utility_function(probs, red_codenames, blue_codenames, beige_codenames, black_codenames, team="red"):
    # We should actually simulate the process of drawing these codenames
    # it's very bad to draw the assassin, so let's try to stay away from him at all costs
    black_util = all_prob_from_cosine.loc[black_codenames, :].sum(axis=0) * -15
    # it's not really that bad to draw a beige tile, but it might stop you from finding the right tile
    opportunity_cost = -0.04
    beige_util = all_prob_from_cosine.loc[beige_codenames, :].sum(axis=0) * opportunity_cost
    red_util = all_prob_from_cosine.loc[red_codenames, :].sum(axis=0)
    blue_util = all_prob_from_cosine.loc[blue_codenames, :].sum(axis=0)
    if team == "red":
        blue_util = -1 * blue_util + opportunity_cost
    if team == "blue":
        red_util = -1 * red_util + opportunity_cost
    if team != "red" and team != "blue":
        return Exception
    return black_util + beige_util + red_util + blue_util

In [56]:
def display_candidate_clues(team_kw, **kwargs):
    assert team_kw in {"red", "blue"}
    # 9, 14 was pretty good
    def filt_codenames(codenames):
        return [codename for codename in codenames if kwargs[codename]]
    utility_by_word = utility_function(all_prob_from_cosine,
                                       red_codenames=filt_codenames(red_codenames),
                                       blue_codenames=filt_codenames(blue_codenames),
                                       beige_codenames=filt_codenames(beige_codenames),
                                       black_codenames=filt_codenames(black_codenames),
                                       team=team_kw)
    codenames_to_display = filt_codenames(all_codenames)
    candidate_words = utility_by_word[words_to_keep].sort_values().tail(48).index
    candidate_probs = all_prob_from_cosine[candidate_words]
    for i in np.arange(0, len(candidate_words), 16):
        display(candidate_probs.loc[codenames_to_display].iloc[:, i:(i + 16)].style.apply(colors_from_codenames, axis=0))

In [68]:
def display_candidate_clues(team_kw, **kwargs):
    assert team_kw in {"red", "blue"}
    # 9, 14 was pretty good
    def filt_codenames(codenames):
        return [codename for codename in codenames if kwargs[codename]]
    utility_by_word = utility_function(all_prob_from_cosine,
                                       red_codenames=filt_codenames(red_codenames),
                                       blue_codenames=filt_codenames(blue_codenames),
                                       beige_codenames=filt_codenames(beige_codenames),
                                       black_codenames=filt_codenames(black_codenames),
                                       team=team_kw)
    codenames_to_display = filt_codenames(all_codenames)
    candidate_words = list(utility_by_word[words_to_keep].sort_values().tail(48).index) + ["animal"]
    candidate_probs = all_prob_from_cosine[candidate_words]
    for i in np.arange(0, len(candidate_words), 16):
        display(candidate_probs.loc[codenames_to_display].iloc[:, i:(i + 16)].style.apply(colors_from_codenames, axis=0))

In [69]:
checkboxes_dict = dict((codename, widgets.Checkbox(True, description=codename))  for codename in all_codenames)
# _ = widgets.interact(display_candidate_clues, team="blue", **checkboxes_dict)

In [58]:
# checkboxes_dict

In [70]:
interactive = widgets.interactive(display_candidate_clues, team_kw=["blue", "red"], **checkboxes_dict)

In [71]:
interactive.children[0]

Dropdown(description='team_kw', options=('blue', 'red'), value='blue')

In [72]:
random_order_codenames = np.random.choice(all_codenames, 25)
for i in range(5):
    print(random_order_codenames[(i * 5):(i * 5 + 5)])

['viking' 'pine' 'team' 'leaf' 'whale']
['battleship' 'leaf' 'bridge' 'dressing' 'forest']
['team' 'viking' 'santa' 'night' 'wheel']
['night' 'forest' 'leaf' 'viking' 'swamp']
['collar' 'smuggler' 'smuggler' 'smuggler' 'chain']


In [74]:
boxes = []
for codenames in [red_codenames, blue_codenames, beige_codenames, black_codenames]:
    # print(codenames)
    box = [checkboxes_dict[codename] for codename in codenames]
    boxes.append(VBox(box))
HBox(boxes)

HBox(children=(VBox(children=(Checkbox(value=True, description='team'), Checkbox(value=True, description='coll…

In [75]:
interactive.children[-1]

Output()