In [47]:
from openai import AsyncOpenAI
from dotenv import load_dotenv
import os
import time
from sentence_transformers import SentenceTransformer
emb_mod = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
import json
import asyncio
from nltk.corpus import wordnet as wn

load_dotenv("../.env")
openAI_api_key = os.environ.get("OPENAI_API_KEY")

# Download necessary data for WordNetLemmatizer if we haven't already
try:
    WordNetLemmatizer().lemmatize("test") # Just a test to trigger lookup error if not downloaded
except LookupError:
    nltk.download('wordnet')
    nltk.download('omw-1.4') # Open Multilingual Wordnet, often needed for full WordNet functionality

In [7]:
client = OpenAI(api_key=openAI_api_key)

In [8]:
team = "blue"
enemy = "red"
system_prompt = f"""

You are a human that plays the game Codenames utilising common knowledge.
**Your Task: You will be provided a list of {team} target words, {enemy} enemy words, neutral civilian words, and an Assassin word. Your goal is to generate a list of high-quality and **immediately understandable** clues, each targeting as many of the {team} target words as possible while maximizing clarity and minimizing ambiguity. Each clue also has to absolutely avoid any connnection to the assassin word. You might provide up to 5 clues.**

**Clue Generation Principles:**
1. Clue Rules (Crucial):
    * **Strong Semantic Relation:** Must be **immediately and clearly** semantically related to **as many of the target words as possible**, using *primary meanings* or *widely recognized themes* that a typical human guesser would quickly understand.
    * **Single Word:** Must be a single English word.
    * **No Direct Forms:** Must NOT be a form (inflection, derivation) of any word currently visible on the board (e.g., if 'DRIVE' is on board, 'DRIVER', 'DRIVING', 'DRIVES' are invalid).
    * **No Substring/Superstring Containment (if related):** Must NOT contain a word on the board (e.g., if 'FIRE' is on board, 'FIREMAN' is invalid) or be a substring of another word on the board (e.g., if 'SKYLINE' is on the board, 'SKY' is invalid).

2. **Overall Clue Qualiverbty (Reinforcing Clarity):**
    * **Strength & Directness of Association:** The clue must strongly connect to *as many of the target words as possible** via their *primary meanings* or *obvious shared themes* (e.g., “fruit” for APPLE, BANANA, ORANGE is clear).
    * **Unambiguity:** Ensure the clue has minimal overlap with the Assassin word. This is the most critical negative constraint.
    * **Minimal Connection to the {enemy} enemy words and civilian words**: After minimizing the overlap with the Assassin word, minize your clues' associations with the enemy words and civilian words. A bad clue will make your teammate guesser guesses the enemy word and give the enemy an advantage. 
    * **Think Like a Typical Human Guesser:** The clue must be instantly recognizable and understandable to a typical human with general knowledge (not an AI or expert). Avoid clues that rely on niche, context-specific, or obscure associations.
    * **Avoid clue words that need to pair with other words to make a phrase**: for example, the clue "hair" that targets "FALL" and "CYCLE" is unacceptable if your reasoning is it's supposed to evoke “hair fall” and “hair cycle". 
    * **High-Quality Clues**: high-quality clues represent complex linguistic relationships such as syno-, anto-, hyper-, hypo- and meronymy. 
    
3. **Safety First (Assassin Avoidance):**
    * **ABSOLUTELY AVOID** any clue that has even a remote chance of relating to any concepts related to he ASSASSIN word. This is the highest priority negative constraint.

* **Examples of Good vs. Bad Clues:**
        * **Good Clue:** “Ocean” for WAVE, TIDE, CORAL (clear, direct, universally understood theme of ocean-related terms).
        * **Bad Clue:** “Hair” for BRUSH, FALL, CYCLE (niche associations like “hair fall” and “hair cycle” are not obvious; a human might think of SCALP or COMB instead).
        * **Bad Clue:** “Revolver” for PISTOL, PLOT, WITCH (connection to PLOT is abstract, and WITCH is unrelated for humans).
        * **Bad Clue:** “djedkare” is a bad clue. Even though it refers to the name of the ruler of Egypt in the 25thcentury B.C., and therefore connects the words “egypt” and “king.", it is so niche that it does not reflect the average person’s knowledge of the English language and is likely to yield random guesses if presented to a human player. 


You may choose up to 5 clues. For each clue, think about why you would choose it, and revisit the guidelines/strategies given to see if the clue matches all the criterias. If you picked a clue and later realized that it's not good, then discard that clue. For each clue, provide reasoning that includes:
* why it relates to the provided list of target words. 
* why it adheres to the guidelines/rules provided.
* Confirmation that the clue avoids the Assassin word. 

**Output Format:**
On a new line at the end of your response, list the clues separated by commas, in order starting with the best clue. All clues must be on the same line at the end.
EXAMPLE if you choose to provide 3 clues: 
clue1, clue2, clue3
"""

In [9]:
history = [{"role": "system", "content": system_prompt}]

In [10]:
red_words = [
    "amazon", "hole", "shark", "ground", "shop", "cast", "nurse", "server", "vacuum", "rock"
]
red_words = [word.upper() for word in red_words]

blue_words = [
    "crown", "pit", "change", "glove", "charge", "torch", "whip", "fly", "africa", "giant"
]
blue_words = [word.upper() for word in blue_words]

neutral_words = [
    "CONDUCTOR", "HEART", "TOKYO",
    "BOW", "DANCE", "LIFE", "SOCK"
]

assassin_word = "AGENT"


In [11]:
prompt = ""
prompt += "The remaining words are: "
prompt += "Red: " + ", ".join(red_words) + ".\n"
prompt += "Blue: " + ", ".join(blue_words) + ".\n"
prompt += "Civilian: " + ", ".join(neutral_words) + ".\n"
prompt += "Assassin: " + assassin_word + ". "
prompt += f"**You are the {team} Codemaster. Analyze the provided game state and generate your list of candidate clues now. Remember that your clues must adhere to the rules provided in the Game Overview section.**"
print(prompt)
history.append({"role": "user", "content": prompt})


The remaining words are: Red: AMAZON, HOLE, SHARK, GROUND, SHOP, CAST, NURSE, SERVER, VACUUM, ROCK.
Blue: CROWN, PIT, CHANGE, GLOVE, CHARGE, TORCH, WHIP, FLY, AFRICA, GIANT.
Civilian: CONDUCTOR, HEART, TOKYO, BOW, DANCE, LIFE, SOCK.
Assassin: AGENT. **You are the blue Codemaster. Analyze the provided game state and generate your list of candidate clues now. Remember that your clues must adhere to the rules provided in the Game Overview section.**


In [58]:
class LLMClueMaster:
    def __init__(self):
        load_dotenv("../.env")
        openAI_api_key = os.environ.get("OPENAI_API_KEY")
        self.client = AsyncOpenAI(api_key=openAI_api_key)

        
    def _get_word_classes(self, word : str) -> set:
        pos_set = set()
        for synset in wn.synsets(word):
            if synset.name().split('.')[0] != word:
                continue  # Only consider exact matches
            if synset.pos() == 'n':
                pos_set.add('noun')
            elif synset.pos() == 'v':
                pos_set.add('verb')
            elif synset.pos() == 's':
                pos_set.add('adj')

        if len(pos_set) == 0: pos_set.add('noun') # default to noun if unidentified 
        return pos_set

    def LLM_response_to_JSON(self, resp : str) -> dict:
        try:
            resp = json.loads(resp)
            return resp
        except:
            return {}

    def extract_clues_from_LLM_response(self, resp: dict) -> list[str]:
        try:
            relations = resp["example_relations"]
            clues = []
            for rel in relations:
                _clues = [rel[-3]] + [rel[-2]] + [rel[-1]]
                clues += _clues
            return clues
        except:
            return []
    
    def extract_sentences_from_LLM_response(self, resp: str) -> list[str]:
        try:
            relations = resp["example_relations"]
            sents = []
            for rel in relations:
                _sents = [rel[1]] + [rel[2]] + [rel[3]]
                sents += _sents
            return sents
        except:
            return []
    
       
    
    async def get_clues_for_word(self, target_word : str, assassin_word : str) -> list[str]:
        pos_list = list(self._get_word_classes(target_word))
        pos_str = "[" + ", ".join(pos_list) + "]"
        prompt = f"""
        **Objective:**
        You are a Linguistic Knowledge AI. Your task is to generate a list of simple, definitional example sentences for a given English word. Each example must connect strongly to commonsense English knowledge such that an average college graduate (no specific major) can understand it - no extremely niche references. 
        You will consider all of its specified parts of speech and generate sentences based on a comprehensive list of semantic relations.
        Be as quick as you can.
        
        **Instructions:**
        1.  Analyze the target word: **{target_word}**
        2.  Consider its meaning for all following roles: **{pos_str}**.
        3.  For each part of speech, generate three clear example sentence for each of its applicable relations and reverse relations. Each example must be distinct, use commonsense English knowledge, and has a strong connection to the target word **{target_word}**.
        4.  Strictly follow the sentence structure provided for each relation in the "Relation Definitions" section below.
        5.  If a relation is not applicable for the target word, simply skip it.
        7.  Your final response must be a single JSON object with one key, "example_relations". Do not include any other text or explanation. Include the nodes A and B in each relation.
        8.  Your examples must in no way relate to the word '{assassin_word}'.
        
        **Relation Definitions (from ConceptNet):**
        
        ### NOUN Relations (A -> B)
        * **IsA**: A is a subtype or a specific instance of B; every A is a B. This is the hyponym relation. Structure: "The noun {target_word} is a type of [concept]."
        * **RelatedTo**: A has a general, positive relationship with B. Structure: "The noun {target_word} is related to [concept]."
        * **UsedFor**: A is used for B; the purpose of A is B. Structure: "The noun {target_word} is used for [purpose or action]."
        * **CapableOf**: Something A can typically do is B. Structure: "The noun {target_word} is capable of [action]."
        * **AtLocation**: A can typically be found at B. Structure: "The noun {target_word} is found at/in/on [location]"
        * **HasProperty**: A has B as a property. Structure: "The noun {target_word} has the property of being [adjective]."
        * **HasA**: A is a whole which has B as a very well-known/typical part. Structure: "The noun {target_word} has a [part]."
        * **MadeOf**: A is made of the substance B. Structure: "The noun {target_word} is made of [substance]."
        * **ReceivesAction**: B is an action that can be done to A. Structure: "The noun {target_word} can be [action, e.g., 'driven' or 'eaten']."
        * **CreatedBy**: B is a process or agent that creates A. Structure: The noun {target_word} is created by [process or person]."
        * **Causes**: A and B are events, and it is typical for A to cause B. Structure: "The noun {target_word} causes [event or state]."
        * **HasPrerequisite**: In order for A to happen, B needs to happen. Structure: "The noun {target_word} has a prerequisite of [concept]."
        * **AssociatedNames**: B is a well-known name of an entity (person, character, location, book, etc) that is very commonly associated with A. Structure "The name [name] ...".
        
        ### NOUN Reverse Relations (B -> A)
        * **IsA (Reverse)**: B is a supertype of A; every A is a B. Structure: "[An example] is a type of {target_word}."
        * **AtLocation (Reverse)**: B is a location where A can be found. Structure: "[An object] is found in {target_word}."
        * **PartOf**: B is a whole that A is a part of; A is a component of B. This is the meronym relation. Structure: "[An object] is a part of {target_word}."
        
        ### VERB Relations (A -> B)
        * **IsA**: A is a subtype of B; to do A is one way to do B. Structure: "To {target_word} is a way to [more general action]."
        * **MannerOf**: A is a specific manner of doing B. Structure: "To {target_word} is a manner of [more general action]."
        * **HasSubevent**: A is an event that has B as a subevent. Structure: "A subevent of the action {target_word} is [component action]."
        * **HasFirstSubevent**: A is an event whose first subevent is B. Structure: "The first subevent of the action {target_word} is [component action]."
        * **HasLastSubevent**: A is an event whose last subevent is B. Structure: "The last subevent of the action {target_word} is [component action]."
        * **MotivatedByGoal**: Someone does A because they want to achieve goal B. Structure: "The action {target_word} is motivated by the goal of [goal]."
        
        ### VERB Reverse Relations (B -> A)
        * **MannerOf (Reverse)**: B is a more general action that can be done in the specific manner A. Structure: "[A specific action] is a manner of {target_word}."
        * **CapableOf (Reverse)**: B is an action that can be performed by A. Structure: "[An object] is capable of the action {target_word}."
        * **UsedFor (Reverse)**: B is an action that is the purpose of A. Structure: "[An object] is used for the action {target_word}."
        * **ReceivesAction (Reverse)**: B is an action that can be done to A. Structure: "[An object] can receive the action {target_word}."
        * **CreatedBy (Reverse)**: B is an action that creates A. Structure: "[A result] is created by the action {target_word}."
        * **CausesDesire (Reverse)**: B is an action that one desires to do because of A. Structure: "[A feeling or concept] makes a person want to {target_word}."
        * **HasFirstSubevent (Reverse)**: B is a larger event whose first subevent is A. Structure: "{target_word} is the first subevent of [a larger action]."
        * **AssociatedNames**: B is a well-known name of an entity (person, character, location, book, etc) that is very commonly known to do A. Structure "The name [name] ...".
        
        ### ADJECTIVE Relations (A -> B)
        * **SimilarTo**: A is an attribute that is similar to attribute B. Structure: "To be {target_word} is similar to being [adjective]."
        * **RelatedTo**: A is an attribute that is related to concept B. Structure: "Being {target_word} is related to [concept]."
        * **HasProperty**: The state of being A has the property B. Structure: "A property of being {target_word} is [concept]."
        ---
        **TASK**
        
        **Target Word:** {target_word}
        **Parts of Speech to Consider:** {pos_str}
        """
        prompt += f"""
        **Required JSON Output Format:**
        {{
          "example_relations": [
            ["{{name of the relation}}", "sentence 1", "sentence 2", sentence 3", "{{The single most important word in node B in the 1st example}}", "{{The single most important word in node B in the 2nd example}}", "{{The single most important word in node B in the 3rd example}}"],
            ["{{name of the relation}}", "sentence 2", "sentence 2", sentence 3", "{{The single most important word in node B in the 1st example}}", "{{The single most important word in node B in the 2nd example}}", "{{The single most important word in node B in the 3rd example}}"], 
            ["{{name of the relation}}", "sentence 3", "sentence 2", sentence 3", "{{The single most important word in node B in the 1st example}}", "{{The single most important word in node B in the 2nd example}}", "{{The single most important word in node B in the 3rd example}}"] 
          ]
        }}
        """
        
        history = [{"role": "user", "content": prompt}]
        response = await self.client.chat.completions.create(
            messages=history,
            model="gpt-4.1",
            response_format={ "type": "json_object" }
        )
        return response.choices[0].message.content

    async def get_clues_for_words(self, wordlist : list[str], assassin_word : str) -> list:
        """
        Create multiple parallel OpenAI API requetsts to get clues for each word in `wordlist`.
        Each target word will produce a list of clues for that particular word.
        Return a list of lists of clues.
        """
        tasks = []
        for word in wordlist:
            tasks.append(self.get_clues_for_word(word, assassin_word))
        
        res = await asyncio.gather(*tasks)
        return res
            

In [59]:
start = time.time()
llm = LLMClueMaster()
responses = await llm.get_clues_for_words(["salmon", "steal"], "tower")
print(f"Time to generate clues: {time.time() - start}")

Time to generate clues: 13.500820398330688


In [60]:
for resp in responses:
    tmp = llm.LLM_response_to_JSON(resp)
    clues = llm.extract_clues_from_LLM_response(tmp)
    clues = set(clues)
    print(clues)
    for i, word in enumerate(clues):
        print(f"{i+1}. {word}")
    print(llm.extract_sentences_from_LLM_response(tmp))

{'markets', 'reproduction', 'rivers', 'jumping', 'Atlantic', 'hatching', 'swimming', 'scales', 'gill', 'flesh', 'meals', 'parasite', 'excitement', 'spawning', 'habitat', 'fin', 'sockeye', 'Chinook', 'scale', 'edible', 'cooking', 'eggs', 'muscle', 'activity', 'nutritious', 'ocean', 'grilling', 'water', 'caught', 'seafood', 'animal', 'tail', 'fishing', 'sushi', 'eating', 'bone', 'Sockeye', 'cooked', 'pink', 'coho', 'fish', 'eaten', 'fillet'}
1. markets
2. reproduction
3. rivers
4. jumping
5. Atlantic
6. hatching
7. swimming
8. scales
9. gill
10. flesh
11. meals
12. parasite
13. excitement
14. spawning
15. habitat
16. fin
17. sockeye
18. Chinook
19. scale
20. edible
21. cooking
22. eggs
23. muscle
24. activity
25. nutritious
26. ocean
27. grilling
28. water
29. caught
30. seafood
31. animal
32. tail
33. fishing
34. sushi
35. eating
36. bone
37. Sockeye
38. cooked
39. pink
40. coho
41. fish
42. eaten
43. fillet
['The noun salmon is a type of fish.', 'The noun salmon is a type of animal.', 