In [87]:
import httpx
import asyncio
import spacy
from openai import OpenAI
from dotenv import load_dotenv
import os
from nltk.stem import WordNetLemmatizer
import lemminflect
import time
import networkx as nx


load_dotenv()
#openAI_api_key = "ENTER YOUR API KEY HERE"
openAI_api_key = os.environ.get("OPENAI_API_KEY")

# Download necessary data for WordNetLemmatizer if we haven't already
try:
    WordNetLemmatizer().lemmatize("test") # Just a test to trigger lookup error if not downloaded
except LookupError:
    nltk.download('wordnet')
    nltk.download('omw-1.4') # Open Multilingual Wordnet, often needed for full WordNet functionality

In [88]:
allowed_word_types = [
    "ADJ",	
    "ADV",
    "INTJ",	
    "NOUN",
    "PROPN",	
    "VERB"
]

NOUN_RELS = ["RelatedTo", "CapableOf", "IsA", "UsedFor", "AtLocation", "HasPrerequisite", "HasProperty", "ReceivesAction", "CreatedBy", "Causes", "HasA", "MadeOf"]
NOUN_REV_RELS = ["AtLocation", "IsA", "PartOf"]
VERB_RELS = ["MannerOf", "HasSubevent", "MotivatedByGoal", "IsA", "HasFirstSubevent", "HasLastSubevent"]
VERB_REV_RELS = ["CapableOf", "MannerOf", "CausesDesire", "CreatedBy", "UsedFor", "ReceivesAction"]
ADJ_RELS = ["SimilarTo", "Antonym", "RelatedTo", "HasProperty"] # Added for completeness


stopwords = set([
    'ourselves', 'hers', 'between', 'yourself', 'but', 'again', 'there', 'about', 'once', 'during', 'out', 'very', 'having', 'with', 'they', 'own', 'an', 'be', 'some', 'for', 'do', 'its', 'yours', 'such', 'into', 'of', 'most', 'itself', 'other', 'off', 'is', 's', 'am', 'or', 'who', 'as', 'from', 'him', 'each', 'the', 'themselves', 'until', 'below', 'are', 'we', 'these', 'your', 'his', 'through', 'don', 'nor', 'me', 'were', 'her', 'more', 'himself', 'this', 'down', 'should', 'our', 'their', 'while', 'above', 'both', 'up', 'to', 'ours', 'had', 'she', 'all', 'no', 'when', 'at', 'any', 'before', 'them', 'same', 'and', 'been', 'have', 'in', 'will', 'on', 'does', 'yourselves', 'then', 'that', 'because', 'what', 'over', 'why', 'so', 'can', 'did', 'not', 'now', 'under', 'he', 'you', 'herself', 'has', 'just', 'where', 'too', 'only', 'myself', 'which', 'those', 'i', 'after', 'few', 'whom', 't', 'being', 'if', 'theirs', 'my', 'against', 'a', 'by', 'doing', 'it', 'how', 'further', 'was', 'here', 'than', 'get', 'put',
])


In [89]:
lemmatizer = WordNetLemmatizer()
def get_word_lemma(word : str, pos_hint : str = None) -> str:
    """
    Gets the lemma of a single word without sentence context using NLTK's WordNetLemmatizer.
    pos_hint can be 'n' (noun), 'v' (verb), 'a' (adjective), 'r' (adverb).
    Defaults to 'n' if no hint is given.
    """
    return lemmatizer.lemmatize(word.lower(), pos=pos_hint) if pos_hint else lemmatizer.lemmatize(word.lower())

def get_all_possible_lemmas(word: str) -> list[str]:
    res = []
    for hint in ["v", "n", "a", "r", "s"]:
        res.append(get_word_lemma(word, hint))
    return res

def get_all_inflections(word : str) -> list[str]:
    inflections = lemminflect.getAllInflections(word)
    res = []
    for _, i in inflections.items():
        res += list(i)
    return res

def get_word_classes(word : str) -> set:
    pos_set = set()
    for synset in wn.synsets(word):
        #print(f"Synset: {synset}")
        if synset.name().split('.')[0] != word:
            continue  # Only consider exact matches
        if synset.pos() == 'n':
            pos_set.add('noun')
        elif synset.pos() == 'v':
            pos_set.add('verb')
        elif synset.pos() == 's':
            pos_set.add('adj')
    return pos_set

def remove_stopwords(words : str) -> str:
    word_list = words.split()
    filtered_words = [word for word in word_list if word.lower() not in stopwords]
    return " ".join(filtered_words)
    

class Codemaster:
    def __init__(self, our_words : list[str], enemy_words : list[str], civilian_words : list[str], assassin_word : str, my_team : str):
        self.our_words = our_words
        self.enemy_words = enemy_words
        self.civilian_words = civilian_words
        self.assassin_word = assassin_word
        self.my_team = my_team

        self.nlp = spacy.load("en_core_web_trf")
        
        self.conceptnet_client = httpx.AsyncClient(http2=True)
        self.openai_client = OpenAI(api_key=openAI_api_key)

        self.lemmas = set()
        all_words = our_words + enemy_words + civilian_words + [assassin_word]
        for word in all_words:
            self.lemmas.update(word)
            self.lemmas.update(get_all_inflections(word))
            self.lemmas.update(get_all_possible_lemmas(word))

    async def fetch_conceptnet(self, url : str) -> dict:
        r = await self.conceptnet_client.get(url, follow_redirects=True)
        return r.json()
    
    def _filter(self, words: str) -> list[str]:
        """
        return the filtered list of words. Each word must NOT appear as a lemma or an inflection of any words on the board (which is given by self.lemmas).
        """
        doc = self.nlp(words)
        l = list(filter(lambda token: (token.pos_ in allowed_word_types) and (token.lemma_ not in self.lemmas) and (token.text not in self.lemmas), doc)) 
        # l is a list of tokens, convert them to string to return
        return [token.text for token in l]
    
    def process_edge(self, edge : dict, target_word):
        """
        for a given target word, and a given edge it has on ConceptNet, process the node this edge points to:
            + First, the node has to be in English.
            + Second, the node may be multi-word, so we need to process each individual word in it. 
            Keep the open class words (adjective, nouns, verbs, etc) as specified in the universal POS tags: https://universaldependencies.org/u/pos/
            + Then, we need to make sure that each word follows the rules of the game (no subword embedding, etc).
        The `self.filter` function performs the last 2 filters.
        """

        # find whether our target word is at the start node or end node of this edge
        start_node = edge["start"]
        end_node = edge["end"]

        start_node_label = remove_stopwords(start_node["label"])
        end_node_label = remove_stopwords(end_node["label"])
        
        if start_node_label == target_word:
            if end_node["language"] != "en": return []
            label_words = self._filter(end_node_label)
        else:
            if start_node["language"] != "en": return []
            label_words = self._filter(start_node_label)
        return label_words

    async def _fetch_relations(self, target_word: str, rel_list: list, is_rev: bool = False) -> set:
        """Generic function to fetch and process edges for a list of relations."""
        clues = set()
        tasks = []
        for rel in rel_list:
            node_param = "end" if is_rev else "start"
            url = f"http://api.conceptnet.io/query?{node_param}=/c/en/{target_word}&rel=/r/{rel}&limit=10"
            tasks.append(self.conceptnet_client.get(url, follow_redirects=True))

        responses = await asyncio.gather(*tasks, return_exceptions=True)
        
        for i, res in enumerate(responses):
            if isinstance(res, Exception):
                # print(f"Warning: Request for {target_word} with relation {rel_list[i]} failed: {res}")
                continue
            
            try:
                api_res = res.json()
                relation_name = rel_list[i]
                for edge in api_res.get("edges", []):
                    processed_clues = self._process_edge(edge, target_word)
                    for clue in processed_clues:
                        clues.add((clue, relation_name))
            except (ValueError, KeyError) as e:
                # print(f"Warning: Could not parse JSON for {target_word} with relation {rel_list[i]}: {e}")
                continue
        return clues

    
    async def _fetch_clues_for_word(self, word: str, specific_rels: list = None) -> dict:
        """Fetches all potential clues for a single word, categorized by POS."""
        clues_by_pos = {'noun': set(), 'verb': set(), 'adj': set()}
        word_classes = get_word_classes(word)
        if not word_classes: word_classes.add('noun') # Default to noun

        for pos in word_classes:
            rels, rev_rels = [], []
            if specific_rels:
                rels = specific_rels
                rev_rels = specific_rels
            elif pos == 'noun':
                rels, rev_rels = NOUN_RELS, NOUN_REV_RELS
            elif pos == 'verb':
                rels, rev_rels = VERB_RELS, VERB_REV_RELS
            elif pos == 'adj':
                rels, rev_rels = ADJ_RELS, []

            forward_clues, reverse_clues = await asyncio.gather(
                self._fetch_relations(word, rels, is_rev=False),
                self._fetch_relations(word, rev_rels, is_rev=True)
            )
            clues_by_pos[pos].update(forward_clues)
            clues_by_pos[pos].update(reverse_clues)
        return clues_by_pos

    async def get_all_potential_clues(self):
        """
        Create multiple tasks to asynchronously get clues for each of our target words.
        """
        self.conceptnet_client = httpx.AsyncClient(http2=True)
        tasks = []
        for word in self.our_words:
            tasks.append(asyncio.create_task(self.fetch_and_extract_clues_for_word(word)))
        res = await asyncio.gather(*tasks)
        print(len(res))
        await self.conceptnet_client.aclose()


In [85]:
start = time.time()
blue_words = ["dwarf", "foot", "moon", "star", "ghost", "beijing", "fighter", "roulette", "alps"]
#red_words = ["club", "superhero", "mount", "bomb", "knife", "belt", "robot", "rock", "bar", "lab"]
red_words = ["drive"]
civilian_words = ["dead"]
assassin_word = "agent"
master = Codemaster(red_words, blue_words, civilian_words, assassin_word, "red")
candidates = await master.get_all_potential_clues()
print(f"Time taken: {time.time() - start}")

1
Time taken: 4.604136228561401


In [181]:
l = ['drive', 'dwarf', 'foot', 'moon', 'star', 'ghost', 'beijing', 'fighter', 'roulette', 'alp', 'dead', 'agent']
nlp = spacy.load("en_core_web_trf")
doc = nlp("driving")
for token in doc:
    if token.lemma_ not in l:
        print(token.text, token.lemma_)

driving driving


In [164]:
response = openai_client.responses.create(
    model="gpt-4.1",
    input = f"""The following are possible word classes: [ADV (adverb), ADJ (adjective), INTJ (interjection), NOUN (noun), PROPN (proper noun), VERB (verb)].
    
    Determine which of these classes the word 'table' belongs to based on its possible usages in English (and English only).
    
    At the end of your response, list the corresponding abbreviations (e.g., NOUN, VERB), separated by commas, on a single line.
    """

).output_text.split("\n")[-1]

In [165]:
response

'NOUN, VERB'

In [6]:
client = OpenAI(api_key=openAI_api_key)
system_prompt = """
You are an expert English linguistic. You
"""


response = client.responses.create(
    model="gpt-4.1",
    input = f"""
    The following are possible word classes: [ADV (adverb), ADJ (adjective), INTJ (interjection), NOUN (noun), PROPN (proper noun), VERB (verb)].
    
    Determine which of these classes the word 'table' belongs to based on its possible usages in English (and English only).
    
    At the end of your response, list the corresponding abbreviations (e.g., NOUN, VERB), separated by commas, on a single line.
    """

).output_text.split("\n")[-1]

print(response)

In [86]:
from nltk.corpus import wordnet as wn

def get_pos(word):
    pos_set = set()
    for synset in wn.synsets(word):
        print(f"Synset: {synset}")
        if synset.name().split('.')[0] != word:
            continue  # Only consider exact matches
        if synset.pos() == 'n':
            pos_set.add('noun')
        elif synset.pos() == 'v':
            pos_set.add('verb')
        elif synset.pos() == 's':
            pos_set.add('adj')
            
    return pos_set

# Example usage
word = "huge"
pos_tags = get_pos(word)
print(f"{word} can be used as: {', '.join(pos_tags)}")

Synset: Synset('huge.s.01')
huge can be used as: adj


In [45]:
remove_stopwords("a city in France")

'city France'

9
Time taken: 8.19223427772522
