In [None]:
# === FORCE FRESH START ===
try:
    del root, animals, people, q_is_animal, noun_data
except:
    pass
import gc
gc.collect()

20

In [None]:
import json

# ===============================
# Load the input JSON files
# ===============================
with open("final_noun_tree.json", "r", encoding="utf-8") as f:
    noun_data = json.load(f)

with open("categorized_nouns.json", "r", encoding="utf-8") as f:
    categorized_data = json.load(f)

print(f"Loaded {len(noun_data)} nouns from final_noun_tree.json")
print(f"Loaded {len(categorized_data)} categories from categorized_nouns.json")

# ===============================
# Create lookup map for fast merge
# ===============================
noun_map = {n["name"].strip().lower(): n for n in noun_data if "name" in n}

# ===============================
# Merge nouns into categories
# ===============================
final_tree = {}

for category_name, noun_list in categorized_data.items():
    merged_nouns = []
    for noun_name in noun_list:
        key = noun_name.strip().lower()
        if key in noun_map:
            merged_nouns.append(noun_map[key])
        else:
            print(f"⚠️ Warning: '{noun_name}' not found in final_noun_tree.json")
    final_tree[category_name] = {
        "name": category_name,
        "noun_list": merged_nouns
    }

# ===============================
# Save the merged final structure
# ===============================
with open("final_full_tree_updated.json", "w", encoding="utf-8") as f:
    json.dump(final_tree, f, indent=2)

print(f"\n✅ Final tree saved to 'final_full_tree_updated.json'")
print(f"Total categories: {len(final_tree)}")
total_nouns = sum(len(cat['noun_list']) for cat in final_tree.values())
print(f"Total nouns combined: {total_nouns}")



Loaded 1816 nouns from final_noun_tree.json
Loaded 10 categories from categorized_nouns.json

✅ Final tree saved to 'final_full_tree_updated.json'
Total categories: 10
Total nouns combined: 2442


In [None]:
import json

# -----------------------------
# Node classes
# -----------------------------
class CategoryNode:
    def __init__(self, name):
        self.name = name
        self.properties = {}        # holds 'is' and 'has' property lists
        self.question_node = None   # points to a QuestionNode if there are questions below
        self.noun_list = []         # used if leaf node

    def __repr__(self):
        return f"CategoryNode({self.name})"


class QuestionNode:
    def __init__(self, question):
        self.question = question
        self.yes = None   # must point to CategoryNode
        self.no = None    # must point to CategoryNode

    def __repr__(self):
        return f"QuestionNode({self.question})"


# -----------------------------
# Leaf categories
# -----------------------------
non_physical_things = CategoryNode("non_physical_things")
people = CategoryNode("people")
animals = CategoryNode("animals")
plants = CategoryNode("plants")
random_living_things = CategoryNode("random_living_things")
places = CategoryNode("places")
foods = CategoryNode("foods")
man_made_objects = CategoryNode("man_made_objects")
rocks = CategoryNode("rocks")
random_uncategorized_objects = CategoryNode("random_uncategorized_objects")

# -----------------------------
# Internal categories
# -----------------------------
physical_objects = CategoryNode("physical_objects")
living_things = CategoryNode("living_things")
non_living_things = CategoryNode("non_living_things")
non_people = CategoryNode("non_people")
non_animals = CategoryNode("non_animals")
non_places = CategoryNode("non_places")
things = CategoryNode("things")
non_foods = CategoryNode("non_foods")
natural_things = CategoryNode("natural_things")

# -----------------------------
# Question nodes
# -----------------------------
q_is_tangible = QuestionNode("Is it physically tangible?")
q_is_alive = QuestionNode("Is it alive?")
q_is_person = QuestionNode("Is it a person?")
q_is_animal = QuestionNode("Is it an animal?")
q_is_plant = QuestionNode("Is it a plant?")
q_is_place = QuestionNode("Is it a place?")
q_is_thing = QuestionNode("Is it a thing?")
q_is_food = QuestionNode("Is it a food?")
q_is_man_made = QuestionNode("Is it man made?")
q_is_rock = QuestionNode("Is it a rock or mineral?")

# -----------------------------
# Link tree nodes
# -----------------------------
root = q_is_tangible
q_is_tangible.yes = physical_objects
q_is_tangible.no = non_physical_things

physical_objects.question_node = q_is_alive
q_is_alive.yes = living_things
q_is_alive.no = non_living_things

living_things.question_node = q_is_person
q_is_person.yes = people
q_is_person.no = non_people

non_people.question_node = q_is_animal
q_is_animal.yes = animals
q_is_animal.no = non_animals

non_animals.question_node = q_is_plant
q_is_plant.yes = plants
q_is_plant.no = random_living_things

non_living_things.question_node = q_is_place
q_is_place.yes = places
q_is_place.no = non_places

non_places.question_node = q_is_thing
q_is_thing.yes = things
q_is_thing.no = non_physical_things  # loop back intentionally

things.question_node = q_is_food
q_is_food.yes = foods
q_is_food.no = non_foods

non_foods.question_node = q_is_man_made
q_is_man_made.yes = man_made_objects
q_is_man_made.no = natural_things

natural_things.question_node = q_is_rock
q_is_rock.yes = rocks
q_is_rock.no = random_uncategorized_objects

# -----------------------------
# Load categorized nouns with full properties
# -----------------------------
with open("final_noun_tree.json", "r") as f:
    noun_data = json.load(f)

# Map categories to leaf nodes
leaf_mapping = {
    "non_physical_things": non_physical_things,
    "people": people,
    "animals": animals,
    "plants": plants,
    "random_living_things": random_living_things,
    "places": places,
    "foods": foods,
    "man_made_objects": man_made_objects,
    "rocks": rocks,
    "random_uncategorized_objects": random_uncategorized_objects,
}

# Populate noun lists in leaves according to their category
for cat_name, leaf_node in leaf_mapping.items():
    for noun in noun_data:
        # Determine which nouns belong in this category
        include = False
        if cat_name == "non_physical_things" and not noun.get("is_physical", False):
            include = True
        elif cat_name == "people" and noun.get("is_person", False):
            include = True
        elif cat_name == "animals" and noun.get("is_animal", False):
            include = True
        elif cat_name == "plants" and noun.get("is_plant", False):
            include = True
        elif cat_name == "random_living_things" and noun.get("is_alive", False) and not noun.get("is_person", False) and not noun.get("is_animal", False) and not noun.get("is_plant", False):
            include = True
        elif cat_name == "places" and noun.get("is_place", False):
            include = True
        elif cat_name == "foods" and noun.get("is_food", False):
            include = True
        elif cat_name == "man_made_objects" and noun.get("is_man_made", False):
            include = True
        elif cat_name == "rocks" and noun.get("is_rock", False):
            include = True
        elif cat_name == "random_uncategorized_objects" and not noun.get("is_alive", False) and not noun.get("is_food", False) and not noun.get("is_man_made", False) and not noun.get("is_rock", False) and not noun.get("is_place", False):
            include = True

        if include:
            leaf_node.noun_list.append(noun)

# -----------------------------
# Define properties for leaves
# -----------------------------
non_physical_things.properties = {"is": ["a concept"], "has": ["complexity"]}
people.properties = {"is": ["actor","athlete","STEM worker","profession"], "has": ["entertainment","influence","scientific impact","athletic ability","medical knowledge","sports scoring ability"]}
animals.properties = {"is": ["mammal","bird","reptile","fish","amphibian"], "has": ["size","speed","aggressiveness","domesticatability"]}
plants.properties = {"is": ["flower","tree","general plant or bush"], "has": ["size","beauty","leaves","fruit"]}
random_living_things.properties = {"is": ["air breathing","water breathing"], "has": ["size","speed"]}
places.properties = {"is": ["In the US","In the western US","In the Eastern US","One Specific Place"], "has": ["heat","extreme weather","size","cold","population","vegetation"]}
foods.properties = {"is": ["natural","cooked","raw"], "has": ["heat","preparation time","ingredients","savoriness","sweetness","saltiness"]}
man_made_objects.properties = {"is": ["tool","vehicle","toy","service","exercise","clothing"], "has": ["softness","engineering","electrical","size","monetary value","simplicity"]}
rocks.properties = {"is": ["rock","mineral","metal"], "has": ["hardness","brittleness","shine","beauty","darkness","monetary value"]}
random_uncategorized_objects.properties = {"is": ["tool","vehicle","toy","pokemon","concept"], "has": ["size","complexity","entertainment"]}

# -----------------------------
# Save final tree for inspection
# -----------------------------
import pickle
with open("final_full_tree_updated.pkl", "wb") as f:
    pickle.dump(root, f)

print("✅ Final tree constructed in memory and saved to 'final_full_tree_updated.pkl'")

def visualize_tree(node, depth=0, show_nouns=False):
    """
    Recursively visualize the tree structure.

    Args:
        node: CategoryNode or QuestionNode
        depth: Current depth in the tree for indentation
        show_nouns: If True, print the nouns in each leaf node
    """
    indent = "  " * depth
    if isinstance(node, QuestionNode):
        print(f"{indent}Q: {node.question}")
        if node.yes:
            print(f"{indent}  Yes → {node.yes.name if isinstance(node.yes, CategoryNode) else node.yes.question}")
            visualize_tree(node.yes, depth + 1, show_nouns)
        if node.no:
            print(f"{indent}  No  → {node.no.name if isinstance(node.no, CategoryNode) else node.no.question}")
            visualize_tree(node.no, depth + 1, show_nouns)
    elif isinstance(node, CategoryNode):
        if node.question_node:
            print(f"{indent}Category: {node.name} → has QuestionNode child")
            visualize_tree(node.question_node, depth + 1, show_nouns)
        else:
            print(f"{indent}Category: {node.name} [Leaf] → {len(node.noun_list)} nouns")
            if show_nouns and node.noun_list:
                # extract the 'name' field from each noun dict
                noun_names = [n["name"] for n in node.noun_list[:10]]
                print(f"{indent}  Nouns: {', '.join(noun_names)} ...")

# Example usage
visualize_tree(root, show_nouns=True)

# Clean up: leaf nodes should NOT have question_node
leaf_nodes = [
    non_physical_things, people, animals, plants, random_living_things,
    places, foods, man_made_objects, rocks, random_uncategorized_objects
]
for leaf in leaf_nodes:
    leaf.question_node = None


✅ Final tree constructed in memory and saved to 'final_full_tree_updated.pkl'
Q: Is it physically tangible?
  Yes → physical_objects
  Category: physical_objects → has QuestionNode child
    Q: Is it alive?
      Yes → living_things
      Category: living_things → has QuestionNode child
        Q: Is it a person?
          Yes → people
          Category: people [Leaf] → 107 nouns
            Nouns: Bill Walton, Allen Iverson, Brian, Child, Cleopatra, Anthony Davis, Dennis Rodman, Friend, Grandparent, Hal Greer ...
          No  → non_people
          Category: non_people → has QuestionNode child
            Q: Is it an animal?
              Yes → animals
              Category: animals [Leaf] → 260 nouns
                Nouns: Airedale Terrier, Akita, Australian Shepherd, Basset Hound, Bernese Mountain Dog, Border Collie, Boston Terrier, Bulldog, Bullmastiff, Aardvark ...
              No  → non_animals
              Category: non_animals → has QuestionNode child
                Q: Is

In [None]:
# Example Games
import random
import math
import json
from IPython.display import Markdown, display

print("Loading noun data and tree...")
with open("final_noun_tree.json", "r") as f:
    ALL_NOUNS = json.load(f)
print(f"Loaded {len(ALL_NOUNS)} nouns and tree root.")

#Helper
def entropy_split(prop, noun_list):
    yes = sum(1 for n in noun_list if n.get("is", {}).get(prop, False))
    no = len(noun_list) - yes
    if yes == 0 or no == 0:
        return 0
    p = yes / len(noun_list)
    return -(p * math.log2(p) + (1-p) * math.log2(1-p))

def run_manual_game(target_noun: dict, game_num: int):
    current = root
    questions_asked = 0
    MAX_QUESTIONS = 21
    transcript = []
    noun_list = None

    print(f"\n{'='*60}")
    print(f" GAME {game_num} | TARGET: {target_noun['name'].upper()} (kept secret)")
    print(f"{'='*60}")
    while questions_asked < MAX_QUESTIONS - 1:
        if hasattr(current, "question"):
            q = current.question
            ans = input(f"Q{questions_asked+1:2d}: {q} (y/n) ").strip().lower()
            transcript.append(f"Q{questions_asked+1:2d}: {q} ({ans})")
            current = current.yes if ans in ("y", "yes") else current.no
            questions_asked += 1
        elif hasattr(current, "name"):
            if getattr(current, "question_node", None):
                current = current.question_node
            else:
                noun_list = current.noun_list.copy()
                print(f" → Reached leaf: {current.name} ({len(noun_list)} nouns)")
                transcript.append(f" → Reached leaf: {current.name} ({len(noun_list)} nouns)")
                break
        else:
            break
    is_props = current.properties.get("is", []).copy()
    while is_props and questions_asked < MAX_QUESTIONS - 1 and len(noun_list) > 1:
        best = max(is_props, key=lambda p: entropy_split(p, noun_list))
        is_props.remove(best)
        q = f"Does it have the property '{best}'?"
        ans = input(f"Q{questions_asked+1:2d}: {q} (y/n) ").strip().lower()
        transcript.append(f"Q{questions_asked+1:2d}: {q} ({ans})")
        if ans in ("y", "yes"):
            noun_list = [n for n in noun_list if n.get("is", {}).get(best, False)]
        else:
            noun_list = [n for n in noun_list if not n.get("is", {}).get(best, False)]
        print(f"     → {len(noun_list)} nouns left")
        transcript.append(f"     → {len(noun_list)} nouns left")
        questions_asked += 1
    has_props = current.properties.get("has", []).copy()
    while has_props and questions_asked < MAX_QUESTIONS - 1 and len(noun_list) > 1:
        best = max(has_props,
                   key=lambda p: max((n.get("has", {}).get(p, 0) for n in noun_list), default=0) -
                                 min((n.get("has", {}).get(p, 0) for n in noun_list), default=0))
        has_props.remove(best)
        sorted_n = sorted(noun_list, key=lambda x: x.get("has", {}).get(best, 0))
        mid_n = sorted_n[len(sorted_n)//2]
        mid_val = mid_n.get("has", {}).get(best, 0)
        values = [n.get("has", {}).get(best, 0) for n in noun_list]
        max_val = max(values) if values else 0
        scale = f"{mid_val}/{max_val}" if max_val > 0 else str(mid_val)
        q = f"Is your word more '{best}' than '{mid_n['name']}' ({scale})?"
        ans = input(f"Q{questions_asked+1:2d}: {q} (y/n) ").strip().lower()
        transcript.append(f"Q{questions_asked+1:2d}: {q} ({ans})")
        if ans in ("y", "yes"):
            noun_list = [n for n in noun_list if n.get("has", {}).get(best, 0) > mid_val]
        else:
            noun_list = [n for n in noun_list if n.get("has", {}).get(best, 0) <= mid_val]
        print(f"     → {len(noun_list)} nouns left")
        transcript.append(f"     → {len(noun_list)} nouns left")
        questions_asked += 1
    guess = noun_list[0]["name"] if noun_list else random.choice(ALL_NOUNS)["name"]
    final_q = f"Are you thinking of {guess}?"
    ans = input(f"Q{questions_asked+1:2d}: {final_q} (y/n) ").strip().lower()
    transcript.append(f"Q{questions_asked+1:2d}: {final_q} ({ans})")
    questions_asked += 1

    #correct category
    target = target_noun["name"]
    guess_noun = next((n for n in ALL_NOUNS if n["name"] == guess), None)

    if guess.lower() == target.lower():
        cat = "Exactly Correct"
    elif any(w in guess.lower() for w in target.lower().split()):
        cat = "Nearly Correct"
    elif (guess_noun and
          current.name == "animals" and
          guess_noun.get("is_animal", False) and
          target_noun.get("is_animal", False)):
        cat = "Kinda/sorta close"
    elif (guess_noun and
          current.name == "man_made_objects" and
          guess_noun.get("is_man_made", False) and
          target_noun.get("is_man_made", False)):
        cat = "Kinda/sorta close"
    elif (guess_noun and
          current.name == "plants" and
          guess_noun.get("is_plant", False) and
          target_noun.get("is_plant", False)):
        cat = "Kinda/sorta close"
    elif (guess_noun and
          current.name == "places" and
          guess_noun.get("is_place", False) and
          target_noun.get("is_place", False)):
        cat = "Kinda/sorta close"
    elif (guess_noun and
          current.name == "foods" and
          guess_noun.get("is_food", False) and
          target_noun.get("is_food", False)):
        cat = "Kinda/sorta close"
    elif (guess_noun and
          current.name == "rocks" and
          guess_noun.get("is_rock", False) and
          target_noun.get("is_rock", False)):
        cat = "Kinda/sorta close"
    else:
        cat = "Wrong"
    print(f"\nRESULT: {cat} | Questions: {questions_asked} | Guess: {guess}")
    print(f"{'='*60}\n")

    return {
        "target": target,
        "questions": questions_asked,
        "guess": guess,
        "category": cat,
        "transcript": "\n".join(transcript)
    }

# run 10 games example
import time
seed = int(time.time())
random.seed(seed)
print(f"Random seed: {seed} (change by re-running)")

results = []
print("STARTING 10 EXAMPLE GAMES ...\n")
for i in range(10):
    target = random.choice(ALL_NOUNS)
    result = run_manual_game(target, i+1)
    results.append(result)

table = [
    "| Game | Target | Questions | Guess | Category |",
    "|------|--------|-----------|-------|----------|"
]
for i, r in enumerate(results):
    table.append(f"| {i+1} | **{r['target']}** | {r['questions']} | {r['guess']} | {r['category']} |")

display(Markdown("\n".join(table)))

# save output to file
with open("example_games_log.txt", "w") as f:
    f.write("# 10 Example Games - Full Transcripts\n\n")
    for i, r in enumerate(results):
        f.write(f"## Game {i+1} | Target: {r['target']}\n")
        f.write(f"**Questions**: {r['questions']} | **Guess**: {r['guess']} | **Category**: {r['category']}\n\n")
        f.write("```\n" + r['transcript'] + "\n```\n\n")
print("\nFull log with ALL questions saved to 'example_games_log.txt'")

Loading noun data and tree...
Loaded 1816 nouns and tree root.
Random seed: 1762752692 (change by re-running)
STARTING 10 EXAMPLE GAMES ...


 GAME 1 | TARGET: PROTEIN SHAKE (kept secret)


KeyboardInterrupt: Interrupted by user