In [1]:
%%bash
mkdir -p /kaggle/working/submission
pip install bitsandbytes accelerate transformers torch

Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl (137.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.3


In [2]:
%%writefile submission/main.py
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from collections import Counter
import re
import torch
from kaggle_secrets import UserSecretsClient
import os
import sys
import shutil
import bitsandbytes as bnb
import accelerate
from torch import bfloat16
import random
bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_quant_type='dynamic', 
    bnb_8bit_use_double_quant=True,
    bnb_8bit_compute_dtype=torch.float32 
)
model_id = 'llama-3/transformers/8b-chat-hf/1'
KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    model_id = os.path.join(KAGGLE_AGENT_PATH, "1")
else:
    model_id = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"

    
    


    
    
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
config = model.config

# Check relevant configuration settings
print("MODEL/TOKENIZER INITIALIZED")
id_eot = tokenizer.convert_tokens_to_ids(["<|eot_id|>"])[0]


questioner_sys_prompt_thing = """You are the questioner agent with a sharp strategy for playing the game 20 questions. The goal is to ask questions to find a word thought of by the user, which falls into one of the following categories:
         1. Food or drink items such as: Food, drinks, things relating to food and drinks such as oven, spoon, tablecloth, dinner table, and so on.
         2. A living thing such as: A specific bug, mammal, fish, tree, plant, and many more.
         3. An object that is electric such as: electric razor, water heater, lamp, washer, type of vehicle, other industrial items
         4. Many other unique and specific items!
        Employ smart questions to strategically reduce the search space:
        - Ask questions related to use of the thing, purpose of the item, size of the item, color of the item, whether the thing is living, and so on.
        - Ask questions about broad categories to narrow the search.
        Remember, your aim is to use each question to progressively eliminate unlikely options, which the guesser agent will use to guess the keyword. 
        """
guesser_sys_prompt = """You are an professional assistant with a sharp strategy for playing the game 20 questions and a large vocabulary. Your goal is to guess the word thought of by the user, which falls into one of the following categories:
         1. Food or drink items such as: Food, drinks, things relating to food, beverages, or cooking such as oven, spoon, tablecloth, dinner table, and so on.
         2. A living thing such as: A specific bug, mammal, fish, tree, plant, and many more
         3. An object that is electric such as: electric razor, water heater, lamp, washer, type of vehicle
         4. An object used for a specific purpose, such as a magnet, silicone, lighter, trimming scissors, and other odd items.
        To play the game and win, you should use the history of the game, the questions and answers, to guess the keyword.
        Remember, your aim is to give a unique guess after understanding the history of the game, leading to a precise guess of the keyword.
        Some of the keyword are really specific and peculiar objects so be creative and really use the history to help you."""



answerer_sys_prompt = """You are an expert AI gamer with a sharp strategy for playing the game 20 questions, you will be the answerer. Your goal is to answer questions thought of by the user, about a keyword which falls into one of the following categories:
        1. A specific thing (which will be food or drink items, things that can be bought at the store, items used around the house, or other specific things)
"""

few_shot_examples_thing = """GAME 1: Keyword is Ironing Board in category Thing:

Question 1: Does the keyword belong to the broad category of living things?
Answer 1: No
Guess 1: Magnet
Question 2: Does the keyword belong to the broad category of Food or Drink items?
Answer 2: No
Guess 2: Electric Razor
Question 3: Is the keyword used for a specific purpose?
Answer 3: Yes
Guess 3: Garbage Can
Question 4: Is the keyword related to Entertainment or Sports?
Answer 4: No
Guess 4: Scissors
Question 5: Is the keyword something related to cleaning at all?
Answer 5: Yes
Guess 5: Garden Hose
Question 6: Is the keyword used to clean your body?
Answer 6: No
Guess 6: Bleach
Question 7: Is the keyword used to clean clothes?
Answer 7: Yes
Guess 7: Laundry Detergent
Question 8: Is the keyword used to clean stains out of clothes?
Answer 8: No
Guess 8: Ironing Board

Correct!!
"""



def generate_answer(template, mode, max_new_tokens=60):
    temperature = 0.1

    inp_ids = tokenizer(template, return_tensors="pt").to("cuda")
    #print(f"Input token length: {inp_ids.input_ids.shape[1]}")
    guess_temp = 0.1
    ask_temp = 0.9
    guess_temps = [0.1,0.1, 1.2, 0.7, 0.9]
    answer_temps = [0.01, 0.1, 0.2, 0.4, 0.5]
    if mode == 'ask':
        temperature = ask_temp
        max_new_tokens = 50
    elif mode == 'guess':
        temperature = random.choice(guess_temps)
        print("GUESSER SELECTED TEMP")
        print(temperature)
        max_new_tokens = 20
    elif mode == 'answer':
        temperature = random.choice(answer_temps)
        print("ANSWER SELECTED TEMP")
        print(temperature)
        max_new_tokens = 50

    out_ids = model.generate(
        **inp_ids,
        do_sample=True,
        max_new_tokens=max_new_tokens,
        num_beams=3,
        temperature=temperature
    ).squeeze()
    
    start_gen = inp_ids.input_ids.shape[1]
    out_ids = out_ids[start_gen:]

    if id_eot in out_ids:
        stop = out_ids.tolist().index(id_eot)
        out = tokenizer.decode(out_ids[:stop])
    else:
        out = tokenizer.decode(out_ids)
    print("RESPONSE HAS BEEN GENERATED")
    print(mode)
    print(out)
    return out




class Agent:
    def __init__(self, few_shot_examples, sys_prompt: str = None):
        self.sys_prompt = sys_prompt
        self.few_shot_examples = None
        #pass
        
    def get_agent(self, mode, obs):
        if mode == 'ask':
            output = self.questioner(obs)
        if mode =='guess':
            output = self.questioner(obs)
        if mode == 'answer':
            
            output = self.answerer(obs)
            
            if "yes" in output.lower():
                output = "yes"
            elif "no" in output.lower():
                output = "no"   
            if ("yes" not in output.lower() and "no" not in output.lower()):
                output = "yes"
                
        return output
    
    def answerer(self, obs):
        previous_question = obs.questions[-1]
        if(len(previous_question) > 500):
            previous_question = previous_question[:499]

        prompt = f"""\
            The keyword for this game is "{obs.keyword}" in the category [{obs.category}]
            You are currently answering a question about the word above.

            The next question is "{obs.questions[-1]}".
            
            Your task is to answer the above yes/no question and place your answer in the following format surrounded by double asterisks:
            
            Answer: **yes**/**no**

            - Your response should be accurate given the keyword above
            - Always respond with ONLY **yes** or **no**
            
            Now please tell me is the answer **yes** or **no** to the following question about {obs.keyword}: {previous_question}
        """
        chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{prompt}<|eot_id|>"""
        chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"
        responses = []
        for i in range(3):
            output = generate_answer(chat_template, mode='answer')
            #output = parse_response(output)
            #print()
            if('yes' in output.lower()):
                output = 'yes'
            else:
                output = 'no'
            responses.append(output)
        return most_common_answer(responses)


        
    def questioner(self, obs):
        if obs.turnType == 'ask':
            self.sys_prompt = questioner_sys_prompt_thing
            self.few_shot_examples = few_shot_examples_thing
                
                
            ask_prompt = f'Here is an example of how this game might work. \n{self.few_shot_examples} \n You will now ask questions relating to the keyword, your questions must be able to be responded to by yes or no. To help you, always try to bisect the search space with your questions. Please only say the question as verbosely and shortly as you can. ONLY STATE YOUR QUESTION WITHOUT EXPLANATION. Now ask a question.'
            chat_template = f"""{self.sys_prompt}\n{ask_prompt}"""
            if len(obs.questions)>=1:
       
                for q, a in zip(obs.questions, obs.answers):
                    chat_template += f"{q}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
                    chat_template += f"{a}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"

            output = generate_answer(chat_template, mode='ask')   
        elif obs.turnType == "guess":
            self.sys_prompt = guesser_sys_prompt
            conv = ""
            conv += self.sys_prompt + "You will now guess the keyword. Here are the previous questions and answers:\n <game_history>"
            for i, (q, a, g) in enumerate(zip(obs.questions, obs.answers, obs.guesses), start=1): 
                conv += f"Question {i}: {q}\nAnswer {i}: {a}\nGuess {i}: {g}\n"
            conv += '<game_history>'
            guess_prompt =  f"""Here is a general understanding of the what game we are playing, and a description of the game history. \n{conv}
            based on the history of the game, guess the keyword. just say your guess and nothing else, short and verbose. Remember to use the previous questions/answers. 
            Here is an example of how the game might work: \n{self.few_shot_examples}\n
            
                - Now guess the keyword, just say your guess without explanation.
                - Make sure your guess takes into account the history of the game, and gives a creative guess. 
                - Here are the previous guesses, DO NOT repeat guesses: {obs.guesses}
                - IMPORTANT!!! DO NOT REPEAT GUESSES, BE SMART
                
                Place your guess in the following format:
                Guess: example_guess
                
                
            Now what is your guess? Be creative!
            """
            chat_template = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{guess_prompt}<|eot_id|>"""
            chat_template += "<|start_header_id|>assistant<|end_header_id|>\n\n"
            #output = generate_answer(chat_template, mode='guess')
            responses = []
            for i in range(5):
                output = generate_answer(chat_template, mode='guess')
                output_parsed = parse_after_colon(output)
                responses.append(output_parsed)
            output = most_common_answer(responses)
        return output
                
        

agent = Agent(few_shot_examples=None)

def parse_response(input_string):
    pattern = r'\*\*(.*?)\*\*'
    matches = re.findall(pattern, input_string, re.DOTALL)
    if matches:
        return matches[0]
    else:
        return "yes"  
def parse_after_colon(s):
    match = re.search(r':\s*(.*)', s)
    if match:
        return match.group(1).strip()
    else:
        return "Cypress Knees"
def most_common_answer(answers):
    counter = Counter(answers)

    most_common = counter.most_common(1)[0][0]
    return most_common
def func1(keyword, question):
    keyword_pattern = r"^[a-zA-Z\s]+$"
    question_pattern = r'^Does the keyword \(in lowercase\) precede "([a-zA-Z\s]+)" in alphabetical order\?$'
    if not re.match(keyword_pattern, keyword) or not re.match(
        question_pattern, question
    ):
        return None
    match = re.match(question_pattern, question)
    compare_word = match.group(1)
    return keyword.lower() < compare_word.lower()#


def func2(keyword, question):
    question_pattern = 'Is it Agent Alpha?'
    if question == question_pattern:
        return 'Yes'
    return None
    
def func3(keyword, question):
    keyword_pattern = r"^[a-zA-Z\s]+$"
    question_patterns = [
        r"^Does the keyword start with one of the letters \'([a-zA-Z]\'(?:, \'[a-zA-Z]\')*)(?: or \'[a-zA-Z]\')?\?$",
        r"^Does the keyword start with the letter \'([a-zA-Z])\'\?$",
    ]
    if not re.match(keyword_pattern, keyword) or not any(
        re.match(pattern, question) for pattern in question_patterns
    ):
        return None
    if re.match(question_patterns[0], question):
        letters = re.findall(r"'([A-Z])'", question)
    else:
        match = re.match(question_patterns[1], question)
        letters = [match.group(1)]
    letters = [c.lower() for c in letters]
    return keyword.strip()[0].lower() in letters


def func4(keyword, question):
    keyword_pattern = r"^[a-zA-Z\s]+$"
    question_pattern = r"^Is the keyword one of the following\? ([a-zA-Z\s,]+)\?$"
    if not re.match(keyword_pattern, keyword) or not re.match(
        question_pattern, question
    ):
        return None
    match = re.match(question_pattern, question)
    options = [option.strip() for option in match.group(1).split(",")]
    return keyword.strip().lower() in [option.lower() for option in options]

def func5(keyword, question):
    keyword_pattern = r"^[a-zA-Z\s]+$"
    question_pattern = r"^Considering every letter in the name of the keyword, does the name of the keyword include the letter \'([A-Za-z])\'\?$"
    if not re.match(keyword_pattern, keyword) or not re.match(
        question_pattern, question
    ):
        return None
    match = re.match(question_pattern, question)
    search_letter = match.group(1)
    return search_letter.lower() in keyword.lower()


def func(keyword, question):
    solves = [func1, func2, func3, func4, func5]
    for f in solves:
        result = f(keyword, question)
        if result is not None:
            return result
    return None

def extract_text_after_colon(sentence):
    match = re.search(r':\s*(.*)', sentence)
    if match:
        return match.group(1)
    else:
        return sentence
    

def agent_gameplay(obs, cfg):
    response = None
    if obs.turnType =="ask":
        if(len(obs.questions) < 1):
            response = "Is the keyword a living thing?"
        elif(len(obs.questions) == 1):
            if(obs.answers[0].lower() == 'yes'):
                response = "Is the keyword a mammal?"
                print(response)
            else:
                response = "Is the keyword related to Entertainment or Sports?"
                print(response)
        else:
            response = agent.get_agent(mode='ask', obs=obs)
        
    elif obs.turnType =="guess":
        response_initial = agent.get_agent(mode = "guess", obs= obs)
        response = extract_text_after_colon(response_initial)
    elif obs.turnType =="answer":
        check = func(obs.keyword, obs.questions[-1])
        if(check is None):
            response = agent.get_agent(mode ='answer', obs=obs)
        else:
            if(check):
                response = "yes"
            else:
                response = "no"
            
    if response == None or len(response)<=1:
        response = 'yes'
        
    return response

Writing submission/main.py


In [3]:
!apt install pigz pv > /dev/null





In [4]:
!tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/input/llama-3/transformers/8b-chat-hf . -C /kaggle/working/submission .




In [5]:
#def agent_dummy(obs, cfg):
#    if obs.turnType == "ask":
#        response = "Is the keyword a Country in Europe?"
#    elif obs.turnType == "guess":
#        response = "duck"
#    elif obs.turnType == "answer":
#        response = "yes"
#    return response

In [6]:
#debug_config = {'episodeSteps': 28,     # initial step plus 3 steps per round (ask/answer/guess)
#                'actTimeout': 60,       # agent time per round in seconds; default is 60
#               'runTimeout': 1200,      # max time for the episode in seconds; default is 1200                
#'agentTimeout': 3600}  # obsolete field; default is 360

In [7]:
#import kaggle_environments

In [8]:
#keyword = "Sieve"
#alts = []
#kaggle_environments.envs.llm_20_questions.llm_20_questions.category = 'thing'
#kaggle_environments.envs.llm_20_questions.llm_20_questions.keyword_obj = {'keyword':keyword,'alts':alts}
#kaggle_environments.envs.llm_20_questions.llm_20_questions.keyword = keyword
#kaggle_environments.envs.llm_20_questions.llm_20_questions.alts = alts

In [9]:
#from kaggle_environments import make
#path = '/kaggle/working/submission/main.py'
#env = make("llm_20_questions",configuration=debug_config, debug=True)
#print('env created')
#game_output = env.run(agents=[agent_gameplay, agent_gameplay, agent_dummy, agent_dummy])
#env.render(mode="ipython", width=600, height=500)