# Twenty Questions with Phi-3

## Install packages to the submission folder

In [None]:
pip install -U -t /kaggle/working/submission/lib tqdm pydantic transformers -qq

## Llama.cpp setup (unused as of now)

In [None]:
# !CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install -t /kaggle/working/submission/lib llama-cpp-python \
#   --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121

In [None]:
# mkdir -p /kaggle/working/submission/lib/phi3/

In [None]:
# !curl -L "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf?download=true" > "/kaggle/working/submission/lib/phi3/model.gguf"

In [None]:
# import os, sys
# KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
# if os.path.exists(KAGGLE_AGENT_PATH):
#     sys.path.insert(0, os.path.join(KAGGLE_AGENT_PATH, 'lib'))
#     WEIGHTS_PATH = os.path.join(KAGGLE_AGENT_PATH, "lib/phi3/model.gguf")
# else:
#     sys.path.insert(0, "/kaggle/working/submission/lib")
#     WEIGHTS_PATH = "/kaggle/working/submission/lib/phi3/model.gguf"

In [None]:
%ls

## Write the submission file

In [None]:
%%writefile submission/main.py
from pydantic.dataclasses import dataclass
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from typing import Literal, List
import os, sys, json

KAGGLE_AGENT_PATH = "/kaggle_simulations/agent/"
if os.path.exists(KAGGLE_AGENT_PATH):
    os.chdir(os.path.join(KAGGLE_AGENT_PATH, 'lib'))
    #WEIGHTS_PATH = os.path.join(KAGGLE_AGENT_PATH, "lib/phi3/model.gguf")
else:
    os.chdir("/kaggle/working/submission/lib")
    #WEIGHTS_PATH = "/kaggle/working/submission/lib/phi3/model.gguf"
    
print(f"Current Directory is {os.getcwd()}. \nFiles in here: {', '.join(os.listdir())}")

#Import model

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",  
    device_map="cuda", torch_dtype="auto", trust_remote_code=True,
    cache_dir="./huggingface"
    
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", cache_dir="./huggingface")
hf_llm = pipeline("text-generation", model=model, tokenizer=tokenizer)

def ask(prompt: str, max_new_tokens=100) -> str:
    result = hf_llm(text_inputs=prompt, return_full_text=False, temperature=0.2, do_sample=False, max_new_tokens=max_new_tokens)
    return result[0]['generated_text']

assert ask("<|user|>\nHello!<|end|>\n<|assistant|>")

@dataclass
class KaggleObservation:
  remainingOverageTime: int | float
  step: int

  questions: list[str]
  answers: list[str]
  guesses: list[str]

  role: Literal["guesser", "answerer"]
  turnType: Literal["ask", "guess", "answer"]

  keyword: str
  category: str

@dataclass
class KaggleConfig:
  episodeSteps: int
  actTimeout: int | float
  runTimeout: int | float
  agentTimeout: int | float
  __raw_path__: str

# llm = Llama(
#   model_path=WEIGHTS_PATH,  # path to GGUF file
#   n_ctx=2048,  # The max sequence length to use - note that longer sequence lengths require much more resources
#   n_threads=4, # The number of CPU threads to use, tailor to your system and the resulting performance
#   n_gpu_layers=35, # The number of layers to offload to GPU, if you have GPU acceleration available. Set to 0 if no GPU acceleration is available on your system.
#   use_mlock=True, # Whether to use mlock to lock the memory in RAM, preventing it from being swapped to disk. This is useful for large models that don't fit in RAM.
#   use_mmap=False, #
# )

def get_context_prompt(observation: KaggleObservation) -> str:
  questions = observation.questions
  answers = observation.answers

  history_prompt = ""
  for index in range(len(max(questions, answers))):
    history_prompt += f"<|user|>\n{questions[index]}<|end|>\n" if index < len(questions) else ""
    history_prompt += f"<|assistant|>\n{answers[index]}<|end|>\n" if index < len(answers) else ""
  #history_prompt += "<|assistant|>\n"
  
  return history_prompt

def get_guesser_prompt(observation: KaggleObservation) -> str:
  prompt = f"<|user|>\nLet's play 20 Questions. You are playing the role of the {observation.role.title()}.<|end|>\n"
  prompt += get_context_prompt(observation)

  if observation.turnType == "ask":
    prompt += f"<|user|>\nTake a break, and ask a short yes-or-no question that would be useful to determine what the city I'm thinking about. Previous questions have been listed above. KEEP YOUR QUESTION ONE SENTENCE ONLY! Do not add any explaination to why you chose the question.<|end|>\n"
  elif observation.turnType == "guess":
    prompt += f"<|user|>\nNow, based on the information above, guess what city I'm thinking about, " 
    prompt += f"which aren't these: {', '.join(observation.guesses)}."
    prompt += f"Now, Make an informed guess, and only provide one word!<|end|>\n"
  else:
    raise ValueError(f"Invalid turnType: {observation.turnType}\n\n{observation}")
  
  prompt += "<|assistant|>\n"

  return prompt

def get_answerer_prompt(observation: KaggleObservation) -> str:
  prompt = f"<|user|>\nYou are a highly experienced tour guide specialized in the city {', '.join(observation.keyword.split(' '))}.\n"
  prompt += "You must answer a question about this city accurately, but only using the word **yes** or **no**.<|end|>\n"

  prompt += f"<|user|>{observation.questions[-1]}<|end|>\n"
  prompt += "<|assistant|>\n"
  return prompt


def play(obs, conf):
  print("Observation: " + json.dumps(obs, indent=2, ensure_ascii=False))
  print("Confing: " + json.dumps(conf, indent=2, ensure_ascii=False))
  observation = KaggleObservation(**obs)
  config = KaggleConfig(**conf)
  if observation.role == "guesser":
    prompt = get_guesser_prompt(observation)
    result = ask(prompt, max_new_tokens=40).split("\n")[0].strip()#, stop=["<|end|>"], max_tokens=256, temperature=0.5, echo=False)
  elif observation.role == "answerer":
    prompt = get_answerer_prompt(observation)
    answer = ask(prompt, max_new_tokens=20)#, stop=["<|end|>"], max_tokens=20, temperature=0.5, echo=False)
    result = "no" if "no" in answer else "yes"
  else:
    raise ValueError(f"Invalid role: {observation.role}\n\n{observation}")
  print(f"Result: {result}")
  return result

## Just checkin

In [None]:
from submission.main import *

In [None]:
assert play({
  'remainingOverageTime': 300, 
  'step': 0, 
  'questions': [], 
  'guesses': [], 
  'answers': [], 
  'role': 'guesser', 
  'turnType': 'ask', 
  'keyword': '', #eg. bangkok
  'category': '', #eg. city
}, {
  'episodeSteps': 61, 
  'actTimeout': 60, 
  'runTimeout': 9600, 
  'agentTimeout': 3600, 
  '__raw_path__': '/kaggle_simulations/agent/main.py'
})

In [None]:
assert play({
  'remainingOverageTime': 300, 
  'step': 0, 
  'questions': ["Is the city you're thinking of located in North America?"], 
  'guesses': [], 
  'answers': [], 
  'role': 'answerer', 
  'turnType': 'answer', 
  'keyword': '', #eg. bangkok
  'category': '', #eg. city
}, {
  'episodeSteps': 61, 
  'actTimeout': 60, 
  'runTimeout': 9600, 
  'agentTimeout': 3600, 
  '__raw_path__': '/kaggle_simulations/agent/main.py'
})

## Archiving the directory into a tar.gz to submit

In [None]:
!apt install pigz pv > /dev/null

In [None]:
%cd /kaggle/working/

In [None]:
!tar --use-compress-program='pigz --fast --recursive | pv' -cf submission.tar.gz -C /kaggle/working/submission .

In [None]:
print("Success.")