In [20]:
import json 
import pandas as pd
import numpy as np
import openai
import tiktoken
import os
import pprint


COMPLETIONS_MODEL = "text-davinci-003"
EMBEDDING_MODEL = "text-embedding-ada-002"
CHAT_MODEL = "gpt-3.5-turbo"

openai.api_key = os.environ.get('api_key')

In [21]:
df_managers = pd.read_csv('managers.csv')
df_managers = df_managers.set_index(['title','heading'])

df_state = pd.read_csv('state.csv')
df_state = df_state.set_index(['title','heading'])




In [22]:
#embeddings part


def get_embedding(text: str, model: str=EMBEDDING_MODEL) -> list[float]:
    result = openai.Embedding.create(
      model=model,
      input=text
    )
    return result["data"][0]["embedding"]

def compute_doc_embeddings(df: pd.DataFrame) -> dict[tuple[str, str], list[float]]:
    return {
        idx: get_embedding(r.content) for idx, r in df.iterrows()
    }
    
def load_embeddings(fname: str) -> dict[tuple[str, str], list[float]]:
    df = pd.read_csv(fname, header=0, encoding='utf-8')
    df = df.set_index(['title','heading'])
    max_dim = max([int(c) for c in df.columns if c != "title" and c != "heading"])
    return {
           (r.title, r.heading): [r[str(i)] for i in range(max_dim + 1)] for _, r in df.iterrows()
    }

def get_usage(text: str, model: str = EMBEDDING_MODEL)->int:
    result = openai.Embedding.create(
        model=model,
        imput = text
    )
    return result["usage"]["total_tokens"]

In [23]:
managers_embeddings = compute_doc_embeddings(df_managers)

In [24]:
state_embedding = compute_doc_embeddings(df_state)

In [25]:
#show embeddings

example_entry = list(managers_embeddings.items())[-1]
print(f"{example_entry[0]} : {example_entry[1][:5]}... ({len(example_entry[1])} entries)")
example_entry = list(state_embedding.items())[-1]
print(f"{example_entry[0]} : {example_entry[1][:5]}... ({len(example_entry[1])} entries)")

('Square Sam', 'unlock level = 45') : [-0.00020701090397778898, -0.008801721036434174, 0.00948029663413763, -0.02485036663711071, -0.016799692064523697]... (1536 entries)
('mine areas', 8) : [-0.01969183050096035, -0.01719668135046959, -0.004136134870350361, -0.026822851970791817, -0.02761676348745823]... (1536 entries)


In [26]:
#vector similarity and user prompt embedding 


def vector_similarity(x: list[float],y: list[float])->float:
    return np.dot(np.array(x),np.array(y))

def order_document_selections_by_querry_similarity(query:str , contexts: dict[(str,str),np.array])->list[(float,(str, str))]:
    query_embedding = get_embedding(query)
    document_similarities = sorted([
        (vector_similarity(query_embedding, doc_embedding),doc_index) for doc_index, doc_embedding in contexts.items()
    ],reverse = True)
    return document_similarities


In [27]:
def get_manager(prompt)-> list[str,float]:
    manager_guess = order_document_selections_by_querry_similarity(prompt,managers_embeddings)[:1][0][1][0]
    manager_chance = order_document_selections_by_querry_similarity(prompt,managers_embeddings)[:1][0][0]*100
    return [manager_guess,manager_chance]

def get_second_manager(prompt)-> list[str,float]:
    manager_guess = order_document_selections_by_querry_similarity(prompt,managers_embeddings)[:2][1][1][0]
    manager_chance = order_document_selections_by_querry_similarity(prompt,managers_embeddings)[:2][1][0]*100
    return [manager_guess,manager_chance]
    
    manager_guess = order_document_selections_by_querry_similarity(prompt,managers_embeddings)[:1][0][1][0]
    manager_chance = order_document_selections_by_querry_similarity(prompt,managers_embeddings)[:1][0][0]*100

    state_guess = order_document_selections_by_querry_similarity(prompt,state_embedding)[:1]

In [38]:
print(get_manager("Who is Timmy's Crush ?"))
print(get_second_manager("Who is Timmy's Crush ?"))

['LillyPI', 82.35284145030951]
['Timmy', 81.91692214070527]


In [29]:
with open ("prompt",'r',encoding='utf-8') as f:
    prompt = f.read()

In [30]:

def update_messages(messages,role,content):
    messages.append({"role":role,"content":content})
    return messages

def get_response_messages(messages):
    response = openai.ChatCompletion.create(
    model = CHAT_MODEL,
    messages=messages
    )
    return response["choices"][0]["message"]["content"]

messages=[
        {"role": "system", "content": prompt,}
    ]



In [42]:
def get_text_from_df(text,df):
    content = df.loc[f'{text}',"content"]
    return content

In [None]:
while True:
    querry = input()
    print(querry)
    guessed_manager = get_manager(querry)[0]
    get_info = get_text_from_df(guessed_manager,df_managers)
    messages = update_messages(messages=messages,role = "user",content = querry)
    messages = update_messages(messages=messages, role="system" , content=f"{get_info}")
    model_response = get_response_messages(messages)
    print(model_response)
    messages = update_messages(messages=messages,role = "assistant", content=model_response)
