In [None]:
import importlib

if not importlib.util.find_spec("ace"):
    !pip install -qqq git+https://github.com/xtreamsrl/ace-of-splades

In [None]:
data_path = f"../data"

In [None]:
os.environ["OPENAI_API_KEY"] = ...

In [None]:
from ace_of_splades.data import get_movies_dataset

movies = get_movies_dataset(local=True)

In [None]:
movies

In [None]:
from sentence_transformers import SentenceTransformer

encoder = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
import lancedb

uri = f"{data_path}/movies_embeddings"
db = lancedb.connect(uri)

In [None]:
movies_table = db.create_table("movies", movies, mode="overwrite")

In [None]:
def get_records(
        query, *, encoder=encoder, db_table=movies_table, max_results=10
):
    query_vector = encoder.encode(query).tolist()
    return db_table.search(
        query_vector
    ).limit(10).select(
        ['release_year', 'title', 'origin', 'director', 'cast', 'genre', 'plot', '_distance']
    ).to_list()


question = "What should I see tonight? I love Sci-Fi movies but I have seen most of the classics, such as Star Wars."

docs = get_records(question, max_results=5)
results = [doc for doc in docs]
results

In [None]:
GEEK_SYSTEM = """
  You are a DVD record store assistant and your goal is to recommed the user with a good movie to watch.

  You are a movie expert and a real geek: you love sci-fi movies and tend to get excited when you talk about them.
  Nevertheless, no matter what, you always want to make your customers happy.
"""

In [None]:
prompt_template = """
  Here are some suggested movies (ranked by relevance) to help you with your choice.
  {context}

  Use these suggestions to answer this question:
  {question}
"""

context_template = """
Title: {title}
Release date: {release_year}
Director: {director}
Cast: {cast}
Genre: {genre}
Overview: {plot}
"""


def format_records_into_context(records, *, template):
    return "".join(
        context_template.format(
            title=rec["title"],
            release_year=rec["release_year"],
            director=rec["director"],
            cast=rec["cast"],
            genre=rec["genre"],
            plot=rec["plot"],
        )
        for rec in results
    )

In [None]:
import openai

client = openai.OpenAI()


def ask(
        question,
        *,
        max_results=10,
        system=GEEK_SYSTEM,
        prompt_template=prompt_template,
        context_template=context_template,
        db_table=movies_table,
        verbose=False
):
    records = get_records(
        query=question, max_results=max_results, db_table=movies_table
    )
    context = format_records_into_context(records, template=context_template)

    prompt = prompt_template.format(question=question, context=context)

    chat_completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": prompt},
        ],
    )

    answer = chat_completion
    if verbose:
        print(answer.choices[0].message.content)
        print(context)

    return answer


answer = ask(question=question, verbose=True)

# Let's Run Our First Evaluation Questions

In [None]:
# TODO: Define better the datasets for domain expert

Now that we have generated some questions to boostrap our evaluation process we must make our system reply

In [None]:
import polars as pl

In [None]:
eval_dataset = pl.read_csv("../data/eval_questions.csv")
eval_dataset

In [None]:
eval_dataset = eval_dataset.with_columns(
    pl.col("question").map_elements(
        lambda question: ask(question).choices[0].message.content,
        return_dtype=pl.String
    ).alias('rag_answer')
)

In [None]:
PL_STR_LEN = 1500

pl.Config.set_fmt_str_lengths(PL_STR_LEN)
pl.Config.set_tbl_width_chars(PL_STR_LEN)

eval_dataset

Cool! Our movie expert replies to all the questions in our dataset; our work here is done! But... let's see what our **movie expert** would say about it.

He's not accustomed to `polars`, so let's prepare a more convenient Excel for him!

In [None]:
# TODO: Could we improve this Excel a little bit later
eval_dataset.write_excel(workbook="../data/eval_questions_answered.xlsx",
                         column_totals=True)

Let's see what the domain expert voted

In [None]:
domain_expert_critiques = pl.read_excel(
    source="../data/eval_questions_with_critiques.xlsx",
    sheet_name="Sheet1",
)
domain_expert_critiques.with_columns(pl.col('Judgement').cast(pl.Categorical))

In [None]:
domain_expert_critiques.select(
    pl.col('Judgement').value_counts(sort=True, name="Count")
).unnest("Judgement")

There are a lot of failures (80%), so let's eye-balling the critiques from the domain expert and cluster them in groups. Possible groups could be:
- missing context: frequently, domain experts said that the model replied saying that it doesn't have the movie in the list
- The system always gives the same suggestion ("Welcome to the Space Show")
- It doesn't manage corner cases, such as unrelated or toxic questions

## The "Easiest First" Rule
Don't panic! You don't need to rebuild all your RAG. Let's keep things simple before discussing complex considerations about restructuring your RAG architecture.

The easiest things to look at are prompts and system messages:


In [None]:
print(GEEK_SYSTEM)

In [None]:
print(prompt_template)

Let's quickly iterate through our system message to check if we could have a better response.

In [None]:
SYSTEM_MESSAGE = """ You are a movie expert and your goal is to recommed the user with a good movie to watch.

RULES: 
- You should reply to questions about: movies plots or synopsys, movies metadata (release date, cast, or director), provide plots summary;
- For every questions outside the scope please reply politely that you're not able to provide a response and desbribe briefly your scope;
- Don't mention that you have a list of films as a context. This should be transparent to the user
- If you don't have the movie in your context reply that you don't know how to reply"""

In [None]:
eval_dataset = eval_dataset.with_columns(
    pl.col("question").map_elements(
        lambda question: ask(question, system=SYSTEM_MESSAGE).choices[0].message.content,
        return_dtype=pl.String
    ).alias('rag_answer')
)

In [None]:
eval_dataset