In [1]:
import os

from llama_index.core import QueryBundle
from llama_index.llms.openai import OpenAI as OpenAILlama
from llama_index.postprocessor.rankgpt_rerank import RankGPTRerank

In [2]:
import nest_asyncio

nest_asyncio.apply()

# GPT


In [3]:
openai_key = os.environ.get("OPENAI_API_KEY")

reranker = RankGPTRerank(
    llm=OpenAILlama(
        model="gpt-3.5-turbo-16k",
        temperature=0.0,
        api_key=openai_key,
    ),
    top_n=5,
    verbose=True,
)

In [4]:
query = "Textual text"
query_bundle = QueryBundle(query)

In [5]:
from llama_index.core.schema import Document, NodeWithScore
from llama_index.core.node_parser import SimpleNodeParser


def nodes_from_documents(documents):
    transformed_documents = []
    for doc in documents:
        node_metadata = {
            "title": doc["title"],
            "link": doc["url"],
            "score": doc["score"],
        }
        document = Document()
        document.text = doc["text"]
        document.metadata = node_metadata
        transformed_documents.append(document)
    base_node_parser = SimpleNodeParser()
    nodes = base_node_parser.get_nodes_from_documents(transformed_documents)
    return [NodeWithScore(node=node, score=node.metadata["score"]) for node in nodes]

In [6]:
documents = [
    {
        "title": "Title 1",
        "url": "https://example.com/1",
        "text": "Text 1...",
        "score": 0.9,
    },
    {
        "title": "Title 2",
        "url": "https://example.com/2",
        "text": "Text 2... text",
        "score": 0.8,
    },
    {
        "title": "Title 3",
        "url": "https://example.com/3",
        "text": "Text 3... Textual text",
        "score": 0.7,
    },
]

In [7]:
retrieved_nodes = nodes_from_documents(documents)

reranked_nodes = reranker.postprocess_nodes(retrieved_nodes, query_bundle)

After Reranking, new rank list for nodes: [2, 0, 1]

In [8]:
import pandas as pd
from IPython.display import display, HTML


def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n", "<br>")))


def visualize_retrieved_nodes(nodes) -> None:
    result_dicts = []
    for node in nodes:
        result_dict = {"Score": node.score, "Text": node.node.get_text()}
        result_dicts.append(result_dict)

    pretty_print(pd.DataFrame(result_dicts))


visualize_retrieved_nodes(reranked_nodes)

Unnamed: 0,Score,Text
0,0.7,Text 3... Textual text
1,0.9,Text 1...
2,0.8,Text 2... text


# Mistral


In [9]:
from llama_index.llms.ollama import Ollama

# download and install https://ollama.com/
# run in CLI: ollama serve

llm = Ollama(
    model="mistral", request_timeout=30.0
)  # mixtral is possible; demands 48GB RAM

reranker = RankGPTRerank(
    llm=llm,
    top_n=5,
    verbose=True,
)

In [10]:
retrieved_nodes = nodes_from_documents(documents)

reranked_nodes = reranker.postprocess_nodes(retrieved_nodes, query_bundle)

After Reranking, new rank list for nodes: [2, 0, 1]

In [11]:
visualize_retrieved_nodes(reranked_nodes)

Unnamed: 0,Score,Text
0,0.7,Text 3... Textual text
1,0.9,Text 1...
2,0.8,Text 2... text
