# Simple RAG (+ Re-ranker) from a PGVector Store

In [1]:
import time
# Start timer to time the notebook execution
start = time.time()

import pandas as pd
import os
import ast
import yaml
from dotmap import DotMap
from sqlalchemy import make_url
import ipywidgets as widgets
widgets.IntSlider()

from llama_index.core import Settings
from llama_index.llms.openai_like import OpenAILike
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.embeddings.nvidia import NVIDIAEmbedding
from llama_index.postprocessor.nvidia_rerank import NVIDIARerank


import sys
utils_path = "../../08-Utils"
if utils_path not in sys.path:
    sys.path.append(utils_path)

from helpers import generate_responses_dict, save_results

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
# Open the Starter Pack global configuration file
with open('../../07-Starter_Pack_config/improved_rag_config.yaml', 'r') as file:
    config = yaml.safe_load(file)
config = DotMap(config)

In [3]:
# LLamaIndex LLM provider
llm_cfg = config.ml_models.llm_generator
Settings.llm = OpenAILike(
        model=llm_cfg.model,
        api_key=llm_cfg.api_key,
        api_base=llm_cfg.api_base,
        temperature=llm_cfg.temperature,
        max_tokens=llm_cfg.max_tokens,
        repetition_penalty=llm_cfg.repetition_penalty,
)

# LLamaIndex embedding model
emb_cfg = config.ml_models.embedder
retriever_cfg = config.ml_models.retriever
Settings.embed_model = NVIDIAEmbedding(
        base_url=emb_cfg.api_base,
        model=emb_cfg.model,
        embed_batch_size=emb_cfg.batch_size,
        truncate="END",
)
EMBEDDING_SIZE = len(Settings.embed_model.get_text_embedding("hi"))

In [4]:
%%time

# PGVector DB params as defines in the reference Docker compose file
## available from the "../PGvector" directory
db_cfg = config.postgresql
connection_string = (f"postgresql://{db_cfg.user}:"
                     f"{db_cfg.password}@{db_cfg.db_host}:{db_cfg.port}/{db_cfg.default_db}")
url = make_url(connection_string)

# Open the connection to the Vector Store
vector_store = PGVectorStore.from_params(
    database=url.database,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name=db_cfg.tables.std_rag,
    embed_dim=EMBEDDING_SIZE, # embedding model dimension
    cache_ok=True,
    hybrid_search=True,
)

# Initialize the index object
index = VectorStoreIndex.from_vector_store(vector_store)

# Initialize the re-ranker of retrieved chunks
'''
re_ranker = SentenceTransformerRerank(
    top_n=config.ml_models.re_ranker.rerank_top_n,
    model=config.ml_models.re_ranker.model,
    device=config.ml_models.re_ranker.device,
)
'''

rerank_cfg = config.ml_models.re_ranker
re_ranker = NVIDIARerank(
        model=rerank_cfg.model,
        base_url=rerank_cfg.api_base,
        api_key="NONE",
        top_n=rerank_cfg.rerank_top_n,
        truncate="END",
)

# Set the index as query engine
h_search = ast.literal_eval(db_cfg.pgvector.pgvector_kwargs)
query_engine = index.as_query_engine(
    similarity_top_k=db_cfg.pgvector.sim_top_k,
    node_postprocessors=[re_ranker],
    vector_store_kwargs=h_search,
    response_mode=config.llama_index.std_rag.response_mode
)

CPU times: user 371 ms, sys: 42.6 ms, total: 413 ms
Wall time: 454 ms


In [5]:
%%time

# Load a test set to run inference on multiple Q/A pairs
test_set_cfg = config.data_files.test_sets
active_set = eval(f"test_set_cfg.{test_set_cfg.active_set}.current")
test_set_dir = test_set_cfg.base_path + active_set
test_set = os.listdir(test_set_dir)[0]
test_set = test_set_dir + test_set

test_set_df = pd.read_csv(
        filepath_or_buffer=test_set,
        usecols=['query', 'reference_answer'],
        nrows=test_set_cfg.sample_size
)

# Run inference on the Q/A pairs and keep the responses
# for future comparison vs.other RAG approaches.
responses = generate_responses_dict(
    query_engine=query_engine,
    test_set_df=test_set_df
)

  0%|          | 0/40 [00:00<?, ?it/s]

CPU times: user 928 ms, sys: 210 ms, total: 1.14 s
Wall time: 3min


In [8]:
# Convert the responses into a Pandas data frame and serialize it
responses_df = pd.DataFrame.from_dict(responses)
save_results(
        responses_df,
        "Standard RAG",
        llm_cfg.model,
        emb_cfg.model,
        rerank_cfg.model
)

stop = time.time()
print(f"Notebook execution time: {(stop-start)/60:.1f} minutes")

Standard RAG pipeline responses successfully saved to file: responses/current/Standard RAG__llama3-8b-instruct__nv-embedqa-e5-v5__nv-rerankqa-mistral-4b-v3.csv.
Notebook execution time: 6.7 minutes


In [12]:
df = responses_df.head(10)
display(df)

Unnamed: 0,query,answer,contexts,ground_truth
0,What was the USSR's primary launch vehicle and...,The USSR's primary launch vehicle for lunar an...,[19 1960 The 4-stage 8K78 launch vehicle (and ...,The USSR's primary launch vehicle for lunar an...
1,What is the function of the main longitudinal ...,The main longitudinal strength member in a sea...,[263 Document 5-20 (a–c) At the time his text ...,"The main longitudinal strength member, known a..."
2,"What limited but reliable technology, develope...",The Russian Soyuz capsule. It was mentioned i...,[Chapter 10: Toward a Permanent Human Presenc...,The Russian Soyuz capsule.
3,Compare the management styles and priorities o...,Daniel Goldin and Richard Truly had different ...,[There he rose through the ranks and was Vice...,Daniel Goldin favored horizontal management st...
4,Analyze the role of end credits in documenting...,The role of end credits in documenting film pr...,[vi Acknowledgments Any author is in debt to m...,End credits document film production contribut...
5,Compare the objectives and outcomes of Zond 7 ...,Zond 7 was the first fully successful Soviet c...,[91 1969 had probably crashed onto the side o...,"Zond 7, 5, and 6 all had circumlunar objective..."
6,"Which American military aircraft, including th...",The F-111 experience was an extremely cautiona...,"[Overall, the F-111 experience was an extremel...","The Grumman F-14 Tomcat, McDonnell Douglas F-1..."
7,How do astronomers utilize a Super-Jupiter's c...,Astronomers used the Hubble Space Telescope to...,[Using a Super-Jupiter’s Cloudy Skies to Measu...,Astronomers use a Super-Jupiter's cloudy skies...
8,Compare the technological goals of the X-34 pr...,The X-34 program was designed to focus on low-...,"[Index 377 DC-X Program, 39 accomplishments, 4...",The X-34 program aimed to demonstrate low-cost...
9,Compare Pratt & Whitney Aircraft's contributio...,Pratt & Whitney Aircraft and General Electric ...,[The Power for Flight 112 Pratt & Whitney focu...,Pratt & Whitney Aircraft contributed to aircra...
