In [1]:
import os
from operator import itemgetter
from typing import Dict, List, Optional, Sequence

# import weaviate
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from langchain_anthropic import ChatAnthropic
from langchain_community.chat_models import ChatCohere
from langchain_community.vectorstores import Weaviate
from langchain_core.documents import Document
from langchain_core.language_models import LanguageModelLike
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    PromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.retrievers import BaseRetriever
from langchain_core.runnables import (
    ConfigurableField,
    Runnable,
    RunnableBranch,
    RunnableLambda,
    RunnablePassthrough,
    RunnableSequence,
    chain,
)

from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

from langchain_fireworks import ChatFireworks
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langsmith import Client

import pandas as pd

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("./data_evaluation.csv")
df.sample(1)

Unnamed: 0,document_index,question,answer
17,0,What kind of gun does the bandana bullet kin use?,The bandana bullet kin wields a machine pistol.


In [None]:
import getpass
import os
os.environ["OPENAI_API_KEY"] = "API_KEY"

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [4]:
from langchain_openai import ChatOpenAI

# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

In [7]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

generate_queries = (
    prompt_rag_fusion | llm | StrOutputParser() | (lambda x: x.split("\n"))
)

In [6]:
# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS


embed_model = OpenAIEmbeddings()
vectorstore = FAISS.load_local(
    "./RAG_chatbot/",
    OpenAIEmbeddings(),
    allow_dangerous_deserialization=True,
)


retriever = vectorstore.as_retriever(search_kwargs={"k": 3, "threshold": 0.5})
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001CF4FA59690>, search_kwargs={'k': 3, 'threshold': 0.5})

In [26]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """Reciprocal_rank_fusion that takes multiple lists of ranked documents
    and an optional parameter k used in the RRF formula"""

    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results



In [17]:
# Setup memory for contextual conversation
retrieval_chain_rag_fusion = (
    generate_queries | retriever.map() | reciprocal_rank_fusion
)

from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)



In [36]:
def get_docs_rag_fusion(question: str):

    docs = retrieval_chain_rag_fusion.invoke({"question": question})
    docs_copy = docs.copy()
    # print(docs_copy)
    docs_copy.sort(key=lambda x: len(x[0].page_content), reverse=True)
    combined_docs = []
    string_check = ""
    for doc in docs_copy:
        if doc[0].page_content not in string_check:
            string_check += doc[0].page_content
            combined_docs.append(doc)
    combined_docs

    return combined_docs

In [37]:
final_rag_chain = (
    {"context": RunnableLambda(itemgetter("question")) | get_docs_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

In [38]:
response = final_rag_chain.invoke({"question": "What kind of model is the bling-phi-3 model"})
print(response)

The bling-phi-3 model is a type of large language model (LLM) that is designed to run efficiently on local machines, such as a MacBook. It is part of a family of models that are optimized for performance while being lightweight enough to fit within the memory constraints of consumer hardware. The model is likely to be quantized or distilled to ensure it can operate effectively in environments with limited computational resources.


In [None]:
docs = get_docs_rag_fusion("What kind of model is the bling-phi-3 model")
docs

In [57]:
docs[0].page_content

"Which GPU(s) to Get for Deep Learning: My Experience and Advice for Using GPUs in Deep Learning\n2023-01-30 by Tim Dettmers 1,664 Comments\n\nDeep learning is a field with intense computational requirements, and your choice of GPU will fundamentally determine your deep learning experience. But what features are important if you want to buy a new GPU? GPU RAM, cores, tensor cores, caches? How to make a cost-efficient choice? This blog post will delve into these questions, tackle common misconceptions, give you an intuitive understanding of how to think about GPUs, and will lend you advice, which will help you to make a choice that is right for you.\n\nThis blog post is designed to give you different levels of understanding of GPUs and the new Ampere series GPUs from NVIDIA. You have the choice: (1) If you are not interested in the details of how GPUs work, what makes a GPU fast compared to a CPU, and what is unique about the new NVIDIA RTX 40 Ampere series, you can skip right to the pe

In [39]:
df.iloc[0, 1]

'What kind of model is the bling-phi-3 model'

In [45]:
contexts = []
for i in range(0, len(df)):
    print(i)
    a = []
    question = df.iloc[i, 1]
    try:
        docs = get_docs_rag_fusion(question)
        string_check = "" 
        for doc in docs:
            # print(doc[0].page_content)
            if doc[0].page_content not in string_check:
                string_check += doc[0].page_content
                a.append(doc[0].page_content)
        contexts.append(a)
    except:
        contexts.append("Error")
        continue

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [46]:
contexts

[['A Survey on Retrieval-Augmented Text Generation for Large Language\nModels\nYizheng Huang\nYork University\nhyz@yorku.ca\nJimmy X. Huang\nYork University\njhuang@yorku.ca\nAbstract\nRetrieval-Augmented Generation (RAG)\nmerges retrieval methods with deep learning\nadvancements to address the static limitations\nof large language models (LLMs) by enabling\nthe dynamic integration of up-to-date external\ninformation. This methodology, focusing\nprimarily on the text domain, provides a\ncost-effective solution to the generation of\nplausible but incorrect responses by LLMs,\nthereby enhancing the accuracy and reliability\nof their outputs through the use of real-world\ndata. As RAG grows in complexity and\nincorporates multiple concepts that can\ninfluence its performance, this paper organizes\nthe RAG paradigm into four categories:\npre-retrieval, retrieval, post-retrieval, and\ngeneration, offering a detailed perspective\nfrom the retrieval viewpoint. It outlines\nRAG’s evolution and

In [47]:
import json
df['contexts'] = contexts
# df['contexts'] = df['contexts'].apply(lambda x: json.loads(x))
df = df.iloc[:,1:]
df=df.rename(columns={
    "answer": "ground_truth"
})
df

Unnamed: 0,question,ground_truth,contexts
0,What kind of model is the bling-phi-3 model,The bling-phi-3 model is the newest and most a...,[A Survey on Retrieval-Augmented Text Generati...
1,What are the advantages and disadvantages of t...,The advantage of BM25 is that it is efficient....,[A Survey on Retrieval-Augmented Text Generati...
2,Who was Duke Stelmane?,Duke Stelmane was a major figure of the Knight...,[Version History\n(Redirected from Version his...
3,What items do Rudolf's family take from the Je...,"Rudolf's wife, Hedwig, often receives luxury a...",[A Survey on Retrieval-Augmented Text Generati...
4,What are the rules for developing general purp...,General purpose AI models that were trained us...,[A Survey on Retrieval-Augmented Text Generati...
5,What can moss be used for?,Harvesting moss gives 1 foraging exp per moss ...,[Version History\n(Redirected from Version his...
6,In what contexts is BERT mentioned?,It is mentioned that for BERT large during tra...,[A Survey on Retrieval-Augmented Text Generati...
7,What enemies are encountered in the second enc...,26 kobolds and 1 kobold inventor are encounter...,[Alan Wake 2\nWhy the hell did you kill Casey?...
8,What colour is Nan-E?,No answer,[Version History\n(Redirected from Version his...
9,How do sets in Python compare to sets in Gleam?,No answer,[Version History\n(Redirected from Version his...


In [48]:
answers = []
for i in range(0, len(df)):
    question = df.iloc[i, 1]
    try:
        response = final_rag_chain.invoke({"question": question})
        answers.append(response)
        print(response)
    except:
        answers.append("Error")
        continue

The bling-phi-3 model is described as the newest and most accurate model within the BLING/DRAGON framework. BLING models are characterized as small, CPU-based, RAG-optimized, instruct-following models with parameters ranging from 1 billion to 3 billion. In contrast, DRAGON models are larger, production-grade models optimized for RAG, typically featuring 6 to 7 billion parameters. The phrase "Delivering RAG on ..." suggests that these DRAGON models are built on leading foundation base models, emphasizing their capability and performance in retrieval-augmented generation tasks.
The statement is correct. The BM25 algorithm is known for its efficiency in ranking documents based on term frequency and presence, which makes it a popular choice in information retrieval systems. However, its focus on these factors can be a disadvantage, as it often overlooks the semantic information of queries. This means that while BM25 can effectively rank documents based on how often terms appear, it may not

In [49]:
df['answers'] = answers
df = df.rename(columns={"answers": "answer"})
df

Unnamed: 0,question,ground_truth,contexts,answer
0,What kind of model is the bling-phi-3 model,The bling-phi-3 model is the newest and most a...,[A Survey on Retrieval-Augmented Text Generati...,The bling-phi-3 model is described as the newe...
1,What are the advantages and disadvantages of t...,The advantage of BM25 is that it is efficient....,[A Survey on Retrieval-Augmented Text Generati...,The statement is correct. The BM25 algorithm i...
2,Who was Duke Stelmane?,Duke Stelmane was a major figure of the Knight...,[Version History\n(Redirected from Version his...,Duke Stelmane was a significant character in t...
3,What items do Rudolf's family take from the Je...,"Rudolf's wife, Hedwig, often receives luxury a...",[A Survey on Retrieval-Augmented Text Generati...,"In ""The Zone of Interest,"" Hedwig Hoss, the wi..."
4,What are the rules for developing general purp...,General purpose AI models that were trained us...,[A Survey on Retrieval-Augmented Text Generati...,The AI Act establishes a regulatory framework ...
5,What can moss be used for?,Harvesting moss gives 1 foraging exp per moss ...,[Version History\n(Redirected from Version his...,"Yes, harvesting moss grants 1 foraging experie..."
6,In what contexts is BERT mentioned?,It is mentioned that for BERT large during tra...,[A Survey on Retrieval-Augmented Text Generati...,"The author highlights that during training, th..."
7,What enemies are encountered in the second enc...,26 kobolds and 1 kobold inventor are encounter...,[Alan Wake 2\nWhy the hell did you kill Casey?...,"In the second encounter, the party faces 26 ko..."
8,What colour is Nan-E?,No answer,[Version History\n(Redirected from Version his...,It seems that you have provided a large amount...
9,How do sets in Python compare to sets in Gleam?,No answer,[Version History\n(Redirected from Version his...,It seems that you have provided a large amount...


In [50]:
for i in range(0, len(df)):
    question = df.iloc[i, 1]
    if df.iloc[i, 3] == "Error":
        try:
            response = final_rag_chain.invoke({"question": question})
            df.iloc[i, 3] = response
            print(response)
        except:
            df.iloc[i, 3] = "Error"
            continue

In [51]:
df.to_csv('./RAG_Fusion_responses.csv', index=False)

In [52]:
from datasets import Dataset

In [53]:
dataset = Dataset.from_pandas(df)

from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_relevancy,
    context_recall,
    context_precision
)

from ragas import evaluate

result = evaluate(
    dataset,
    llm=llm,
    raise_exceptions=False,
    callbacks=None,
    is_async=False,
    embeddings=OpenAIEmbeddings(),
    metrics=[
        faithfulness,
        answer_relevancy,
        context_relevancy,
        context_precision,
        context_recall,
    ],
)

result

Evaluating: 100%|██████████| 100/100 [04:47<00:00,  2.88s/it]


{'faithfulness': 0.9316, 'answer_relevancy': 0.7286, 'context_relevancy': 0.0005, 'context_precision': 0.3294, 'context_recall': 0.7000}

In [54]:
result.to_pandas().to_csv('./RAG_Fusion_evaluations.csv', encoding='utf-8', index=False)

In [None]:
df=df.rename(columns={"Question":"question","Answer_RAG_FUSION":"answer",'Context_RAG_FUSION':'contexts',"Answer":'ground_truth'})

df['contexts'] = df['contexts'].apply(lambda x: json.loads(x))
df