In [14]:
import os
from operator import itemgetter
from typing import Dict, List, Optional, Sequence

# import weaviate
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from langchain_anthropic import ChatAnthropic
from langchain_community.chat_models import ChatCohere
from langchain_community.vectorstores import Weaviate
from langchain_core.documents import Document
from langchain_core.language_models import LanguageModelLike
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    PromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.retrievers import BaseRetriever
from langchain_core.runnables import (
    ConfigurableField,
    Runnable,
    RunnableBranch,
    RunnableLambda,
    RunnablePassthrough,
    RunnableSequence,
    chain,
)

from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

from langchain_fireworks import ChatFireworks
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langsmith import Client

import pandas as pd

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

In [69]:
df = pd.read_csv("./data_evaluation.csv")
df.sample(1)

Unnamed: 0,document_index,question,answer
2,7,Who was Duke Stelmane?,Duke Stelmane was a major figure of the Knight...


In [16]:
df1 = pd.read_csv("./documents.csv")
df1.sample(1)

Unnamed: 0,index,source_url,text
11,11,https://timdettmers.com/2023/01/30/which-gpu-f...,Which GPU(s) to Get for Deep Learning: My Expe...


In [None]:
import getpass
import os
os.environ["OPENAI_API_KEY"] = "API_KEY"

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [19]:
from langchain_openai import ChatOpenAI

# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

In [34]:
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Sử dụng các thông tin sau đây để trả lời câu hỏi của người dùng.
Nếu bạn không biết câu trả lời, chỉ cần nói rằng bạn không biết, đừng cố bịa ra câu trả lời.
Hãy trả lời thật chi tiết và chính xác nhất có thể.
Tất cả câu trả lời của bạn đều phải trả lời bằng tiếng Anh

Context: {context}
Question: {question}

"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Sử dụng các thông tin sau đây để trả lời câu hỏi của người dùng.\nNếu bạn không biết câu trả lời, chỉ cần nói rằng bạn không biết, đừng cố bịa ra câu trả lời.\nHãy trả lời thật chi tiết và chính xác nhất có thể.\nTất cả câu trả lời của bạn đều phải trả lời bằng tiếng Anh\n\nContext: {context}\nQuestion: {question}\n\n'))])

In [35]:
# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS


embed_model = OpenAIEmbeddings()
vectorstore = FAISS.load_local(
    "./RAG_chatbot/",
    OpenAIEmbeddings(),
    allow_dangerous_deserialization=True,
)


retriever = vectorstore.as_retriever(search_kwargs={"k": 3, "threshold": 0.5})
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002624B2E9D90>, search_kwargs={'k': 3, 'threshold': 0.5})

In [41]:
def get_docs_rag(question):
    return retriever.get_relevant_documents(question)

In [42]:
rag_chain = (
    {"context": RunnableLambda(itemgetter("question")) | get_docs_rag, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

In [43]:
response = rag_chain.invoke({"question": "What kind of model is the bling-phi-3 model"})
print(response)

The "bling-phi-3" model is not specifically mentioned in the provided context. However, based on the naming convention, it likely refers to a variant of the Phi model series, which are typically designed for natural language processing tasks. The Phi models are known for their performance in various language understanding and generation tasks. If you are looking for specific details about the "bling-phi-3" model, such as its architecture, capabilities, or intended use cases, I do not have that information available.


In [56]:
docs = get_docs_rag("What kind of model is the bling-phi-3 model")
docs

[Document(page_content="Which GPU(s) to Get for Deep Learning: My Experience and Advice for Using GPUs in Deep Learning\n2023-01-30 by Tim Dettmers 1,664 Comments\n\nDeep learning is a field with intense computational requirements, and your choice of GPU will fundamentally determine your deep learning experience. But what features are important if you want to buy a new GPU? GPU RAM, cores, tensor cores, caches? How to make a cost-efficient choice? This blog post will delve into these questions, tackle common misconceptions, give you an intuitive understanding of how to think about GPUs, and will lend you advice, which will help you to make a choice that is right for you.\n\nThis blog post is designed to give you different levels of understanding of GPUs and the new Ampere series GPUs from NVIDIA. You have the choice: (1) If you are not interested in the details of how GPUs work, what makes a GPU fast compared to a CPU, and what is unique about the new NVIDIA RTX 40 Ampere series, you c

In [57]:
docs[0].page_content

"Which GPU(s) to Get for Deep Learning: My Experience and Advice for Using GPUs in Deep Learning\n2023-01-30 by Tim Dettmers 1,664 Comments\n\nDeep learning is a field with intense computational requirements, and your choice of GPU will fundamentally determine your deep learning experience. But what features are important if you want to buy a new GPU? GPU RAM, cores, tensor cores, caches? How to make a cost-efficient choice? This blog post will delve into these questions, tackle common misconceptions, give you an intuitive understanding of how to think about GPUs, and will lend you advice, which will help you to make a choice that is right for you.\n\nThis blog post is designed to give you different levels of understanding of GPUs and the new Ampere series GPUs from NVIDIA. You have the choice: (1) If you are not interested in the details of how GPUs work, what makes a GPU fast compared to a CPU, and what is unique about the new NVIDIA RTX 40 Ampere series, you can skip right to the pe

In [51]:
df.iloc[0, 1]

'What kind of model is the bling-phi-3 model'

In [None]:
contexts = []
for i in range(0, len(df)):
    print(i)
    a = []
    question = df.iloc[i, 1]
    try:
        docs = get_docs_rag(question)
        string_check = "" 
        for doc in docs:
            if doc.page_content not in string_check:
                string_check += doc.page_content
                a.append(doc.page_content)
        contexts.append(a)
    except:
        contexts.append("Error")
        continue

In [None]:
contexts

In [71]:
import json
df['contexts'] = contexts
# df['contexts'] = df['contexts'].apply(lambda x: json.loads(x))
df = df.iloc[:,1:]
df=df.rename(columns={
    "answer": "ground_truth"
})
df

Unnamed: 0,question,ground_truth,contexts
0,What kind of model is the bling-phi-3 model,The bling-phi-3 model is the newest and most a...,[Which GPU(s) to Get for Deep Learning: My Exp...
1,What are the advantages and disadvantages of t...,The advantage of BM25 is that it is efficient....,[Which GPU(s) to Get for Deep Learning: My Exp...
2,Who was Duke Stelmane?,Duke Stelmane was a major figure of the Knight...,[The Emperor is a mind flayer who appears in B...
3,What items do Rudolf's family take from the Je...,"Rudolf's wife, Hedwig, often receives luxury a...",[‘The Zone Of Interest’ Ending Explained & Fil...
4,What are the rules for developing general purp...,General purpose AI models that were trained us...,[Why do we need to regulate the use of Artific...
5,What can moss be used for?,Harvesting moss gives 1 foraging exp per moss ...,[Version History\n(Redirected from Version his...
6,In what contexts is BERT mentioned?,It is mentioned that for BERT large during tra...,[A Survey on Retrieval-Augmented Text Generati...
7,What enemies are encountered in the second enc...,26 kobolds and 1 kobold inventor are encounter...,[Bullet Kin\nBullet Kin are one of the most co...
8,What colour is Nan-E?,No answer,[Space Babies\n\nOriginal Airdate: 11 May 2024...
9,How do sets in Python compare to sets in Gleam?,No answer,[Gleam for Python users\nHello productive prag...


In [72]:
answers = []
for i in range(0, len(df)):
    question = df.iloc[i, 1]
    try:
        response = rag_chain.invoke({"question": question})
        answers.append(response)
        print(response)
    except:
        answers.append("Error")
        continue

The bling-phi-3 model is indeed the latest and most accurate model within the BLING/DRAGON framework. BLING models are designed to be small, CPU-based, and optimized for Retrieval-Augmented Generation (RAG), typically falling within the parameter range of 1 billion to 3 billion. These models are particularly focused on following instructions effectively, making them suitable for various applications that require responsive and context-aware interactions.

On the other hand, DRAGON models represent a more robust category, being production-grade and optimized for RAG as well. These models generally have a larger parameter count, specifically in the range of 6 billion to 7 billion parameters. The DRAGON models are designed to deliver high-quality RAG capabilities, leveraging their larger size and enhanced architecture to provide more accurate and contextually relevant outputs.

In summary, the bling-phi-3 model exemplifies the advancements in the BLING series, focusing on efficiency and i

In [80]:
df['answers'] = answers
df = df.rename(columns={"answers": "answer"})
df

Unnamed: 0,question,ground_truth,contexts,answer
0,What kind of model is the bling-phi-3 model,The bling-phi-3 model is the newest and most a...,[Which GPU(s) to Get for Deep Learning: My Exp...,The bling-phi-3 model is indeed the latest and...
1,What are the advantages and disadvantages of t...,The advantage of BM25 is that it is efficient....,[Which GPU(s) to Get for Deep Learning: My Exp...,The BM25 algorithm is a widely used informatio...
2,Who was Duke Stelmane?,Duke Stelmane was a major figure of the Knight...,[The Emperor is a mind flayer who appears in B...,Duke Stelmane was indeed a significant charact...
3,What items do Rudolf's family take from the Je...,"Rudolf's wife, Hedwig, often receives luxury a...",[‘The Zone Of Interest’ Ending Explained & Fil...,"In ""The Zone of Interest,"" Hedwig Hoss, the wi..."
4,What are the rules for developing general purp...,General purpose AI models that were trained us...,[Why do we need to regulate the use of Artific...,The AI Act establishes a regulatory framework ...
5,What can moss be used for?,Harvesting moss gives 1 foraging exp per moss ...,[Version History\n(Redirected from Version his...,In the context of the game mechanics described...
6,In what contexts is BERT mentioned?,It is mentioned that for BERT large during tra...,[A Survey on Retrieval-Augmented Text Generati...,The author highlights the performance advantag...
7,What enemies are encountered in the second enc...,26 kobolds and 1 kobold inventor are encounter...,[Bullet Kin\nBullet Kin are one of the most co...,In the second encounter during your journey th...
8,What colour is Nan-E?,No answer,[Space Babies\n\nOriginal Airdate: 11 May 2024...,"I'm sorry, but I don't have an answer to your ..."
9,How do sets in Python compare to sets in Gleam?,No answer,[Gleam for Python users\nHello productive prag...,"I'm sorry, but I don't have an answer to your ..."


In [74]:
for i in range(0, len(df)):
    question = df.iloc[i, 1]
    if df.iloc[i, 3] == "Error":
        try:
            response = rag_chain.invoke({"question": question})
            df.iloc[i, 3] = response
            print(response)
        except:
            df.iloc[i, 3] = "Error"
            continue

In [81]:
df.to_csv('./RAG_responses.csv', index=False)

In [77]:
from datasets import Dataset

In [82]:
dataset = Dataset.from_pandas(df)

from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_relevancy,
    context_recall,
    context_precision
)

from ragas import evaluate

result = evaluate(
    dataset,
    llm=llm,
    raise_exceptions=False,
    callbacks=None,
    is_async=False,
    embeddings=OpenAIEmbeddings(),
    metrics=[
        faithfulness,
        answer_relevancy,
        context_relevancy,
        context_precision,
        context_recall,
    ],
)

result

Evaluating:  97%|█████████▋| 97/100 [01:30<00:03,  1.28s/it]Failed to parse output. Returning None.
Evaluating: 100%|██████████| 100/100 [04:34<00:00,  2.74s/it]


{'faithfulness': 0.6517, 'answer_relevancy': 0.6359, 'context_relevancy': 0.0006, 'context_precision': 0.6333, 'context_recall': 0.5500}

In [83]:
result.to_pandas().to_csv('./RAG_evaluations.csv', encoding='utf-8', index=False)

In [None]:
df=df.rename(columns={"Question":"question","Answer_RAG_FUSION":"answer",'Context_RAG_FUSION':'contexts',"Answer":'ground_truth'})

df['contexts'] = df['contexts'].apply(lambda x: json.loads(x))
df