# Initial Setup

In [1]:
import os
import gradio as gr
from dotenv import load_dotenv
from openai import OpenAI
from huggingface_hub import login

In [2]:
# Vector DB
from langchain_chroma import Chroma
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace, HuggingFacePipeline

In [3]:
# Chat
import torch
from langchain_openai import ChatOpenAI
from transformers import BitsAndBytesConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [4]:
ABSOLUTE_PATH = os.path.abspath(os.getcwd())
VDB_PATH = os.path.join(ABSOLUTE_PATH, "VectorDBs")

def set_path(db_name: str, vdb_path: str=VDB_PATH) -> str:
    return os.path.join(vdb_path, db_name)

# Chroma vector store paths
CHROMADB_OPENAI_PATH = set_path("insurellm_chroma_vector_db")
CHROMADB_HF_PATH = set_path("insurellm_chroma_vector_db_hf")

# FAISS vector store paths
FAISSDB_OPENAI_PATH = set_path("insurellm_faiss_vector_db")
FAISSDB_HF_PATH = set_path("insurellm_faiss_vector_db_hf")

# Embeddings

In [5]:
# OpenAI Embeddings
load_dotenv(override=True)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "type-your-api-key-here")
openai_embeddings = OpenAIEmbeddings()

# Hugging Face Embeddings
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN", "type-your-token-here")
login(HF_TOKEN)
hf_embeddings = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

if openai_embeddings and hf_embeddings:
    print("Embeddings are loaded.")

Embeddings are loaded.


# Load Vector Database

## Chroma

In [6]:
def load_chroma_vdb(db_path: str, embeddings) -> Chroma:
    if os.path.exists(db_path):
        print(f"Loading {db_path} vector stores ...")
        vectorstores = Chroma(persist_directory=db_path, embedding_function=embeddings)
        num_docs = vectorstores._collection.count()
        print(f"{db_path} vector stores are loaded: {num_docs} documents found.\n")
        return vectorstores
    else:
        print(f"{db_path} not found in directory.\n")

chroma_openai_vectorstores = load_chroma_vdb(CHROMADB_OPENAI_PATH, openai_embeddings)
chroma_hf_vectorstores = load_chroma_vdb(CHROMADB_HF_PATH, hf_embeddings)

Loading D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_chroma_vector_db vector stores ...
D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_chroma_vector_db vector stores are loaded: 123 documents found.

Loading D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_chroma_vector_db_hf vector stores ...
D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_chroma_vector_db_hf vector stores are loaded: 123 documents found.



## FAISS

In [7]:
def load_faiss_db(db_path: str, embeddings) -> FAISS:
    if os.path.exists(db_path):
        print(f"Loading {db_path} vector stores ...")
        vectorstores = FAISS.load_local(db_path, embeddings=embeddings, allow_dangerous_deserialization=True)
        num_docs = vectorstores.index.ntotal
        dim = vectorstores.index.d
        print(f"{db_path} vector stores are loaded.")
        print(f"Found {num_docs} documents with {dim} dimensions.\n")
        return vectorstores
    else:
        print(f"{db_path} not found in directory.\n")

faiss_openai_vectorstores = load_faiss_db(FAISSDB_OPENAI_PATH, openai_embeddings)
faiss_hf_vectorstores = load_faiss_db(FAISSDB_HF_PATH, hf_embeddings)

Loading D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_faiss_vector_db vector stores ...
D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_faiss_vector_db vector stores are loaded.
Found 123 documents with 1536 dimensions.

Loading D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_faiss_vector_db_hf vector stores ...
D:\Learn\LLM\llm_engineering\week5\VectorDBs\insurellm_faiss_vector_db_hf vector stores are loaded.
Found 123 documents with 384 dimensions.



# RAG

In [8]:
# Ollama model
OLLAMA_API_KEY = "ollama"
OLLAMA_BASE_URL = "http://localhost:11434/v1"
llama_model = "llama3.2:latest"

# GPT model
gpt_model = "gpt-4o-mini"

In [9]:
%%time

# Hugging Face Model
hf_model = "meta-llama/Llama-3.2-1B-Instruct"

# 1. Load model
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

llama_hf_model = AutoModelForCausalLM.from_pretrained(
    hf_model,
    device_map="auto",
    quantization_config=quantization_config
)

# Load model tokenizer
tokenizer = AutoTokenizer.from_pretrained(hf_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

if llama_hf_model and tokenizer:
    print(f"Hugging Face model and its tokenizer are loaded.")

Hugging Face model and its tokenizer are loaded.
CPU times: total: 4.39 s
Wall time: 6.5 s


In [76]:
%%time

def setup_conversation_chain(model: str, vectorstores, temperature: float):
    # 1. Set-up the model
    if model == gpt_model:
        llm = ChatOpenAI(temperature=temperature, model=gpt_model)
        print(f"{gpt_model} model loaded.")
        
    elif model == llama_model:
        llm = ChatOpenAI(temperature=temperature, model=llama_model, api_key=OLLAMA_API_KEY, base_url=OLLAMA_BASE_URL)
        print(f"{llama_model} model loaded.")
    
    elif model == hf_model or (llama_hf_model is not None and tokenizer is not None):
        print(f"Loading {hf_model} model ...")
        
        text_pipeline = pipeline(
            "text-generation",
            model=llama_hf_model,
            tokenizer=tokenizer,
            max_new_tokens=512,
            temperature=temperature,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            return_full_text=False
        )
        hf_llm = HuggingFacePipeline(pipeline=text_pipeline)
        print("Hugging face pipeline created.")
        
        llm = ChatHuggingFace(llm=hf_llm, model_id=hf_model)
        print(f"{hf_model} loaded.")
    else:
        raise ValueError(f"Unknown or invalid model: {model}")
    
    # 2. Set-up the retriever: the retriever is an abstraction over the VectorStore that will be used during RAG
    retriever = vectorstores.as_retriever()
    print(f"{vectorstores} has been set-up as retriever.")
    
    # 3. Set-up the conversation memory for the chat
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    print(f"chat_history memroy has been set-up.")

    # Putting it together: set-up the conversation chain with the GPT 4o-mini or Llama3.2, the vector store and memory
    conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
    
    return conversation_chain

# Building custom conversation chain
conversation_chain = setup_conversation_chain(
    model=hf_model,
    vectorstores=faiss_openai_vectorstores,
    temperature=0.7
)
if conversation_chain:
    print("\nConversation chain is ready to be tested and used.")

Device set to use cuda:0


Loading meta-llama/Llama-3.2-1B-Instruct model ...
Hugging face pipeline created.
meta-llama/Llama-3.2-1B-Instruct loaded.
<langchain_community.vectorstores.faiss.FAISS object at 0x000002313D731C50> has been set-up as retriever.
chat_history memroy has been set-up.

Conversation chain is ready to be tested and used.
CPU times: total: 422 ms
Wall time: 1.51 s


## Testing the Conversation Chain

### GPT

In [39]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: gpt-4o-mini
Retriever	: Chroma using HuggingFaceEmbeddings

Insurellm is an insurance tech startup founded by Avery Lancaster in 2015, aimed at disrupting the traditional insurance industry through innovative products. Its first offering, Markellm, is a marketplace that connects consumers with insurance providers. Since its inception, Insurellm has rapidly expanded, reaching 200 employees by 2024 and establishing 12 offices across the US.

CPU times: total: 46.9 ms
Wall time: 2.95 s


In [41]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: gpt-4o-mini
Retriever	: Chroma using OpenAIEmbeddings

Insurellm is an innovative insurance tech firm founded in 2015 by Avery Lancaster, specializing in disrupting the insurance industry with cutting-edge software products. The company offers four main products: Carllm for auto insurance, Homellm for home insurance, Rellm for reinsurance, and Markellm, a marketplace connecting consumers with insurance providers. With a workforce of 200 employees and over 300 clients worldwide, Insurellm is dedicated to transforming the landscape of insurance through innovation and reliability.

CPU times: total: 172 ms
Wall time: 4.77 s


In [43]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: gpt-4o-mini
Retriever	: FAISS using HuggingFaceEmbeddings

Insurellm is an insurance technology startup founded in 2015 by Avery Lancaster, aimed at disrupting the insurance industry with innovative products. The company initially launched its first product, Markellm, which serves as a marketplace connecting consumers with insurance providers. By 2024, Insurellm had expanded significantly, reaching 200 employees and operating 12 offices across the United States.

CPU times: total: 31.2 ms
Wall time: 3.27 s


In [45]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: gpt-4o-mini
Retriever	: FAISS using OpenAIEmbeddings

Insurellm is an innovative insurance tech firm founded in 2015 by Avery Lancaster, with a mission to disrupt the insurance industry through technology. The company offers four software products: Carllm for auto insurance, Homellm for home insurance, Rellm for the reinsurance sector, and Markellm, a marketplace connecting consumers with insurance providers. With a workforce of 200 employees and over 300 clients worldwide, Insurellm operates 12 offices across the US, focusing on delivering reliable and cutting-edge solutions for insurance companies.

CPU times: total: 31.2 ms
Wall time: 4.51 s


### Ollama (Local)

In [47]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: llama3.2:latest
Retriever	: Chroma using HuggingFaceEmbeddings

I don't have much information on Insurellm beyond what's provided in our context. It appears to be an insurance technology company that offers various products and services, including a marketplace for consumers to connect with insurance providers. The company has expanded rapidly since its founding in 2015 and now has multiple offices across the US.

CPU times: total: 62.5 ms
Wall time: 9.86 s


In [49]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: llama3.2:latest
Retriever	: Chroma using OpenAIEmbeddings

Insurellm is an innovative insurance tech firm that offers four software products: Carllm, Homellm, Rellm, and Markellm. Founded by Avery Lancaster in 2015, the company has rapidly expanded to become one of the leading players in the industry, serving over 300 clients worldwide with a team of 200 employees across 12 US offices. Insurellm is committed to disrupting the insurance industry through innovative products and services that prioritize reliability and customer care.

CPU times: total: 0 ns
Wall time: 5.63 s


In [51]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: llama3.2:latest
Retriever	: FAISS using HuggingFaceEmbeddings

I don't know much about Insurellm from this context, but it appears to be an insurance technology company that offers various products and services aimed at disrupting the insurance industry. It was founded by Avery Lancaster in 2015 and has since expanded its presence with multiple offices across the US and a range of employees.

CPU times: total: 62.5 ms
Wall time: 3.95 s


In [53]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_name}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: llama3.2:latest
Retriever	: FAISS using OpenAIEmbeddings

Insurellm is an innovative insurance tech firm that offers four software products for various sectors of the industry, including auto, home, reinsurance, and marketplace solutions. Founded by Avery Lancaster in 2015, the company has rapidly expanded to become one of the leading players in the industry, serving over 300 clients worldwide with a team of 200 employees across 12 US offices.

CPU times: total: 0 ns
Wall time: 5.1 s


### Hugging Face Model & Tokenizer (Local)

In [62]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_id}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: meta-llama/Llama-3.2-1B-Instruct
Retriever	: Chroma using HuggingFaceEmbeddings

Insurellm is an insurance technology (IT) startup that was founded in 2015 by Avery Lancaster. It offers a range of products, including a marketplace for insurance providers to connect with consumers, as well as a comprehensive training program to help customers utilize the services. Insurellm aims to revolutionize the insurance industry by providing innovative and user-friendly solutions to improve customer experience and increase efficiency.

CPU times: total: 2.81 s
Wall time: 3.19 s


In [64]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_id}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: meta-llama/Llama-3.2-1B-Instruct
Retriever	: Chroma using OpenAIEmbeddings

Insurellm is an insurance tech firm that offers four insurance software products: Carllm for auto insurance, Homellm for home insurance, Rellm for the reinsurance sector, and Markellm for connecting consumers with insurance providers. Founded in 2015 by Avery Lancaster, Insurellm aims to disrupt the insurance industry by providing innovative products and solutions that prioritize both innovation and reliability for all insurance providers and customers.

CPU times: total: 2.62 s
Wall time: 3.5 s


In [66]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_id}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: meta-llama/Llama-3.2-1B-Instruct
Retriever	: FAISS using HuggingFaceEmbeddings

Insurellm is an insurance tech startup that provides innovative solutions for the insurance industry. They were founded in 2015 by Avery Lancaster and have since expanded to become a leading provider of insurance marketplaces, connecting consumers with insurance providers. Their platform offers a range of products and services, including policy management, claims processing, and customer support, with a focus on improving the efficiency and customer experience of the insurance industry.

CPU times: total: 2.67 s
Wall time: 2.68 s


In [68]:
%%time
print(f"Model name\t: {conversation_chain.question_generator.llm.model_id}")
print(f"Retriever\t: {conversation_chain.retriever.tags[0]} using {conversation_chain.retriever.tags[1]}\n")

message = "Describe Insurellm in a few sentences"
response = conversation_chain.invoke({"question": message})
print(f"{response['answer']}\n")

Model name	: meta-llama/Llama-3.2-1B-Instruct
Retriever	: FAISS using OpenAIEmbeddings

Insurellm is an insurance technology (tech) firm that offers four insurance software products: Carllm, a car insurance portal; Homellm, a home insurance portal; Rellm, an enterprise platform for the reinsurance sector; and Markellm, a marketplace for connecting consumers with insurance providers. Founded in 2015, Insurellm has grown rapidly, reaching 200 employees and 300+ clients worldwide.

CPU times: total: 2.55 s
Wall time: 3.08 s


# UI

In [90]:
%%time
# Using the best model and retriever
conversation_chain = setup_conversation_chain(
    model=llama_model,
    vectorstores=faiss_hf_vectorstores,
    temperature=0.1
)

def chat(message, history):
    response = conversation_chain.invoke({"question": message})
    return response["answer"]

llama3.2:latest model loaded.
<langchain_community.vectorstores.faiss.FAISS object at 0x000002313D5F7D50> has been set-up as retriever.
chat_history memroy has been set-up.
CPU times: total: 0 ns
Wall time: 1e+03 μs


In [91]:
view = gr.ChatInterface(fn=chat, type="messages").launch()

* Running on local URL:  http://127.0.0.1:7868
* To create a public link, set `share=True` in `launch()`.
