In [1]:
# Let's start by setting up the server for the local LLM to run.

from langchain_openai import ChatOpenAI

MODEL = "mistral"
OLLAMA_BASE_URL = "http://localhost:11434/v1"

ollama = ChatOpenAI(model = MODEL,temperature=0,base_url=OLLAMA_BASE_URL, api_key='ollama')

#This model will do all the text related queries processing. This wont be able to do text generation.

In [2]:
import requests
requests.get("http://localhost:11434").content

b'Ollama is running'

In [None]:
# To do image and audio generation I use openai api. Since finding an open llm which can do text-text
# text-image and text-audio is impossible.

from openai import OpenAI
import os
from dotenv import load_dotenv

# Initialization by loading OPENAI api key.
load_dotenv(override=True)

# Reading the file from .env file.
openai_api_key = os.getenv('OPENAI_API_KEY')
if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
openai = OpenAI()


OpenAI API Key exists and begins sk-proj-


In [4]:
# Now I setup the Dall-E model for image generation

import base64
from io import BytesIO
from PIL import Image

# Function for generating images for our chatbot
def artist(city):
    image_response = openai.images.generate(
            model="dall-e-3",
            prompt=f"An image representing an insuarance company office in {city}, showing customers and employees in the office. The company name is insurellm in a vibrant pop-art style",
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))

In [5]:
# Let's now setup the text-audio functionality. I use gpt-4o-mini-tts (text to speech)

# Function to add speech feature to our chat_bot
def talker(message):
    response = openai.audio.speech.create(
      model="gpt-4o-mini-tts",
      voice="onyx",    
      input=message
    )
    return response.content

In [6]:
# Now I add the RAG functionality. The documents have access to the files of the insurance
# company stored in the folder knowledge-base.

from langchain_chroma import Chroma
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_huggingface import HuggingFaceEmbeddings



In [7]:
# How many characters in all the documents?
import glob

knowledge_base_path = "knowledge-base/**/*.md"
files = glob.glob(knowledge_base_path, recursive=True)
print(f"Found {len(files)} files in the knowledge base")

entire_knowledge_base = ""

for file_path in files:
    with open(file_path, 'r', encoding='utf-8') as f:
        entire_knowledge_base += f.read()
        entire_knowledge_base += "\n\n"

print(f"Total characters in knowledge base: {len(entire_knowledge_base):,}")

Found 76 files in the knowledge base
Total characters in knowledge base: 304,434


In [None]:
# Loading in everything in the knowledgebase using LangChain's loaders
from langchain_community.document_loaders import DirectoryLoader, TextLoader

folders = glob.glob("knowledge-base/*")

documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={'encoding': 'utf-8'})
    folder_docs = loader.load()
    for doc in folder_docs:
        doc.metadata["doc_type"] = doc_type
        documents.append(doc)

print(f"Loaded {len(documents)} documents")

Loaded 76 documents


In [None]:
# Dividing into chunks using the RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

print(f"Divided into {len(chunks)} chunks")
print(f"First chunk:\n\n{chunks[0]}")

Divided into 413 chunks
First chunk:

page_content='# About Insurellm

Insurellm was founded by Avery Lancaster in 2015 as an insurance tech startup designed to disrupt an industry in need of innovative products. Its first product was Markellm, the marketplace connecting consumers with insurance providers.

The company experienced rapid growth in its first five years, expanding its product portfolio to include Carllm (auto insurance portal), Homellm (home insurance portal), and Rellm (enterprise reinsurance platform). By 2020, Insurellm had reached a peak of 200 employees with 12 offices across the US.' metadata={'source': 'knowledge-base\\company\\about.md', 'doc_type': 'company'}


In [None]:
# Picking an embedding model
db_name = "vector_db"
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
#embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 413 documents


In [11]:
# Let's investigate the vectors

collection = vectorstore._collection
count = collection.count()

sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")

There are 413 vectors with 384 dimensions in the vector store


In [12]:
retriever = vectorstore.as_retriever()

In [13]:
SYSTEM_PROMPT = """
You are a knowledgeable, friendly assistant representing the company Insurellm.
You are chatting with a user about Insurellm.
If relevant, use the given context to answer any question.
If you don't know the answer, say so.
Context:
{context}
"""

In [None]:

import gradio as gr

def put_message_in_chatbot(message, history):
        return "", history + [{"role":"user", "content":message}]


def chat(history):

    question = history[-1]["content"]

    extraction_prompt = f"Analyze the following text and extract any country names. If there are multiple, return only the first one. If no country is found, return the word 'none'.\n\nText: \"{question}\""
    extraction_response = ollama.invoke([HumanMessage(content=extraction_prompt)])
    country_name = extraction_response.content.strip()


    image = None
    if country_name.lower() != 'none':
        image = artist(country_name)

    docs = retriever.invoke(question)
    context = "\n\n".join(doc.page_content for doc in docs)
    

    system_prompt = SYSTEM_PROMPT.format(context=context) 
    
    response = ollama.invoke([SystemMessage(content=system_prompt), HumanMessage(content=question)])
    reply = response.content

    history.append({"role":"assistant", "content":reply})

    voice = talker(reply)


    return history, voice, image


In [None]:
with gr.Blocks() as ui:
    with gr.Row():
        chatbot = gr.Chatbot(height=500, type="messages")
        image_output = gr.Image(height=500, interactive=False)
    with gr.Row():
        audio_output = gr.Audio(autoplay=True)
    with gr.Row():
        message = gr.Textbox(label="Chat with our AI Assistant:")

    message.submit(
        put_message_in_chatbot, 
        inputs=[message, chatbot], 
        outputs=[message, chatbot]
    ).then(
        chat, 
        inputs=chatbot,  
        outputs=[chatbot, audio_output, image_output]
    )

ui.launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


