In [1]:
import os
import glob
from dotenv import load_dotenv
import gradio as gr

In [5]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.vectorstores import Chroma
import numpy as np
from sklearn.manifold import TSNE
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [6]:
MODEL = "gpt-4o-mini"
db_name = "vector_db"

In [7]:
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')


In [25]:
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
folders ="/home/exouser/Desktop/Jeeva/knowledge_base"

text_loader_kwargs = {'encoding': 'utf-8'}

loader = DirectoryLoader(
    folders,
    glob="**/*.md",
    loader_cls=TextLoader,
    loader_kwargs=text_loader_kwargs
)
documents = loader.load()

# Optional: add 'doc_type' manually based on filename
for doc in documents:
    doc.metadata["doc_type"] = os.path.splitext(os.path.basename(doc.metadata["source"]))[0]

In [26]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(documents)

Created a chunk of size 1152, which is longer than the specified 1000
Created a chunk of size 1025, which is longer than the specified 1000


In [27]:
len(chunks)

19

In [28]:
doc_types = set(chunk.metadata['doc_type'] for chunk in chunks)
print(f"Document types found: {', '.join(doc_types)}")


Document types found: about, journey, serve


In [29]:
embeddings = OpenAIEmbeddings()
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

    
vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)
print(f"Vectorstore created with {vectorstore._collection.count()} documents")

Vectorstore created with 19 documents


In [30]:
collection = vectorstore._collection
sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"The vectors have {dimensions:,} dimensions")

The vectors have 1,536 dimensions


In [None]:

llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever()

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)


In [32]:
query = "whos is the founder of Jeeva?"
result = conversation_chain.invoke({"question":query})
print(result["answer"])

The founder of Jeeva is Dr. Harsha Rajasimha.


In [None]:

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [42]:
def chat(message, history):
    result = conversation_chain.invoke({"question": message})
    return result["answer"]

In [43]:
import gradio as gr

# def chat(messages):
#     user_input = messages[-1]["content"]
#     return f"Here's a polished response to your question: '{user_input}'"

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="gray")) as demo:
    with gr.Row():
        gr.Image(value="/home/exouser/Desktop/Jeeva/logo.png", height=80, show_label=False, container=False)
    
    gr.Markdown(
        """
        <div style="text-align: center; padding-bottom: 10px;">
            <h2 style="margin: 0; font-size: 2rem;">Jeeva AI Assistant</h2>
            <p style="font-size: 1rem; color: #666;">Ask anything about our mission, journey, and how we serve clinical trials.</p>
        </div>
        """
    )

    chatbot = gr.ChatInterface(
        fn=chat,
        type="messages",
        chatbot=gr.Chatbot(
            label="Jeeva Assistant",
            show_copy_button=True
        ),
        textbox=gr.Textbox(
            placeholder="Type your question here...",
            scale=7,
            show_label=False
        )
    )

demo.launch(inbrowser=True)

  chatbot=gr.Chatbot(


* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.


