In [1]:
import os
import dotenv
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
dotenv.load_dotenv()

True

In [3]:
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

In [4]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [8]:
def load_data(path):
    data = DirectoryLoader(path, glob=["*.pdf"], loader_cls=PyPDFLoader)
    loaded_data = data.load()
    return loaded_data




In [9]:
%pwd

'd:\\projects\\Practice Projects\\AI projects\\Generative AI\\Medical Chatbot\\research'

In [10]:
os.chdir('../')

In [11]:
%pwd

'd:\\projects\\Practice Projects\\AI projects\\Generative AI\\Medical Chatbot'

In [13]:
docs = load_data('data/')

In [14]:
def split_docs(docs):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    splits = splitter.split_documents(docs)
    return splits

In [15]:
splitted_docs = split_docs(docs)

In [16]:
from langchain_huggingface import HuggingFaceEmbeddings

In [17]:
def download_embedding_model():
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding_model


embeddings = download_embedding_model()


  from .autonotebook import tqdm as notebook_tqdm


In [18]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec


pc = Pinecone()

index_name = "medibot"

pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)


In [20]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(
    documents=splitted_docs,
    index_name = index_name,
    embedding= embeddings
)

In [22]:
docsearch = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [23]:
retriever = docsearch.as_retriever(search_kwargs = {"k" : 3})

In [24]:
retriever.invoke("What is TOPICAL DRUGS")

[Document(id='7cf8f41b-f8b8-4e13-93ad-6941a5b6570e', metadata={'page': 283.0, 'page_label': '284', 'source': 'data\\Medical_book.pdf'}, page_content='Nancy Ross-Flanigan\nAntifungal drugs, topical\nDefinition\nTopical antifungal drugs are medicines applied to\nthe skin to treat skin infections caused by a fungus.\nPurpose\nDermatologic fungal infections are usually\ndescribed by their location on the body: tinea pedis\n(infection of the foot), tinea unguium (infection of the\nnails), tinia capitis (infection of the scalp.) Three types of\nfungus are involved in most skin infections:Trichophy-\nton, Epidermophyton, and Microsporum. Mild infections'),
 Document(id='c2a3173f-80e0-419e-a729-c248bc23b836', metadata={'page': 38.0, 'page_label': '39', 'source': 'data\\Medical_book.pdf'}, page_content='Topical medications are available as cream, gel,\nlotion, or pad preparations of varying strengths. They\ninclude antibiotics (agents that kill bacteria), such as ery-\nthromycin, clindamycin (C

In [25]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="gemma2-9b-it")

In [26]:
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [27]:
system_prompt = """
"You are helpfull Healthcare AI assistant",
"Provide answers for user questions, Using the context",
"If question not related to the context, Just say I don't know",
{context}
"""


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}")
    ]
)


In [28]:
question_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_chain)

In [29]:
rag_chain.invoke({"input" : "What is Topical Drugs"})

{'input': 'What is Topical Drugs',
 'context': [Document(id='7cf8f41b-f8b8-4e13-93ad-6941a5b6570e', metadata={'page': 283.0, 'page_label': '284', 'source': 'data\\Medical_book.pdf'}, page_content='Nancy Ross-Flanigan\nAntifungal drugs, topical\nDefinition\nTopical antifungal drugs are medicines applied to\nthe skin to treat skin infections caused by a fungus.\nPurpose\nDermatologic fungal infections are usually\ndescribed by their location on the body: tinea pedis\n(infection of the foot), tinea unguium (infection of the\nnails), tinia capitis (infection of the scalp.) Three types of\nfungus are involved in most skin infections:Trichophy-\nton, Epidermophyton, and Microsporum. Mild infections'),
  Document(id='c2a3173f-80e0-419e-a729-c248bc23b836', metadata={'page': 38.0, 'page_label': '39', 'source': 'data\\Medical_book.pdf'}, page_content='Topical medications are available as cream, gel,\nlotion, or pad preparations of varying strengths. They\ninclude antibiotics (agents that kill ba

In [30]:
rag_chain.invoke({"input": "What is Artifical Inteligence"})

{'input': 'What is Artifical Inteligence',
 'context': [Document(id='df74ebf4-75f6-44a0-a029-29db0d950dbd', metadata={'page': 442.0, 'page_label': '443', 'source': 'data\\Medical_book.pdf'}, page_content='his practice of integrating the best aspects of Eastern and\nWestern medicine.\nIn 1993, he published Creating Affluence: Wealth\nConsciousness in the Field of All Possibilities , and the\nenormously successful best seller, Ageless Body, Timeless\nMind. In the latter he presents his most radical thesis: that\naging is not the inevitable deterioration of organs and\nmind that we have been traditionally taught to think of it\nas. It is a process that can be influenced, slowed down,'),
  Document(id='7920ac43-d32b-46bb-8a9b-6bca579736dc', metadata={'page': 429.0, 'page_label': '430', 'source': 'data\\Medical_book.pdf'}, page_content='be understood are among our most basic needs. He was a\npioneer in perceiving that language problems convert into\nsocial problems for people. “Language is 