In [1]:
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%pwd

'c:\\MediBot\\MediBot\\research'

In [3]:
import os
os.chdir('../')

In [4]:
%pwd

'c:\\MediBot\\MediBot'

In [5]:
def get_file(data):
    loader = DirectoryLoader(data,glob='*.pdf',loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents


In [6]:
extracted_data = get_file("data")

In [7]:
from typing import List
from langchain.schema import Document

def filter_data(docs: List[Document]) -> List[Document]:
    filtered_data: List[Document] = []
    for doc in docs:
        src = doc.metadata.get("source")
        filtered_data.append(
            Document(page_content=doc.page_content, metadata = {"source":src})
        )
    return filtered_data



In [8]:
filtered_data = filter_data(extracted_data)

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

def chunk_docs(filtered_data: List[Document]):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
    chunked_docs = splitter.split_documents(filtered_data)
    return chunked_docs

In [10]:
chunked_docs = chunk_docs(filtered_data)

In [11]:
from langchain_community.embeddings import HuggingFaceEmbeddings

In [12]:
def download_embedding():
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embedding = HuggingFaceEmbeddings(model_name=model_name)
    return embedding

In [13]:
embedding_model = download_embedding()

  embedding = HuggingFaceEmbeddings(model_name=model_name)


In [14]:
from dotenv import load_dotenv
import os
load_dotenv()


True

In [15]:
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

In [19]:
from pinecone import Pinecone
from pinecone import ServerlessSpec

index_name = 'medical-chatbot'

pc = Pinecone(api_key=PINECONE_API_KEY)

if not pc.has_index(index_name):
    pc.create_index(
        name = index_name,
        dimension=384,
        metric='cosine',
        spec=ServerlessSpec(cloud='aws',region='us-east-1')
    )



In [20]:
index = pc.Index(name=index_name)


In [21]:
from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore.from_documents(documents=chunked_docs,index_name=index_name,embedding=embedding_model)

In [22]:
retriever = docsearch.as_retriever(search_type='similarity',search_kwargs={"k":5})

In [23]:
retrieved_docs = retriever.invoke("what is acne?")

In [24]:
retrieved_docs

[Document(id='10b12e34-e25b-47fb-b796-e7f9aee379fd', metadata={'source': 'data\\Medical_book.pdf'}, page_content='Isotretinoin (Accutane) is prescribed only for very\nsevere, disfiguring acne.\nAcne is a skin condition that occurs when pores or\nhair follicles become blocked. This allows a waxy\nmaterial, sebum, to collect inside the pores or follicles.\nNormally, sebum flows out onto the skin and hair to\nform a protective coating, but when it cannot get out,\nsmall swellings develop on the skin surface. Bacteria\nand dead skin cells can also collect that can cause\ninflammation. Swellings that are small and not\ninflamed are whiteheads or blackheads. When they\nbecome inflamed, they turn into pimples. Pimples that\nfill with pus are called pustules.\nAcne cannot be cured, but acne drugs can help clear\nthe skin. Benzoyl peroxide and tretinoin work by mildly\nirritating the skin. This encourages skin cells to slough\noff, which helps open blocked pores. Benzoyl peroxide\nalso kills ba

In [24]:
openai_key = os.getenv('OPENAI_API_KEY')

In [33]:
openai_key

'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'

In [26]:
print(os.getenv('OPENAI_KEY'))

None


In [27]:
from langchain_openai import ChatOpenAI
# from langchain_anthropic import ChatAnthropic
chatModel = ChatOpenAI(model='gpt-4o')

In [28]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [29]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a helpful medical assistant. Use ONLY the following context to answer. "
    "If the answer is not in the context, say \"I don't know based on the provided data.\""
    "\n\nContext:\n{context}\n\n"
    "Answer in a clear, medically accurate way."
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [30]:
question_answer_chain = create_stuff_documents_chain(chatModel, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [32]:
response = rag_chain.invoke({"input":"what is acne?"})

RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [38]:
ANTHROPIC_API_KEY = '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'

In [39]:
ANTHROPIC_API_KEY

'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'

In [49]:
GEMINI_API_KEY = 'AIzaSyBrmeC7pKvqnIlc5vFMjZMAljuXcZYrVus'

In [40]:
from langchain_anthropic import ChatAnthropic

In [41]:
model = ChatAnthropic(model = 'claude-sonnet-4-5-20250929',api_key=ANTHROPIC_API_KEY)

In [46]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [50]:
model = ChatGoogleGenerativeAI(model='gemini-2.5-pro',api_key=GEMINI_API_KEY)

In [51]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [52]:
system_prompt = (
    "You are an Medical assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [53]:
question_answer_chain = create_stuff_documents_chain(model, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [54]:
response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
print(response["answer"])

Acromegaly is a disorder caused by the abnormal release of growth hormone from the pituitary gland after bone growth has stopped, leading to increased growth in bone and soft tissue. When this same hormonal abnormality occurs in children whose bones are still growing, it is called gigantism, which causes unusual height. Both men and women can be affected by these conditions, which are often diagnosed in middle age due to their gradual onset.
