# MediMate :- Your personalized healthcare assistant powered by AI.

In [15]:

from langchain.vectorstores import Pinecone
import pinecone
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
import os

In [14]:
### Downloading all the required packages by following workflow.

from langchain.document_loaders import PyPDFLoader,DirectoryLoader # To load dataset.
from langchain.text_splitter import RecursiveCharacterTextSplitter # split data into chunks.
from langchain.embeddings import HuggingFaceEmbeddings          # vectorization.
import pinecone
from langchain.vectorstores import Pinecone                     # store vectores.

from langchain.prompts import PromptTemplate                       #To give specific instruction to llm.
from langchain.chains import RetrievalQA                          # to retrive relavent vecotors

from langchain.llms import CTransformers                         #to use model.

In [3]:
# Load data

#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()

    return documents

In [4]:
extracted_data = load_pdf("data/")
print(len(extracted_data))

637


In [5]:
# creating chunks from loaded data.

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [6]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 7020


In [6]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

embeddings = download_hugging_face_embeddings()

In [7]:
# test
query_result = embeddings.embed_query("Hi, How are you?")
print("Length", len(query_result))

Length 384


In [16]:
# Upload Data to Vectore Database "Pinecone"

from langchain_pinecone import PineconeVectorStore
import pinecone

In [9]:

PINECONE_API_KEY = "7f2c336d-cfe5-4932-be89-ac751167b757"
PINECONE_API_ENV = "us-east-1"
index_name="medimate"

os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

In [11]:
def upload_data_to_pinecone(text_chunks, pinecone_api_key, index_name):

  #Creating Embeddings for Each of The Text Chunks & storing
  docsearch=PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

  print("Data uploaded successfully to Pinecone!")

# Upload data using your API key
upload_data_to_pinecone(text_chunks, PINECONE_API_KEY, index_name)

Data uploaded successfully to Pinecone!


In [10]:
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)

In [11]:
query = "what is the allergy?"

docs=docsearch.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'), Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={'page': 135.0, 'source': 'data/Medical_book.pdf'}), Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-')]


In [12]:
#Fatching top 10 matches
    docsearch.similarity_search(query=query, k=10)

[Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'),
 Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'),
 Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={'page': 135.0, 'source': 'data/Medical_book.pdf'}),
 Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={'page': 135.0, 'source': 'data/Medical_book.pdf'}),
 Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-', metadata={'page': 135.0, 'source': 'data/Medical_book.pdf'}),
 Document(page_content='When thisoccurs, an allergy 

In [15]:
# Now creating a propt templete.

prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [16]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [18]:
# use the model to get accurate answer most relavent.
llm = CTransformers(model="/Users/vishalpatel/Desktop/openaI/MediMate/model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})
llm

CTransformers(client=<ctransformers.llm.LLM object at 0x3107e9610>, model='/Users/vishalpatel/Desktop/openaI/MediMate/model/llama-2-7b-chat.ggmlv3.q4_0.bin', model_type='llama', config={'max_new_tokens': 512, 'temperature': 0.8})

In [19]:
retriever = docsearch.as_retriever(search_kwargs={'k': 2})

In [20]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [22]:
!pip install langchainhub

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting langchainhub
  Downloading langchainhub-0.1.15-py3-none-any.whl.metadata (621 bytes)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.31.0.20240406-py3-none-any.whl.metadata (1.8 kB)
Downloading langchainhub-0.1.15-py3-none-any.whl (4.6 kB)
Downloading types_requests-2.31.0.20240406-py3-none-any.whl (15 kB)
Installing collected packages: types-requests, langchainhub
Successfully installed langchainhub-0.1.15 types-requests-2.31.0.20240406


In [23]:
from langchain import hub

retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

In [24]:
combine_docs_chain = create_stuff_documents_chain(
    llm, retrieval_qa_chat_prompt
)

In [25]:
from langchain.chains import create_retrieval_chain

In [26]:
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

In [30]:
qe = retrieval_chain.invoke({"input":"what is the allergy?"})

In [31]:
print(qe)

{'input': 'what is the allergy?', 'context': [Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-'), Document(page_content='the itchy, scratchy nose, eyes, and throat common inallergic rhinitis .\nThe particular allergens to which a person is sensi-')], 'answer': '\nAssistant: Ah, I see! Based on the context you provided, it seems that the allergy being referred to is allergic rhinitis. This is a common allergic reaction that occurs when the nasal passages become inflamed due to exposure to an allergen, such as pollen, dust, or pet dander. The symptoms you mentioned, including the itchy nose, eyes, and throat, are all common signs of allergic rhinitis.'}


In [32]:
qe.keys()

dict_keys(['input', 'context', 'answer'])

In [34]:
print(qe['answer'])


Assistant: Ah, I see! Based on the context you provided, it seems that the allergy being referred to is allergic rhinitis. This is a common allergic reaction that occurs when the nasal passages become inflamed due to exposure to an allergen, such as pollen, dust, or pet dander. The symptoms you mentioned, including the itchy nose, eyes, and throat, are all common signs of allergic rhinitis.
