In [1]:
!pip install langchain
!pip install langchain_community
!pip install pypdf

Collecting langchain
  Downloading langchain-0.2.1-py3-none-any.whl (973 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m973.5/973.5 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_core-0.2.1-py3-none-any.whl (308 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m308.5/308.5 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.0-py3-none-any.whl (23 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.63-py3-none-any.whl (122 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.8/122.8 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting packaging<24.0,>=23.2 (from langchain-

In [2]:
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain_community.llms import CTransformers
import os
from langchain_community.vectorstores import FAISS

In [3]:
def extract_data(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    docs = loader.load()

    return docs

extracted = extract_data("/content/drive/MyDrive/Colab Notebooks/Medical_Chatbot/Data")


In [4]:
#Create chunks of data
def doc_spliter(extracted):
    chunks = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30)
    text_chunks = chunks.split_documents(extracted)

    return text_chunks

documents_split = doc_spliter(extracted)

In [6]:
pip install sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/171.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m163.8/171.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.5/171.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (

In [12]:
#Embedding model
def embeddings_model():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device':'cpu'} )

    return embeddings

embeddings = embeddings_model()

In [13]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


In [14]:
#Creating vectors

vector_store = FAISS.from_documents(documents_split, embeddings)
vector_store.save_local("/content/drive/MyDrive/Colab Notebooks/Medical_Chatbot/vectorestores")


In [15]:
#Prompt template
prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context:{context}
Question:{question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

prompts = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt":prompts}

In [None]:
#LLM Model(Llama-2)
llm = CTransformers(model="/content/drive/MyDrive/Colab Notebooks/Medical_Chatbot/Data/llama-2-7b-chat.ggmlv3.q8_0.bin",
                    model_type="llama",
                    config={"max_new_tokens":512,
                            "temperature":0.8})


In [None]:
#QA retrieval
vector_data = "/content/drive/MyDrive/Colab Notebooks/Medical_Chatbot/vectorestores"
QA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_data.as_retriever(search_kwargs={"k":3}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
#Testing
while True:
    user_input = input(f"Input Prompt:")
    result = QA({"query":user_input})
    print("Response: ", result["result"])
