Build BioMistral Medical RAG Chatbot using BioMistral Open Source LLM
In the notebook we will build a Medical Chatbot with BioMistral LLM and Heart Health pdf file.

Installation

In [6]:
!pip install langchain sentence-transformers chromadb llama-cpp-python langchain_community pypdf

Collecting langchain
  Downloading langchain-0.1.13-py3-none-any.whl (810 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m810.5/810.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers
  Downloading sentence_transformers-2.6.1-py3-none-any.whl (163 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.3/163.3 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting chromadb
  Downloading chromadb-0.4.24-py3-none-any.whl (525 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m525.5/525.5 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting llama-cpp-python
  Downloading llama_cpp_python-0.2.57.tar.gz (36.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.9/36.9 MB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ...

Import Libraries

In [7]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS, Chroma
from langchain_community.llms import LlamaCpp
from langchain.chains import RetrievalQA, LLMChain

In [8]:
import pathlib
import textwrap
from IPython.display import display
from IPython.display import Markdown

def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [9]:
# Used to securely store your API key
from google.colab import userdata

Setup HuggingFace Access Token
Log in to HuggingFace.co
Click on your profile icon at the top-right corner, then choose “Settings.”
In the left sidebar, navigate to “Access Token”
Generate a new access token, assigning it the “write” role.

In [10]:
# Or use `os.getenv('HUGGINGFACEHUB_API_TOKEN')` to fetch an environment variable.
import os
from getpass import getpass

HUGGINGFACEHUB_API_TOKEN = userdata.get("HUGGINGFACEHUB_API_TOKEN")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "HUGGINGFACEHUB_API_TOKEN"

Import Document

In [13]:
loader = PyPDFDirectoryLoader("/content/Data")
docs = loader.load()

In [14]:
docs

[Document(page_content='YOUR GUIDE TO\nA Healthy Heart\nU.S. DEPARTMENT OF HEALTH AND HUMAN SERVICES\nNational Institutes of Health\nNational Heart, Lung, and Blood Institute\n', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 0}),
 Document(page_content='YOUR GUIDE TO\nA Healthy Heart\nU.S. D EPARTMENT OF HEALTH AND HUMAN SERVICES\nNational Institutes of Health\nNational Heart, Lung, and Blood InstituteNIH Publication No. 06-5269December 2005', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 1}),
 Document(page_content='U.S. DEPARTMENT OF HEALTH AND HUMAN SERVICES\nNational Institutes of HealthNational Heart, Lung, and Blood InstituteWritten by: Marian Sandmaier', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 2}),
 Document(page_content='Heart Disease: Why Should You Care? . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1\nWhat You Need To Know About Heart Disease . . . . . . . . . . . . . . . . . . . . . . 3\nWhat Is Heart Disease?.

Text Splitting - Chunking

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)

In [16]:
len(chunks)

747

In [17]:
chunks[0]

Document(page_content='YOUR GUIDE TO\nA Healthy Heart\nU.S. DEPARTMENT OF HEALTH AND HUMAN SERVICES\nNational Institutes of Health\nNational Heart, Lung, and Blood Institute', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 0})

In [18]:
chunks[1]

Document(page_content='YOUR GUIDE TO\nA Healthy Heart\nU.S. D EPARTMENT OF HEALTH AND HUMAN SERVICES\nNational Institutes of Health\nNational Heart, Lung, and Blood InstituteNIH Publication No. 06-5269December 2005', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 1})

In [19]:
chunks[2]

Document(page_content='U.S. DEPARTMENT OF HEALTH AND HUMAN SERVICES\nNational Institutes of HealthNational Heart, Lung, and Blood InstituteWritten by: Marian Sandmaier', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 2})

In [20]:
chunks[3]

Document(page_content='Heart Disease: Why Should You Care? . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1\nWhat You Need To Know About Heart Disease . . . . . . . . . . . . . . . . . . . . . . 3', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 3})

In [21]:
chunks[4]

Document(page_content='What Is Heart Disease?. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3Who Is at Risk? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4How Risk Works . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .', metadata={'source': '/content/Data/healthyheart.pdf', 'page': 3})

Embeddings

In [22]:
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vector Store - FAISS or ChromaDB

In [23]:
vectorstore = Chroma.from_documents(chunks, embeddings)

In [24]:
vectorstore

<langchain_community.vectorstores.chroma.Chroma at 0x7a311da85240>

In [25]:
query = "who is at risk of heart disease"
search = vectorstore.similarity_search(query)

In [26]:
to_markdown(search[0].page_content)

> an important risk factor for heart disease and other medicaldisorders.

Retriever

In [27]:
retriever = vectorstore.as_retriever(
    search_kwargs={'k': 5}
)

In [28]:
retriever.get_relevant_documents(query)

[Document(page_content='an important risk factor for heart disease and other medicaldisorders.', metadata={'page': 13, 'source': '/content/Data/healthyheart.pdf'}),
 Document(page_content='Some people may need to take additional steps to prevent heart', metadata={'page': 49, 'source': '/content/Data/healthyheart.pdf'}),
 Document(page_content='important risk factorsfor heart disease and how each of them affects your health.', metadata={'page': 15, 'source': '/content/Data/healthyheart.pdf'}),
 Document(page_content='high risk for a heart attack—at the same level of risk as someone who has heart disease.Also, even though overweight and physical inactivity are not on this list of risk factors,they are conditions that raise your risk for heart disease and need to be corrected.', metadata={'page': 24, 'source': '/content/Data/healthyheart.pdf'}),
 Document(page_content='While each risk factor increases your risk of heart disease, having', metadata={'page': 8, 'source': '/content/Data/healt

Large Language Model - Open Source

In [2]:
#connect to google drive
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [29]:
llm = LlamaCpp(
    model_path= "/content/drive/MyDrive/BioMistral-7B.Q4_K_M.gguf",
    temperature=0.3,
    max_tokens=2048,
    top_p=1)

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /content/drive/MyDrive/BioMistral-7B.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = hub
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head

RAG Chain

In [30]:
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate

In [31]:
template = """
<|context|>
You are an AI assistant that follows instruction extremely well.
Please be truthful and give direct answers
</s>
<|user|>
{query}
</s>
 <|assistant|>
"""

In [32]:
prompt = ChatPromptTemplate.from_template(template)

In [33]:
rag_chain = (
    {"context": retriever,  "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [34]:
response = rag_chain.invoke("what disease affect the heart?")


llama_print_timings:        load time =    4951.58 ms
llama_print_timings:      sample time =      42.40 ms /    63 runs   (    0.67 ms per token,  1485.99 tokens per second)
llama_print_timings: prompt eval time =   34130.22 ms /    56 tokens (  609.47 ms per token,     1.64 tokens per second)
llama_print_timings:        eval time =   48521.11 ms /    62 runs   (  782.60 ms per token,     1.28 tokens per second)
llama_print_timings:       total time =   83009.85 ms /   118 tokens


In [35]:
to_markdown(response)

> The heart is affected by several diseases, including cardiomyopathy, coronary artery disease, arrhythmias, endocarditis, myocarditis, congenital heart defects, and heart failure. Is there a specific type of heart disease you would like to know more about?

In [None]:
import sys

while True:
  user_input = input(f"Input Prompt: ")
  if user_input == 'exit':
    print('Exiting')
    sys.exit()
  if user_input == '':
    continue
  result = rag_chain.invoke(user_input)
  print("Answer: ",result)