In [25]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms import CTransformers
from langchain import PromptTemplate
import pinecone

In [26]:
import os
os.environ['PINECONE_API_KEY'] = "pcsk_2v563S_SvhqbswwyQMD5WJtwo5mdyUyabLG31np4DHH6NDLG9RLujTqWMevxVGk4y5G3kS"

In [27]:
def load_pdf(data):
  loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
  documents = loader.load()
  return documents

In [28]:
extracted_data = load_pdf("data/")

In [29]:
# extracted_data

In [30]:
def text_split(extracted_data):
  splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
  chunks = splitter.split_documents(extracted_data)
  return chunks

In [31]:
text_chunks = text_split(extracted_data)
print(len(text_chunks))

5860


In [32]:
def download_huggingface_embeddings():
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  return embeddings

In [33]:
embeddings = download_huggingface_embeddings()

In [34]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [35]:
query_result = embeddings.embed_query("Hello World")
print("Length ", len(query_result))

Length  384


In [38]:
query_result

[-0.03447723761200905,
 0.031023213639855385,
 0.006734990980476141,
 0.02610895223915577,
 -0.03936200216412544,
 -0.16030248999595642,
 0.06692393124103546,
 -0.006441502831876278,
 -0.04745049029588699,
 0.014758865348994732,
 0.07087529450654984,
 0.05552753433585167,
 0.019193345680832863,
 -0.026251327246427536,
 -0.010109513066709042,
 -0.026940496638417244,
 0.022307435050606728,
 -0.022226642817258835,
 -0.1496925801038742,
 -0.01749304123222828,
 0.007676258217543364,
 0.05435232073068619,
 0.0032544711139053106,
 0.031725890934467316,
 -0.0846213549375534,
 -0.029405983164906502,
 0.05159558728337288,
 0.04812406003475189,
 -0.0033148040529340506,
 -0.05827920883893967,
 0.04196924716234207,
 0.022210638970136642,
 0.1281888484954834,
 -0.022338991984725,
 -0.011656233109533787,
 0.06292837113142014,
 -0.03287629410624504,
 -0.09122603386640549,
 -0.031175388023257256,
 0.052699580788612366,
 0.0470348484814167,
 -0.08420310914516449,
 -0.030056176707148552,
 -0.020744847133

In [41]:
pinecone.Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
index_name = "medical-chatbot"
docsearch = Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [42]:
docsearch = Pinecone.from_existing_index(index_name, embeddings)
query = "What are Allergies, and how do I know if I have them?"
docs = docsearch.similarity_search(query, k=3)
print("Result :", docs)

Result : [Document(metadata={}, page_content='reaction. Allergic rhinitis is characterized by an itchy,\nrunny nose, often with a scratchy or irritated throat due\nto post-nasal drip. Inflammation of the thin membrane\ncovering the eye (allergic conjunctivitis) causes redness,\nirritation, and increased tearing in the eyes. Asthma caus-\nes wheezing, coughing, and shortness of breath. Symp-\ntoms of food allergies depend on the tissues most sensi-\ntive to the allergen and whether the allergen spread sys-'), Document(metadata={}, page_content='This type of allergic response may develop over several\ndays following contact with the allergen, and symptoms\nmay persist for a week or more.\nCauses and symptoms\nAllergens enter the body through four main routes:\nthe airways, the skin, the gastrointestinal tract, and the\ncirculatory system.\n• Airborne allergens cause the sneezing, runny nose,\nand itchy, bloodshot eyes of hay fever (allergic rhini-\ntis). Airborne allergens can also affec

In [43]:
prompt_template = """
Use the following pieces of context to answer the question at the end. You cannot generate any new information and add any new information to the context.
Context: {context}
Question: {question}
Return the exact answer you recieved but just correct the format and nothing else so that it can be read in the chatbot.
Answer:
"""

In [44]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

In [45]:
llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin", model_type="llama", config={'max_new_tokens': 512, 'temperature': 0.8})

In [46]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", 
      retriever=docsearch.as_retriever(search_kwargs={"k": 2}),
      return_source_documents=True,
      chain_type_kwargs=chain_type_kwargs)

In [None]:
while True:
    user_input = input(f"Input prompt: ")
    result = qa({"query": user_input})
    print("Response :",result["result"])

Number of tokens (513) exceeded maximum context length (512).
Number of tokens (514) exceeded maximum context length (512).


Response : Allergies are characterized by an itchy, runny nose, often with a scratchy or irritated throat due to post-nasal drip. Inflammation of the thin membrane covering the eye (allergic conjunctivitis) causes redness, irritation, and increased tearing in the eyes. Asthma causes wheezing, coughing, and shortness of breath. Symptoms of food allergies depend on the tissues most sensitive to the allergen and whether the allergen spreads systemically or locally. Common inhaled allergens include pollen, dust, and insect parts from tiny house mites. Common food allergens include nuts, fish, and milk. Allergic reactions involve a special set of cells in the immune system known as mast cells. Mast
