In [22]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone,ServerlessSpec
from langchain.document_loaders import  DirectoryLoader,PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import os

In [2]:
!huggingface-cli login --token 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 --add-to-git-credential

Token is valid (permission: read).
Your token has been saved in your configured git credential helpers (manager).
Your token has been saved to C:\Users\Zeeshan Ali\.cache\huggingface\token
Login successful


In [24]:
PINECONE_API_KEY="e2a5927e-05b0-4fe1-b661-ad6fdfa78832"
PINECONE_INDEX_NAME="medical-chatbot"
os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY

In [25]:
#Extract Data
def load_pdf(data_dir):
    loader = DirectoryLoader(data_dir, glob="**/*.pdf", loader_cls=PyPDFLoader)
    docs = loader.load()
    return docs

extracted_data = load_pdf("../data")

In [27]:
extracted_data[5].page_content
print(len(extracted_data))

1265


In [28]:
# Text Splitter
def split_texts(docs):
    text_splitter = RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size=500,
        chunk_overlap=20,
        length_function=len,
        is_separator_regex=False,
    )
    texts = text_splitter.split_documents(docs)
    return texts

In [29]:
text_chunks=split_texts(extracted_data)

In [30]:
print(len(text_chunks))

10545


In [37]:
# Download Embedding Model
def download_embed_model():
    embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [38]:
embeddings=download_embed_model()

In [33]:
pc = Pinecone(api_key=PINECONE_API_KEY)



In [34]:
indexExists=False
for index_detail in pc.list_indexes():
    if(index_detail.name==PINECONE_INDEX_NAME):
        indexExists=True

if(not indexExists):
    pc.create_index(
    name=PINECONE_INDEX_NAME,
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
    )
else:
    index = pc.Index(PINECONE_INDEX_NAME)

In [35]:
vector_store=PineconeVectorStore(index=index,embedding=embeddings)

In [36]:
vector_store.from_texts([text.page_content for text in text_chunks],embedding=embeddings,index_name="medical-chatbot")

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1e3c4b10c10>

In [39]:
print(vector_store.similarity_search("What are allergies?",k=3)[0].page_content)

ORGANIZATIONS
American Academy of Ophthalmology. 655 Beach Street, PO
Box 7424, San Francisco, CA 94120-7424. <http://www.
eyenet.org>.KEY TERMS
Allergen —A substance capable of inducing an
allergic response.
Allergic reaction —An immune system reaction to
a substance in the environment; symptoms
include rash, inflammation, sneezing, itchy watery
eyes, and runny nose.
Conjunctiva —The mucous membrane that covers
the white part of the eyes and lines the eyelids.


In [40]:
medical_template= PromptTemplate(
    template="""Use the Followind Piece of Information to answer the User's Question.
    If you don't know the answer , just say i don't know, don't try to make up an answer
    
    Context:{context}
    Question:{question}
    
    Only return an helpful answer below and nothing else
    Helpful Answer:
    """,
    input_variables=['context','question']
    ,
)
chain_type_kwargs={"prompt":medical_template}

In [41]:
model_path="E:\\transformers_cache\\models--TheBloke--Llama-2-7B-Chat-GGML\\snapshots\\76cd63c351ae389e1d4b91cab2cf470aab11864b\\llama-2-7b-chat.ggmlv3.q4_0.bin"

In [45]:
llm = CTransformers(model=model_path, model_type="llama", config={
    "temperature": 0.8,      
    "max_new_tokens":512
})


In [46]:
response = llm("What is the capital of France?")
print(response)

  warn_deprecated(



 Begriffe: France, French, Paris, Eiffel Tower, Sacre Coeur.
Exercise: Write the following sentences using the target language and grammar structure. Be sure to use correct spelling and punctuation.
1. The capital of France is ________________. (Use the correct form of the word "France" in this sentence.)
2. Many people visit Paris each year because it is the ________________ of France. (What is the name of the city that is the capital of France?)
3. Have you ever seen the famous ________________ in Paris? (What is the famous landmark in Paris that visitors love to see?)
4. The ________________ in Paris is a beautiful church with stunning architecture. (What is the name of the famous church in Paris?)
5. Visitors can climb to the top of the ________________ for a panoramic view of the city. (What is the tallest structure in Paris that visitors can climb to the top of?)


In [48]:
qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=vector_store.as_retriever(search_kwargs={"k":2}), chain_type_kwargs=chain_type_kwargs,chain_type="stuff",return_source_documents=True
)

In [49]:
user_question="What is Acne?"

In [50]:
result=qa_chain({ "query": user_question})

  warn_deprecated(


In [51]:
print(result['result'])

 Acne is a common skin condition characterized by red, inflamed pimples, usually on the face, forehead, chest, and back. It occurs when the pores on the skin become clogged with oil, dead skin cells, and bacteria, leading to infection and inflammation. Acne can be caused by a variety of factors, including hormonal changes, genetics, environmental factors, and certain medications. Treatment options include topical creams and gels, oral antibiotics, and lifestyle changes such as regular exercise and a healthy diet. In severe cases, acne can lead to scarring and disfigurement, and can have a significant impact on a person's self-esteem and mental health.
