In [1]:
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
pipeline
)

import torch

import os

from langchain.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Weaviate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate, format_document
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain.memory import ConversationBufferMemory
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string

from operator import itemgetter

import weaviate

import gradio as gr

In [2]:


#Loading the Mistral Model
model_name='mistralai/Mistral-7B-Instruct-v0.2'
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

# Building a LLM text-generation pipeline
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1024,
    device_map = 'auto',
)




`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [12]:
llm = text_generation_pipeline

In [3]:

hf_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)


In [4]:
import weaviate

client = weaviate.Client(url=  'https://superteams-810p8edk.weaviate.network')

            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


In [5]:


vectorstore = Weaviate.from_documents(
    [], embedding=hf_embeddings,
    client = client ,
    by_text= False
)


In [6]:

# Simulate some document processing delay
textsplitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)


In [7]:
client.schema.delete_all()

In [8]:
files = ['/home/vardh/RAG_Mistral_Weaviate_Gradio/Leave Policy - Novus Technology.pdf']

for file_path in files:
    file_name = os.path.basename(file_path)  # Extract the filename from the full path
    if file_name.lower().endswith('.pdf'):  # Check if the file is a PDF

        loader_temp = PyPDFLoader(file_path)
        docs_temp = loader_temp.load_and_split(text_splitter=textsplitter)
        for doc in docs_temp:
            # Replace all occurrences of '\n' with a space ' '
            doc.page_content = doc.page_content.replace('\n', ' ')
        vectorstore.add_documents(docs_temp)




In [9]:
y = vectorstore.similarity_search("What are the policy of maternal leave?", k= 3)



In [10]:
joined_content = ' '.join(doc.page_content for doc in y)

In [11]:
joined_content

'leave can be taken up to 4 weeks before the expected date of delivery. An additional 4 weeks of unpaid leave can be requested if needed. ### 4. Paternity Leave Male employees are entitled to 5 days of paid paternity leave, to be taken within 1 month of the birth of their child. ### 5. Compassionate Leave Employees are entitled to 3 days of paid compassionate leave in the event of the death of an immediate family member (spouse, child, parent, or sibling). ### 6. Unpaid Leave In exceptional circumstances, # Leave Policy ## Introduction At Novus Technologies, we recognize the importance of work-life balance and the need for employees to take time off for various reasons. This leave policy outlines the different types of leave available to employees and the procedures for requesting and managing leave. ## Types of Leave ### 1. Annual Leave All full-time employees are entitled to 20 days of paid annual leave per calendar year. Annual leave accrues on a pro-rata basis from the date of join

In [13]:
def clear_vectordb(chatbot, msg):
    client.schema.delete_all()
    chatbot = ""
    msg = ""
    return chatbot, msg

In [51]:
def answer_query(message, chat_history):
    context_docs = vectorstore.similarity_search(message, k= 3)
    context = ' '.join(doc.page_content for doc in context_docs)

    template = f"""Answer the question based only on the following context:
        {context}

        Question: {message}
    """

    result = llm(template)

    answer = result["generated_text"].replace(template, '')

    chat_history.append((message, answer))

    return "", chat_history

In [14]:
llm("What is the color of the sky?")



[{'generated_text': "What is the color of the sky?\n\nThe color of the sky varies depending on atmospheric conditions. It can appear to be various shades of blue during clear weather, red or orange during sunrise and sunset, green during a thunderstorm, or gray during an overcast day. The dominant color of the sky is blue due to the scattering of sunlight by the atmosphere.\n\n## What are some interesting facts about the sky?\n\n1. The sky is not actually a thing but rather a collective term for the gaseous atmosphere surrounding Earth and other planets.\n2. The sky appears blue because molecules in the Earth's atmosphere scatter short-wavelength light (blue and violet) more than longer wavelengths (red, yellow, and green).\n3. The sky is not always blue; it can take on many different colors depending on the time of day, weather conditions, and location.\n4. The stars we see at night are actually suns just like our own Sun, but they are so far away that their light takes millions of ye

In [53]:
with gr.Blocks() as demo:

   with gr.Row():
       upload_files = gr.File(label= "Upload pdf files only", file_count= 'multiple')
       success_msg = gr.Text(value="")

   chatbot = gr.Chatbot()
   msg = gr.Textbox(label= "Enter your query here")
   clear = gr.ClearButton([msg, chatbot], value= "Clear VectorDB")


   upload_files.upload(add_pdfs_to_vectorstore, upload_files, success_msg)
   msg.submit(answer_query, [msg, chatbot], [msg, chatbot])

demo.launch(server_name='0.0.0.0', share= True)


        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  @app.on_event("startup")
        on_event is deprecated, use lifespan event handlers instead.

        Read more about it in the
        [FastAPI docs for Lifespan Events](https://fastapi.tiangolo.com/advanced/events/).
        
  return self.router.on_event(event_type)


Running on local URL:  http://0.0.0.0:7860
Running on public URL: https://defe2053333ace9cec.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Replace `TemplateResponse(name, {"request": request})` by `TemplateResponse(request, name)`.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Traceback (most recent call last):
  File "/opt/conda/envs/pytorch/lib/python3.10/site-packages/gradio/queueing.py", line 501, in call_prediction
    output = await route_utils.call_process_api(
  File "/opt/conda/envs/pytorch/lib/python3.10/site-packages/gradio/route_utils.py", line 258, in call_process_api
    output = await app.get_blocks().process_api(
  File "/opt/conda/envs/pytorch/lib/python3.10/site-packages/gradio/blocks.py", line 1710, in process_api
    result = await self.call_function(
  File "/opt/conda/envs/pytorch/lib/python3.10/site-packages/gradio/blocks.py", line 1250, in call_function
    prediction = await 