In [None]:
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
pipeline
)

import transformers
import torch
import streamlit as st

from langchain.llms import HuggingFacePipeline

#Loading the Llama-2 Model
model_name='NousResearch/Llama-2-7b-chat-hf'
model_config = transformers.AutoConfig.from_pretrained(
model_name,
)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Activate 4-bit precision base model loading
use_4bit = True
# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"
# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
load_in_4bit=use_4bit,
bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=use_nested_quant,
)
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)


model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
)

# Building a LLM text-generation pipeline
text_generation_pipeline = pipeline(
model=model,
tokenizer=tokenizer,
task="text-generation",
temperature=0.2,
repetition_penalty=1.1,
return_full_text=True,
max_new_tokens=300,
)

llm = HuggingFacePipeline(pipeline= text_generation_pipeline)

In [None]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import os


PDF_DIRECTORY = '/home/vardhanam/enterprise_chatbot/uploaded_pdfs'

# Simulate some document processing delay
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=20,
length_function=len,
is_separator_regex=False,
)
loader = DirectoryLoader(PDF_DIRECTORY, loader_cls=PyPDFLoader)
docs = loader.load()

#Loading the embeddings model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

qdrant_vectorstore = Qdrant.from_documents(
    docs,
    embeddings,
    location = ":memory:",
    collection_name = "pdf_database",
)

qdrant_retriever = qdrant_vectorstore.as_retriever(search_kwargs={'k': 10})

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
{"context": qdrant_retriever, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)

In [None]:
import gradio as gr
import os
from shutil import copyfile

def save_pdf(files):
    save_path = "/home/vardhanam/enterprise_chatbot/uploaded_pdfs"  # Define the directory where you want to save the files

    global text_splitter
    global qdrant_vectorstore

    saved_files_count = 0
    for file_path in files:
        file_name = os.path.basename(file_path)  # Extract the filename from the full path
        if file_name.lower().endswith('.pdf'):  # Check if the file is a PDF
            new_file_path = os.path.join(save_path, file_name)  # Path to save the file in the desired directory
            copyfile(file_path, new_file_path)  # Copy the file from the temporary location to the new location
            saved_files_count += 1
            loader_temp = PyPDFLoader(new_file_path)
            docs_temp = loader_temp.load_and_split(text_splitter=text_splitter)
            qdrant_vectorstore.add_documents(docs_temp)

        else:
            print(f"Skipping non-PDF file: {file_name}")

    return f"Saved {saved_files_count} PDF file(s) to {save_path}/"



In [None]:
def process_query(query):
    # This is a placeholder for your chain.invoke method
    # For demonstration, let's return a simple response
    # Replace this with your actual chain.invoke(query) logic
    global chain

    response = chain.invoke(query)
    return response

In [None]:
import gradio as gr
# Define the Gradio interface
iface_save_pdf = gr.Interface(fn=save_pdf,
                     inputs=gr.Files(label="Upload Files", type='filepath'),
                     outputs="text",
                     title="PDF Uploader",
                     description="Upload multiple files. Only PDF files will be saved to disk.")

iface_process_query = gr.Interface(fn=process_query,
                                   inputs=gr.Textbox(label="Enter your query"),
                                   outputs="text",
                                   title="Query Processor",
                                   description="Enter queries to get responses.")

iface_combined = gr.TabbedInterface([iface_save_pdf, iface_process_query], ["PDF Upload", "Query Processor"])

# Launch the combined interface
if __name__ == "__main__":
    iface_combined.launch(share=True)