# Initialize Pure Large Language model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def main():
    model_name ="DeepSeek-R1-Distill-Qwen-1.5B"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype = torch.float16,device_map = "auto")

    def deepseek_generate(prompt):
        inputs = tokenizer(prompt,return_tensors="pt").to("cuda")
        # output = model.generate(**inputs,max_length =126)
        output = model.generate(**inputs,max_new_tokens=1000)    
        return tokenizer.decode(output[0],skip_special_tokens=False)

    query = "what is RAG in AI ?"
    print(deepseek_generate(query))
    torch.cuda.empty_cache()

if __name__ =="__main__":
    main()


# RAG Setup 

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import os
import PyPDF2
import docx

def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file."""
    text = ""
    with open(pdf_path, "rb") as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
    return text

def extract_text_from_word(docx_path):
    """Extract text from a Word file."""
    doc = docx.Document(docx_path)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

def load_documents_from_folder(folder_path):
    """Read all PDF and Word files from a folder and extract text."""
    documents = []
    
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        if filename.endswith(".pdf"):
            text = extract_text_from_pdf(file_path)
        elif filename.endswith(".docx"):
            text = extract_text_from_word(file_path)
        else:
            continue  # Skip non-PDF and non-Word files

        documents.append({"filename": filename, "content": text})
    
    return documents

def main(): 

    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")

    # Define text splitter (for chunking long documents)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=100)

    # Load documents from a folder
    folder_path = "RAG Information"  # Replace with your folder path
    documents = load_documents_from_folder(folder_path)

    # Process and convert text into embeddings
    all_docs = []
    for doc in documents:
        split_docs = text_splitter.create_documents([doc["content"]])
        all_docs.extend(split_docs)

    # Create FAISS vector database
    vectorstore = FAISS.from_documents(all_docs, embedding_model)

    # Save FAISS index
    vectorstore.save_local("faiss_index")

if __name__ =="__main__":
    main()

# Demo using Large Language Model with RAG

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch 
from torch import autocast
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

model_name ="DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype = torch.float16,device_map = "auto")
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
vectorstore = FAISS.load_local("faiss_index", embedding_model,allow_dangerous_deserialization=True)

def deepseek_generate(prompt):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with autocast(device_type=device, dtype=torch.float16):  # Use mixed precision
        output = model.generate(
            **inputs,
            max_new_tokens=1000
        )

    return tokenizer.decode(output[0], skip_special_tokens=True)

def retrieve_context(query):
    """Retrieve the most relevant document chunks for a given query."""
    retrieved_docs = vectorstore.similarity_search(query, k=3)
    return "\n".join([doc.page_content for doc in retrieved_docs])

def deepseek_rag_pipeline(query):
    retrieved_context = retrieve_context(query)

    # Construct prompt with retrieved knowledge
    full_prompt = f"""
    You are an AI assistant. Use the retrieved knowledge below to answer accurately:

    Retrieved Context:
    {retrieved_context}

    Question: {query}
    """

    # Generate response using DeepSeek
    response = deepseek_generate(full_prompt)
    
    return response



def main():
    query = "do you know what is UMCH?"
    response = deepseek_rag_pipeline(query)
    print(response)

if __name__ =="__main__":
    main()