In [30]:
import PyPDF2
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

# Function to extract text from a PDF.
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            page_text = page.extract_text() or ""
            text += page_text
    return text

# Function to clean the text.
def clean_text(text):
    text = " ".join(text.split())
    text = re.sub(r"[^a-zA-Z0-9\s.,!?]", "", text)
    return text

# Function to split the text into Document objects using RecursiveCharacterTextSplitter.
def split_text_into_documents(text, chunk_size=2000, chunk_overlap=300):
    text_splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", ". ", " ", ""],
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    # Wrap the entire text in a Document object
    docs = [Document(page_content=text)]
    documents = text_splitter.split_documents(docs)
    return documents

# Main function to process the document.
def process_document(pdf_path):
    text = extract_text_from_pdf(pdf_path)
    print(f"Extracted text length: {len(text)} characters")
    
    cleaned_text = clean_text(text)
    print(f"Cleaned text length: {len(cleaned_text)} characters")
    
    documents = split_text_into_documents(cleaned_text)
    print(f"Number of document chunks: {len(documents)}")
    return documents

if __name__ == "__main__":
    pdf_path = r"C:\Users\kingl\OneDrive\Desktop\mlops_project\chatbot\chatbot\Rag\attention.pdf"
    documents = process_document(pdf_path)
    
    # Print the first document chunk as an example.
    print("\nFirst document chunk:")
    print(documents[0].page_content)


Extracted text length: 39472 characters
Cleaned text length: 38554 characters
Number of document chunks: 23

First document chunk:
Provided proper attribution is provided, Google hereby grants permission to reproduce the tables and figures in this paper solely for use in journalistic or scholarly works. Attention Is All You Need Ashish Vaswani Google Brain avaswanigoogle.comNoam Shazeer Google Brain noamgoogle.comNiki Parmar Google Research nikipgoogle.comJakob Uszkoreit Google Research uszgoogle.com Llion Jones Google Research lliongoogle.comAidan N. Gomez  University of Toronto aidancs.toronto.eduukasz Kaiser Google Brain lukaszkaisergoogle.com Illia Polosukhin  illia.polosukhingmail.com Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the T

In [31]:
documents

[Document(metadata={}, page_content='Provided proper attribution is provided, Google hereby grants permission to reproduce the tables and figures in this paper solely for use in journalistic or scholarly works. Attention Is All You Need Ashish Vaswani Google Brain avaswanigoogle.comNoam Shazeer Google Brain noamgoogle.comNiki Parmar Google Research nikipgoogle.comJakob Uszkoreit Google Research uszgoogle.com Llion Jones Google Research lliongoogle.comAidan N. Gomez  University of Toronto aidancs.toronto.eduukasz Kaiser Google Brain lukaszkaisergoogle.com Illia Polosukhin  illia.polosukhingmail.com Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions 

In [32]:
# Step 1: Import required libraries
from langchain_community.embeddings import OllamaEmbeddings

# Step 2: Initialize OpenAI Embeddings
def initialize_embeddings():
    """
    Initializes the OpenAI Embeddings model.

    Returns:
        OpenAIEmbeddings: Embeddings model.
    """
    embeddings = OllamaEmbeddings(model="llama3.2:1b")
    print("OpenAI Embeddings initialized successfully!")
    return embeddings

# Step 3: Generate embeddings for text chunks
def generate_embeddings(chunks, embeddings):
    """
    Generates embeddings for a list of text chunks.

    Args:
        chunks (list): List of text chunks.
        embeddings (OpenAIEmbeddings): Embeddings model.

    Returns:
        list: List of embeddings (vectors) for each chunk.
    """
    # Generate embeddings for each chunk
    chunk_embeddings = embeddings.embed_documents(chunks)
    print(f"Generated embeddings for {len(chunk_embeddings)} chunks.")
    return chunk_embeddings



In [45]:

embeddings = initialize_embeddings()

# Generate embeddings for the chunks
chunk_embeddings1 = generate_embeddings(documents, embeddings)

# Print the first embedding as an example
print("\nFirst chunk embedding (first 10 dimensions):")
print(chunk_embeddings1[0][:10])

OpenAI Embeddings initialized successfully!
Generated embeddings for 23 chunks.

First chunk embedding (first 10 dimensions):
[-1.6370317935943604, 1.2566972970962524, 1.8292789459228516, -0.7638096213340759, 3.2038021087646484, -0.5250728130340576, 3.4231607913970947, 0.7220054268836975, 0.6167386770248413, 0.07893369346857071]


In [35]:
import pickle

# Save embeddings to a pickle file
with open('chunk_embeddings.pkl', 'wb') as f:
    pickle.dump(chunk_embeddings, f)

print("Embeddings saved!")


Embeddings saved!


In [6]:
import pickle

# Load the embeddings from the pickle file
with open('chunk_embeddings.pkl', 'rb') as f:
    chunk_embeddings = pickle.load(f)

print("Embeddings loaded!")
print(chunk_embeddings[:5])  # Print first 5 embeddings as an example


Embeddings loaded!
[[0.6292080283164978, 2.6260859966278076, 0.5753267407417297, 0.39746174216270447, 1.8612641096115112, -2.4867329597473145, 1.3653322458267212, 0.5735061168670654, -1.4694424867630005, -0.35355550050735474, -0.02638312242925167, -0.6411489248275757, -1.270591378211975, 0.7549274563789368, -0.6578266024589539, -1.51787531375885, 2.321682929992676, 1.8448150157928467, 4.108569145202637, 0.1608719378709793, 0.13438934087753296, -1.196655035018921, -3.01212215423584, -2.166290521621704, 0.966609537601471, -3.178586006164551, 0.45204585790634155, 0.865517258644104, -0.2396547794342041, 1.6668113470077515, 1.139245867729187, 1.732393503189087, -1.0873222351074219, -0.0470145046710968, 0.9221020340919495, 1.3405113220214844, 0.4101397693157196, -2.829878807067871, -1.7046282291412354, 2.2111763954162598, -0.9175156950950623, -1.0137187242507935, -1.9614372253417969, 2.829570770263672, 0.11678681522607803, 1.1764277219772339, 0.9390425682067871, 1.001818299293518, 0.00820098

In [46]:
from langchain.docstore.document import Document
from langchain_community.vectorstores import Chroma
from langchain.vectorstores import FAISS


# Create the Chroma vector store using the Document objects and the dummy embeddings.
# vector_store = Chroma.from_documents(documents, chunk_embeddings)
vector_store = FAISS.from_documents(documents, embeddings)




below is for llms usage 


In [19]:
from langchain_core.prompts import ChatPromptTemplate

prompt=ChatPromptTemplate.from_template("""
Answer the following question based on the provided context.
Think step by step before providing a detailed answer.
I will tip you $1000 if user finds your answer helpful.                      
<context> 
{context}  
</context>                                    
                                        
                                        """)

In [9]:
from langchain_community.llms import Ollama


llms = Ollama(model="llama3.2:1b")
llms


  llms = Ollama(model="llama3.2:1b")


Ollama(model='llama3.2:1b')

In [56]:

from langchain_community.llms import Ollama


llms = Ollama(model="llama3.2:1b")
from langchain.chains.combine_documents import create_stuff_documents_chain
doc_chain=create_stuff_documents_chain(llms,prompt)
doc_chain
retriver=vector_store.as_retriever()
retriver
from langchain.chains.combine_documents import create_stuff_documents_chain
doc_chain=create_stuff_documents_chain(llms,prompt)

In [57]:
retriver=vector_store.as_retriever()
retriver

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000016DD28A3A30>, search_kwargs={})

In [48]:
from langchain.chains import create_retrieval_chain

retriverchain=create_retrieval_chain(retriver,doc_chain)

In [55]:
res=retriverchain.invoke({"input":"The dominant sequence transduction models are based"})
res['answer']


'The question is not explicitly stated in the provided text, but based on the content, it appears that you are asking about the comparison between self-attention layers and recurrent neural networks (RNNs) in sequence modeling and transduction tasks.\n\nIn general, self-attention layers are designed to be more parallelizable and computationally efficient than RNNs when dealing with long sequences. Self-attention allows for the connection of all positions in a sequence with a constant number of sequentially executed operations, whereas RNNs require sequential computation along the symbol positions of the input and output sequences.\n\nThe authors compare various aspects of self-attention layers to recurrent neural networks, including:\n\n1. Computational complexity: Self-attention is faster than RNNs when the sequence length is smaller.\n2. Path length: A single convolutional layer with a kernel width of 6 does not connect all pairs of input and output positions in RNNs; in contrast, se