In [None]:

import os
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path
from langchain_core.documents import Document

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
### Read all the pdf's inside the directory

def process_all_pdfs(pdf_directory):
    """Process all PDF files in a directory"""
    all_documents = []
    pdf_dir = Path("/Users/utkarshupadhyay/Computer Science/Falkomeai/RAG/data/pdf_1")
    
    # Find all PDF files recursively
    pdf_files = list(pdf_dir.glob("**/*.pdf"))
    
    print(f"Found {len(pdf_files)} PDF files to process")
    
    for pdf_file in pdf_files:
        print(f"\nProcessing: {pdf_file.name}")
        try:
            loader = PyPDFLoader(str(pdf_file))
            documents = loader.load()
            
            # Add source information to metadata
            for doc in documents:
                doc.metadata['source_file'] = pdf_file.name
                doc.metadata['file_type'] = 'pdf'
            
            all_documents.extend(documents)
            print(f"  ‚úì Loaded {len(documents)} pages")
            
        except Exception as e:
            print(f"  ‚úó Error: {e}")
    
    print(f"\nTotal documents loaded: {len(all_documents)}")
    return all_documents

# Process all PDFs in the data directory
all_pdf_documents = process_all_pdfs("../data")

Found 2 PDF files to process

Processing: NIPS-2017-attention-is-all-you-need-Paper.pdf
  ‚úì Loaded 11 pages

Processing: Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf
  ‚úì Loaded 6 pages

Total documents loaded: 17


In [6]:
all_pdf_documents

[Document(metadata={'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On 

In [4]:
type(all_pdf_documents[0])

langchain_core.documents.base.Document

In [13]:
import re

SECTION_PATTERNS = {
    "abstract": r"\babstract\b",
    "introduction": r"\bintroduction\b",
    "methodology": r"\b(methodology|methods|materials and methods)\b",
    "results": r"\bresults\b",
    "discussion": r"\bdiscussion\b",
    "conclusion": r"\b(conclusion|conclusions)\b",
    "references": r"\breferences\b"
}

def detect_sections_with_context(text: str, last_section: str) -> str:
    text_lower = text.lower()
    for section, pattern in SECTION_PATTERNS.items():
        if re.search(pattern, text_lower):
            return section
    return last_section



In [12]:
def split_documents_section_aware(documents):
    """
    Chunk research papers for RAG with section-aware metadata
    """

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,        # ‚úÖ ideal for research papers
        chunk_overlap=80,      # ‚úÖ preserves context
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )

    all_chunks = []

    for doc in documents:
        # Detect section BEFORE splitting
        section = detect_section(doc.page_content)

        # Split into chunks
        chunks = text_splitter.split_text(doc.page_content)

        for idx, chunk in enumerate(chunks):
            all_chunks.append({
                "page_content": chunk,
                "metadata": {
                    **doc.metadata,
                    "section": section,
                    "chunk_id": idx
                }
            })

    print(f"‚úÖ Created {len(all_chunks)} RAG-optimized chunks")

    # Preview one chunk
    if all_chunks:
        print("\nüîç Example chunk:")
        print(all_chunks[0]["page_content"][:300], "...")
        print("Metadata:", all_chunks[0]["metadata"])

    return all_chunks



In [17]:
def split_documents_section_aware(documents):
    """
    Correct, production-grade section-aware chunking for research papers
    """

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=80,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )

    all_chunks = []
    global_chunk_id = 0

    for doc in documents:
        last_section = doc.metadata.get("section", "unknown")

        # Split page into chunks
        chunks = text_splitter.split_text(doc.page_content)

        for chunk in chunks:
            # Detect or inherit section
            section = detect_sections_with_context(chunk, last_section)
            last_section = section

            all_chunks.append(
                Document(
                    page_content=chunk,
                    metadata={
                        **doc.metadata,
                        "section": section,
                        "chunk_id": global_chunk_id
                    }
                )
            )
            global_chunk_id += 1

    print(f"‚úÖ Created {len(all_chunks)} RAG-optimized chunks")

    if all_chunks:
        print("\nüîç Example chunk:")
        print(all_chunks[0].page_content[:300], "...")
        print("Metadata:", all_chunks[0].metadata)

    return all_chunks

In [22]:
from langchain_core.documents import Document
chunks = split_documents_section_aware(all_pdf_documents)
chunks

‚úÖ Created 150 RAG-optimized chunks

üîç Example chunk:
Attention Is All You Need
Ashish Vaswani‚àó
Google Brain
avaswani@google.com
Noam Shazeer‚àó
Google Brain
noam@google.com
Niki Parmar‚àó
Google Research
nikip@google.com
Jakob Uszkoreit‚àó
Google Research
usz@google.com
Llion Jones‚àó
Google Research
llion@google.com
Aidan N. Gomez‚àó‚Ä†
University of Toronto
aid ...
Metadata: {'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with re

[Document(metadata={'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On 

In [24]:
chunks[140]


Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'TeX', 'creationdate': '2025-11-11T17:41:06+00:00', 'moddate': '2025-11-11T17:41:06+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'trapped': '/False', 'source': '/Users/utkarshupadhyay/Computer Science/Falkomeai/RAG/data/pdf_1/Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'total_pages': 6, 'page': 4, 'page_label': '5', 'source_file': 'Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'file_type': 'pdf', 'section': 'methodology', 'chunk_id': 140}, page_content='[14] C. A. Gao, F. M. Howard, N. S. Markov, E. C. Dyer, S. Ramesh,\nY . Luo, and A. T. Pearson, ‚ÄúComparing scientific abstracts generated by\nchatgpt to real abstracts with detectors and blinded human reviewers,‚Äù\nNPJ digital medicine , vol. 6, no. 1, p. 75, 2023.\n[15] B. Alhijawi, R. Jarrar, A. AbuAlRub, and A. Bader, ‚ÄúDeep learning d

In [25]:
len(chunks)


150

In [26]:
from collections import Counter
Counter([c.metadata["section"] for c in chunks])


Counter({'unknown': 55,
         'results': 35,
         'methodology': 28,
         'introduction': 21,
         'references': 7,
         'abstract': 4})

In [27]:
import random
c = random.choice(chunks)
print(c.page_content[:300])
print(c.metadata)


et al. [19] developed a GPT detection method using a convo-
lution neural network (CNN) and a self-attention mechanism
called, SeqXGPT. Chen et al. [23] introduced statistical-
based deep learning detection of machine-generated text called
STADEE. Their method leverages statistical text features, su
{'producer': 'pdfTeX-1.40.26', 'creator': 'TeX', 'creationdate': '2025-11-11T17:41:06+00:00', 'moddate': '2025-11-11T17:41:06+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'trapped': '/False', 'source': '/Users/utkarshupadhyay/Computer Science/Falkomeai/RAG/data/pdf_1/Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'total_pages': 6, 'page': 1, 'page_label': '2', 'source_file': 'Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'file_type': 'pdf', 'section': 'introduction', 'chunk_id': 103}


In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity

In [28]:
from typing import List
import numpy as np
from sentence_transformers import SentenceTransformer

class EmbeddingManager:
    """Handles document embedding generation using SentenceTransformer"""

    def __init__(
        self,
        model_name: str = "sentence-transformers/all-MiniLM-L6-v2",
        device: str = "cpu"
    ):
        self.model_name = model_name
        self.device = device
        self.model = None
        self._load_model()

    def _load_model(self):
        try:
            print(f"üîπ Loading embedding model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name, device=self.device)
            dim = self.model.get_sentence_embedding_dimension()
            print(f"‚úÖ Model loaded | Embedding dimension: {dim}")
        except Exception as e:
            print(f"‚ùå Error loading model {self.model_name}: {e}")
            raise

    def generate_embeddings(
        self,
        texts: List[str],
        batch_size: int = 32
    ) -> np.ndarray:
        """
        Generate embeddings for a list of texts (RAG optimized)
        """
        if not self.model:
            raise ValueError("Model not loaded")

        print(f"üîπ Generating embeddings for {len(texts)} chunks...")

        embeddings = self.model.encode(
            texts,
            batch_size=batch_size,
            show_progress_bar=True,
            normalize_embeddings=True  # ‚≠ê VERY IMPORTANT
        )

        print(f"‚úÖ Embeddings generated | Shape: {embeddings.shape}")
        return embeddings


In [29]:
embedding_manager=EmbeddingManager()
embedding_manager

üîπ Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
‚úÖ Model loaded | Embedding dimension: 384


<__main__.EmbeddingManager at 0x120cbc4d0>

In [30]:
import os
import uuid
import pickle
import numpy as np
import faiss
from typing import List, Any
from langchain_community.vectorstores import FAISS


In [31]:


class VectorStore:
    """Manages document embeddings using FAISS"""

    def __init__(
        self,
        index_path: str = "../data/faiss_store",
        index_name: str = "pdf_index"
    ):
        self.index_path = index_path
        self.index_name = index_name

        os.makedirs(self.index_path, exist_ok=True)

        self.index_file = os.path.join(self.index_path, f"{self.index_name}.index")
        self.meta_file = os.path.join(self.index_path, f"{self.index_name}_meta.pkl")

        self.index = None
        self.metadatas = []
        self.documents = []

        self._initialize_store()

    def _initialize_store(self):
        if os.path.exists(self.index_file) and os.path.exists(self.meta_file):
            print("üîπ Loading existing FAISS index...")
            self.index = faiss.read_index(self.index_file)
            with open(self.meta_file, "rb") as f:
                data = pickle.load(f)
                self.documents = data["documents"]
                self.metadatas = data["metadatas"]
            print(f"‚úÖ Loaded FAISS index with {self.index.ntotal} vectors")
        else:
            print("üîπ Creating new FAISS index...")
            self.index = None
            self.documents = []
            self.metadatas = []

    def add_documents(
        self,
        documents: List[Any],
        embeddings: np.ndarray
    ):
        if len(documents) != len(embeddings):
            raise ValueError("Documents and embeddings count mismatch")

        # Initialize FAISS index on first insert
        if self.index is None:
            dim = embeddings.shape[1]
            self.index = faiss.IndexFlatIP(dim)  # cosine similarity (normalized embeddings)
            print(f"‚úÖ FAISS IndexFlatIP initialized (dim={dim})")

        print(f"üîπ Adding {len(documents)} documents to FAISS index")

        self.index.add(embeddings.astype(np.float32))

        for doc in documents:
            self.documents.append(doc.page_content)
            self.metadatas.append(doc.metadata)

        print(f"‚úÖ Total vectors in FAISS index: {self.index.ntotal}")

    def similarity_search(
        self,
        query_embedding: np.ndarray,
        k: int = 5
    ):
        if self.index is None or self.index.ntotal == 0:
            raise ValueError("FAISS index is empty")

        query_embedding = query_embedding.astype(np.float32)
        scores, indices = self.index.search(query_embedding, k)

        results = []
        for idx in indices[0]:
            results.append({
                "page_content": self.documents[idx],
                "metadata": self.metadatas[idx]
            })

        return results

    def save(self):
        if self.index is None:
            return

        faiss.write_index(self.index, self.index_file)
        with open(self.meta_file, "wb") as f:
            pickle.dump(
                {
                    "documents": self.documents,
                    "metadatas": self.metadatas
                },
                f
            )
        print("üíæ FAISS index saved successfully")


In [32]:
vectorstore = VectorStore()

texts = [doc.page_content for doc in chunks]
embeddings = embedding_manager.generate_embeddings(texts)

vectorstore.add_documents(chunks, embeddings)
vectorstore.save()
vectorstore

üîπ Creating new FAISS index...
üîπ Generating embeddings for 150 chunks...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5/5 [00:02<00:00,  2.42it/s]

‚úÖ Embeddings generated | Shape: (150, 384)
‚úÖ FAISS IndexFlatIP initialized (dim=384)
üîπ Adding 150 documents to FAISS index
‚úÖ Total vectors in FAISS index: 150
üíæ FAISS index saved successfully





<__main__.VectorStore at 0x12203d2d0>

In [33]:
query = "What methodology does the paper use?"

query_embedding = embedding_manager.generate_embeddings([query])

results = vectorstore.similarity_search(query_embedding, k=3)

for r in results:
    print(r["page_content"][:300])
    print(r["metadata"])


üîπ Generating embeddings for 1 chunks...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 102.59it/s]

‚úÖ Embeddings generated | Shape: (1, 384)
media misinformation. Also, GPT models have been used to
generate homework exercises, TOEFL writing tasks, graduate
record examinations writing tasks [8], creative short stories [9],
restaurant reviews [10], the United States medical licensing
exam [11], and scientific content [12], [13]. Figure 1 s
{'producer': 'pdfTeX-1.40.26', 'creator': 'TeX', 'creationdate': '2025-11-11T17:41:06+00:00', 'moddate': '2025-11-11T17:41:06+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'trapped': '/False', 'source': '/Users/utkarshupadhyay/Computer Science/Falkomeai/RAG/data/pdf_1/Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'total_pages': 6, 'page': 0, 'page_label': '1', 'source_file': 'Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'file_type': 'pdf', 'section': 'introduction', 'chunk_id': 85}
followed when collecting the AIGT




In [None]:
import numpy as np
from typing import List, Dict, Any

class RAGRetriever:
    """Handles query-based retrieval from FAISS vector store"""

    def __init__(self, vector_store, embedding_manager):
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager

    def retrieve(
        self,
        query: str,
        top_k: int = 5,
        score_threshold: float = 0.0
    ) -> List[Dict[str, Any]]:
        """
        Retrieve relevant documents using FAISS
        """

        print(f"\nüîç Query: {query}")
        print(f"Top-K: {top_k} | Score threshold: {score_threshold}")

        # 1Ô∏è‚É£ Generate query embedding (normalized)
        query_embedding = self.embedding_manager.generate_embeddings([query])
        query_embedding = query_embedding.astype(np.float32)

        # 2Ô∏è‚É£ FAISS similarity search
        scores, indices = self.vector_store.index.search(query_embedding, top_k)

        retrieved_docs = []

        for rank, (idx, score) in enumerate(zip(indices[0], scores[0]), start=1):
            if idx == -1:
                continue

            if score < score_threshold:
                continue

            retrieved_docs.append({
                "content": self.vector_store.documents[idx],
                "metadata": self.vector_store.metadatas[idx],
                "similarity_score": float(score),
                "rank": rank
            })

        print(f"‚úÖ Retrieved {len(retrieved_docs)} documents")

        return retrieved_docs
    


In [37]:
retriever = RAGRetriever(
    vector_store=vectorstore,
    embedding_manager=embedding_manager
)

In [40]:
results = retriever.retrieve(
    query="What Is self attention?",
    top_k=3
)

for r in results:
    print("\n---")
    print(r["content"][:300])
    print("Score:", r["similarity_score"])
    print("Metadata:", r["metadata"])



üîç Query: What Is self attention?
Top-K: 3 | Score threshold: 0.0
üîπ Generating embeddings for 1 chunks...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 164.23it/s]

‚úÖ Embeddings generated | Shape: (1, 384)
‚úÖ Retrieved 3 documents

---
described in section 3.2.
Self-attention, sometimes called intra-attention is an attention mechanism relating different positions
of a single sequence in order to compute a representation of the sequence. Self-attention has been
used successfully in a variety of tasks including reading comprehension
Score: 0.6903805732727051
Metadata: {'producer': 'PyPDF2', 'creator': 'PyPDF', 'creationdate': '', 'subject': 'Neural Information Processing Systems http://nips.cc/', 'publisher': 'Curran Associates, Inc.', 'language': 'en-US', 'created': '2017', 'eventtype': 'Poster', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mecha




In [41]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

class LocalHFLLM:
    """Local HuggingFace LLM for RAG answer generation"""

    def __init__(
        self,
        model_name="Qwen/Qwen2.5-1.5B-Instruct",
        device=None,
        max_context_tokens=1800
    ):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.max_context_tokens = max_context_tokens

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True
        )

        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            device_map="auto"
        )

        self.model.eval()
        print(f"‚úÖ Loaded LLM: {model_name} on {self.device}")

    def _build_prompt(self, query: str, context: str) -> str:
        return f"""You are an AI assistant answering questions ONLY using the provided context.

Rules:
- Use ONLY the information in the context.
- If the answer is not present, say "I don't know based on the provided documents."
- Be concise and factual.

Context:
{context}

Question:
{query}

Answer:
"""

    def generate_response(self, query: str, context: str) -> str:
        prompt = self._build_prompt(query, context)

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=self.max_context_tokens
        ).to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=300,
                do_sample=False,
                temperature=0.1
            )

        # Extract only generated answer
        generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]
        answer = self.tokenizer.decode(
            generated_tokens,
            skip_special_tokens=True
        )

        return answer.strip()


In [42]:
class RAGPipeline:
    """End-to-end RAG pipeline"""

    def __init__(
        self,
        retriever: RAGRetriever,
        llm: LocalHFLLM,
        max_context_chars: int = 4000
    ):
        self.retriever = retriever
        self.llm = llm
        self.max_context_chars = max_context_chars

    def _build_context(self, retrieved_docs):
        """
        Build a single context string from retrieved chunks
        """
        context_parts = []
        total_length = 0

        for doc in retrieved_docs:
            chunk_text = (
                f"[Source: {doc['metadata'].get('source_file', 'unknown')} | "
                f"Section: {doc['metadata'].get('section', 'unknown')}]\n"
                f"{doc['content']}"
            )

            if total_length + len(chunk_text) > self.max_context_chars:
                break

            context_parts.append(chunk_text)
            total_length += len(chunk_text)

        return "\n\n".join(context_parts)

    def answer(self, query: str, top_k: int = 5):
        """
        Generate final answer for a query using RAG
        """

        # 1Ô∏è‚É£ Retrieve relevant chunks
        retrieved_docs = self.retriever.retrieve(
            query=query,
            top_k=top_k
        )

        if not retrieved_docs:
            return "I couldn't find relevant information in the documents."

        # 2Ô∏è‚É£ Build context
        context = self._build_context(retrieved_docs)

        # 3Ô∏è‚É£ Generate answer using LLM
        answer = self.llm.generate_response(
            query=query,
            context=context
        )

        return {
            "query": query,
            "answer": answer,
            "sources": [
                {
                    "source_file": d["metadata"].get("source_file"),
                    "section": d["metadata"].get("section"),
                    "score": d["similarity_score"]
                }
                for d in retrieved_docs
            ]
        }


In [43]:
# 1. Embeddings
embedding_manager = EmbeddingManager()

# 2. Vector store (already built & loaded)
vectorstore = VectorStore()

# 3. Retriever
rag_retriever = RAGRetriever(
    vector_store=vectorstore,
    embedding_manager=embedding_manager
)

# 4. LLM
llm = LocalHFLLM()

# 5. RAG Pipeline
rag_pipeline = RAGPipeline(
    retriever=rag_retriever,
    llm=llm
)


üîπ Loading embedding model: sentence-transformers/all-MiniLM-L6-v2
‚úÖ Model loaded | Embedding dimension: 384
üîπ Loading existing FAISS index...
‚úÖ Loaded FAISS index with 150 vectors


`torch_dtype` is deprecated! Use `dtype` instead!
Some parameters are on the meta device because they were offloaded to the disk.


‚úÖ Loaded LLM: Qwen/Qwen2.5-1.5B-Instruct on cpu


In [44]:
response = rag_pipeline.answer(
    query="What is transformer?",
    top_k=3
)

print("Answer:\n", response["answer"])
print("\nSources:")
for s in response["sources"]:
    print(s)



üîç Query: What is transformer?
Top-K: 3 | Score threshold: 0.0
üîπ Generating embeddings for 1 chunks...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  8.82it/s]
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


‚úÖ Embeddings generated | Shape: (1, 384)
‚úÖ Retrieved 3 documents
Answer:
 The Transformer is a neural network architecture designed to process sequential data such as text or speech. It was introduced in the paper "Attention Is All You Need" presented at NIPS 2017. The key features of the Transformer include:

1. **Self-Attention Mechanism**: Unlike traditional RNNs which use recurrent connections between elements, Transformers use self-attention mechanisms to focus on relevant parts of the input sequence independently. Each position in the sequence can attend to any other position, allowing it to capture long-range dependencies without needing to store past states.

2. **Encoder-Decoder Architecture**: The Transformer consists of multiple layers, including self-attention layers and feedforward networks. These layers allow the model to learn representations of sequences through successive transformations.

3. **Residual Connections and Layer Normalization**: Residual connections an

In [45]:
response = rag_pipeline.answer(
    query="What is Attention is all you need ?",
    top_k=3
)

print("Answer:\n", response["answer"])
print("\nSources:")
for s in response["sources"]:
    print(s)



üîç Query: What is Attention is all you need ?
Top-K: 3 | Score threshold: 0.0
üîπ Generating embeddings for 1 chunks...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.01it/s]

‚úÖ Embeddings generated | Shape: (1, 384)
‚úÖ Retrieved 3 documents





Answer:
 Attention is all you need refers to a paper presented at NIPS 2017 that introduced the concept of self-attention as a fundamental component in neural network architectures. The paper discusses how self-attention can be used to improve the performance of various machine learning tasks by allowing the model to focus on relevant parts of input data simultaneously across multiple positions within sequences. This approach enables more efficient processing of complex data structures like text, images, and other forms of sequential data. The authors also highlight the potential for self-attention to make models more interpretable by revealing which parts of the input contribute most significantly to the output. Additionally, they explore ways to extend this idea beyond traditional language modeling tasks into broader applications such as image captioning and speech recognition. The work aims to provide a new perspective on how deep learning models can better understand and process na

In [46]:
response = rag_pipeline.answer(
    query="What is Plagiarism  detection ?",
    top_k=3
)

print("Answer:\n", response["answer"])
print("\nSources:")
for s in response["sources"]:
    print(s)



üîç Query: What is Plagiarism  detection ?
Top-K: 3 | Score threshold: 0.0
üîπ Generating embeddings for 1 chunks...


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  6.21it/s]

‚úÖ Embeddings generated | Shape: (1, 384)
‚úÖ Retrieved 3 documents





Answer:
 Plagiarism detection involves identifying content that has been generated or paraphrased from external sources without proper citations or credit given to the original source. It's a crucial aspect of academic integrity and intellectual property rights, ensuring that authors attribute their work correctly and avoid unintentional copyright infringement. The process typically includes comparing the submitted manuscript against databases of existing literature to find similarities or identical sections. Advanced techniques such as natural language processing and machine learning algorithms can be employed to automate this process more effectively.

Sources:
{'source_file': 'Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'section': 'introduction', 'score': 0.5888799428939819}
{'source_file': 'Transformer_Based_Approach_for_Detecting_LLM_Generated_Scientific_Text4.pdf', 'section': 'methodology', 'score': 0.570601761341095}
{'source_file': 'Transformer