In [2]:
##RAG Pipelines- Data Ingestion to Vector DB Pipeline

In [3]:
import os
from langchain_community.document_loaders import PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
### Read all the pdf's inside the directory

def process_all_pdfs(pdf_directory):
    """Process all PDF files in a directory"""
    all_documents = []
    pdf_dir = Path(pdf_directory)
    
    # Find all PDF files recursively
    pdf_files = list(pdf_dir.glob("**/*.pdf"))
    
    print(f"Found {len(pdf_files)} PDF files to process")
    
    for pdf_file in pdf_files:
        print(f"\nProcessing: {pdf_file.name}")
        try:
            loader = PyPDFLoader(str(pdf_file))
            documents = loader.load()
            
            # Add source information to metadata
            for doc in documents:
                doc.metadata['source_file'] = pdf_file.name
                doc.metadata['file_type'] = 'pdf'
            
            all_documents.extend(documents)
            print(f"  ✓ Loaded {len(documents)} pages")
            
        except Exception as e:
            print(f"  ✗ Error: {e}")
    
    print(f"\nTotal documents loaded: {len(all_documents)}")
    return all_documents

# Process all PDFs in the data directory
all_pdf_documents = process_all_pdfs("../data")

Found 2 PDF files to process

Processing: _OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf
  ✓ Loaded 370 pages

Processing: Deep_learning_book.pdf
  ✓ Loaded 534 pages

Total documents loaded: 904


In [5]:
all_pdf_documents

[Document(metadata={'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)', 'creationdate': "D:20240822090713Z00'00'", 'author': 'Sebastian Raschka', 'icnappplatform': 'Windows', 'icnappversion': '3.05', 'title': 'Build a Large Language Model (From Scratch)', 'moddate': "D:20250903101523Z00'00'", 'icnappname': 'Foxit Advanced PDF Editor', 'source': '../data/pdf/_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'total_pages': 370, 'page': 0, 'page_label': 'Build a Large Language Model (From Scratch)', 'source_file': '_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'file_type': 'pdf'}, page_content='MANNING\nSebastian Raschka\nFROMSCRATCH\nBUILD A'),
 Document(metadata={'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)', 'creationdate': "D:2024

In [None]:

### Text splitting get into chunks

def split_documents(documents,chunk_size=10000,chunk_overlap=200):
    """Split documents into smaller chunks for better RAG performance"""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    split_docs = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(split_docs)} chunks")
    
    # Show example of a chunk
    if split_docs:
        print(f"\nExample chunk:")
        print(f"Content: {split_docs[0].page_content[:200]}...")
        print(f"Metadata: {split_docs[0].metadata}")
    
    return split_docs

In [7]:
chunks=split_documents(all_pdf_documents)
chunks

Split 904 documents into 868 chunks

Example chunk:
Content: MANNING
Sebastian Raschka
FROMSCRATCH
BUILD A...
Metadata: {'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)', 'creationdate': "D:20240822090713Z00'00'", 'author': 'Sebastian Raschka', 'icnappplatform': 'Windows', 'icnappversion': '3.05', 'title': 'Build a Large Language Model (From Scratch)', 'moddate': "D:20250903101523Z00'00'", 'icnappname': 'Foxit Advanced PDF Editor', 'source': '../data/pdf/_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'total_pages': 370, 'page': 0, 'page_label': 'Build a Large Language Model (From Scratch)', 'source_file': '_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'file_type': 'pdf'}


[Document(metadata={'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)', 'creationdate': "D:20240822090713Z00'00'", 'author': 'Sebastian Raschka', 'icnappplatform': 'Windows', 'icnappversion': '3.05', 'title': 'Build a Large Language Model (From Scratch)', 'moddate': "D:20250903101523Z00'00'", 'icnappname': 'Foxit Advanced PDF Editor', 'source': '../data/pdf/_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'total_pages': 370, 'page': 0, 'page_label': 'Build a Large Language Model (From Scratch)', 'source_file': '_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'file_type': 'pdf'}, page_content='MANNING\nSebastian Raschka\nFROMSCRATCH\nBUILD A'),
 Document(metadata={'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)', 'creationdate': "D:2024

In [8]:
import numpy as np
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import uuid
from typing import List, Dict, Any, Tuple
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
class EmbeddingManager:
    """Handles document embedding generation using SentenceTransformer"""
    
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        """
        Initialize the embedding manager
        
        Args:
            model_name: HuggingFace model name for sentence embeddings
        """
        self.model_name = model_name
        self.model = None
        self._load_model()

    def _load_model(self):
        """Load the SentenceTransformer model"""
        try:
            print(f"Loading embedding model: {self.model_name}")
            self.model = SentenceTransformer(self.model_name)
            print(f"Model loaded successfully. Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
        except Exception as e:
            print(f"Error loading model {self.model_name}: {e}")
            raise

    def generate_embeddings(self, texts: List[str]) -> np.ndarray:
        """
        Generate embeaddings for a list of texts
        
        Args:
            texts: List of text strings to embed
            
        Returns:
            numpy array of embeddings with shape (len(texts), embedding_dim)
        """
        if not self.model:
            raise ValueError("Model not loaded")
        
        print(f"Generating embeddings for {len(texts)} texts...")
        embeddings = self.model.encode(texts, show_progress_bar=True)
        print(f"Generated embeddings with shape: {embeddings.shape}")
        return embeddings

In [10]:
embedding_manager=EmbeddingManager()
embedding_manager

Loading embedding model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384


<__main__.EmbeddingManager at 0x11ee532d0>

In [11]:
class VectorStore:
    """Manages document embeddings in a ChromaDB vector store"""
    
    def __init__(self, collection_name: str = "pdf_documents", persist_directory: str = "../data/vector_store"):
        """
        Initialize the vector store
        
        Args:
            collection_name: Name of the ChromaDB collection
            persist_directory: Directory to persist the vector store
        """
        self.collection_name = collection_name
        self.persist_directory = persist_directory
        self.client = None
        self.collection = None
        self._initialize_store()
        
        
    def _initialize_store(self):
        """Initialize ChromaDB client and collection"""
        try:
            # Create persistent ChromaDB client
            os.makedirs(self.persist_directory, exist_ok=True)
            self.client = chromadb.PersistentClient(path=self.persist_directory)
            
            # Get or create collection
            self.collection = self.client.get_or_create_collection(
                name=self.collection_name,
                metadata={"description": "PDF document embeddings for RAG"}
            )
            print(f"Vector store initialized. Collection: {self.collection_name}")
            print(f"Existing documents in collection: {self.collection.count()}")
            
        except Exception as e:
            print(f"Error initializing vector store: {e}")
            raise
        
    def add_documents(self, documents: List[Any], embeddings: np.ndarray):
        """
        Add documents and their embeddings to the vector store
        
        Args:
            documents: List of LangChain documents
            embeddings: Corresponding embeddings for the documents
        """
        if len(documents) != len(embeddings):
            raise ValueError("Number of documents must match number of embeddings")
        
        print(f"Adding {len(documents)} documents to vector store...")
        
        # Prepare data for ChromaDB
        ids = []
        metadatas = []
        documents_text = []
        embeddings_list = []
        
        for i, (doc, embedding) in enumerate(zip(documents, embeddings)):
            # Generate unique ID
            doc_id = f"doc_{uuid.uuid4().hex[:8]}_{i}"
            ids.append(doc_id)
            
            # Prepare metadata
            metadata = dict(doc.metadata)
            metadata['doc_index'] = i
            metadata['content_length'] = len(doc.page_content)
            metadatas.append(metadata)
            
            # Document content
            documents_text.append(doc.page_content)
            
            # Embedding
            embeddings_list.append(embedding.tolist())
            
            # Add to collection
        try:
            self.collection.add(
                ids=ids,
                embeddings=embeddings_list,
                metadatas=metadatas,
                documents=documents_text
            )
            print(f"Successfully added {len(documents)} documents to vector store")
            print(f"Total documents in collection: {self.collection.count()}")
            
        except Exception as e:
            print(f"Error adding documents to vector store: {e}")
            raise

vectorstore=VectorStore()
vectorstore
    

Vector store initialized. Collection: pdf_documents
Existing documents in collection: 868


<__main__.VectorStore at 0x11d015a50>

In [12]:
chunks

[Document(metadata={'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)', 'creationdate': "D:20240822090713Z00'00'", 'author': 'Sebastian Raschka', 'icnappplatform': 'Windows', 'icnappversion': '3.05', 'title': 'Build a Large Language Model (From Scratch)', 'moddate': "D:20250903101523Z00'00'", 'icnappname': 'Foxit Advanced PDF Editor', 'source': '../data/pdf/_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'total_pages': 370, 'page': 0, 'page_label': 'Build a Large Language Model (From Scratch)', 'source_file': '_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf', 'file_type': 'pdf'}, page_content='MANNING\nSebastian Raschka\nFROMSCRATCH\nBUILD A'),
 Document(metadata={'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)', 'creationdate': "D:2024

In [13]:
### Convert the text to embeddings
texts=[doc.page_content for doc in chunks]

## Generate the Embeddings

embeddings=embedding_manager.generate_embeddings(texts)

##store int he vector dtaabase
vectorstore.add_documents(chunks,embeddings)

Generating embeddings for 868 texts...


Batches: 100%|██████████| 28/28 [00:10<00:00,  2.67it/s]


Generated embeddings with shape: (868, 384)
Adding 868 documents to vector store...
Successfully added 868 documents to vector store
Total documents in collection: 1736


### Retriever Pipeline From VectorStore


In [14]:
class RAGRetriever:
    """Handles query-based retrieval from the vector store"""
    
    def __init__(self, vector_store: VectorStore, embedding_manager: EmbeddingManager):
        """
        Initialize the retriever
        
        Args:
            vector_store: Vector store containing document embeddings
            embedding_manager: Manager for generating query embeddings
        """
        self.vector_store = vector_store
        self.embedding_manager = embedding_manager
        
        
    def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
        """
        Retrieve relevant documents for a query
        
        Args:
            query: The search query
            top_k: Number of top results to return
            score_threshold: Minimum similarity score threshold
            
        Returns:
            List of dictionaries containing retrieved documents and metadata
        """
        print(f"Retrieving documents for query: '{query}'")
        print(f"Top K: {top_k}, Score threshold: {score_threshold}")
        
        # Generate query embedding
        query_embedding = self.embedding_manager.generate_embeddings([query])[0]
        
        # Search in vector store
        try:
            results = self.vector_store.collection.query(
                query_embeddings=[query_embedding.tolist()],
                n_results=top_k
            )
            
            # Process results
            retrieved_docs = []
            
            if results['documents'] and results['documents'][0]:
                documents = results['documents'][0]
                metadatas = results['metadatas'][0]
                distances = results['distances'][0]
                ids = results['ids'][0]
                
                for i, (doc_id, document, metadata, distance) in enumerate(zip(ids, documents, metadatas, distances)):
                    # Convert distance to similarity score (ChromaDB uses cosine distance)
                    similarity_score = 1 - distance
                    
                    if similarity_score >= score_threshold:
                        retrieved_docs.append({
                            'id': doc_id,
                            'content': document,
                            'metadata': metadata,
                            'similarity_score': similarity_score,
                            'distance': distance,
                            'rank': i + 1
                        })
                
                print(f"Retrieved {len(retrieved_docs)} documents (after filtering)")
            else:
                print("No documents found")
            
            return retrieved_docs
            
        except Exception as e:
            print(f"Error during retrieval: {e}")
            return []

rag_retriever=RAGRetriever(vectorstore,embedding_manager)

In [15]:
rag_retriever

<__main__.RAGRetriever at 0x11cf1c110>

In [16]:
rag_retriever.retrieve("What is deep learning")

Retrieving documents for query: 'What is deep learning'
Top K: 5, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 10.27it/s]

Generated embeddings with shape: (1, 384)
Retrieved 5 documents (after filtering)





[{'id': 'doc_3fc2e309_271',
  'content': '253A.1 What is PyTorch?\n Machine learning represents a subfield of AI, as illustrated in figure A.2, that focuses\non developing and improving learning algorithms. The key idea behind machine\nlearning is to enable computers to learn from data and make predictions or decisions\nwithout being explicitly programmed to perform the task. This involves developing\nalgorithms that can identify patterns, learn from historical data, and improve their\nperformance over time with more data and feedback.\nMachine learning has been integral in the evolution of AI, powering many of the\nadvancements we see today, including LLMs. Machine learning is also behind technol-\nogies like recommendation systems used by online retailers and streaming services,\nemail spam filtering, voice recognition in virtual assistants, and even self-driving cars.\nThe introduction and advancement of machine learning have significantly enhanced\nAI’s capabilities, enabling it to

In [17]:
rag_retriever.retrieve("what is Large Language Model")

Retrieving documents for query: 'what is Large Language Model'
Top K: 5, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 17.51it/s]

Generated embeddings with shape: (1, 384)
Retrieved 5 documents (after filtering)





[{'id': 'doc_77f98c65_2',
  'content': 'Build a Large Language Model (From Scratch)',
  'metadata': {'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext, AppendMode 1.1',
   'icnappplatform': 'Windows',
   'source_file': '_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf',
   'author': 'Sebastian Raschka',
   'page_label': 'i',
   'total_pages': 370,
   'creationdate': "D:20240822090713Z00'00'",
   'creator': 'FrameMaker 16.0.1(Foxit Advanced PDF Editor)',
   'icnappversion': '3.05',
   'page': 2,
   'moddate': "D:20250903101523Z00'00'",
   'source': '../data/pdf/_OceanofPDF.com_Build_a_Large_Language_Model_From_Scratch_-_Sebastian_Raschka.pdf',
   'file_type': 'pdf',
   'doc_index': 2,
   'icnappname': 'Foxit Advanced PDF Editor',
   'content_length': 43,
   'title': 'Build a Large Language Model (From Scratch)'},
  'similarity_score': 0.5554735362529755,
  'distance': 0.44452646374702454,
  'rank': 1},
 {'id': 'doc_af66cfd9_2',
  'content

RAG Pipeline- VectorDB To LLM Output Generation

In [18]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

class LocalHFLLM:
    def __init__(
        self,
        model_name="Qwen/Qwen2.5-1.5B-Instruct",
        device=None
    ):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True
        )

        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            device_map="auto"
        )

        self.model.eval()
        print(f"Loaded model: {model_name} on {self.device}")

    def generate_response(self, query: str, context: str) -> str:
        prompt = f"""
You are a helpful assistant.

Context:
{context}

Question:
{query}

Answer:
"""

        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=2048
        ).to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=300,
                do_sample=False,
                temperature=0.1
            )

        return self.tokenizer.decode(
            outputs[0],
            skip_special_tokens=True
        )


Integration Vectordb Context pipeline With LLM output


In [19]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM


In [20]:
!pip install transformers torch accelerate bitsandbytes


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [21]:
class LocalHFLLM:
    def __init__(
        self,
        model_name="Qwen/Qwen2.5-1.5B-Instruct"
    ):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            trust_remote_code=True
        )

        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
            device_map="cpu"
        )

        self.model.eval()
        print(f"Loaded {model_name} on {self.device}")

    def invoke(self, prompt: str):
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=2048
        ).to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=300,
                do_sample=False,
                temperature=0.1
            )

        return self.tokenizer.decode(
            outputs[0],
            skip_special_tokens=True
        )
local_llm=LocalHFLLM()

`torch_dtype` is deprecated! Use `dtype` instead!


Loaded Qwen/Qwen2.5-1.5B-Instruct on cpu


In [23]:
def rag_simple(query, retriever, local_llm, top_k=3):
    # 1. Retrieve context
    results = retriever.retrieve(query, top_k=top_k)

    context = "\n\n".join(
        [doc["content"] for doc in results]
    ) if results else ""

    if not context:
        return "No relevant context found to answer the question."

    # 2. Prompt
    prompt = f"""Use the following context to answer the question concisely.
Context:
{context}

Question: {query}

Answer:"""

    # 3. Generate answer using local HF model
    response = local_llm.invoke(prompt)
    return response


In [26]:
answer = rag_simple(
    query="What is a Deep Learning?",
    retriever=rag_retriever,
    local_llm=local_llm,
    top_k=3
)

print(answer)


Retrieving documents for query: 'What is a Deep Learning?'
Top K: 3, Score threshold: 0.0
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.11it/s]
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Use the following context to answer the question concisely.
Context:
253A.1 What is PyTorch?
 Machine learning represents a subfield of AI, as illustrated in figure A.2, that focuses
on developing and improving learning algorithms. The key idea behind machine
learning is to enable computers to learn from data and make predictions or decisions
without being explicitly programmed to perform the task. This involves developing
algorithms that can identify patterns, learn from historical data, and improve their
performance over time with more data and feedback.
Machine learning has been integral in the evolution of AI, powering many of the
advancements we see today, including LLMs. Machine learning is also behind technol-
ogies like recommendation systems used by online retailers and streaming services,
email spam filtering, voice recognition in virtual assistants, and even self-driving cars.
The introduction 

In [29]:
def rag_advanced(
    query,
    retriever,
    llm,
    top_k=5,
    min_score=0.2,
    return_context=False
):
    """
    Advanced RAG pipeline:
    - Uses vector DB retrieval
    - Generates grounded answer via local HF LLM
    - Returns answer, sources, confidence, optional context
    """

    # 1. Retrieve documents
    results = retriever.retrieve(
        query,
        top_k=top_k,
        score_threshold=min_score
    )

    if not results:
        return {
            'answer': 'No relevant context found.',
            'sources': [],
            'confidence': 0.0,
            'context': '' if return_context else None
        }

    # 2. Prepare context (ranked)
    context = "\n\n".join(
        [doc['content'] for doc in results]
    )

    # 3. Prepare sources
    sources = [
        {
            'source': doc['metadata'].get(
                'source_file',
                doc['metadata'].get('source', 'unknown')
            ),
            'page': doc['metadata'].get('page', 'unknown'),
            'score': float(doc['similarity_score']),
            'preview': doc['content'][:300] + '...'
        }
        for doc in results
    ]

    # 4. Confidence score
    confidence = max(doc['similarity_score'] for doc in results)

    # 5. Prompt
    prompt = f"""Use the following context to answer the question concisely.
If the answer is not present in the context, reply ONLY with:
  "The provided context does not contain enough information to answer this question.."

Context:
{context}

Question: {query}

Answer:"""

    # 6. Generate answer (HF LLM)
    response_text = llm.invoke(prompt)

    # 7. Output
    output = {
        'answer': response_text,
        'sources': sources,
        'confidence': confidence
    }

    if return_context:
        output['context'] = context

    return output


In [30]:
result = rag_advanced(
    "what is Large Language Model",
    rag_retriever,
    local_llm,
    top_k=3,
    min_score=0.1,
    return_context=True
)

print("Answer:", result["answer"])
print("Sources:", result["sources"])
print("Confidence:", result["confidence"])
print("Context Preview:", result["context"][:300])


Retrieving documents for query: 'what is Large Language Model'
Top K: 3, Score threshold: 0.1
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.63it/s]


Generated embeddings with shape: (1, 384)
Retrieved 3 documents (after filtering)
Answer: Use the following context to answer the question concisely.
If the answer is not present in the context, reply ONLY with:
  "The provided context does not contain enough information to answer this question.."

Context:
Build a Large Language Model (From Scratch)

Build a Large Language Model (From Scratch)

Build a Large
Language Model
(From Scratch)
SEBASTIAN RASCHKA
MANNING
SHELTER ISLAND

Question: what is Large Language Model

Answer: A Large Language Model is a type of artificial intelligence that can generate human-like text based on patterns learned from vast amounts of data. It's built from scratch without using pre-existing models or frameworks. The book mentions building such a model from scratch as part of its content. Sebastian Raschka authored this book titled "Build a Large Language Model (From Scratch)." Shelter Island served as the location for the publication event. The term 'Larg