In [1]:
import os

In [4]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv

In [21]:
load_dotenv()

True

In [16]:
model = ChatGroq(model='qwen/qwen3-32b' )

In [17]:
model.invoke('the sky is').content

'<think>\nOkay, the user wrote "the sky is". They might be trying to start a sentence or a question. Let me think about possible continuations. Maybe they want to describe the sky in a certain way, like "the sky is blue" or "the sky is cloudy". Alternatively, they could be asking for information, such as "the sky is what color?" or "the sky is made of what?".\n\nSince the input is very short, I should consider if they need help completing a thought or if they have a specific question in mind. They might be referring to a common phrase or a riddle. For example, "The sky is the limit" or "The sky is falling". \n\nI should also check if there\'s any context from previous messages, but since this is the first message, there\'s nothing to reference. The best approach is to ask for clarification. I can offer examples to prompt them to elaborate on what they\'re thinking. That way, I can provide a more accurate and helpful response.\n\nSo, I\'ll respond by asking what they were going to say n

In [19]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [62]:
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [63]:
embedding = embedding_model.embed_query('the sky is')

In [29]:
len(embedding)

768

## 1. Data Ingestion

In [32]:
from langchain.document_loaders import PyPDFLoader

In [33]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [35]:
import os
pdf_path = os.path.join(os.getcwd(), 'data', 'sample.pdf')

In [37]:
pdf = PyPDFLoader(pdf_path)

In [39]:
document = pdf.load()

In [42]:
document[:5]

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '/Users/uw-user/Library/CloudStorage/GoogleDrive-vvithurshan@gmail.com/My Drive/Artificial-Intelligence-2025/Machine-Learning/Deep-Learning/LLM/LLMOPs/Document-Portal/Notebook/data/sample.pdf', 'total_pages': 77, 'page': 0, 'page_label': '1'}, page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman 

In [44]:
# each page is a document
len(document)

77

In [45]:
# make chunks
# we have to experiment these values

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 150,
    length_function = len,
)

In [95]:
docs = text_splitter.split_documents(document)

In [47]:
len(docs)

765

In [58]:
docs[0].page_content

'Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\nHakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev'

In [61]:
docs[0].metadata

{'producer': 'pdfTeX-1.40.25',
 'creator': 'LaTeX with hyperref',
 'creationdate': '2023-07-20T00:30:36+00:00',
 'author': '',
 'keywords': '',
 'moddate': '2023-07-20T00:30:36+00:00',
 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5',
 'subject': '',
 'title': '',
 'trapped': '/False',
 'source': '/Users/uw-user/Library/CloudStorage/GoogleDrive-vvithurshan@gmail.com/My Drive/Artificial-Intelligence-2025/Machine-Learning/Deep-Learning/LLM/LLMOPs/Document-Portal/Notebook/data/sample.pdf',
 'total_pages': 77,
 'page': 0,
 'page_label': '1'}

In [None]:
# store the data in vector store

from langchain.vectorstores import FAISS

# in-memory storage
vectorstore = FAISS.from_documents(docs, embedding_model)

1. in-memoery (faiss, chroma)
2. on disk (faiss)
3. cloud (astradb, mongodbvectorsearch, milvus, weaviate)

## Retrival 

In [69]:
# similarity search

vectorstore.similarity_search('what is llma model')

[Document(id='8985eec3-b756-4d16-949f-e942e11a9a65', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '/Users/uw-user/Library/CloudStorage/GoogleDrive-vvithurshan@gmail.com/My Drive/Artificial-Intelligence-2025/Machine-Learning/Deep-Learning/LLM/LLMOPs/Document-Portal/Notebook/data/sample.pdf', 'total_pages': 77, 'page': 2, 'page_label': '3'}, page_content='1 Introduction\nLarge Language Models (LLMs) have shown great promise as highly capable AI assistants that excel in\ncomplex reasoning tasks requiring expert knowledge across a wide range of fields, including in specialized\ndomains such as programming and creative writing. They enable interaction with humans through intuitive\nch

In [None]:
# k number of docs

relevant_doc = vectorstore.similarity_search('llma fine-tuning', k = 10)

In [77]:
len(relevant_doc)

10

In [75]:
relevant_doc[0].page_content

'Ba. Large language models are human-level prompt engineers. InThe Eleventh International Conference on\nLearning Representations, 2022.\n44'

question: user query
contenxt: 

In [78]:
prompt_template ="""
    Answer the question based on the context provided below.
    if the context does to contain sufficient information, respond with: "I 
    do not have enough information about this"
    context: {context}

    question: {question}

    answer: 
"""



In [87]:
# prompt template
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    template = prompt_template,
    input_variables = ['context', 'question']
)

prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n    Answer the question based on the context provided below.\n    if the context does to contain sufficient information, respond with: "I \n    do not have enough information about this"\n    context: {context}\n\n    question: {question}\n\n    answer: \n')

In [88]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

In [None]:
retriever.invoke('llama fine-tuining')

[Document(id='b0e7379d-16cc-4f07-b19e-c59701b2b0ad', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '/Users/uw-user/Library/CloudStorage/GoogleDrive-vvithurshan@gmail.com/My Drive/Artificial-Intelligence-2025/Machine-Learning/Deep-Learning/LLM/LLMOPs/Document-Portal/Notebook/data/sample.pdf', 'total_pages': 77, 'page': 73, 'page_label': '74'}, page_content='Llama 2\n7B 0.28 0.25 0.29 0.50 0.36 0.37 0.21 0.34 0.32 0.50 0.28 0.19 0.26 0.32 0.44 0.51 0.30 0.2513B 0.24 0.25 0.35 0.50 0.41 0.36 0.24 0.39 0.35 0.48 0.31 0.18 0.27 0.34 0.46 0.66 0.35 0.2834B 0.27 0.24 0.33 0.56 0.41 0.36 0.26 0.32 0.36 0.53 0.33 0.07 0.26 0.30 0.45 0.56 0.26 0.3570B 0.31 0.29 0.35 0.51 0.41 0.45 0.27 0.34

In [83]:
from langchain_core.output_parsers import StrOutputParser

In [84]:
parser = StrOutputParser()

In [100]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [101]:
from langchain_core.runnables import RunnablePassthrough

In [102]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()

)

In [103]:
rag_chain.invoke('talk about the fine-tuning of llama')

'<think>\nOkay, let me try to figure out how to answer the question about the fine-tuning of Llama based on the provided context. First, I need to recall what the context says.\n\nLooking at the context, there\'s a section titled "Fine-tuning" which mentions that Llama 2-Chat is created through supervised fine-tuning after pretraining. Then it goes on to talk about iterative refinement using Reinforcement Learning with Human Feedback (RLHF), specifically through rejection sampling and Proximal Policy Optimization. There are also some numbers and model sizes listed, like 7B, 13B, 34B, 70B parameters, but I\'m not sure if those are relevant here. \n\nThe question is asking about the fine-tuning process of Llama in general. The context mentions that Llama 2 is first pre-trained on public data, then supervised fine-tuning is applied to create Llama 2-Chat. After that, they use RLHF with rejection sampling and Proximal Policy Optimization (probably Proximal Policy Optimization algorithm) fo

In [104]:
# explore about the keyword filter