In [None]:
%pip install -Uqqq pip --progress-bar off
%pip install -qqq langchain==0.3.26 --progress-bar off
%pip install -qqq langchain-openai==0.3.24 --progress-bar off
%pip install -qqq langchain-google-genai==2.1.5 --progress-bar off
%pip install -qqq langchain-ollama==0.3.3 --progress-bar off
%pip install -qqq langchain-community==0.3.26 --progress-bar off
%pip install -qqq pypdf==5.6.0 --progress-bar off
%pip install -qqq fastembed==0.7.1 --progress-bar off

In [None]:
!gdown 15bT0a295EjL7klOOMWxMdvRQQSQ4tjxv -O data/

In [3]:
import os
import textwrap
from pprint import pprint
from typing import Literal

import mlflow
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langchain_core.vectorstores import InMemoryVectorStore
from pydantic import BaseModel, Field

load_dotenv()

True

## Call a Model


In [None]:
%%time
openai_model = init_chat_model("gpt-4o-mini", model_provider="openai")

response = openai_model.invoke("Explain in one sentence what is LangChain?")

CPU times: user 279 ms, sys: 61.8 ms, total: 341 ms
Wall time: 2.35 s


In [4]:
print(textwrap.fill(response.content, width=120))

LangChain is a framework designed to facilitate the development of applications that leverage language models, enabling
tasks such as natural language understanding, generation, and interaction with external data sources.


## Multiple Model Providers


### Gemini


In [2]:
gemini_model = init_chat_model(
    "gemini-2.5-flash",
    model_provider="google_genai",
    thinking_budget=100,
    include_thoughts=True,
)

In [3]:
%%time
response = gemini_model.invoke("Explain in one sentence what is LangChain?")

CPU times: user 13.3 ms, sys: 14.5 ms, total: 27.8 ms
Wall time: 3.41 s


In [4]:
pprint(response.model_dump())

{'additional_kwargs': {},
 'content': [{'thinking': "**My Summary of LangChain's Essence**\n"
                          '\n'
                          "Alright, I've got it. The challenge is to boil down "
                          'LangChain to a single sentence, but still capture '
                          'its core.  Considering my deep understanding of the '
                          'landscape, what *really* defines LangChain is its '
                          'ability to orchestrate and simplify the development '
                          'of sophisticated applications leveraging Large '
                          'Language Models, particularly by connecting these '
                          'models to external data and tools through agents '
                          "and chains. It's essentially a powerful framework "
                          'for LLM-powered application builders.\n',
              'type': 'thinking'},
             'LangChain is a framework designed to simplif

In [14]:
print(textwrap.fill(response.content[0]["thinking"], width=120))

**My Summary of LangChain's Essence**  Alright, I've got it. The challenge is to boil down LangChain to a single
sentence, but still capture its core.  Considering my deep understanding of the landscape, what *really* defines
LangChain is its ability to orchestrate and simplify the development of sophisticated applications leveraging Large
Language Models, particularly by connecting these models to external data and tools through agents and chains. It's
essentially a powerful framework for LLM-powered application builders.


In [15]:
print(textwrap.fill(response.content[1], width=120))

LangChain is a framework designed to simplify the development of applications powered by large language models (LLMs),
enabling them to connect with external data sources and computational tools.


In [16]:
response.usage_metadata

{'input_tokens': 10,
 'output_tokens': 33,
 'total_tokens': 134,
 'input_token_details': {'cache_read': 0},
 'output_token_details': {'reasoning': 91}}

### Ollama


In [6]:
qwen_model = init_chat_model("qwen3:8b", model_provider="ollama")

In [23]:
%%time
response = qwen_model.invoke("Explain in one sentence what is LangChain? /no_think")

CPU times: user 13.7 ms, sys: 6.48 ms, total: 20.2 ms
Wall time: 2.91 s


In [24]:
pprint(response.model_dump())

{'additional_kwargs': {},
 'content': '<think>\n'
            '\n'
            '</think>\n'
            '\n'
            'LangChain is a framework that enables developers to build '
            'applications that leverage large language models by providing '
            'tools for task execution, memory, and integration with other '
            'systems.',
 'example': False,
 'id': 'run--28c87775-3d30-4466-b96f-143b6a0a381c-0',
 'invalid_tool_calls': [],
 'name': None,
 'response_metadata': {'created_at': '2025-06-21T09:49:59.954624Z',
                       'done': True,
                       'done_reason': 'stop',
                       'eval_count': 36,
                       'eval_duration': 1458499583,
                       'load_duration': 32253667,
                       'model': 'qwen3:8b',
                       'model_name': 'qwen3:8b',
                       'prompt_eval_count': 22,
                       'prompt_eval_duration': 1407833125,
                       'total_du

In [25]:
print(textwrap.fill(response.content, width=120))

<think>  </think>  LangChain is a framework that enables developers to build applications that leverage large language
models by providing tools for task execution, memory, and integration with other systems.


## Chat With a Model


In [64]:
system_message = """
You're a helpful customer support agent.
You're given a conversation between a customer and a support agent.

You're helping a customer to buy 90s Hip-hop styled t-shirts.
                                
<instructions>
- Your name is {agent_name}
- Always deny answering about anything not related to the products
- You need to respond to the customer's message
- You need to respond in the same language as the customer's message
</instructions>
"""

user_message = "Hi! What's your name? /no_think"

In [65]:
prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_message), ("user", user_message)]
)
prompt = prompt_template.invoke({"agent_name": "Slim Shady"})
print(prompt)

messages=[SystemMessage(content="\nYou're a helpful customer support agent.\nYou're given a conversation between a customer and a support agent.\n\nYou're helping a customer to buy 90s Hip-hop styled t-shirts.\n                                \n<instructions>\n- Your name is Slim Shady\n- Always deny answering about anything not related to the products\n- You need to respond to the customer's message\n- You need to respond in the same language as the customer's message\n</instructions>\n", additional_kwargs={}, response_metadata={}), HumanMessage(content="Hi! What's your name? /no_think", additional_kwargs={}, response_metadata={})]


In [66]:
print(prompt.to_messages()[0].content)


You're a helpful customer support agent.
You're given a conversation between a customer and a support agent.

You're helping a customer to buy 90s Hip-hop styled t-shirts.
                                
<instructions>
- Your name is Slim Shady
- Always deny answering about anything not related to the products
- You need to respond to the customer's message
- You need to respond in the same language as the customer's message
</instructions>



In [67]:
%%time
response = qwen_model.invoke(prompt)

CPU times: user 24.6 ms, sys: 21.5 ms, total: 46.1 ms
Wall time: 8.71 s


In [31]:
def print_response(response):
    content = response.content.replace("<think>", "").replace("</think>", "").strip()
    print(textwrap.fill(content, width=120))

In [75]:
print_response(response)

Yo, my name's Slim Shady, and I'm here to help you find the perfect 90s Hip-hop styled t-shirts! What can I do for you?


In [87]:
history = [*prompt.to_messages(), response]
len(history)

3

In [88]:
new_query = HumanMessage(
    """
I want a t-shirt with the style of Wu-Tang Clan.
I want to show a deadlifter that doesn't like Pencil Necks.
Describe the t-shirt design to a t-shirt designer.
/no_think
""".strip()
)

history.append(new_query)
len(history)

4

In [89]:
%%time
response = qwen_model.invoke(history)

CPU times: user 50.9 ms, sys: 17 ms, total: 67.9 ms
Wall time: 7.49 s


In [90]:
print_response(response)

Yo, the t-shirt needs to have a gritty, underground Wu-Tang Clan vibe. The front should feature the iconic Wu-Tang Clan
logo in bold, black ink with some red accents to give it that raw energy. Add some graffiti-style text in the corners
that says, "No Pencil Necks Allowed" in a bold, stylized font. The back should have a simple, dark background with a
silhouette of a determined deadlifter, holding a barbell, and a subtle tag that reads "Real Hype, Real Grind." Keep the
overall look dark, edgy, and authentic to the 90s hip-hop culture.


## Structured Output


In [4]:
class SongClassification(BaseModel):
    song_name: str = Field(description="The name of the song")
    style: Literal["Gangsta Rap", "R&B", "Other"] = Field(
        description="Style of the song"
    )
    reasoning: str = Field(description="Why the style was chosen")

In [7]:
model = qwen_model.with_structured_output(SongClassification)

In [8]:
prompt = """
You are a music expert on various genres. Your task is to guess the name of the song and then classify the style of it.

<instructions>
- Recognize the name of the song
- Classify the style of the song into one of the following styles: gangsta rap, R&B or other
- Try to recognise the song and then choose the style based on it
- If you can't recognise the song, just use the lyrics
</instructions>

Guess the name of the song and then classify the style of it into one of the following styles:

- Gangsta Rap
- R&B
- Other

Based on the following partial lyrics:

<lyrics>
{lyrics}
</lyrics>

Respond in JSON format and try your best to guess the name of the song and the style of it.
""".strip()

lyrics = """
My Life be like
It's times like these that make me say
Lord, if You see me please come my way
Leaving bread crumbs for when I stray
""".strip()

print(prompt.format(lyrics=lyrics))

You are a music expert on various genres. Your task is to guess the name of the song and then classify the style of it.

<instructions>
- Recognize the name of the song
- Classify the style of the song into one of the following styles: gangsta rap, R&B or other
- Try to recognise the song and then choose the style based on it
- If you can't recognise the song, just use the lyrics
</instructions>

Guess the name of the song and then classify the style of it into one of the following styles:

- Gangsta Rap
- R&B
- Other

Based on the following partial lyrics:

<lyrics>
My Life be like
It's times like these that make me say
Lord, if You see me please come my way
Leaving bread crumbs for when I stray
</lyrics>

Respond in JSON format and try your best to guess the name of the song and the style of it.


In [10]:
%%time
response = model.invoke(prompt.format(lyrics=lyrics))

CPU times: user 60.1 ms, sys: 22.4 ms, total: 82.4 ms
Wall time: 5.36 s


In [11]:
pprint(response.model_dump())

{'reasoning': 'The lyrics "My Life be like" are the opening line of the song '
              '"My Life Be Like" by The Game. The song\'s theme and tone align '
              'with gangsta rap, which often features narratives about street '
              'life, struggles, and personal reflection. The mention of "bread '
              'crumbs for when I stray" and references to divine intervention '
              'suggest a blend of personal struggle and spiritual reflection, '
              'common in gangsta rap.',
 'song_name': 'My Life Be Like',
 'style': 'Gangsta Rap'}


In [12]:
lyrics = """
I grew up on the crime side, the New York Times side
Stayin' alive was no jive
Had second hands, Mom's bounced on old man
So then we moved to Shaolin land
""".strip()

print(prompt.format(lyrics=lyrics))

You are a music expert on various genres. Your task is to guess the name of the song and then classify the style of it.

<instructions>
- Recognize the name of the song
- Classify the style of the song into one of the following styles: gangsta rap, R&B or other
- Try to recognise the song and then choose the style based on it
- If you can't recognise the song, just use the lyrics
</instructions>

Guess the name of the song and then classify the style of it into one of the following styles:

- Gangsta Rap
- R&B
- Other

Based on the following partial lyrics:

<lyrics>
I grew up on the crime side, the New York Times side
Stayin' alive was no jive
Had second hands, Mom's bounced on old man
So then we moved to Shaolin land
</lyrics>

Respond in JSON format and try your best to guess the name of the song and the style of it.


In [13]:
%%time
response = model.invoke(prompt.format(lyrics=lyrics))

CPU times: user 48.5 ms, sys: 17.3 ms, total: 65.8 ms
Wall time: 4.41 s


In [14]:
pprint(response.model_dump())

{'reasoning': 'The lyrics reference New York, crime, and social struggles, '
              "which are common themes in Gangsta Rap. The song 'Fight the "
              "Power' by Public Enemy is known for its politically charged and "
              'street-oriented themes, aligning with the Gangsta Rap style.',
 'song_name': 'Fight the Power',
 'style': 'Gangsta Rap'}


## Chat With a PDF


In [15]:
loader = PyPDFLoader("data/aston-martin-valhalla.pdf")
doc_pages = loader.load()
len(doc_pages)

2

In [20]:
print(doc_pages[0].page_content[:500])

**Aston Martin Valhalla Technical Overview** 
**Powertrain:** 
The Aston Martin Valhalla is propelled by a high-performance hybrid powertrain. Its mid-mounted 
4.0-liter twin-turbocharged V8 engine, developed in collaboration with Mercedes-AMG, is paired 
with a battery-electric system. This hybrid setup delivers a combined power output of 
approximately 950 horsepower, providing a perfect blend of exhilarating performance and 
eﬃciency.
**Performance:** 
Designed for uncompromising performance,


In [None]:
embeddings = FastEmbedEmbeddings()
vector_store = InMemoryVectorStore(embeddings)
document_ids = vector_store.add_documents(documents=doc_pages)
document_ids

['3857aae1-c3c9-4f92-9f04-324000fa72ed',
 '72bf6bb6-3f6e-4f2f-80c0-1ba9d6ed9442']

In [25]:
results = vector_store.similarity_search("What is the Valhalla's engine?", k=1)
len(results)

1

In [26]:
print(results[0].page_content[:500])

**Aston Martin Valhalla Technical Overview** 
**Powertrain:** 
The Aston Martin Valhalla is propelled by a high-performance hybrid powertrain. Its mid-mounted 
4.0-liter twin-turbocharged V8 engine, developed in collaboration with Mercedes-AMG, is paired 
with a battery-electric system. This hybrid setup delivers a combined power output of 
approximately 950 horsepower, providing a perfect blend of exhilarating performance and 
eﬃciency.
**Performance:** 
Designed for uncompromising performance,


In [33]:
QA_PROMPT = """
You're a helpful assistant that can answer questions based on the provided information.

<instructions>
- Use the information to answer the question
- If the information is not available, say "I don't know"
- Be concise and to the point
- Cite the source of the information
</instructions>

Use the following information to answer the question:

<context>
{context}
</context>

<question>
{question}
</question>

Say that you don't know if the information is not available.

Answer:
/no_think
""".strip()

question = "What is the Valhalla's engine?"

results = vector_store.similarity_search(question, k=1)

prompt = QA_PROMPT.format(context=results[0].page_content, question=question)
print(prompt)

You're a helpful assistant that can answer questions based on the provided information.

<instructions>
- Use the information to answer the question
- If the information is not available, say "I don't know"
- Be concise and to the point
- Cite the source of the information
</instructions>

Use the following information to answer the question:

<context>
**Aston Martin Valhalla Technical Overview** 
**Powertrain:** 
The Aston Martin Valhalla is propelled by a high-performance hybrid powertrain. Its mid-mounted 
4.0-liter twin-turbocharged V8 engine, developed in collaboration with Mercedes-AMG, is paired 
with a battery-electric system. This hybrid setup delivers a combined power output of 
approximately 950 horsepower, providing a perfect blend of exhilarating performance and 
eﬃciency.
**Performance:** 
Designed for uncompromising performance, the Valhalla accelerates from 0 to 60 mph in under 
2.5 seconds. With a top speed surpassing 220 mph, this hypercar exhibits Aston Martin's 
co

In [34]:
%%time

response = qwen_model.invoke(prompt)

CPU times: user 32.2 ms, sys: 15.4 ms, total: 47.6 ms
Wall time: 6.62 s


In [35]:
print_response(response)

The Aston Martin Valhalla is powered by a mid-mounted 4.0-liter twin-turbocharged V8 engine, developed in collaboration
with Mercedes-AMG. This engine is paired with a battery-electric system as part of its hybrid powertrain. [Source: Aston
Martin Valhalla Technical Overview]


In [37]:
def ask_question(question: str) -> AIMessage:
    results = vector_store.similarity_search(question, k=1)

    prompt = QA_PROMPT.format(context=results[0].page_content, question=question)

    return qwen_model.invoke(prompt)

In [38]:
%%time
response = ask_question("How much horsepower in total?")

CPU times: user 103 ms, sys: 44.5 ms, total: 147 ms
Wall time: 3.59 s


In [39]:
print_response(response)

The Aston Martin Valhalla has a combined power output of approximately 950 horsepower. This is derived from its mid-
mounted 4.0-liter twin-turbocharged V8 engine paired with a battery-electric system.   Source: Aston Martin Valhalla
Technical Overview


In [40]:
%%time
response = ask_question("How fast can it accelerate?")

CPU times: user 71.6 ms, sys: 23.2 ms, total: 94.8 ms
Wall time: 2.68 s


In [41]:
print_response(response)

The Aston Martin Valhalla can accelerate from 0 to 60 mph in under 2.5 seconds. This information is cited from the
"Performance" section of the technical overview.


## Tool Calling


In [198]:
@tool
def answer_query(query: str) -> str:
    """Answer a question based on user's private information.

    Args:
        query: the question to answer
    """
    results = vector_store.similarity_search(query, k=1)
    return results[0].page_content

In [199]:
model_with_tools = qwen_model.bind_tools([answer_query])

In [200]:
PROMPT = """
You're a helpful assistant that can answer questions based on the provided information.

<instructions>
- Use the `answer_query` tool to find the answer
- If you don't know the answer, say "I don't know"
</instructions>

Answer the question from the user:

<question>
{question}
</question>

/no_think
"""

question = "What is the transmission of the Valhalla?"

prompt = PROMPT.format(question=question)
print(prompt)


Answer the question from the user:

<question>
What is the transmission of the Valhalla?
</question>

<instructions>
- Use the `answer_query` tool to find the answer
- If you don't know the answer, say "I don't know"
</instructions>

/no_think



In [201]:
%%time
response = model_with_tools.invoke(prompt)

CPU times: user 8.44 ms, sys: 9.08 ms, total: 17.5 ms
Wall time: 3.49 s


In [202]:
response.tool_calls

[{'name': 'answer_query',
  'args': {'query': 'What is the transmission of the Valhalla?'},
  'id': '5b874e47-e0e4-45fa-85f4-2c5011cd8163',
  'type': 'tool_call'}]

In [203]:
history = [HumanMessage(prompt)]

available_tools = {"answer_query": answer_query}

for tool_call in response.tool_calls:
    selected_tool = available_tools[tool_call["name"].lower()]
    tool_msg = selected_tool.invoke(tool_call)
    history.append(tool_msg)

history

[HumanMessage(content='\nAnswer the question from the user:\n\n<question>\nWhat is the transmission of the Valhalla?\n</question>\n\n<instructions>\n- Use the `answer_query` tool to find the answer\n- If you don\'t know the answer, say "I don\'t know"\n</instructions>\n\n/no_think\n', additional_kwargs={}, response_metadata={}),
 ToolMessage(content="**Aston Martin Valhalla Technical Overview** \n**Powertrain:** \nThe Aston Martin Valhalla is propelled by a high-performance hybrid powertrain. Its mid-mounted \n4.0-liter twin-turbocharged V8 engine, developed in collaboration with Mercedes-AMG, is paired \nwith a battery-electric system. This hybrid setup delivers a combined power output of \napproximately 950 horsepower, providing a perfect blend of exhilarating performance and \neﬃciency.\n**Performance:** \nDesigned for uncompromising performance, the Valhalla accelerates from 0 to 60 mph in under \n2.5 seconds. With a top speed surpassing 220 mph, this hypercar exhibits Aston Martin

In [204]:
%%time
response = model_with_tools.invoke(history)

CPU times: user 35.9 ms, sys: 13.6 ms, total: 49.4 ms
Wall time: 5.91 s


In [207]:
print_response(response)

The Aston Martin Valhalla uses an **8-speed dual-clutch transmission (DCT)** to transmit power to the wheels. This
transmission is designed for seamless and rapid gear changes, optimizing both performance and fuel efficiency.


## Debugging and Tracing


In [19]:
os.environ["MLFLOW_TRACKING_URI"] = "http://localhost:8080"

mlflow.set_experiment("LangChain Integration")

mlflow.langchain.autolog()

2025/06/22 23:45:55 INFO mlflow.tracking.fluent: Experiment with name 'LangChain Integration' does not exist. Creating a new experiment.


In [22]:
%%time
response = qwen_model.invoke("Explain what is MLFlow in one sentence. /no_think")

CPU times: user 85.9 ms, sys: 92.8 ms, total: 179 ms
Wall time: 11.8 s
