## Simple AI Agent

In [None]:
from dotenv import load_dotenv
from langchain.agents import create_agent
from langchain.tools import tool

load_dotenv()
MODEL = "gpt-4.1-mini"


@tool('get_weather', description='Return weather information for a given city', return_direct=False)
def get_weather(city: str):
    response = requests.get(f'https://wttr.in/{city}?format=j1')
    return response.json()


agent = create_agent(
    model=MODEL,  # Ensure the API key of the model provider is provided in the .env 
    tools=[get_weather],
    system_prompt='You are a helpful weather assistant, who always cracks jokes and is humorous while remaining helpful.'
)

response = agent.invoke({
    'messages': [
        {'role': 'user', 'content': 'What is the weather like in Vienna?'},
    ]
})

print(response)
print(response['messages'][-1]['content'])


## Standalone Model Inference

In [None]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model

load_dotenv()
MODEL = "gpt-4.1-mini"

model = init_chat_model(
    model=MODEL,  # Ensure the API key of the model provider is provided in the .env
    temperature=0.1
)

response = model.invoke('What is Python?')

print(response)
print(response.content)

In [None]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain.messages import HumanMessage, AIMessage, SystemMessage

load_dotenv()
MODEL = "gpt-4.1-mini"

model = init_chat_model(
    model=MODEL,  # Ensure the API key of the model provider is provided in the .env
    temperature=0.1
)

conversation = [
    SystemMessage('You are a helpful assistant for questions regarding programming'),
    HumanMessage('What is Python?'),
    AIMessage('Python is an interpreted programming language.'),
    HumanMessage('When was it released?')
]

response = model.invoke(conversation)

print(response)
print(response.content)

In [None]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model

load_dotenv()
MODEL = "mistral-medium-2508"

model = init_chat_model(
    model=MODEL,  # Ensure the API key of the model provider is provided in the .env
    temperature=0.1
)

for chunk in model.stream('Hello, what is Python?'):
    print(chunk.text, end='', flush=True)


## Advanced Agent Example

In [None]:
import requests
from dotenv import load_dotenv
from langchain.agents import create_agent
from langchain.tools import tool, ToolRuntime
from langchain.chat_models import init_chat_model
from langgraph.checkpoint.memory import InMemorySaver
from dataclasses import dataclass

load_dotenv()
MODEL = "gpt-4.1-mini"


@dataclass
class Context:
    user_id: str


@dataclass
class ResponseFormat:
    summary: str
    temperature_celsius: float
    temperature_fahrenheit: float
    humidity: float


@tool('get_weather', description='Return weather information for a given city', return_direct=False)
def get_weather(city: str):
    response = requests.get(f'https://wttr.in/{city}?format=j1')
    return response.json()


@tool('locate_user', description="Look up a user's city based on the context")
def locate_user(runtime: ToolRuntime[Context]):
    match runtime.context.user_id:
        case 'ABC123':
            return 'Vienna'
        case 'XYZ456':
            return 'London'
        case 'HJK789':
            return 'Paris'
        case _:
            return 'Unknown'


model = init_chat_model(
    MODEL,  # Ensure the API key of the model provider is provided in the .env
    temperature=0.3
)

checkpointer = InMemorySaver()

agent = create_agent(
    model=model,
    tools=[get_weather, locate_user],
    system_prompt='You are a helpful weather assistant, who always cracks jokes and is humorous while remaining helpful.',
    context_schema=Context,
    response_format=ResponseFormat,
    checkpointer=checkpointer
)

config = {'configurable': {'thread_id': 1}}

response = agent.invoke({
    'messages': [
        {'role': 'user', 'content': 'What is the weather like in Vienna?'}
    ]},
    config=config,
    context=Context(user_id='ABC123')
)

print(response['structured_response'])
print(response['structured_response'].summary)
print(response['structured_response'].temperature_celsius)

response = agent.invoke({
    'messages': [
        {'role': 'user', 'content': 'What is the weather like in Vienna?'}
    ]},
    config=config,  # Still the same thread
    context=Context(user_id='ABC123')
)

print(response['structured_response'].summary)


## Multimodal Input

In [None]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain.messages import HumanMessage
from base64 import b64encode

load_dotenv()
MODEL = "gpt-4.1-mini"

model = init_chat_model(MODEL)

message = {
    'role': 'user',
    'content': [
        {'type': 'text', 'text': 'Describe the contents of this image.'},
        # {'type': 'image', 'url': '<YOUR_IMAGE_URL>'},
        {
            'type': 'image',
            'base64': b64encode(open('image.png', 'rb').read()).decode(),
            'mime-type': 'image/png'
        },
    ],
}

# message = HumanMessage(
#     content=[
#         {'type': 'text', 'text': 'Describe the contents of this image.'},
#         # {'type': 'image', 'url': '<YOUR_IMAGE_URL>'},
#         {
#             'type': 'image',
#             'base64': b64encode(open('image.png', 'rb').read()).decode(),
#             'mime-type': 'image/png'
#         },
#     ],
# )

response = model.invoke([message])

print(response.content)

## RAG Example

In [2]:
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

MODEL = "llama2"
EMBEDDING_MODEL = "mxbai-embed-large"

embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)

texts = [
    'Apple makes very good computers.',
    'I believe Apple is innovative!',
    'I love apples.',
    'I am a fan of MacBooks.',
    'I enjoy oranges.',
    'I like Lenovo Thinkpads.',
    'I think pears taste very good.'
]

vector_store = FAISS.from_texts(texts=texts, embedding=embeddings)

print('Similarity search results based on provided text input:')
for text in vector_store.similarity_search('Apples are my fav food.', k=7):
    print(text.page_content)
print('-' * 40)
for text in vector_store.similarity_search('Linux is the best OS!', k=7):
    print(text.page_content)
print('-' * 40)

Similarity search results based on provided text input:
I love apples.
I enjoy oranges.
I am a fan of MacBooks.
I think pears taste very good.
I believe Apple is innovative!
Apple makes very good computers.
I like Lenovo Thinkpads.
----------------------------------------
Apple makes very good computers.
I am a fan of MacBooks.
I like Lenovo Thinkpads.
I believe Apple is innovative!
I love apples.
I enjoy oranges.
I think pears taste very good.
----------------------------------------


In [3]:
from langchain.agents import create_agent
from langchain.chat_models import init_chat_model
from langchain.embeddings import init_embeddings
from langchain_community.vectorstores import FAISS
from langchain_core.tools import create_retriever_tool

MODEL_PROVIDER = "ollama"
MODEL_NAME = "qwen3-vl:4b"
EMBEDDING_MODEL_NAME = "mxbai-embed-large"

embeddings = init_embeddings(
    model=EMBEDDING_MODEL_NAME,
    provider=MODEL_PROVIDER
)

texts = [
    'I love apples.',
    'I enjoy oranges.',
    'I think pears taste very good.',
    'I hate bananas.',
    'I dislike raspberries.',
    'I despise mangos',
    'i love Linux.',
    'I hate Windows.'
]

vector_store = FAISS.from_texts(texts=texts, embedding=embeddings)

print('Similarity search results based on provided text input:')
for text in vector_store.similarity_search('What fruits does the person like?', k=3):
    print(text.page_content)
print('-' * 40)
for text in vector_store.similarity_search('What fruits does the person hate?', k=3):
    print(text.page_content)
print('-' * 40)

retriever = vector_store.as_retriever(search_kwargs={'k': 3})

retriever_tool = create_retriever_tool(
    retriever,
    name='kb_search',
    description='Search the small product / fruit database for information.'
)

model = init_chat_model(
    model=MODEL_NAME,
    model_provider=MODEL_PROVIDER,
)

agent = create_agent(
    model=model,
    tools=[retriever_tool],
    system_prompt=(
        "You are a helpful assistant. For any questions first call the kb_search tool "
        "to retrieve context, then answer succinctly. Maybe you have to use it multiple times before answering."
    )
)

result = agent.invoke({
    "message": [{"role": "user",
                 "content": "What three fruits does the person like and what three fruits does the person like dislike?"}]
})

print(result)
print("-" * 40)
print(result['messages'][-1].content)

Similarity search results based on provided text input:
I enjoy oranges.
I love apples.
I think pears taste very good.
----------------------------------------
I despise mangos
I dislike raspberries.
I hate bananas.
----------------------------------------
{'messages': [AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen3-vl:4b', 'created_at': '2025-11-14T10:32:11.612165Z', 'done': True, 'done_reason': 'stop', 'total_duration': 68810651334, 'load_duration': 69427709, 'prompt_eval_count': 193, 'prompt_eval_duration': 1611791541, 'eval_count': 2563, 'eval_duration': 66715759615, 'model_name': 'qwen3-vl:4b', 'model_provider': 'ollama'}, id='lc_run--e50b5666-533b-46ee-bead-91259a02642c-0', tool_calls=[{'name': 'kb_search', 'args': {'query': 'small fruit'}, 'id': 'daaaad31-46d5-4089-a5bc-849cef0e0997', 'type': 'tool_call'}], usage_metadata={'input_tokens': 193, 'output_tokens': 2563, 'total_tokens': 2756}), ToolMessage(content='I love apples.\n\nI enjoy oranges.\n\

## Dynamic System Prompts

In [4]:
from dataclasses import dataclass
from langchain.agents import create_agent
from langchain.agents.middleware import ModelRequest, dynamic_prompt

PROVIDER_NAME = 'ollama'
MODEL_NAME = 'llama2'


@dataclass
class Context:
    user_role: str


@dynamic_prompt
def user_role_prompt(request: ModelRequest) -> str:
    user_role = request.runtime.context.user_role

    base_prompt = 'You are a helpful and very concise assistant.'

    match user_role:
        case 'expert':
            return f'{base_prompt} Provide detail technical responses.'
        case 'beginner':
            return f'{base_prompt} Keep your explanations simple and basic.'
        case 'child':
            return f'{base_prompt} Explain everything as if you were literally talking to a five-year old.'
        case _:
            return base_prompt


agent = create_agent(
    model=f'{PROVIDER_NAME}:{MODEL_NAME}',
    middleware=[user_role_prompt],
    context_schema=Context
)

response = agent.invoke({'messages': [{'role': 'user', 'content': 'Explain PCA.'}]},
                        context=Context(user_role='expert'))

print(response)
print("-" * 40)
print(response['messages'][-1].content)

{'messages': [HumanMessage(content='Explain PCA.', additional_kwargs={}, response_metadata={}, id='49b5f273-a339-4ef9-92c9-e36da53a08e8'), AIMessage(content="\nPrincipal Component Analysis (PCA) is a dimension reduction technique that transforms a set of correlated variables into a set of linearly uncorrelated ones, called principal components. The goal of PCA is to find a lower-dimensional representation of the data that preserves as much information as possible about the original data.\n\nHere's how PCA works:\n\n1. Standardize the Data: The input data is standardized by subtracting the mean and dividing by the standard deviation for each variable. This ensures that all variables are on the same scale, which is important for linear transformations.\n2. Covariance Matrix Calculation: The covariance matrix is calculated from the standardized data. The covariance matrix is a square matrix that contains the pairwise covariances between all possible combinations of variables.\n3. Eigenval

## Dynamic Model Choice

In [6]:
from dotenv import load_dotenv
from langchain.agents import create_agent
from langchain.chat_models import init_chat_model
from langchain.agents.middleware import ModelRequest, ModelResponse, wrap_model_call
from langchain.messages import HumanMessage, AIMessage, SystemMessage

load_dotenv()

basic_model = init_chat_model(
    model='llama2',
    model_provider='ollama'
)
advanced_model = init_chat_model(
    model='gpt-4.1-mini',
    model_provider='openai'
)


@wrap_model_call
def dynamic_model_selection(request: ModelRequest, handler) -> ModelResponse:
    message_count = len(request.state['messages'])

    if message_count > 3:
        model = advanced_model
    else:
        model = basic_model

    request.model = model

    return handler(request)


agent = create_agent(
    model=model,
    middleware=[dynamic_model_selection]
)

response = agent.invoke({
    'messages': [
        SystemMessage('You are a helpful and very concise assistant.'),
        HumanMessage('What is 1+1?')
    ]
})

print(response['messages'][-1].content)
print(response['messages'][-1].response_metadata['model_name'])

Of course! The answer to 1+1 is 2.
llama2


## Custom Agent Middleware

In [8]:
import time
from dotenv import load_dotenv
from langchain.agents import create_agent
from langchain.agents.middleware import AgentMiddleware, AgentState

load_dotenv()
MODEL_PROVIDER = 'ollama'
MODEL_NAME = 'llama2'


class HooksDemo(AgentMiddleware):

    def __init__(self):
        super().__init__()
        self.start_time = 0.0

    def before_agent(self, state: AgentState, runtime):
        self.start_time = time.time()
        print('before_agent triggered')

    def before_model(self, state: AgentState, runtime):
        print('before_model triggered')

    def after_model(self, state: AgentState, runtime):
        print('after_model triggered')

    def after_agent(self, state: AgentState, runtime):
        print('after_agent:', '{:.2f}'.format(time.time() - self.start_time), 'seconds')


agent = create_agent(
    model=f'{PROVIDER_NAME}:{MODEL_NAME}',
    middleware=[HooksDemo()]
)

response = agent.invoke({'messages': [{'role': 'user', 'content': 'Explain PCA.'}]},
                        context=Context(user_role='expert'))

print("-" * 40)
print(response)
print("-" * 40)
print(response['messages'][-1].content)

before_agent triggered
before_model triggered
after_model triggered
after_agent: 24.44 seconds
----------------------------------------
{'messages': [HumanMessage(content='Explain PCA.', additional_kwargs={}, response_metadata={}, id='0a1b3b62-0d70-4a36-85a8-acf63ed8306c'), AIMessage(content='Principal Component Analysis (PCA) is a dimension reduction technique that is used to simplify complex datasets by reducing the number of features or variables while retaining most of the information in the data. It is a statistical method that transforms a set of correlated variables into a set of uncorrelated variables, called principal components, which are linear combinations of the original variables.\n\nThe goal of PCA is to identify the underlying structure of the data and reduce the number of features while retaining as much information as possible. It is commonly used in data analysis, machine learning, and signal processing.\n\nPCA works by finding the directions of maximum variance in t