# Introduction to LangChain 🦜🔗
Last Update: 25.05.2025

![agent_langchain-intro.png](assets/agent_langchain-intro-overview.png)


In [1]:
# Import and configure LangChain models, tools, and environment
from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.utils.function_calling import tool_example_to_messages
from pydantic import BaseModel, Field
from typing import List, Optional
from IPython.display import display, HTML
import pandas as pd
pd.set_option('display.max_colwidth', None)
import os
from dotenv import load_dotenv
load_dotenv()

True

### LLMs

In [2]:
# Load LLMs and print model names
model_anthropic = init_chat_model("claude-3-5-sonnet-latest", model_provider="anthropic")
print(model_anthropic.model)

model_google = init_chat_model(model="gemini-2.0-flash", model_provider="google_genai")
print(model_google.model)
model_google_name = model_google.model.split("/")[-1]

model_openai = init_chat_model("gpt-4o-mini", model_provider="openai")
print(model_openai.model_name)

claude-3-5-sonnet-latest
models/gemini-2.0-flash
gpt-4o-mini


In [None]:
# Column names
columns = ["Anthropic", "Google", "OpenAI"]

# Model names
model_names = [
    model_anthropic.model,
    model_google_name,
    model_openai.model_name
]

# Training cutoff dates
training_cutoff = [
    "August 2023",   # Claude 3.5 Sonnet
    "August 2024", # Gemini 2.0 Flash
    "October 2023"   # GPT-4o Mini
]

# Context window (tokens)
context_window = [
    "200,000 tokens",    # Claude 3.5 Sonnet
    "1,000,000 tokens",  # Gemini 2.0 Flash
    "128,000 tokens"     # GPT-4o Mini
]

# Logos file paths
svg_files = [
    './assets/claude.svg', 
    './assets/gemini.svg',
    './assets/openai.svg', 
]

# First row: SVG images as HTML <img> tags
svg_row = [
    f'<div style="display:inline-block; background:linear-gradient(45deg, #e6e2fe, #ffdcf2); border-radius:50%; width:48px; height:48px; '
    f'display:flex; align-items:center; justify-content:center;">'
    f'<img src="{svg}" style="width:32px; height:32px; display:block; margin:auto;" /></div>'
    for svg in svg_files
]

# Create DataFrame
df_models = pd.DataFrame(
    [svg_row, model_names, training_cutoff, context_window],
    columns=columns,
    index=["logo", "model", "training cutoff", "context window"]
)

# Display with HTML rendering for images
styled = df_models.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])

display(HTML(styled.to_html(escape=False)))


Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
training cutoff,August 2023,August 2024,October 2023
context window,"200,000 tokens","1,000,000 tokens","128,000 tokens"


## Data

In [None]:
question = "What is the maximum grant amount (in dollars) that an eligible entity can receive under the Wireless Electric Vehicle Charging Grant Program Act of 2025?"

program_1 = """
4 SEC. 6. COST SHARE.
5 (a) FEDERAL SHARE.—The Federal share of a grant
6 awarded under the Program may not exceed 80 percent
7 of total cost of the project.
8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
9 grant awarded to an eligible entity under the Program
10 may not exceed $25,000,000.
"""

program_1_and_2 = """Wireless Electric Vehicle Charging Grant Program: 
4 SEC. 6. COST SHARE.
5 (a) FEDERAL SHARE.—The Federal share of a grant
6 awarded under the Program may not exceed 80 percent
7 of total cost of the project.
8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
9 grant awarded to an eligible entity under the Program
10 may not exceed $25,000,000.

Die maximale Förderhöhe für die Aufrüstung oder Ersatzbeschaffung einer 
Ladeeinrichtung liegt bei 60 Prozent der förderfähigen Ausgaben pro Ladepunkt 
und wird durch den maximalen Förderbetrag von 20.000 Euro pro Ladepunkt gedeckelt.
Die Mittelausstattung des Förderprogramms beträgt von 2021 bis 2025 insgesamt rund
500 Mio. Euro. Das Fördervolumen dieses Förderaufrufs beträgt 90 Mio. Euro."""

no_program_1 = """Congress.gov is the official website for U.S. federal legislative information. 
        The site provides access to accurate, timely, and complete legislative information 
        for Members of Congress, legislative agencies, and the general public."""

no_program_2 = """Article I of the U.S. Constitution grants all legislative powers to a bicameral Congress: 
    a House of Representatives and a Senate that are the result of a “Great Compromise” 
    seeking to balance the effects of popular majorities with the interests of the states"""

amount_description="What is the maximum grant amount (in dollars) that an eligible entity can receive under this Program?"
share_description="What is the maximum Federal share percentage allowed for a grant under this Program?"
language_description="Which language the text is written in (e.g., en, de, fr, etc.)?"

## Chat Model

Sends hardcoded System and Human messages directly to the models

In [5]:
# Query all models using the chat model and compare their answers
messages = [
    SystemMessage("Provide the answer limited to 280 characters"),
    HumanMessage(question),
]

model_chat = pd.DataFrame([
    {
        "OpenAI": model_openai.invoke(messages).content,
        "Anthropic": model_anthropic.invoke(messages).content,
        "Google": model_google.invoke(messages).content
    },
    {
        "OpenAI": f"{len(model_openai.invoke(messages).content)} characters",
        "Anthropic": f"{len(model_anthropic.invoke(messages).content)} characters",
        "Google": f"{len(model_google.invoke(messages).content)} characters"
    },
], index=["chat model", "answer length"])

df = pd.concat([df_models.head(2), model_chat])

styled = df.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])
display(HTML(styled.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
chat model,"$500,000 is the maximum grant amount that an eligible entity can receive under the Wireless Electric Vehicle Charging Grant Program Act of 2025.","$1,000,000","Under the Wireless Electric Vehicle Charging Grant Program Act of 2025, the maximum grant amount an eligible entity can receive is $5 million."
answer length,153 characters,10 characters,142 characters


## Prompt Template

Builds a prompt dynamically using a template with custom variables

In [6]:
# Generate and compare model responses using a dynamic prompt template
system_template = "Provide the answer within 280 characters in {language}"

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)

prompt = prompt_template.invoke({"language": "German", "text": question})

model_prompt = pd.DataFrame([
    {
        "OpenAI": model_openai.invoke(prompt).content,
        "Anthropic": model_anthropic.invoke(prompt).content,
        "Google": model_google.invoke(prompt).content
    },
    {
        "OpenAI": f"{len(model_openai.invoke(prompt).content)} characters",
        "Anthropic": f"{len(model_anthropic.invoke(prompt).content)} characters",
        "Google": f"{len(model_google.invoke(prompt).content)} characters"
    },
], index=["prompt template", "answer length"])

df = pd.concat([df_models.head(2), model_chat, model_prompt])

styled = df.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])
display(HTML(styled.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
chat model,"$500,000 is the maximum grant amount that an eligible entity can receive under the Wireless Electric Vehicle Charging Grant Program Act of 2025.","$1,000,000","Under the Wireless Electric Vehicle Charging Grant Program Act of 2025, the maximum grant amount an eligible entity can receive is $5 million."
answer length,153 characters,10 characters,142 characters
prompt template,Nach dem Gesetz können berechtigte Antragsteller maximal $500.000 pro Projekt als Förderung für kabellose Elektrofahrzeug-Ladestationen erhalten.,Ich kann keine spezifische Summe für 2025 nennen. Bitte konsultieren Sie die offizielle Gesetzgebung.,"Die maximale Zuschussbetrag, den eine förderfähige Stelle im Rahmen des Wireless Electric Vehicle Charging Grant Program Act von 2025 erhalten kann, beträgt 5 Millionen Dollar."
answer length,171 characters,50 characters,180 characters


## Classification

In [7]:

# Define output schema for structured model responses
class Classification(BaseModel):
    """Information about a funding program."""
    amount: int = Field(description=amount_description)
    share: int = Field(description=share_description)
    language: str = Field(description=language_description)

# Wrap models to return structured output matching Classification
structured_openai = model_openai.with_structured_output(Classification)
structured_anthropic = model_anthropic.with_structured_output(Classification)
structured_google = model_google.with_structured_output(Classification)

# Create prompt to extract structured data from input text
tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.
Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

# Build structured prompt from input text
prompt = tagging_prompt.invoke({"input": program_1})

# Get structured outputs from all models
model_classification = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(prompt),
        "Anthropic": structured_anthropic.invoke(prompt),
        "Google": structured_google.invoke(prompt)
    },
], index=["classification"])

# Append results to DataFrame and display
df = pd.concat([df_models.head(2), model_classification])
styled = df.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])
display(HTML(styled.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
classification,amount=25000000 share=80 language='en',amount=25000000 share=80 language='en',amount=25000000 share=80 language='en'


## Extraction

### One Entity

In [8]:
# Define output schema for structured model responses
class Extraction(BaseModel):
    """Information about a funding program."""
    amount: Optional[int] = Field(default=None, description=amount_description)
    share: Optional[int] = Field(default=None, description=share_description)
    language: Optional[str] = Field(default=None, description=language_description)

# Wrap models to return structured output matching Extraction
structured_openai = model_openai.with_structured_output(schema=Extraction)
structured_anthropic = model_anthropic.with_structured_output(schema=Extraction)
structured_google = model_google.with_structured_output(schema=Extraction)

# Create prompt to extract structured data from input text
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        ("human", "{text}"),
    ]
)

# Define input text containing federal share and grant amount
text = program_1

# Build structured prompt from input text
prompt = prompt_template.invoke({"text": text})

# Get structured outputs from all models
model_extraction = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(prompt),
        "Anthropic": structured_anthropic.invoke(prompt),
        "Google": structured_google.invoke(prompt)
    },
], index=["extraction"])

# Append results to DataFrame and display
df = pd.concat([df_models.head(2), model_classification, model_extraction])
styled = df.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])
display(HTML(styled.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
classification,amount=25000000 share=80 language='en',amount=25000000 share=80 language='en',amount=25000000 share=80 language='en'
extraction,amount=25000000 share=80 language='en',amount=25000000 share=80 language=None,amount=25000000 share=80 language=None


### Multiple Entities

In [9]:
# Define output schema for structured model responses
class Program(BaseModel):
    """Information about a program."""
    amount: Optional[int] = Field(default=None, description=amount_description)
    share: Optional[int] = Field(default=None, description=share_description)
    language: Optional[str] = Field(default=None, description=language_description)

# Define container schema to extract a list of program entities
class Data(BaseModel):
    """Extracted data about programs."""
    # Create a model to extract multiple entities
    program: List[Program]

# Wrap models to return structured output matching Data
structured_openai = model_openai.with_structured_output(schema=Data)
structured_anthropic = model_anthropic.with_structured_output(schema=Data)
structured_google = model_google.with_structured_output(schema=Data)

# Define input text containing federal share and grant amount
text = program_1_and_2

# Build structured prompt from input text
prompt = prompt_template.invoke({"text": text})

# Get structured outputs from all models
model_muliple_entities = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(prompt),
        "Anthropic": structured_anthropic.invoke(prompt),
        "Google": structured_google.invoke(prompt)
    },
], index=["multiple entities"])

# Append results to DataFrame and display
df = pd.concat([df_models.head(2), model_muliple_entities])
styled = df.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])
display(HTML(styled.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
multiple entities,"program=[Program(amount=25000000, share=80, language='en'), Program(amount=20000, share=60, language='de')]","program=[Program(amount=25000000, share=80, language='en'), Program(amount=20000, share=60, language='de')]","program=[Program(amount=25000000, share=80, language='en'), Program(amount=20000, share=60, language='de')]"


### Reference Examples

In [10]:
# Define input-output examples for model grounding and tool call validation
examples = [
    (
        no_program_1,
        Data(program=[]),
    ),
    (
        program_1,
        Data(program=[{"share": 80, "amount": 25000000, "language": "English"}]),
    ),
]

# Initialize empty message list for storing converted examples
messages = []

# Convert input-output examples into formatted tool call messages
for txt, tool_call in examples:
    if tool_call.program:
        ai_response = "Detected program."
    else:
        ai_response = "Detected no program."
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

# Print each generated message in a readable format
for message in messages:
    message.pretty_print()


Congress.gov is the official website for U.S. federal legislative information. 
        The site provides access to accurate, timely, and complete legislative information 
        for Members of Congress, legislative agencies, and the general public.
Tool Calls:
  Data (8bd076b3-ba57-4f7b-9dd1-5c85b1c71ba1)
 Call ID: 8bd076b3-ba57-4f7b-9dd1-5c85b1c71ba1
  Args:
    program: []

You have correctly called this tool.

Detected no program.


4 SEC. 6. COST SHARE.
5 (a) FEDERAL SHARE.—The Federal share of a grant
6 awarded under the Program may not exceed 80 percent
7 of total cost of the project.
8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
9 grant awarded to an eligible entity under the Program
10 may not exceed $25,000,000.

Tool Calls:
  Data (dea80155-1cf9-414f-a322-8036c0b49d45)
 Call ID: dea80155-1cf9-414f-a322-8036c0b49d45
  Args:
    program: [{'amount': 25000000, 'share': 80, 'language': 'English'}]

You have correctly called this tool.

Detected program.


  messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))


In [11]:
# Define a user message with text containing no relevant program info
message_no_extraction = {
    "role": "user",
    "content":  no_program_2,
}

# Invoke all models to extract structured data from irrelevant input
model_without_examples = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke([message_no_extraction]),
        "Anthropic": structured_anthropic.invoke([message_no_extraction]),
        "Google": structured_google.invoke([message_no_extraction])
    },
], index=["without examples"])

# Append results to DataFrame and display to check for false positives
df = pd.concat([df_models.head(2), model_without_examples])
styled = df.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])
display(HTML(styled.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
without examples,"program=[Program(amount=None, share=None, language='en')]",program=[],"program=[Program(amount=0, share=0, language='en')]"


In [12]:
# Invoke models with example messages plus a no-extraction message for context
model_with_examples = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(messages + [message_no_extraction]),
        "Anthropic": structured_anthropic.invoke(messages + [message_no_extraction]),
        "Google": structured_google.invoke(messages + [message_no_extraction])
    },
], index=["with examples"])

# Append new results to DataFrame 
df = pd.concat([df_models.head(2), model_without_examples, model_with_examples])
styled = df.style.set_table_styles([
    {'selector': 'th.col_heading', 'props': [('width', '180px'), ('text-align', 'center')]},
    {'selector': 'td', 'props': [('width', '120px'), ('text-align', 'left')]}
])
display(HTML(styled.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
without examples,"program=[Program(amount=None, share=None, language='en')]",program=[],"program=[Program(amount=0, share=0, language='en')]"
with examples,program=[],program=[],"program=[Program(amount=25000000, share=80, language='English')]"


## Summary

In [13]:
df = pd.concat([df_models, model_chat, model_prompt, model_classification, model_extraction, model_muliple_entities, model_without_examples, model_with_examples])

# Save the cumulative model comparison results to CSV
df_no_logo = df.drop("logo", axis=0)
df_no_logo.to_excel("./data/output/model-comparison.xlsx", index=False)

# Display with left-aligned text and centered headers
html = f"""
<style>
    table {{
        border: 1px solid #ddd;
        border-collapse: collapse;
        table-layout: fixed;
    }}
    th {{
        text-align: center;
        border: 1px solid #ddd;
        padding: 8px;
    }}
    td {{
        text-align: left;
        border: 1px solid #ddd;
        padding: 8px;
    }}
    /* Target all columns except first (index) */
    th:nth-child(n+2),
    td:nth-child(n+2) {{
        width: 300px;
        min-width: 300px;  /* Prevent column shrinking */
        max-width: 300px;  /* Prevent column expansion */
        overflow: visible;  /* Handle overflow content */
        white-space: normal;
        word-break: break-word;
    }}
</style>

{df.to_html(escape=False, index=True, border=0)}
"""

display(HTML(html))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
training cutoff,August 2023,August 2024,October 2023
context window,"200,000 tokens","1,000,000 tokens","128,000 tokens"
chat model,"$500,000 is the maximum grant amount that an eligible entity can receive under the Wireless Electric Vehicle Charging Grant Program Act of 2025.","$1,000,000","Under the Wireless Electric Vehicle Charging Grant Program Act of 2025, the maximum grant amount an eligible entity can receive is $5 million."
answer length,153 characters,10 characters,142 characters
prompt template,Nach dem Gesetz können berechtigte Antragsteller maximal $500.000 pro Projekt als Förderung für kabellose Elektrofahrzeug-Ladestationen erhalten.,Ich kann keine spezifische Summe für 2025 nennen. Bitte konsultieren Sie die offizielle Gesetzgebung.,"Die maximale Zuschussbetrag, den eine förderfähige Stelle im Rahmen des Wireless Electric Vehicle Charging Grant Program Act von 2025 erhalten kann, beträgt 5 Millionen Dollar."
answer length,171 characters,50 characters,180 characters
classification,amount=25000000 share=80 language='en',amount=25000000 share=80 language='en',amount=25000000 share=80 language='en'
extraction,amount=25000000 share=80 language='en',amount=25000000 share=80 language=None,amount=25000000 share=80 language=None



# References 
- LangChain. Chat Model and Prompts: https://python.langchain.com/docs/tutorials/llm_chain/
- LangChain. Classification: https://python.langchain.com/docs/tutorials/classification/
- LangChain. Extraction: https://python.langchain.com/docs/tutorials/extraction/
