# Table of Contents
- [Introduction to LangChain](#introduction-to-langchain)
    - [Chat Model](#chat-model)
    - [Prompt Template](#prompt-template)
    - [Classification](#classification)
    - [Extraction](#extraction)
    - [Multiple Entities](#multiple-entities)
    - [Reference Examples](#reference-examples)
- [References](#references)

# Introduction to LangChain 🦜🔗
Date: 17.05.2025

![agent_langchain-intro.png](assets/agent_langchain-intro.png)


In [1]:
# Import and configure LangChain models, tools, and environment
from langchain.chat_models import init_chat_model
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.utils.function_calling import tool_example_to_messages
from pydantic import BaseModel, Field
from typing import List, Optional
from IPython.display import display, HTML
import pandas as pd
pd.set_option('display.max_colwidth', None)
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# Load LLMs and print model names
model_anthropic = init_chat_model("claude-3-5-sonnet-latest", model_provider="anthropic")
print(model_anthropic.model)

model_google = ChatGoogleGenerativeAI(model="gemini-2.0-flash",
    google_api_key=os.getenv("GOOGLE_GEMINI_KEY"))
print(model_google.model)
model_google_name = model_google.model.split("/")[-1]

model_openai = init_chat_model("gpt-4o-mini", model_provider="openai")
print(model_openai.model_name)

claude-3-5-sonnet-latest
models/gemini-2.0-flash
gpt-4o-mini


In [15]:
# Column names
columns = ["Anthropic", "Google", "OpenAI"]

# Model names
model_names = [
    model_anthropic.model,
    model_google_name,
    model_openai.model_name
]

# Logos file paths
svg_files = [
    './assets/claude.svg', 
    './assets/gemini.svg',
    './assets/openai.svg', 
]

# First row: SVG images as HTML <img> tags
svg_row = [
    f'<div style="display:inline-block; background:linear-gradient(45deg, #e6e2fe, #ffdcf2); border-radius:50%; width:48px; height:48px; '
    f'display:flex; align-items:center; justify-content:center;">'
    f'<img src="{svg}" style="width:32px; height:32px; display:block; margin:auto;" /></div>'
    for svg in svg_files
]

# Create DataFrame
df_models = pd.DataFrame([svg_row, model_names], columns=columns, index=["logo", "model"])

# Display with HTML rendering for images
display(HTML(df_models.to_html(escape=False)))


Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini


## Chat Model

Sends hardcoded System and Human messages directly to the models

In [None]:
# Query all models using the chat model and compare their answers
messages = [
    SystemMessage("Provide the answer limited to 280 characters"),
    HumanMessage("What is the federal share and the maximum grant amount under the Wireless Electric Vehicle Charging Grant Program?"),
]

model_answer = pd.DataFrame([
    {
        "OpenAI": model_openai.invoke(messages).content,
        "Anthropic": model_anthropic.invoke(messages).content,
        "Google": model_google.invoke(messages).content
    },
    {
        "OpenAI": f"{len(model_openai.invoke(messages).content)} characters",
        "Anthropic": f"{len(model_anthropic.invoke(messages).content)} characters",
        "Google": f"{len(model_google.invoke(messages).content)} characters"
    },
], index=["chat model", "length answer_1"])

df = pd.concat([df_models, model_answer])
display(HTML(df.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
chat model,Federal share is 80% of total project costs. Maximum grant amount is $15 million per project. The program has $100 million in funding from 2026-2028.,Federal share is 80%. Max grant is $15 million.,"The federal share under the Wireless Electric Vehicle Charging Grant Program is up to 50% of project costs. The maximum grant amount can reach $2 million per project. Please verify with the latest guidelines, as specifics may change."
length answer_1,146 characters,106 characters,263 characters


## Prompt Template

Builds a prompt dynamically using a template with custom variables

In [None]:
# Generate and compare model responses using a dynamic prompt template
system_template = "Provide the answer within 280 characters in {language}"

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)

prompt = prompt_template.invoke({"language": "German", "text": "What is the federal share and the maximum grant amount under the Wireless Electric Vehicle Charging Grant Program?"})

model_answer = pd.DataFrame([
    {
        "OpenAI": model_openai.invoke(prompt).content,
        "Anthropic": model_anthropic.invoke(prompt).content,
        "Google": model_google.invoke(prompt).content
    },
    {
        "OpenAI": f"{len(model_openai.invoke(prompt).content)} characters",
        "Anthropic": f"{len(model_anthropic.invoke(prompt).content)} characters",
        "Google": f"{len(model_google.invoke(prompt).content)} characters"
    },
], index=["prompt template", "length answer_2"])

df = pd.concat([df, model_answer])

display(HTML(df.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
chat model,Federal share is 80% of total project costs. Maximum grant amount is $15 million per project. The program has $100 million in funding from 2026-2028.,Federal share is 80%. Max grant is $15 million.,"The federal share under the Wireless Electric Vehicle Charging Grant Program is up to 50% of project costs. The maximum grant amount can reach $2 million per project. Please verify with the latest guidelines, as specifics may change."
length answer_1,146 characters,106 characters,263 characters
prompt template,Der Bundesanteil beträgt 80% der förderfähigen Kosten. Die maximale Fördersumme pro Projekt liegt bei 15 Millionen US-Dollar im Rahmen des Förderprogramms für kabelloses Laden von Elektrofahrzeugen.,Bundesanteil: 80%. Max. Zuschuss: 15 Mio. $ pro Projekt.,Unter dem Wireless Electric Vehicle Charging Grant Program beträgt der Bundesanteil 80 % und der maximale Zuschuss 5 Millionen US-Dollar pro Projekt.
length answer_2,198 characters,55 characters,170 characters


## Classification

In [19]:
# Define output schema for structured model responses
class Classification(BaseModel):
    """Information about a funding program."""
    share: int = Field(description="What is the federal share in percent under this funding program?")
    amount: int = Field(description="What is the maximum grant amount under this funding program?")
    language: str = Field(description="The language the text is written in")

# Wrap models to return structured output matching Classification
structured_openai = model_openai.with_structured_output(Classification)
structured_anthropic = model_anthropic.with_structured_output(Classification)
structured_google = model_google.with_structured_output(Classification)

# Create prompt to extract structured data from input text
tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

# Define input text containing federal share and grant amount
inp = """
4 SEC. 6. COST SHARE.
5 (a) FEDERAL SHARE.—The Federal share of a grant
6 awarded under the Program may not exceed 80 percent
7 of total cost of the project.
8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
9 grant awarded to an eligible entity under the Program
10 may not exceed $25,000,000.
"""

# Build structured prompt from input text
prompt = tagging_prompt.invoke({"input": inp})

# Get structured outputs from all models
model_answer = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(prompt),
        "Anthropic": structured_anthropic.invoke(prompt),
        "Google": structured_google.invoke(prompt)
    },
], index=["classification"])

# Append results to DataFrame and display
df_classification = pd.concat([df_models, model_answer])
display(HTML(df_classification.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
classification,share=80 amount=25000000 language='en',share=80 amount=25000000 language='english',share=80 amount=25000000 language='English'


## Extraction

In [20]:
# Define output schema for structured model responses
class Extraction(BaseModel):
    """Information about a funding program."""
    share: Optional[int] = Field(default=None, description="What is the federal share in percent under this funding program?")
    amount: Optional[int] = Field(default=None, description="What is the maximum grant amount under this funding program?")
    language: Optional[str] = Field(default=None, description="The language the text is written in")

# Wrap models to return structured output matching Extraction
structured_openai = model_openai.with_structured_output(schema=Extraction)
structured_anthropic = model_anthropic.with_structured_output(schema=Extraction)
structured_google = model_google.with_structured_output(schema=Extraction)

# Create prompt to extract structured data from input text
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        ("human", "{text}"),
    ]
)

# Define input text containing federal share and grant amount
text = """
4 SEC. 6. COST SHARE.
5 (a) FEDERAL SHARE.—The Federal share of a grant
6 awarded under the Program may not exceed 80 percent
7 of total cost of the project.
8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
9 grant awarded to an eligible entity under the Program
10 may not exceed $25,000,000.
"""

# Build structured prompt from input text
prompt = prompt_template.invoke({"text": text})

# Get structured outputs from all models
model_answer = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(prompt),
        "Anthropic": structured_anthropic.invoke(prompt),
        "Google": structured_google.invoke(prompt)
    },
], index=["extraction"])

# Append results to DataFrame and display
df_extraction = pd.concat([df_classification, model_answer])
display(HTML(df_extraction.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
classification,share=80 amount=25000000 language='en',share=80 amount=25000000 language='english',share=80 amount=25000000 language='English'
extraction,share=80 amount=25000000 language='en',share=80 amount=25000000 language=None,share=80 amount=25000000 language=None


## Multiple Entities

In [21]:
# Define output schema for structured model responses
class Program(BaseModel):
    """Information about a program."""
    share: Optional[int] = Field(default=None, description="What is the federal share in percent under this program?")
    amount: Optional[int] = Field(default=None, description="What is the maximum grant amount under this program?")
    language: Optional[str] = Field(default=None, description="The language the text is written in")

# Define container schema to extract a list of program entities
class Data(BaseModel):
    """Extracted data about programs."""

    # Create a model to extract multiple entities
    program: List[Program]

# Wrap models to return structured output matching Data
structured_openai = model_openai.with_structured_output(schema=Data)
structured_anthropic = model_anthropic.with_structured_output(schema=Data)
structured_google = model_google.with_structured_output(schema=Data)

# Define input text containing federal share and grant amount
text = """Wireless Electric Vehicle Charging Grant Program: 
4 SEC. 6. COST SHARE.
5 (a) FEDERAL SHARE.—The Federal share of a grant
6 awarded under the Program may not exceed 80 percent
7 of total cost of the project.
8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
9 grant awarded to an eligible entity under the Program
10 may not exceed $25,000,000.

Die Förderrichtlinie „Öffentlich zugängliche Ladeinfrastruktur für Elektrofahrzeuge in Deutschland“ 
(2021–2025) des Bundesministeriums für Digitales und Verkehr (BMDV) stellt bis Ende 2025 
insgesamt 500 Millionen Euro im Rahmen mehrerer Förderaufrufe zur Verfügung.  
Rechtspersonen und Privatpersonen können Fördermittel von bis zu 60 % der förderfähigen Kosten erhalten.
"""

# Build structured prompt from input text
prompt = prompt_template.invoke({"text": text})

# Get structured outputs from all models
model_answer = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(prompt),
        "Anthropic": structured_anthropic.invoke(prompt),
        "Google": structured_google.invoke(prompt)
    },
], index=["multiple entities"])

# Append results to DataFrame and display
df_multiple_entities = pd.concat([df_models, model_answer])
display(HTML(df_multiple_entities.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
multiple entities,"program=[Program(share=80, amount=25000000, language='en'), Program(share=60, amount=None, language='de')]","program=[Program(share=80, amount=25000000, language='en'), Program(share=60, amount=500000000, language='de')]","program=[Program(share=80, amount=25000000, language='English'), Program(share=60, amount=500000000, language='German')]"


## Reference Examples

In [22]:
# Define input-output examples for model grounding and tool call validation
examples = [
    (
        """Congress.gov is the official website for U.S. federal legislative information. 
        The site provides access to accurate, timely, and complete legislative information 
        for Members of Congress, legislative agencies, and the general public.""",
        Data(program=[]),
    ),
    (
        """Wireless Electric Vehicle Charging Grant Program: 
        4 SEC. 6. COST SHARE.
        5 (a) FEDERAL SHARE.—The Federal share of a grant
        6 awarded under the Program may not exceed 80 percent
        7 of total cost of the project.
        8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
        9 grant awarded to an eligible entity under the Program
        10 may not exceed $25,000,000.""",
        Data(program=[{"share": 80, "amount": 25000000, "language": "English"}]),
    ),
]

# Initialize empty message list for storing converted examples
messages = []

# Convert input-output examples into formatted tool call messages
for txt, tool_call in examples:
    if tool_call.program:
        ai_response = "Detected program."
    else:
        ai_response = "Detected no program."
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

# Print each generated message in a readable format
for message in messages:
    message.pretty_print()


Congress.gov is the official website for U.S. federal legislative information. 
        The site provides access to accurate, timely, and complete legislative information 
        for Members of Congress, legislative agencies, and the general public.
Tool Calls:
  Data (2363c4ab-f5fd-4cc0-a05d-7ed59bdfa704)
 Call ID: 2363c4ab-f5fd-4cc0-a05d-7ed59bdfa704
  Args:
    program: []

You have correctly called this tool.

Detected no program.

Wireless Electric Vehicle Charging Grant Program: 
        4 SEC. 6. COST SHARE.
        5 (a) FEDERAL SHARE.—The Federal share of a grant
        6 awarded under the Program may not exceed 80 percent
        7 of total cost of the project.
        8 (b) MAXIMUM GRANT AMOUNT.—The amount of a
        9 grant awarded to an eligible entity under the Program
        10 may not exceed $25,000,000.
Tool Calls:
  Data (9cbc9856-c85f-4361-8391-51adebefd0aa)
 Call ID: 9cbc9856-c85f-4361-8391-51adebefd0aa
  Args:
    program: [{'share': 80, 'amount': 25000000, '

In [23]:
# Define a user message with text containing no relevant program info
message_no_extraction = {
    "role": "user",
    "content":  """Article I of the U.S. Constitution grants all legislative powers to a bicameral Congress: 
    a House of Representatives and a Senate that are the result of a “Great Compromise” 
    seeking to balance the effects of popular majorities with the interests of the states""",
}

# Invoke all models to extract structured data from irrelevant input
model_answer = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke([message_no_extraction]),
        "Anthropic": structured_anthropic.invoke([message_no_extraction]),
        "Google": structured_google.invoke([message_no_extraction])
    },
], index=["without examples"])

# Append results to DataFrame and display to check for false positives
df_reference_examples = pd.concat([df_models, model_answer])
display(HTML(df_reference_examples.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
without examples,"program=[Program(share=None, amount=None, language='en')]",program=[],"program=[Program(share=50, amount=1000000, language='en')]"


In [24]:
# Invoke models with example messages plus a no-extraction message for context
model_answer = pd.DataFrame([
    {
        "OpenAI": structured_openai.invoke(messages + [message_no_extraction]),
        "Anthropic": structured_anthropic.invoke(messages + [message_no_extraction]),
        "Google": structured_google.invoke(messages + [message_no_extraction])
    },
], index=["with examples"])

# Save the cumulative model comparison results to CSV
df.to_csv("./data/output/model-comparison.csv")

# Append new results to DataFrame 
df_reference_examples = pd.concat([df_reference_examples, model_answer])
display(HTML(df_reference_examples.to_html(escape=False)))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
without examples,"program=[Program(share=None, amount=None, language='en')]",program=[],"program=[Program(share=50, amount=1000000, language='en')]"
with examples,program=[],program=[],"program=[Program(share=80, amount=25000000, language='English')]"


In [27]:
# Display with left-aligned text and centered headers
html = f"""
<style>
    table {{
        border: 1px solid #ddd;
        border-collapse: collapse;
    }}
    th {{
        text-align: center;
        border: 1px solid #ddd;
        padding: 8px;
    }}
    td {{
        text-align: left;
        border: 1px solid #ddd;
        padding: 8px;
    }}
</style>
{df_reference_examples.to_html(escape=False, index=True, border=0)}
"""

display(HTML(html))

Unnamed: 0,Anthropic,Google,OpenAI
logo,,,
model,claude-3-5-sonnet-latest,gemini-2.0-flash,gpt-4o-mini
without examples,"program=[Program(share=None, amount=None, language='en')]",program=[],"program=[Program(share=50, amount=1000000, language='en')]"
with examples,program=[],program=[],"program=[Program(share=80, amount=25000000, language='English')]"



# References 
- LangChain. Chat Model and Prompts: https://python.langchain.com/docs/tutorials/llm_chain/
- LangChain. Classification: https://python.langchain.com/docs/tutorials/classification/
- LangChain. Extraction: https://python.langchain.com/docs/tutorials/extraction/
