# 3. Sentiment Analysis

Structured Outputs i.e. Mapping the output from an LLM to a python class

## Setup

In [2]:
import os

try:
    # load environment variables from .env file (requires `python-dotenv`)
    from dotenv import load_dotenv

    load_dotenv()
except ImportError:
    pass

assert os.environ["LANGSMITH_TRACING"] is not None
assert os.environ["LANGSMITH_API_KEY"] is not None
assert os.environ["LANGSMITH_PROJECT"] is not None
assert os.environ["OPENAI_API_KEY"] is not None

In [3]:
from langchain.chat_models import init_chat_model
model = init_chat_model("gpt-4o-mini", model_provider="openai")

## 3.1 Tagging

Tagging means labeling a document with classes such as:

- Sentiment
- Language
- Style (formal, informal etc.)
- Covered topics
- Political tendency

Tagging has a few components:

- `function`: Like extraction, tagging uses functions to specify how the model should tag a document
- `schema`: defines how we want to tag the document


In [5]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)


class Classification(BaseModel):
    sentiment: str = Field(description="The sentiment of the text")
    aggressiveness: int = Field(
        description="How aggressive the text is on a scale from 1 to 10"
    )
    language: str = Field(description="The language the text is written in")


# Structured LLM
structured_llm = model.with_structured_output(Classification)

In [6]:
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
prompt = tagging_prompt.invoke({"input": inp})
response = structured_llm.invoke(prompt)

response

Classification(sentiment='positive', aggressiveness=1, language='Spanish')

## 3.2 Finer Control over the output

Careful schema definition gives us more control over the model's output.

Specifically, we can define:
- Possible values for each property
- Description to make sure that the model understands the property
- Required properties to be returned


In [24]:
class Classification(BaseModel):
    sentiment: str = Field(..., enum=["happy", "neutral", "sad"])
    aggressiveness: int = Field(
        ...,
        description="describes how aggressive the statement is, the higher the number the more aggressive",
        enum=[1, 2, 3, 4, 5],
    )
    language: str = Field(
        ..., enum=["spanish", "english", "french", "german", "italian"]
    )

tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

llm = ChatOpenAI(temperature=0, model="gpt-4o-mini").with_structured_output(
    Classification
)

inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

response.model_dump()

{'sentiment': 'happy', 'aggressiveness': 1, 'language': 'spanish'}

In [38]:
results = []
for inp in [
        "muy amigo esta muerto",
        "donde esta la bibliotecha",
        "merde!",
        "ich habe kein mehr Gelt. Oh well, i can always rob a bank","Will you marry me?",
        "I'd marry you over my dead body you ugly 4-eyed piece of human excrement",
        "All the other kids with the pumped up kicks better run, better run, outrun my gun",
        "Jojo mogo roko wowo mukaka bamo",
    "23423 324342 2342290 00"
]:
    prompt = tagging_prompt.invoke({"input": inp})
    response = llm.invoke(prompt)
    results.append(response.model_dump())

results

[{'sentiment': 'sad', 'aggressiveness': 1, 'language': 'spanish'},
 {'sentiment': 'neutral', 'aggressiveness': 1, 'language': 'spanish'},
 {'sentiment': 'sad', 'aggressiveness': 5, 'language': 'french'},
 {'sentiment': 'sad', 'aggressiveness': 3, 'language': 'german'},
 {'sentiment': 'happy', 'aggressiveness': 1, 'language': 'english'},
 {'sentiment': 'sad', 'aggressiveness': 5, 'language': 'english'},
 {'sentiment': 'neutral', 'aggressiveness': 3, 'language': 'english'},
 {'sentiment': 'neutral', 'aggressiveness': 1, 'language': 'english'},
 {'sentiment': 'neutral', 'aggressiveness': 1, 'language': 'english'}]

## 3.3 Playground

In [19]:
from pydantic import BaseModel, Field

class BudgetEntry(BaseModel):
    amount: float = Field(description = "The income or expense amount")
    currency: str = Field(description = "The currency of the amount, defaults to AED")
    creditOrDebit: str = Field(description = "Credit or Debit. Debit if the amount was debited/spent. credit if the amount was received. Defaults to credit", enum=["C","D"])
    memo: str = Field(description="Short description of the credit/debit event e.g. Shopping")
    category: str = Field(description="The category of the credit/debit event e.g. Bills", enum=["Salary","Bills","Rent","Shopping","Car","Home"])

structured_llm = model.with_structured_output(BudgetEntry)
system_template = """
Extract the properties of the 'BudgetEntry' function from the following input. 
"""
user_input = "20 aed on a haircut"

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{user_input}")])
prompt = prompt_template.invoke({"user_input": user_input})
response = structured_llm.invoke(prompt)

In [20]:
response.model_dump()

{'amount': 20.0,
 'currency': 'AED',
 'creditOrDebit': 'D',
 'memo': 'Haircut',
 'category': 'Shopping'}