**Import libraries**

In [1]:
from typing import Optional, Dict, List, TypedDict
import uuid
import json

from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
)

import os
from dotenv import load_dotenv

# import warnings
# warnings.filterwarnings("ignore")

**Define schema**

In [2]:
class Property(BaseModel):
    """Information about a specific property, including its location, features, and amenities."""

    location: Optional[str] = Field(description="The location of the property")

    number_of_bedrooms: Optional[int] = Field(
        default=None, description="The total count of bedrooms in the property. This should be a non-negative integer."
    )
    number_of_bathrooms: Optional[int] = Field(
        default=None, description="The total count of bathrooms in the property, including both full and half bathrooms."
    )
    floor_number: Optional[int] = Field(
        default=None, description="The specific floor on which the property is located, if applicable. For single-story properties, this can be omitted."
    )
    
    elevator_access: Optional[bool] = Field(
        default=None, description="Indicates whether the property can be accessed via an elevator."
    )
    parking: Optional[bool] = Field(
        default=None, description="Indicates whether parking facilities are available at the property."
    )
    air_conditioning: Optional[bool] = Field(
        default=None, description="Whether the property has air conditioning"
    )


**Prompt Template**

In [3]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a specialized extraction algorithm trained to analyze real estate descriptions. "
            "Your task is to identify and extract specific property details from the provided text accurately. "
            "Extract attributes like location, number of bedrooms, bathrooms, floor number, elevator access, and parking availability. "
            "If an attribute is not mentioned in the text, you should return 'null' for that attribute. "
            "Focus on the precision and relevance of the information extracted."
        ),

        MessagesPlaceholder("examples"),
        
        ("human", "{text}"),
    ]
)

**Model**

(0 temperature means deterministic)

In [4]:
api_key = os.getenv("OPENAI_API_KEY")

In [5]:
llm = ChatOpenAI(
    model_name="gpt-4",
    temperature=0.0,
    openai_api_key=api_key
)

**Create extractor**

In [6]:
runnable = prompt | llm.with_structured_output(
    schema=Property,
    method="function_calling",
    include_raw=False
)

**Extract** (without examples)

In [7]:
# Load the transcript and run the model

transcript_path = "transcripts/transcript_1.txt"

with open(transcript_path, 'r') as file:
    transcript_string = file.read()

property_schema = runnable.invoke({"text": transcript_string, "examples": []})

property_schema

Property(location='near the beach', number_of_bedrooms=3, number_of_bathrooms=2, floor_number=None, elevator_access=None, parking=True, air_conditioning=None)

**Save as json**

In [8]:
property_schema_json = property_schema.json()

file_path = "property_schema.json"
with open(file_path, 'w') as file:
    json.dump(property_schema_json, file)


### Using examples

**Define example obejct and adapter**

In [9]:

class Example(TypedDict):
    """A representation of an example consisting of text input and expected tool calls.

    For extraction, the tool calls are represented as instances of pydantic model.
    """

    input: str  # This is the example text
    tool_calls: List[BaseModel]  # Instances of pydantic model that should be extracted


def tool_example_to_messages(example: Example) -> List[BaseMessage]:
    """Convert an example into a list of messages that can be fed into an LLM.

    This code is an adapter that converts our example to a list of messages
    that can be fed into a chat model.

    The list of messages per example corresponds to:

    1) HumanMessage: contains the content from which content should be extracted.
    2) AIMessage: contains the extracted information from the model
    3) ToolMessage: contains confirmation to the model that the model requested a tool correctly.

    The ToolMessage is required because some of the chat models are hyper-optimized for agents
    rather than for an extraction use case.
    """
    messages: List[BaseMessage] = [HumanMessage(content=example["input"])]
    openai_tool_calls = []
    for tool_call in example["tool_calls"]:
        openai_tool_calls.append(
            {
                "id": str(uuid.uuid4()),
                "type": "function",
                "function": {
                    "name": tool_call.__class__.__name__,
                    "arguments": tool_call.json(),
                },
            }
        )

    messages.append(
        AIMessage(content="", additional_kwargs={"tool_calls": openai_tool_calls})
    )

    tool_outputs = example.get("tool_outputs") or [
        "You have correctly called this tool."
    ] * len(openai_tool_calls)

    for output, tool_call in zip(tool_outputs, openai_tool_calls):
        messages.append(ToolMessage(content=output, tool_call_id=tool_call["id"]))
        
    return messages

**Examples**

In [10]:
examples = [
    (
        "Beautiful two-bedroom, two-bathroom apartment located in downtown Chicago on the 15th floor, with elevator access and a secure parking spot.",
        Property(
            location="downtown Chicago",
            number_of_bedrooms=2,
            number_of_bathrooms=2,
            floor_number=15,
            elevator_access=True,
            parking=True
        ),
    ),
    (
        "Cozy cabin in the woods with one bedroom and a bathroom but no garage. Located near Lake Tahoe.",
        Property(
            location="near Lake Tahoe",
            number_of_bedrooms=1,
            number_of_bathrooms=1,
            floor_number=None,
            elevator_access=None,
            parking=False
        ),
    ),
    (
        "Luxury villa with multiple balconies, located in Beverly Hills. Comes with private parking and exclusive amenities.",
        Property(
            location="Beverly Hills",
            number_of_bedrooms=None,  # Assume bedrooms not specified
            number_of_bathrooms=None,  # Assume bathrooms not specified
            floor_number=None,
            elevator_access=None,
            parking=True
        ),
    ),
    (
        "The weather is sunny and the beach is crowded today.",
        Property(
            location=None,
            number_of_bedrooms=None,
            number_of_bathrooms=None,
            floor_number=None,
            elevator_access=None,
            parking=None
        ),
    ),
    (
        "The latest model of this car features an innovative parking assistant.",
        Property(
            location=None,
            number_of_bedrooms=None,
            number_of_bathrooms=None,
            floor_number=None,
            elevator_access=None,
            parking=None
        ),
    ),
]

In [11]:
messages = []

for text_, tool_call in examples:
    messages.extend(
        tool_example_to_messages({"input": text_, "tool_calls": [tool_call]})
    )

messages

[HumanMessage(content='Beautiful two-bedroom, two-bathroom apartment located in downtown Chicago on the 15th floor, with elevator access and a secure parking spot.'),
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '88dec37d-172f-47bd-ad72-9ffe56a975bc', 'type': 'function', 'function': {'name': 'Property', 'arguments': '{"location": "downtown Chicago", "number_of_bedrooms": 2, "number_of_bathrooms": 2, "floor_number": 15, "elevator_access": true, "parking": true, "air_conditioning": null}'}}]}, tool_calls=[{'name': 'Property', 'args': {'location': 'downtown Chicago', 'number_of_bedrooms': 2, 'number_of_bathrooms': 2, 'floor_number': 15, 'elevator_access': True, 'parking': True, 'air_conditioning': None}, 'id': '88dec37d-172f-47bd-ad72-9ffe56a975bc'}]),
 ToolMessage(content='You have correctly called this tool.', tool_call_id='88dec37d-172f-47bd-ad72-9ffe56a975bc'),
 HumanMessage(content='Cozy cabin in the woods with one bedroom and a bathroom but no garage. Located near

In [12]:
property_schema = runnable.invoke({"text": transcript_string, "examples": messages})

In [13]:
property_schema

Property(location=None, number_of_bedrooms=3, number_of_bathrooms=2, floor_number=None, elevator_access=None, parking=True, air_conditioning=None)