In [1]:
from kor.extraction import create_extraction_chain
from kor.nodes import Object, Text, Number
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI

In [2]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=2000,
)

In [8]:
from_address = Object(
    id="from_address",
    description="Person moved away from this address",
    attributes=[
        Text(id="street"),
        Text(id="city"),
        Text(id="state"),
        Text(id="zipcode"),
        Text(id="country", description="A country in the world; e.g., France."),
    ],
    examples=[
        (
            "100 Main St, Boston, MA, 23232, USA",
            {
                "street": "100 Marlo St",
                "city": "Boston",
                "state": "MA",
                "zipcode": "23232",
                "country": "USA",
            },
        )
    ],
)
print(from_address)
to_address = from_address.replace(
    id="to_address", description="Address to which the person is moving"
)
print(to_address)
schema = Object(
    id="information",
    attributes=[
        Text(
            id="person_name",
            description="The full name of the person or partial name",
            examples=[("John Smith was here", "John Smith")],
        ),
        from_address,
        to_address,
    ],
    many=True,
)

id='from_address' description='Person moved away from this address' many=False attributes=[Text(id='street', description='', many=False, examples=()), Text(id='city', description='', many=False, examples=()), Text(id='state', description='', many=False, examples=()), Text(id='zipcode', description='', many=False, examples=()), Text(id='country', description='A country in the world; e.g., France.', many=False, examples=())] examples=[('100 Main St, Boston, MA, 23232, USA', {'street': '100 Marlo St', 'city': 'Boston', 'state': 'MA', 'zipcode': '23232', 'country': 'USA'})]
id='to_address' description='Address to which the person is moving' many=False attributes=[Text(id='street', description='', many=False, examples=()), Text(id='city', description='', many=False, examples=()), Text(id='state', description='', many=False, examples=()), Text(id='zipcode', description='', many=False, examples=()), Text(id='country', description='A country in the world; e.g., France.', many=False, examples=(

In [9]:
chain = create_extraction_chain(
    llm, schema, encoder_or_encoder_class="json", input_formatter=None
)

In [10]:
chain.predict_and_parse(
    text="Alice Doe moved from New York to Boston, MA while Bob Smith did the opposite."
)["data"]

{'information': [{'person_name': 'Alice Doe',
   'to_address': {'city': 'Boston', 'state': 'MA'}},
  {'person_name': 'Bob Smith', 'to_address': {'city': 'New York'}}]}

In [11]:
chain.predict_and_parse(
    text=(
        "Alice Doe and Bob Smith moved from New York to Boston. Andrew was 12 years"
        " old. He also moved to Boston. So did Joana and Paul. Betty did the opposite."
    )
)["data"]

{'information': [{'person_name': 'Alice Doe',
   'to_address': {'city': 'Boston', 'state': 'MA'}},
  {'person_name': 'Bob Smith',
   'to_address': {'city': 'Boston', 'state': 'MA'}},
  {'person_name': 'Andrew', 'to_address': {'city': 'Boston', 'state': 'MA'}},
  {'person_name': 'Joana', 'to_address': {'city': 'Boston', 'state': 'MA'}},
  {'person_name': 'Paul', 'to_address': {'city': 'Boston', 'state': 'MA'}}]}

In [12]:
from_address = Object(
    id="from_address",
    description="Person moved away from this address",
    attributes=[
        Text(id="street"),
        Text(id="city"),
        Text(id="state"),
        Text(id="zipcode"),
        Text(id="country", description="A country in the world; e.g., France."),
    ],
    examples=[
        (
            "100 Main St, Boston,MA, 23232, USA",
            {
                "street": "100 Marlo St",
                "city": "Boston",
                "state": "MA",
                "zipcode": "23232",
                "country": "USA",
            },
        )
    ],
    many=True,  # <-- PLEASE NOTE THIS CHANGE
)

to_address = from_address.replace(
    id="to_address", description="Address to which the person is moving"
)

schema = Object(
    id="information",
    attributes=[
        Text(
            id="person_name",
            description="The full name of the person or partial name",
            examples=[("John Smith was here", "John Smith")],
        ),
        from_address,
        to_address,
    ],
    many=True,
)

In [13]:
chain = create_extraction_chain(llm, schema, encoder_or_encoder_class="json")

In [16]:
print(chain.prompt.format_prompt(text="[user input]").to_string())

Your goal is to extract structured information from the user's input that matches the form described below. When extracting information please make sure it matches the type information exactly. Do not add any attributes that do not appear in the schema shown below.

```TypeScript

information: Array<{ // 
 person_name: string // The full name of the person or partial name
 to_address: Array<{ // Address to which the person is moving
  street: string // 
  city: string // 
  state: string // 
  zipcode: string // 
  country: string // A country in the world; e.g., France.
 }>
 to_address: Array<{ // Address to which the person is moving
  street: string // 
  city: string // 
  state: string // 
  zipcode: string // 
  country: string // A country in the world; e.g., France.
 }>
}>
```


Please output the extracted information in JSON format. Do not output anything except for the extracted information. Do not add any clarifying information. Do not add any fields that are not in the sche

In [14]:
chain.predict_and_parse(
    text=(
        "Alice Doe and Bob Smith moved from New York to Boston. Bob later moved to LA."
    )
)["data"]

{'information': [{'person_name': 'Alice Doe',
   'to_address': [{'street': None,
     'city': 'Boston',
     'state': None,
     'zipcode': None,
     'country': None}]},
  {'person_name': 'Bob Smith',
   'to_address': [{'street': None,
     'city': 'LA',
     'state': None,
     'zipcode': None,
     'country': None}]},
  {'person_name': None,
   'to_address': [{'street': None,
     'city': 'New York',
     'state': None,
     'zipcode': None,
     'country': None}]},
  {'person_name': None,
   'to_address': [{'street': None,
     'city': 'Boston',
     'state': None,
     'zipcode': None,
     'country': None}]}]}