In [1]:
from kor.extraction import create_extraction_chain
from kor.nodes import Object, Text, Number
from langchain.chat_models import ChatOpenAI

# Schema
Kor requires that you specify the schema of what you want parsed with some optional examples.

We’ll start off by specifying a very simple schema.

In [2]:
schema = Object(
    id="person",
    description="Personal information",
    examples=[
        ("Alice and Bob are friends", [{"first_name": "Alice"}, {"first_name": "Bob"}])
    ],
    attributes=[
        Text(
            id="first_name",
            description="The first name of a person.",
        )
    ],
    many=True,
)

# Langchain
Instantiate a langchain LLM and create a chain.

In [3]:
from langchain.llms import OpenAI

In [7]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0,
    max_tokens=2000,
)

In [8]:
chain = create_extraction_chain(llm, schema)

# Extract
With a chain and a schema defined, we’re ready to extract data.

In [9]:
chain.predict_and_parse(text=("My name is Bobby. My brother's name Joe."))["data"]

{'person': [{'first_name': 'Bobby'}, {'first_name': 'Joe'}]}

# The Prompt
And here’s the actual prompt that was sent to the LLM.

In [12]:
print(chain.prompt.format_prompt(text="[user input]").to_string())

Your goal is to extract structured information from the user's input that matches the form described below. When extracting information please make sure it matches the type information exactly. Do not add any attributes that do not appear in the schema shown below.

```TypeScript

person: Array<{ // Personal information
 first_name: string // The first name of a person.
}>
```


Please output the extracted information in CSV format in Excel dialect. Please use a | as the delimiter. 
 Do NOT add any clarifying information. Output MUST follow the schema above. Do NOT add any additional columns that do not appear in the schema.



Input: Alice and Bob are friends
Output: first_name
Alice
Bob

Input: [user input]
Output:


# With pydantic

In [13]:
from kor import from_pydantic
from typing import List, Optional
from pydantic import BaseModel, Field

In [14]:
class Person(BaseModel):
    first_name: str = Field(description="The first name of a person")

In [15]:
schema, validator = from_pydantic(
    Person,
    description="Personal Information",  # <-- Description
    examples=[  # <-- Object level examples
        ("Alice and Bob are friends", [{"first_name": "Alice"}, {"first_name": "Bob"}])
    ],
    many=True,  # <-- Note Many = True
)

chain = create_extraction_chain(llm, schema, validator=validator)

In [16]:
chain.predict_and_parse(text=("My name is Bobby. My brother's name Joe."))

{'data': {'person': [{'first_name': 'Bobby'}, {'first_name': 'Joe'}]},
 'raw': 'first_name\nBobby\nJoe',
 'errors': [],
 'validated_data': [Person(first_name='Bobby'), Person(first_name='Joe')]}