### Resolving Data Quality Issues

In [4]:
import os
from dotenv import load_dotenv
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

In [2]:
load_dotenv()

openai_api_key = os.environ["OPENAI_API_KEY"]
model_name = "gpt-3.5-turbo-0125"
max_tokens = 1000

In [3]:
llm = ChatOpenAI(
    model=model_name,
    temperature=0.5,
    openai_api_key=openai_api_key,
    max_tokens=max_tokens
)

In [5]:
response_schema = [
    ResponseSchema(name = "bad_string", description = "This is poorly formatted user response"),
    ResponseSchema(name = "good_string", description = "This is your response, after properly formatted.")
]

In [6]:
output_parser = StructuredOutputParser.from_response_schemas(response_schema)
format_instructions = output_parser.get_format_instructions()

In [7]:
template = """
You shall be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly.

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

In [8]:
prompt = PromptTemplate(
    input_variables = ["user_input"],
    partial_variables = {
        "format_instructions": format_instructions
    },
    template = template
)


In [9]:
prompt_value = prompt.format(user_input = "Welcom to Califonya")

llm_output = llm.invoke(prompt_value)

In [10]:
result = output_parser.parse(llm_output.content)
result["good_string"]

'Welcome to California'

In [11]:
result

{'bad_string': 'Welcom to Califonya', 'good_string': 'Welcome to California'}