In [2]:
import textwrap
from lyon_common import create_chain, report_on_message
from question_generator_model_Copy1 import SingleSelection, Code, AnyQuestion
from langchain.output_parsers import PydanticOutputParser

**NOTE** I have updated `lyon_common.py` so that the `create_chain` function accepts a few more arguments:

- `model_name` now defaults to the GPT 3.5 turbo model from 2023-11-06 
- `model_kwargs`: this is a dictionary with OpenAI specific parameters we can set. The default value is now to instruct OpenAI to use the [new `json_object` response format ](https://platform.openai.com/docs/guides/text-generation/json-mode), which modifies the available tokens so that at each step only valid JSON can be produced.
- `verbose` is a new keyword argument that is set to `False` by default.

This notebook shows how we can use these updates to generate valid Pydantic ready JSON

In [5]:
def create_system_prompt(pydantic_object):
    common_system_prompt = textwrap.dedent("""
    You are a smart, helpful teaching assistant chatbot named Callisto.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to graduate students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor you will be given a topic (string) and an
    expected difficulty level (integer)
    
    The difficulty will be a number between 1 and 3, with 1 corresponding to a request 
    for an easy question, and 3 for the most difficult question.
    
    If the professor asks you for another question and does not specify either a new topic 
    or a new difficulty, you must use the previous topic or difficulty.

    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified JSON format with no extra words or content.

    You must always produce exactly one JSON object.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())
    

In [5]:
def generate_and_parse_question(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    response = rag_chain(query)
    report_on_message(response)  # print a summary of what was produced
    parser = PydanticOutputParser(pydantic_object=pydantic_model)
    return parser.parse(response["output"])

In [4]:
generate_and_parse_question(Code, "topic: pandas time series rolling moments\ndifficulty: 2")

NameError: name 'generate_and_parse_question' is not defined