In [1]:
import textwrap
from lyon_common import create_chain
from question_generator_model import SingleSelection, Code, ManySingleSelections
from langchain.output_parsers import PydanticOutputParser

In [2]:
def create_system_prompt(pydantic_object):
    common_system_prompt = textwrap.dedent("""
    You are a smart, helpful teaching assistant chatbot named Callisto.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to graduate students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor you will be given a topic (string) and an
    expected difficulty level (integer)
    
    The difficulty will be a number between 1 and 3, with 1 corresponding to a request 
    for an easy question, and 3 for the most difficult question.
    
    If the professor asks you for another question and does not specify either a new topic 
    or a new difficulty, you must use the previous topic or difficulty.

    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified format with no extra words or content.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())
    

In [7]:
chat = create_chain(create_system_prompt(ManySingleSelections), temperature=0.1)

In [8]:
def generate_and_parse_question(rag_chain, query, pydantic_model):
    reponse = rag_chain(query)
    return pydantic_model.parse(response)
    

In [9]:
res = chat({"input": "topic: DataFrame.set_index\ndifficulty: 2\nplease produce 4 questions"})

In [10]:
print(res["output"])

{
  "questions": [
    {
      "question_text": "What is the purpose of the index in pandas?",
      "difficulty": 2,
      "topics": [
        "DataFrame.set_index",
        "pandas",
        "indexing"
      ],
      "choices": [
        "The index in pandas is only used for labeling the rows",
        "The index in pandas is used for data alignment and ensuring proper alignment of data",
        "The index in pandas is used for aggregating data"
      ],
      "solution": 1
    },
    {
      "question_text": "What happens when the reset_index method is called without any arguments?",
      "difficulty": 2,
      "topics": [
        "DataFrame.set_index",
        "pandas",
        "indexing"
      ],
      "choices": [
        "The index is reset to go from zero to the total number of rows in the data frame",
        "The columns that were previously used for the index are returned back to be columns alongside the existing ones",
        "The data alignment is broken and the index i

Ok not quite the format we wanted... Let's see what happened in intermediate steps

In [21]:
for x in res["chat_history"]:
    print("\n\n\n")
    print(repr(x))





AIMessage(content='', additional_kwargs={'function_call': {'name': 'python', 'arguments': '{\n  "question_text": "How would you set the \'id\' column as the index of a DataFrame called \'df\'?",\n  "difficulty": 2,\n  "topics": ["pandas", "DataFrame.set_index"],\n  "starting_code": "import pandas as pd\\n\\ndf = pd.DataFrame({\'id\': [1, 2, 3], \'name\': [\'Alice\', \'Bob\', \'Charlie\']})\\n\\n# Set the \'id\' column as the index\\n",\n  "solution": "df.set_index(\'id\', inplace=True)",\n  "setup_code": "import pandas as pd",\n  "test_code": "assert df.index.name == \'id\'\\nassert df.index.tolist() == [1, 2, 3]"\n}'}})




FunctionMessage(content='python is not a valid tool, try one of [search_course_content].', name='python')




AIMessage(content='', additional_kwargs={'function_call': {'name': 'search_course_content', 'arguments': '{\n  "__arg1": "DataFrame.set_index"\n}'}})




FunctionMessage(content='[Document(page_content="And we\'ll have a few words of advice for how you 

Looks like the AI did return an appropriate question first

But then it tried to call an agent tool called `python` on it

Then langchain told the AI the only tool it could use is `search_course_content`.

It did use `search_course_content` to do a retreival

Then it generated a question in the wrong format.

We can access the original correctly formatted question as follows

In [22]:
res["intermediate_steps"][0][0].tool_input

{'question_text': "How would you set the 'id' column as the index of a DataFrame called 'df'?",
 'difficulty': 2,
 'topics': ['pandas', 'DataFrame.set_index'],
 'starting_code': "import pandas as pd\n\ndf = pd.DataFrame({'id': [1, 2, 3], 'name': ['Alice', 'Bob', 'Charlie']})\n\n# Set the 'id' column as the index\n",
 'solution': "df.set_index('id', inplace=True)",
 'setup_code': 'import pandas as pd',
 'test_code': "assert df.index.name == 'id'\nassert df.index.tolist() == [1, 2, 3]"}

This is a dict (already parsed JSON)

We can load it up as a `Code` (question type) Pydantic object as follows

In [26]:
Code(**res["intermediate_steps"][0][0].tool_input)

How would you set the 'id' column as the index of a DataFrame called 'df'?

```python
import pandas as pd

df = pd.DataFrame({'id': [1, 2, 3], 'name': ['Alice', 'Bob', 'Charlie']})

# Set the 'id' column as the index

```

**Solution**

```python
df.set_index('id', inplace=True)
```

**Test Suite**

```python
import pandas as pd

df.set_index('id', inplace=True)

assert df.index.name == 'id'
assert df.index.tolist() == [1, 2, 3]
```