In [197]:
%pip install --user --quiet psycopg2-binary pgvector

Note: you may need to restart the kernel to use updated packages.


In [4]:
from dotenv import load_dotenv
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.messages import SystemMessage
from langchain.vectorstores.pgvector import PGVector
import textwrap
from langchain.output_parsers import PydanticOutputParser
from question_generator_model import SingleSelection, Code, AnyQuestion, FillInBlank, MultipleSelection

load_dotenv("/home/jupyteach-msda/jupyteach-ai/.env")

COLLECTION_NAME = "documents"
DB_CONNECTION = "postgresql://postgres:supa-jupyteach@192.168.0.77:54328/postgres"


def get_vectorstore():
    embeddings = OpenAIEmbeddings()

    db = PGVector(embedding_function=embeddings,
        collection_name=COLLECTION_NAME,
        connection_string=DB_CONNECTION,
    )
    return db

In [7]:
#Function that takes the input and returns the output from the retreival agent
def create_chain(
        system_message_text, 
        temperature=0, 
        model_name="gpt-3.5-turbo-1106", 
        model_kwargs={"response_format": {"type": "json_object"}},
        verbose=False,
    ):
    # step 1: create llm
    retriever = get_vectorstore().as_retriever()
    llm = ChatOpenAI(temperature=temperature, model_name=model_name, model_kwargs=model_kwargs, verbose=verbose)
    
    # step 2: create retriever tool
    tool = create_retriever_tool(
        retriever,
        "search_course_content",
        "Searches and returns documents regarding the contents of the course and notes from the instructor.",
    )
    tools = [tool]

    # step 3: create system message from the text passed in as an argument
    system_message = SystemMessage(content=system_message_text)

    # return the chain
    return create_conversational_retrieval_agent(
        llm=llm, 
        tools=tools, 
        verbose=False, 
        system_message=system_message
    )

In [219]:
#Function to check if the retrieval is happening
def report_on_message(msg):
    print("any intermediate_steps?: ", len(msg["intermediate_steps"]) > 0)
    print("output:\n", msg["output"])
    print("\n\n")

In [201]:
#Fucntion that returns the system prompt with the format of the question requested 
def create_system_prompt(pydantic_object):
    common_system_prompt = textwrap.dedent("""
    You are a smart, helpful teaching assistant chatbot named AcademiaGPT.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to college students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor you  will be given a topic (string) and an
    expected difficulty level (integer)

    
    
    The difficulty will be a number between 1 and 3, with 1 corresponding to a request 
    for an easy question, and 3 for the most difficult question.
    
    If the professor asks you for a question and does not specify either a new topic 
    or a new difficulty or both, you must use the previous topic or difficulty or both.

    Occasionaly the professor may ask you to do something like produce a similar question,
    or try again and make it more difficult or easy. You need to assist the professor with the same.
    
    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified JSON format with no extra words or content.

    You must always produce exactly one JSON object.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())

In [202]:
#Fucntion that takes the input, call the retreiver agent, and returns the parsed output
def generate_and_parse_question(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    response = rag_chain(query)
    report_on_message(response)  # print a summary of what was produced
    parser = PydanticOutputParser(pydantic_object=pydantic_model)
    return parser.parse(response["output"])

In [203]:
from pydantic import ValidationError
import json
from json.decoder import JSONDecodeError

# Function that takes the input, calls the retriever agent, and returns the parsed output
def generate_and_parse_question(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    
    try:
        response = rag_chain(query)
        report_on_message(response)  # print a summary of what was produced
        parser = PydanticOutputParser(pydantic_object=pydantic_model)
        return parser.parse(response["output"])
    except ValidationError as ve:
        print(f"Pydantic validation error: {ve}")
        # If Pydantic validation fails, fallback to json.loads
        return json.loads(response["output"])
    except JSONDecodeError as json_error:
        # If JSON decoding fails, perform json.loads and inform the caller about the error
        result_output = json.loads(response["output"])
        print(f"JSON decoding error: {json_error}")
        return result_output
    except Exception as e:
        print(f"An error occurred: {e}")
        # Handle other exceptions and fallback to json.loads
        return json.loads(response["output"])

'''# Example usage
try:
    result = generate_and_parse_question(YourPydanticModel, "Your Query")
    # Continue processing the result as needed
except Exception as e:
    print(f"Error processing question: {e}")
    # Handle the error, log, or notify the caller
'''

'# Example usage\ntry:\n    result = generate_and_parse_question(YourPydanticModel, "Your Query")\n    # Continue processing the result as needed\nexcept Exception as e:\n    print(f"Error processing question: {e}")\n    # Handle the error, log, or notify the caller\n'

In [204]:
try:
    generate_and_parse_question(FillInBlank, "Make the question more difficult")
except Exception as e:
    print(f"An error occurred: {e}")

any intermediate_steps?:  False
output:
 {
  "error": "No previous question provided to increase difficulty."
}



Pydantic validation error: 7 validation errors for FillInBlank
question_text
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
difficulty
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
topics
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
starting_code
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
solution
  F

In [205]:
generate_and_parse_question(FillInBlank, "topic: pandas groupby\ndifficulty: 2")

any intermediate_steps?:  True
output:
 {
  "question_text": "Given the following DataFrame `df`, use the `groupby` method to calculate the mean of column 'B' for each unique value in column 'A'. Fill in the blanks to complete the code.\n\n```python\ndf = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 1, 2], 'B': [4, 5, 6, 7, 8, 9, 10, 11]})\n```\n",
  "difficulty": 2,
  "topics": ["pandas", "groupby", "data analysis"],
  "starting_code": "grouped = df.groupby('___X')\nmean_values = grouped['___X'].___X()",
  "solution": ["A", "B", "mean"],
  "setup_code": "import pandas as pd\ndf = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 1, 2], 'B': [4, 5, 6, 7, 8, 9, 10, 11]})",
  "test_code": "assert mean_values.tolist() == [7.0, 8.0]"
}





Given the following DataFrame `df`, use the `groupby` method to calculate the mean of column 'B' for each unique value in column 'A'. Fill in the blanks to complete the code.

```python
df = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 1, 2], 'B': [4, 5, 6, 7, 8, 9, 10, 11]})
```


```python
grouped = df.groupby('___X')
mean_values = grouped['___X'].___X()
```

**Solution**

[A, B, mean]
```

**Rendered Solution**

```python
grouped = df.groupby('A')
mean_values = grouped['B'].mean()
```

**Test Suite**

```python
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1, 2, 1, 2, 1, 2], 'B': [4, 5, 6, 7, 8, 9, 10, 11]})

grouped = df.groupby('A')
mean_values = grouped['B'].mean()

assert mean_values.tolist() == [7.0, 8.0]
```

In [206]:
generate_and_parse_question(FillInBlank, "Give me one more question on the same")

any intermediate_steps?:  False
output:
 {
  "question_text": "Suppose you have already executed the following code:\n\n```python\nimport numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n```\n\nFill in the blanks below to solve the matrix equation $Ax = b$ for $x$",
  "difficulty": 2,
  "topics": ["linear algebra", "regression", "numpy"],
  "starting_code": "from scipy.linalg import ___X\n\nx = ___X(A, ___X)",
  "solution": ["solve", "solve", "b"],
  "setup_code": "import numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n",
  "test_code": "assert np.allclose(x, [22, -6])"
}





Suppose you have already executed the following code:

```python
import numpy as np

A = np.array([[1, 2], [3, 4]])
b = np.array([10, 42])
```

Fill in the blanks below to solve the matrix equation $Ax = b$ for $x$

```python
from scipy.linalg import ___X

x = ___X(A, ___X)
```

**Solution**

[solve, solve, b]
```

**Rendered Solution**

```python
from scipy.linalg import solve

x = solve(A, b)
```

**Test Suite**

```python
import numpy as np

A = np.array([[1, 2], [3, 4]])
b = np.array([10, 42])


from scipy.linalg import solve

x = solve(A, b)

assert np.allclose(x, [22, -6])
```

In [207]:
generate_and_parse_question(FillInBlank, "But I asked for the question on pandas groupby")

any intermediate_steps?:  True
output:
 {
    "question_text": "Given the following DataFrame `df`, use the `groupby` method to calculate the mean value of the `score` column for each unique `group`.\n\n```python\ndf = pd.DataFrame({\n    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],\n    'score': [23, 45, 56, 67, 89, 90, 34, 56]\n})\n```\n\nFill in the blanks to complete the task:\n\n```python\ndf.groupby('___X').___X.mean()\n```",
    "difficulty": 1,
    "topics": ["pandas", "data analysis", "groupby"],
    "starting_code": "df.groupby('___X').___X.mean()",
    "solution": ["group", "score"],
    "setup_code": "import pandas as pd\ndf = pd.DataFrame({\n    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],\n    'score': [23, 45, 56, 67, 89, 90, 34, 56]\n})",
    "test_code": "assert df.groupby('group')['score'].mean().equals(pd.Series([50.5, 64.5], index=['A', 'B']))"
}





Given the following DataFrame `df`, use the `groupby` method to calculate the mean value of the `score` column for each unique `group`.

```python
df = pd.DataFrame({
    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'score': [23, 45, 56, 67, 89, 90, 34, 56]
})
```

Fill in the blanks to complete the task:

```python
df.groupby('___X').___X.mean()
```

```python
df.groupby('___X').___X.mean()
```

**Solution**

[group, score]
```

**Rendered Solution**

```python
df.groupby('group').score.mean()
```

**Test Suite**

```python
import pandas as pd
df = pd.DataFrame({
    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'score': [23, 45, 56, 67, 89, 90, 34, 56]
})

df.groupby('group').score.mean()

assert df.groupby('group')['score'].mean().equals(pd.Series([50.5, 64.5], index=['A', 'B']))
```

In [208]:
try:
    generate_and_parse_question(FillInBlank, "give me one more questions ")
except Exception as e:
    print(f"An error occurred: {e}")

any intermediate_steps?:  False
output:
 {
  "error": "No previous topic or difficulty level provided. Please specify a topic and a difficulty level for the question."
}



Pydantic validation error: 7 validation errors for FillInBlank
question_text
  Field required [type=missing, input_value={'error': 'No previous to...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
difficulty
  Field required [type=missing, input_value={'error': 'No previous to...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
topics
  Field required [type=missing, input_value={'error': 'No previous to...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
starting_code
  Field required [type=missing, input_value={'error': 'No previous to...evel for the question.'}, input_type=dict]
    For further information vi

In [209]:
try:
    generate_and_parse_question(MultipleSelection, "Give me 2 more questions on the previous topic with diffculty 1 ")
except Exception as e:
    print(f"An error occurred: {e}")


any intermediate_steps?:  True
output:
 {
  "question_text": "What are the three main components of computational social science as discussed in the lecture?",
  "difficulty": 1,
  "topics": ["computational social science"],
  "choices": [
    "Data, Model, and Prior Beliefs",
    "Hypothesis, Experiment, and Conclusion",
    "Observation, Theory, and Law"
  ],
  "solution": [0]
}
{
  "question_text": "In the context of computational social science, what role do prior beliefs play when considering model parameters?",
  "difficulty": 1,
  "topics": ["computational social science"],
  "choices": [
    "They are used to validate the final results of the model.",
    "They guide the selection of parameters that match both the data and our understanding of the world.",
    "They are irrelevant to the model and should be ignored."
  ],
  "solution": [1]
}



An error occurred: Failed to parse MultipleSelection from completion {
  "question_text": "What are the three main components of comput

In [210]:
generate_and_parse_question(MultipleSelection, "Give me an easier question ")

any intermediate_steps?:  False
output:
 {
  "description": "Please provide the topic and the expected difficulty level for the question."
}



Pydantic validation error: 5 validation errors for MultipleSelection
question_text
  Field required [type=missing, input_value={'description': 'Please p...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
difficulty
  Field required [type=missing, input_value={'description': 'Please p...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
topics
  Field required [type=missing, input_value={'description': 'Please p...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
choices
  Field required [type=missing, input_value={'description': 'Please p...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.d

{'description': 'Please provide the topic and the expected difficulty level for the question.'}

## Prompt 2

In [220]:
#Fucntion that returns the system prompt with the format of the question requested 
def create_system_prompt1(pydantic_object):
    common_system_prompt = textwrap.dedent("""level
    You are a smart, helpful teaching assistant chatbot named AcademiaGPT.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to college students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor, you  will be given a topic and an
    expected difficulty level (integer) or (string). 

    If the difficulty is not given assume the difficulty level to be the previously used difficulty level.
    
    If you are asked to give similar, easier, or another question, the user wants you to use the same topic and difficulty 
    level that you used to generate the previous question.

    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified JSON format with no extra words or content.

    You must always produce exactly one JSON object.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())

In [221]:
from pydantic import ValidationError
import json
from json.decoder import JSONDecodeError

# Function that takes the input, calls the retriever agent, and returns the parsed output
def generate_and_parse_question1(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt1(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    
    try:
        response = rag_chain(query)
        report_on_message(response)  # print a summary of what was produced
        parser = PydanticOutputParser(pydantic_object=pydantic_model)
        return parser.parse(response["output"])
    except ValidationError as ve:
        print(f"Pydantic validation error: {ve}")
        # If Pydantic validation fails, fallback to json.loads
        return json.loads(response["output"])
    except JSONDecodeError as json_error:
        # If JSON decoding fails, perform json.loads and inform the caller about the error
        result_output = json.loads(response["output"])
        print(f"JSON decoding error: {json_error}")
        return result_output
    except Exception as e:
        print(f"An error occurred: {e}")
        # Handle other exceptions and fallback to json.loads
        return json.loads(response["output"])

'''# Example usage
try:
    result = generate_and_parse_question(YourPydanticModel, "Your Query")
    # Continue processing the result as needed
except Exception as e:
    print(f"Error processing question: {e}")
    # Handle the error, log, or notify the caller
'''

'# Example usage\ntry:\n    result = generate_and_parse_question(YourPydanticModel, "Your Query")\n    # Continue processing the result as needed\nexcept Exception as e:\n    print(f"Error processing question: {e}")\n    # Handle the error, log, or notify the caller\n'

In [213]:
generate_and_parse_question1(FillInBlank, "But I asked for the question on pandas groupby")

any intermediate_steps?:  True
output:
 {
  "question_text": "Given the following DataFrame `df`, fill in the blanks to group the data by the 'A' column and calculate the mean of the 'B' column for each group.",
  "difficulty": 2,
  "topics": ["pandas", "data analysis", "groupby"],
  "starting_code": "import pandas as pd\ndf = pd.DataFrame({'A': [1, 2, 1, 2], 'B': [5, 3, 4, 8]})\ngrouped = df.groupby('___X')\nmean_b = grouped['___X'].___X()",
  "solution": ["A", "B", "mean"],
  "setup_code": "import pandas as pd\ndf = pd.DataFrame({'A': [1, 2, 1, 2], 'B': [5, 3, 4, 8]})",
  "test_code": "assert mean_b.tolist() == [4.5, 5.5]"
}





Given the following DataFrame `df`, fill in the blanks to group the data by the 'A' column and calculate the mean of the 'B' column for each group.

```python
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1, 2], 'B': [5, 3, 4, 8]})
grouped = df.groupby('___X')
mean_b = grouped['___X'].___X()
```

**Solution**

[A, B, mean]
```

**Rendered Solution**

```python
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1, 2], 'B': [5, 3, 4, 8]})
grouped = df.groupby('A')
mean_b = grouped['B'].mean()
```

**Test Suite**

```python
import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1, 2], 'B': [5, 3, 4, 8]})

import pandas as pd
df = pd.DataFrame({'A': [1, 2, 1, 2], 'B': [5, 3, 4, 8]})
grouped = df.groupby('A')
mean_b = grouped['B'].mean()

assert mean_b.tolist() == [4.5, 5.5]
```

generate_and_parse_question1(ManySingleSelections, "Generate a question on plotting ")

In [170]:
generate_and_parse_question1(SingleSelection, "Generate a question on plotting with a difficulty of one ")

any intermediate_steps?:  False
output:
 {
  "question_text": "Which Python library is commonly used for creating simple plots and visualizations?",
  "difficulty": 1,
  "topics": ["plotting", "python", "libraries"],
  "choices": [
    "NumPy",
    "Pandas",
    "Matplotlib",
    "TensorFlow"
  ],
  "solution": 2
}





Which Python library is commonly used for creating simple plots and visualizations?

- [ ] NumPy
- [ ] Pandas
- [x] Matplotlib
- [ ] TensorFlow


In [224]:
generate_and_parse_question1(MultipleSelection,"Give a question on web scrapping and difficulty 2")

any intermediate_steps?:  True
output:
 {
  "question_text": "When performing web scraping, which of the following steps are typically involved in the process?",
  "difficulty": 2,
  "topics": ["web scraping"],
  "choices": [
    "Identifying the data you want to scrape by visually inspecting the website",
    "Writing a Python function to randomly guess the structure of the data",
    "Using browser developer tools to inspect the elements containing the data",
    "Sending an email to the website's support team to request the data"
  ],
  "solution": [0, 2]
}





When performing web scraping, which of the following steps are typically involved in the process?

- [x] Identifying the data you want to scrape by visually inspecting the website
- [ ] Writing a Python function to randomly guess the structure of the data
- [x] Using browser developer tools to inspect the elements containing the data
- [ ] Sending an email to the website's support team to request the data


In [172]:
generate_and_parse_question1(FillInBlank,"Give another question on the same topic")

any intermediate_steps?:  False
output:
 {
  "question_text": "Suppose you have already executed the following code:\n\n```python\nimport numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n```\n\nFill in the blanks below to compute the determinant of matrix $A$ using NumPy\n",
  "difficulty": 2,
  "topics": ["linear algebra", "regression", "numpy"],
  "starting_code": "det_A = np.linalg.___X(A)",
  "solution": ["det"],
  "setup_code": "import numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n",
  "test_code": "assert np.isclose(det_A, -2)"
}





Suppose you have already executed the following code:

```python
import numpy as np

A = np.array([[1, 2], [3, 4]])
b = np.array([10, 42])
```

Fill in the blanks below to compute the determinant of matrix $A$ using NumPy


```python
det_A = np.linalg.___X(A)
```

**Solution**

[det]
```

**Rendered Solution**

```python
det_A = np.linalg.det(A)
```

**Test Suite**

```python
import numpy as np

A = np.array([[1, 2], [3, 4]])
b = np.array([10, 42])


det_A = np.linalg.det(A)

assert np.isclose(det_A, -2)
```

In [173]:
generate_and_parse_question1(FillInBlank,"but the topic was on webscraping")

any intermediate_steps?:  True
output:
 {
    "description": "    Question type where the student is given a main question and then\n    a code block with \"blanks\" (represented by `___X` in the source).\n    The student must provide one string per blank. Correctness is evaluated\n    based on a Python test suite based on the following template:\n\n    \n    ```python\n    {setup_code}\n\n    {code_block_with_blanks_filled_in}\n\n    {test_code}\n    ```\n\n    There must be at least one `___X` (one blank) in `starting_code`\n\n\n    Examples\n    --------\n    {\n      \"question_text\": \"Suppose you have already executed the following code:\n\n```python\nimport numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n```\n\nFill in the blanks below to solve the matrix equation $Ax = b$ for $x$\n\",\n      \"difficulty\": 2,\n      \"topics\": [\"linear algebra\", \"regression\", \"numpy\"],\n      \"starting_code\": \"from scipy.linalg import ___X\n\nx = ___X(A, ___X)

JSONDecodeError: Extra data: line 56 column 1 (char 3175)

In [174]:
generate_and_parse_question1(FillInBlank,"Can you generate another question on the same topic?")

any intermediate_steps?:  False
output:
 {
  "description": "    Question type where the student is given a main question and then\n    a code block with \"blanks\" (represented by `___X` in the source).\n    The student must provide one string per blank. Correctness is evaluated\n    based on a Python test suite based on the following template:\n\n    \n    ```python\n    {setup_code}\n\n    {code_block_with_blanks_filled_in}\n\n    {test_code}\n    ```\n\n    There must be at least one `___X` (one blank) in `starting_code`\n\n\n    Examples\n    --------\n    {\n      \"question_text\": \"Suppose you have already executed the following code:\n\n```python\nimport numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n```\n\nFill in the blanks below to solve the matrix equation $Ax = b$ for $x$\n\",\n      \"difficulty\": 2,\n      \"topics\": [\"linear algebra\", \"regression\", \"numpy\"],\n      \"starting_code\": \"from scipy.linalg import ___X\n\nx = ___X(A, ___X)\

JSONDecodeError: Extra data: line 56 column 1 (char 2945)

## Prompt with Dr. Lyon

In [11]:
#Fucntion that returns the system prompt with the format of the question requested 
def create_system_prompt2(pydantic_object):
    common_system_prompt = textwrap.dedent("""level
    You are a smart, helpful teaching assistant chatbot named AcademiaGPT.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to college students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor, you  will be given a topic and an
    expected difficulty level (integer) or (string). 

    If the difficulty is not given assume the difficulty level to be the previously used difficulty level.

    

    Here is an example question with difficulty 1

      {{
        "question_text":"How would you reverse the order of the following list in python\n\n```python\na = [1, 'hi', 3, 'there']\n```\n\nand save the result in an object `b`",
        "starting_code":"a = [1, 'hi', 3, 'there']\n# Reverse the order of the list and save the result in an object called b",
        "solution":"a = [1, 'hi', 3, 'there']\nb = a[::-1]",
        "topics":["python","programming","lists"],
        "difficulty":1,
        "setup_code":"# none",
        "test_code":"assert b == ['there', 3, 'hi', 1]"
      }}


    Here is an example question with difficulty 2

    {{"question_text": "Given a list of stock prices `prices` for consecutive days, write a for loop that calculates the total return of the stock over the period. The total return is defined as the percentage change from the first day to the last day. Store the result in a variable named `total_return`.",
    "starting_code": "prices = [100, 102, 105, 110, 108]\n# Calculate the total return and store it in total_return",
    "solution": "prices = [100, 102, 105, 110, 108]\nfirst_price = prices[0]\nlast_price = prices[-1]\ntotal_return = ((last_price - first_price) / first_price) * 100",
    "topics": ["for loops", "asset pricing"],
    "difficulty": 2,
    "setup_code": "# No setup code required",
    "test_code": "assert abs(total_return - ((prices[-1] - prices[0]) / prices[0]) * 100) < 1e-6"
    }}



    Here is an example question with difficulty 3

    {{
        "question_text":"You are given a 3 dimensional numpy array as specified below:\n\n```\nA = np.array([[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]])\n```\n\nCreate a variable `idx` (define as a tuple) that you could use to select the `4.0` element of this array.\n\nFor example,\n\n```\nidx = (0, 0, 0)\n```\n\nwould select the `0.0` element of the array.",
        "starting_code":"idx = (0, 0, 0)  # Fill this in with the correct index",
        "solution":"x = (1, 0, 0)",
        "topics":["numpy"],
        "difficulty":3,
        "setup_code":"import numpy as np\n\nA = np.array([[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]])",
        "test_code":"assert A[idx] == A[1, 0, 0]",
      }}
    
    If you are asked to give similar, easier, or another question, the user wants you to use the same topic and difficulty 
    level that you used to generate the previous question.

    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified JSON format with no extra words or content.

    You must always produce exactly one JSON object.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())

In [9]:
from pydantic import ValidationError
import json
from json.decoder import JSONDecodeError

# Function that takes the input, calls the retriever agent, and returns the parsed output
def generate_and_parse_question2(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt2(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    
    try:
        response = rag_chain(query)
        report_on_message(response)  # print a summary of what was produced
        parser = PydanticOutputParser(pydantic_object=pydantic_model)
        return parser.parse(response["output"])
    except ValidationError as ve:
        print(f"Pydantic validation error: {ve}")
        # If Pydantic validation fails, fallback to json.loads
        return json.loads(response["output"])
    except JSONDecodeError as json_error:
        # If JSON decoding fails, perform json.loads and inform the caller about the error
        result_output = json.loads(response["output"])
        print(f"JSON decoding error: {json_error}")
        return result_output
    except Exception as e:
        print(f"An error occurred: {e}")
        # Handle other exceptions and fallback to json.loads
        return json.loads(response["output"])


In [12]:
generate_and_parse_question2(Code, "topic: for loops and asset pricing and difficulty:2")

An error occurred: name 'report_on_message' is not defined


{'question_text': 'Given a list of stock prices `prices` for consecutive days, write a for loop that calculates the total return of the stock over the period. The total return is defined as the percentage change from the first day to the last day. Store the result in a variable named `total_return`.',
 'difficulty': 2,
 'topics': ['for loops', 'asset pricing'],
 'starting_code': 'prices = [100, 102, 105, 110, 108]\n# Calculate the total return and store it in total_return',
 'solution': 'prices = [100, 102, 105, 110, 108]\nfirst_price = prices[0]\nlast_price = prices[-1]\ntotal_return = ((last_price - first_price) / first_price) * 100',
 'setup_code': '# No setup code required',
 'test_code': 'assert abs(total_return - ((prices[-1] - prices[0]) / prices[0]) * 100) < 1e-6'}

In [36]:
generate_and_parse_question2(Code, "topic: for loops and asset pricing and difficulty 1")

any intermediate_steps?:  False
output:
 {
    "question_text": "Given a list of stock prices `prices = [100, 101, 102, 103, 104]`, write a for loop that calculates the daily return of the stock and stores the results in a list called `returns`. The daily return is calculated as `(current_price - previous_price) / previous_price`. Assume the first day's return is 0.",
    "difficulty": 1,
    "topics": ["for loops", "asset pricing"],
    "starting_code": "prices = [100, 101, 102, 103, 104]\nreturns = []\n# Write your for loop here",
    "solution": "prices = [100, 101, 102, 103, 104]\nreturns = [0]\nfor i in range(1, len(prices)):\n    daily_return = (prices[i] - prices[i-1]) / prices[i-1]\n    returns.append(daily_return)",
    "setup_code": "# none",
    "test_code": "assert returns == [0, 0.01, 0.009900990099009901, 0.00980392156862745, 0.009708737864077669]"
}





Given a list of stock prices `prices = [100, 101, 102, 103, 104]`, write a for loop that calculates the daily return of the stock and stores the results in a list called `returns`. The daily return is calculated as `(current_price - previous_price) / previous_price`. Assume the first day's return is 0.

```python
prices = [100, 101, 102, 103, 104]
returns = []
# Write your for loop here
```

**Solution**

```python
prices = [100, 101, 102, 103, 104]
returns = [0]
for i in range(1, len(prices)):
    daily_return = (prices[i] - prices[i-1]) / prices[i-1]
    returns.append(daily_return)
```

**Test Suite**

```python
# none

prices = [100, 101, 102, 103, 104]
returns = [0]
for i in range(1, len(prices)):
    daily_return = (prices[i] - prices[i-1]) / prices[i-1]
    returns.append(daily_return)

assert returns == [0, 0.01, 0.009900990099009901, 0.00980392156862745, 0.009708737864077669]
```