In [1]:
from dotenv import load_dotenv
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.messages import SystemMessage
from langchain.vectorstores.pgvector import PGVector
import textwrap
from langchain.output_parsers import PydanticOutputParser
from question_generator_model import SingleSelection, Code, AnyQuestion, FillInBlank, MultipleSelection

load_dotenv("/home/jupyteach-msda/jupyteach-ai/.env")

COLLECTION_NAME = "documents"
DB_CONNECTION = "postgresql://postgres:supa-jupyteach@192.168.0.77:54328/postgres"


def get_vectorstore():
    embeddings = OpenAIEmbeddings()

    db = PGVector(embedding_function=embeddings,
        collection_name=COLLECTION_NAME,
        connection_string=DB_CONNECTION,
    )
    return db

In [2]:
#Function that takes the input and returns the output from the retreival agent
def create_chain(
        system_message_text, 
        temperature=0, 
        model_name="gpt-3.5-turbo-1106", 
        model_kwargs={"response_format": {"type": "json_object"}},
        verbose=False,
    ):
    # step 1: create llm
    retriever = get_vectorstore().as_retriever()
    llm = ChatOpenAI(temperature=temperature, model_name=model_name, model_kwargs=model_kwargs, verbose=verbose)
    
    # step 2: create retriever tool
    tool = create_retriever_tool(
        retriever,
        "search_course_content",
        "Searches and returns documents regarding the contents of the course and notes from the instructor.",
    )
    tools = [tool]

    # step 3: create system message from the text passed in as an argument
    system_message = SystemMessage(content=system_message_text)

    # return the chain
    return create_conversational_retrieval_agent(
        llm=llm, 
        tools=tools, 
        verbose=False, 
        system_message=system_message
    )

In [3]:
#Function to check if the retrieval is happening
def report_on_message(msg):
    print("any intermediate_steps?: ", len(msg["intermediate_steps"]) > 0)
    print("output:\n", msg["output"])
    print("\n\n")

In [4]:
#Fucntion that returns the system prompt with the format of the question requested 
def create_system_prompt(pydantic_object):
    common_system_prompt = textwrap.dedent("""
    You are a smart, helpful teaching assistant chatbot named AcademiaGPT.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to college students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor you  will be given a topic (string) and an
    expected difficulty level (integer)

    
    
    The difficulty will be a number between 1 and 3, with 1 corresponding to a request 
    for an easy question, and 3 for the most difficult question.
    
    If the professor asks you for a question and does not specify either a new topic 
    or a new difficulty or both, you must use the previous topic or difficulty or both.

    Occasionaly the professor may ask you to do something like produce a similar question,
    or try again and make it more difficult or easy. You need to assist the professor with the same.
    
    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified JSON format with no extra words or content.

    You must always produce exactly one JSON object.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())

In [19]:
#Fucntion that takes the input, call the retreiver agent, and returns the parsed output
def generate_and_parse_question(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    response = rag_chain(query)
    report_on_message(response)  # print a summary of what was produced
    parser = PydanticOutputParser(pydantic_object=pydantic_model)
    return parser.parse(response["output"])

In [20]:
from pydantic import ValidationError
import json
from json.decoder import JSONDecodeError

# Function that takes the input, calls the retriever agent, and returns the parsed output
def generate_and_parse_question(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    
    try:
        response = rag_chain(query)
        report_on_message(response)  # print a summary of what was produced
        parser = PydanticOutputParser(pydantic_object=pydantic_model)
        return parser.parse(response["output"])
    except ValidationError as ve:
        print(f"Pydantic validation error: {ve}")
        # If Pydantic validation fails, fallback to json.loads
        return json.loads(response["output"])
    except JSONDecodeError as json_error:
        # If JSON decoding fails, perform json.loads and inform the caller about the error
        result_output = json.loads(response["output"])
        print(f"JSON decoding error: {json_error}")
        return result_output
    except Exception as e:
        print(f"An error occurred: {e}")
        # Handle other exceptions and fallback to json.loads
        return json.loads(response["output"])

'''# Example usage
try:
    result = generate_and_parse_question(YourPydanticModel, "Your Query")
    # Continue processing the result as needed
except Exception as e:
    print(f"Error processing question: {e}")
    # Handle the error, log, or notify the caller
'''

'# Example usage\ntry:\n    result = generate_and_parse_question(YourPydanticModel, "Your Query")\n    # Continue processing the result as needed\nexcept Exception as e:\n    print(f"Error processing question: {e}")\n    # Handle the error, log, or notify the caller\n'

In [6]:
try:
    generate_and_parse_question(FillInBlank, "Make the question more difficult")
except Exception as e:
    print(f"An error occurred: {e}")

any intermediate_steps?:  False
output:
 {
  "error": "No previous question provided to increase difficulty."
}



Pydantic validation error: 7 validation errors for FillInBlank
question_text
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
difficulty
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
topics
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
starting_code
  Field required [type=missing, input_value={'error': 'No previous qu...o increase difficulty.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
solution
  F

In [7]:
generate_and_parse_question(FillInBlank, "topic: pandas groupby\ndifficulty: 2")

any intermediate_steps?:  True
output:
 {
  "question_text": "Given the following DataFrame `df`, use the `groupby` method to calculate the mean of the 'scores' for each unique 'group'. Fill in the blanks to complete the code:",
  "difficulty": 2,
  "topics": ["pandas", "groupby", "data analysis"],
  "starting_code": "import pandas as pd\n\ndf = pd.DataFrame({\n    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],\n    'scores': [88, 59, 90, 81, 76, 45, 95, 82]\n})\n\ngrouped = df.groupby('___X')\nmean_scores = grouped['___X'].___X()",
  "solution": ["group", "scores", "mean"],
  "setup_code": "import pandas as pd\n\ndf = pd.DataFrame({\n    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],\n    'scores': [88, 59, 90, 81, 76, 45, 95, 82]\n})",
  "test_code": "assert mean_scores.to_dict() == {'A': 87.25, 'B': 66.75}"
}





Given the following DataFrame `df`, use the `groupby` method to calculate the mean of the 'scores' for each unique 'group'. Fill in the blanks to complete the code:

```python
import pandas as pd

df = pd.DataFrame({
    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'scores': [88, 59, 90, 81, 76, 45, 95, 82]
})

grouped = df.groupby('___X')
mean_scores = grouped['___X'].___X()
```

**Solution**

[group, scores, mean]
```

**Rendered Solution**

```python
import pandas as pd

df = pd.DataFrame({
    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'scores': [88, 59, 90, 81, 76, 45, 95, 82]
})

grouped = df.groupby('group')
mean_scores = grouped['scores'].mean()
```

**Test Suite**

```python
import pandas as pd

df = pd.DataFrame({
    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'scores': [88, 59, 90, 81, 76, 45, 95, 82]
})

import pandas as pd

df = pd.DataFrame({
    'group': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'],
    'scores': [88, 59, 90, 81, 76, 45, 95, 82]
})

grouped = df.groupby('group')
mean_scores = grouped['scores'].mean()

assert mean_scores.to_dict() == {'A': 87.25, 'B': 66.75}
```

In [8]:
generate_and_parse_question(FillInBlank, "Give me one more question on the same")

any intermediate_steps?:  False
output:
 {
  "question_text": "Suppose you have already executed the following code:\n\n```python\nimport numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n```\n\nFill in the blanks below to solve the matrix equation $Ax = b$ for $x$ using a different method than the previous question.\n",
  "difficulty": 2,
  "topics": ["linear algebra", "regression", "numpy"],
  "starting_code": "x = np.linalg.___X(A, b)",
  "solution": ["lstsq"],
  "setup_code": "import numpy as np\n\nA = np.array([[1, 2], [3, 4]])\nb = np.array([10, 42])\n",
  "test_code": "assert np.allclose(x[0], [22, -6])"
}





Suppose you have already executed the following code:

```python
import numpy as np

A = np.array([[1, 2], [3, 4]])
b = np.array([10, 42])
```

Fill in the blanks below to solve the matrix equation $Ax = b$ for $x$ using a different method than the previous question.


```python
x = np.linalg.___X(A, b)
```

**Solution**

[lstsq]
```

**Rendered Solution**

```python
x = np.linalg.lstsq(A, b)
```

**Test Suite**

```python
import numpy as np

A = np.array([[1, 2], [3, 4]])
b = np.array([10, 42])


x = np.linalg.lstsq(A, b)

assert np.allclose(x[0], [22, -6])
```

In [9]:
generate_and_parse_question(FillInBlank, "But I asked for the question on pandas groupby")

any intermediate_steps?:  True
output:
 {
  "question_text": "Given a DataFrame `df` with a column 'A' containing categorical data, write a Python code snippet using pandas to group the data by column 'A' and calculate the mean of each group for the 'B' column.",
  "difficulty": 2,
  "topics": ["pandas", "data manipulation", "groupby"],
  "starting_code": "import pandas as pd\n\ndf = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],\n                   'B': [1, 2, 3, 4, 5, 6, 7, 8],\n                   'C': [9, 10, 11, 12, 13, 14, 15, 16]})\n\ngrouped_means = df.groupby('___X')['___X'].___X()",
  "solution": ["A", "B", "mean"],
  "setup_code": "",
  "test_code": "assert grouped_means.to_dict() == {'B': {'bar': 4.0, 'foo': 4.8}}"
}





Given a DataFrame `df` with a column 'A' containing categorical data, write a Python code snippet using pandas to group the data by column 'A' and calculate the mean of each group for the 'B' column.

```python
import pandas as pd

df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                   'B': [1, 2, 3, 4, 5, 6, 7, 8],
                   'C': [9, 10, 11, 12, 13, 14, 15, 16]})

grouped_means = df.groupby('___X')['___X'].___X()
```

**Solution**

[A, B, mean]
```

**Rendered Solution**

```python
import pandas as pd

df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                   'B': [1, 2, 3, 4, 5, 6, 7, 8],
                   'C': [9, 10, 11, 12, 13, 14, 15, 16]})

grouped_means = df.groupby('A')['B'].mean()
```

**Test Suite**

```python


import pandas as pd

df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                   'B': [1, 2, 3, 4, 5, 6, 7, 8],
                   'C': [9, 10, 11, 12, 13, 14, 15, 16]})

grouped_means = df.groupby('A')['B'].mean()

assert grouped_means.to_dict() == {'B': {'bar': 4.0, 'foo': 4.8}}
```

In [24]:
try:
    generate_and_parse_question(FillInBlank, "give me one more questions ")
except Exception as e:
    print(f"An error occurred: {e}")

any intermediate_steps?:  False
output:
 {
  "description": "Please provide the topic and the current difficulty level to increase the difficulty of the question."
}



An error occurred: 7 validation errors for FillInBlank
question_text
  Field required [type=missing, input_value={'description': 'Please p...culty of the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
difficulty
  Field required [type=missing, input_value={'description': 'Please p...culty of the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
topics
  Field required [type=missing, input_value={'description': 'Please p...culty of the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
starting_code
  Field required [type=missing, input_value={'description': 'Please p...culty of the question.'}, input_type=dict]
    For further information visit https://

In [21]:
try:
    generate_and_parse_question(MultipleSelection, "Give me 2 more questions on the previous topic with diffculty 1 ")
except Exception as e:
    print(f"An error occurred: {e}")


any intermediate_steps?:  True
output:
 {
  "question_text": "Which of the following are main components of computational social science as discussed in the lecture?",
  "difficulty": 1,
  "topics": ["computational social science", "modeling"],
  "choices": [
    "Data and real-world observations",
    "Statistical models and parameter adjustment",
    "Prior beliefs and parameter plausibility",
    "Physical experiments and laboratory tests"
  ],
  "solution": [0, 1, 2]
}
{
  "question_text": "In the context of computational social science, what role do prior beliefs play in model parameter selection?",
  "difficulty": 1,
  "topics": ["computational social science", "modeling"],
  "choices": [
    "They are used to validate the final results of the model.",
    "They guide the selection of parameters that match both the data and our understanding of the world.",
    "They are irrelevant and should not influence the model.",
    "They are used to replace real-world data when it is not 

In [11]:
generate_and_parse_question(MultipleSelection, "Give me an easier question ")

any intermediate_steps?:  False
output:
 {
  "error": "Please provide a topic and a difficulty level for the question."
}



Pydantic validation error: 5 validation errors for MultipleSelection
question_text
  Field required [type=missing, input_value={'error': 'Please provide...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
difficulty
  Field required [type=missing, input_value={'error': 'Please provide...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
topics
  Field required [type=missing, input_value={'error': 'Please provide...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
choices
  Field required [type=missing, input_value={'error': 'Please provide...evel for the question.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.4/v/missing
so

{'error': 'Please provide a topic and a difficulty level for the question.'}

In [None]:
#Fucntion that returns the system prompt with the format of the question requested 
def create_system_prompt(pydantic_object):
    common_system_prompt = textwrap.dedent("""
    You are a smart, helpful teaching assistant chatbot named AcademiaGPT.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to college students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor you  will be given a topic (string) and an
    expected difficulty level (integer)

    
    
    The difficulty will be a number between 1 and 3, with 1 corresponding to a request 
    for an easy question, and 3 for the most difficult question.
    
    If the professor asks you for a question and does not specify either a new topic 
    or a new difficulty or both, you must use the previous topic or difficulty or both.

    Occasionaly the professor may ask you to do something like produce a similar question,
    or try again and make it more difficult or easy. You need to assist the professor with the same.
    
    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified JSON format with no extra words or content.

    You must always produce exactly one JSON object.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())