In [9]:
from dotenv import load_dotenv
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.messages import SystemMessage
from langchain.vectorstores.pgvector import PGVector
import textwrap
from langchain.output_parsers import PydanticOutputParser
from question_generator_model import SingleSelection, Code, AnyQuestion, FillInBlank, MultipleSelection

load_dotenv("/home/jupyteach-msda/jupyteach-ai/.env")

COLLECTION_NAME = "documents"
DB_CONNECTION = "postgresql://postgres:supa-jupyteach@192.168.0.77:54328/postgres"


def get_vectorstore():
    embeddings = OpenAIEmbeddings()

    db = PGVector(embedding_function=embeddings,
        collection_name=COLLECTION_NAME,
        connection_string=DB_CONNECTION,
    )
    return db

In [2]:
def create_chain(
        system_message_text, 
        temperature=0, 
        model_name="gpt-3.5-turbo-1106", 
        model_kwargs={"response_format": {"type": "json_object"}},
        verbose=False,
    ):
    # step 1: create llm
    retriever = get_vectorstore().as_retriever()
    llm = ChatOpenAI(temperature=temperature, model_name=model_name, model_kwargs=model_kwargs, verbose=verbose)
    
    # step 2: create retriever tool
    tool = create_retriever_tool(
        retriever,
        "search_course_content",
        "Searches and returns documents regarding the contents of the course and notes from the instructor.",
    )
    tools = [tool]

    # step 3: create system message from the text passed in as an argument
    system_message = SystemMessage(content=system_message_text)

    # return the chain
    return create_conversational_retrieval_agent(
        llm=llm, 
        tools=tools, 
        verbose=False, 
        system_message=system_message
    )

In [3]:
#Function to check if the retrieval is happening
def report_on_message(msg):
    print("any intermediate_steps?: ", len(msg["intermediate_steps"]) > 0)
    print("output:\n", msg["output"])
    print("\n\n")

In [4]:
from pydantic import ValidationError
import json
from json.decoder import JSONDecodeError

# Function that takes the input, calls the retriever agent, and returns the parsed output
def generate_and_parse_question(pydantic_model, query):
    rag_chain = create_chain(create_system_prompt(pydantic_model), temperature=0.1, verbose=True, model_name="gpt-4-1106-preview")
    
    try:
        response = rag_chain(query)
        report_on_message(response)  # print a summary of what was produced
        parser = PydanticOutputParser(pydantic_object=pydantic_model)
        return parser.parse(response["output"])
    except ValidationError as ve:
        print(f"Pydantic validation error: {ve}")
        # If Pydantic validation fails, fallback to json.loads
        return json.loads(response["output"])
    except JSONDecodeError as json_error:
        # If JSON decoding fails, perform json.loads and inform the caller about the error
        result_output = json.loads(response["output"])
        print(f"JSON decoding error: {json_error}")
        return result_output
    except Exception as e:
        print(f"An error occurred: {e}")
        # Handle other exceptions and fallback to json.loads
        return json.loads(response["output"])

In [5]:
#Fucntion that returns the system prompt with the format of the question requested 
def create_system_prompt(pydantic_object):
    common_system_prompt = textwrap.dedent("""level
    You are a smart, helpful teaching assistant chatbot named AcademiaGPT.

    You are an expert Python programmer and have used all the most popular
    libraries for data analysis, machine learning, and artificial intelligence.

    You assist professors that teach courses about Python, data science, and machine learning
    to college students.

    Your task is to help professors produce practice questions to help students solidify 
    their understanding of specific topics

    In your conversations with a professor, you  will be given a topic and an
    expected difficulty level (integer) or (string). 

    If the difficulty is not given assume the difficulty level to be the previously used difficulty level.

    

    Here is an example question with difficulty 1

      {{
        "question_text":"How would you reverse the order of the following list in python\n\n```python\na = [1, 'hi', 3, 'there']\n```\n\nand save the result in an object `b`",
        "starting_code":"a = [1, 'hi', 3, 'there']\n# Reverse the order of the list and save the result in an object called b",
        "solution":"a = [1, 'hi', 3, 'there']\nb = a[::-1]",
        "topics":["python","programming","lists"],
        "difficulty":1,
        "setup_code":"# none",
        "test_code":"assert b == ['there', 3, 'hi', 1]"
        }}


    Here is an example question with difficulty 2

    {{"question_text": "Given a list of stock prices `prices` for consecutive days, write a for loop that calculates the total return of the stock over the period. The total return is defined as the percentage change from the first day to the last day. Store the result in a variable named `total_return`.",
    "starting_code": "prices = [100, 102, 105, 110, 108]\n# Calculate the total return and store it in total_return",
    "solution": "prices = [100, 102, 105, 110, 108]\nfirst_price = prices[0]\nlast_price = prices[-1]\ntotal_return = ((last_price - first_price) / first_price) * 100",
    "topics": ["for loops", "asset pricing"],
    "difficulty": 2,
    "setup_code": "# No setup code required",
    "test_code": "assert abs(total_return - ((prices[-1] - prices[0]) / prices[0]) * 100) < 1e-6"
    }}



    Here is an example question with difficulty 3

    {{
        "question_text":"You are given a 3 dimensional numpy array as specified below:\n\n```\nA = np.array([[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]])\n```\n\nCreate a variable `idx` (define as a tuple) that you could use to select the `4.0` element of this array.\n\nFor example,\n\n```\nidx = (0, 0, 0)\n```\n\nwould select the `0.0` element of the array.",
        "starting_code":"idx = (0, 0, 0)  # Fill this in with the correct index",
        "solution":"x = (1, 0, 0)",
        "topics":["numpy"],
        "difficulty":3,
        "setup_code":"import numpy as np\n\nA = np.array([[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]]])",
        "test_code":"assert A[idx] == A[1, 0, 0]"
      }}
    
    If you are asked to give similar, easier, or another question, the user wants you to use the same topic and difficulty 
    level that you used to generate the previous question.

    You are encouraged to use any tools available to look up relevant information, only
    if necessary.

    Your responses must always exactly match the specified JSON format with no extra words or content.

    You must always produce exactly one JSON object.
    
    {format_instructions}
    """)

    parser = PydanticOutputParser(pydantic_object=pydantic_object)
    return common_system_prompt.format(format_instructions=parser.get_format_instructions())

In [6]:
generate_and_parse_question(Code, "topic: for loops and asset pricing and difficulty:2")

any intermediate_steps?:  False
output:
 {
    "question_text": "Write a Python function `calculate_max_drawdown` that takes a list of daily stock prices and returns the maximum drawdown. The maximum drawdown is defined as the maximum loss from a peak to a trough of a portfolio, before a new peak is attained. It is often expressed as a percentage of the peak value.",
    "starting_code": "def calculate_max_drawdown(prices):\n    # Your code here\n    return max_drawdown\n\nprices = [100, 102, 105, 103, 97, 95, 99, 110]\nmax_drawdown = calculate_max_drawdown(prices)",
    "solution": "def calculate_max_drawdown(prices):\n    max_drawdown = 0\n    peak = prices[0]\n    for price in prices:\n        if price > peak:\n            peak = price\n        drawdown = (peak - price) / peak\n        if drawdown > max_drawdown:\n            max_drawdown = drawdown\n    return max_drawdown * 100\n\nprices = [100, 102, 105, 103, 97, 95, 99, 110]\nmax_drawdown = calculate_max_drawdown(prices)",
    "

Write a Python function `calculate_max_drawdown` that takes a list of daily stock prices and returns the maximum drawdown. The maximum drawdown is defined as the maximum loss from a peak to a trough of a portfolio, before a new peak is attained. It is often expressed as a percentage of the peak value.

```python
def calculate_max_drawdown(prices):
    # Your code here
    return max_drawdown

prices = [100, 102, 105, 103, 97, 95, 99, 110]
max_drawdown = calculate_max_drawdown(prices)
```

**Solution**

```python
def calculate_max_drawdown(prices):
    max_drawdown = 0
    peak = prices[0]
    for price in prices:
        if price > peak:
            peak = price
        drawdown = (peak - price) / peak
        if drawdown > max_drawdown:
            max_drawdown = drawdown
    return max_drawdown * 100

prices = [100, 102, 105, 103, 97, 95, 99, 110]
max_drawdown = calculate_max_drawdown(prices)
```

**Test Suite**

```python
# No setup code required

def calculate_max_drawdown(prices):
    max_drawdown = 0
    peak = prices[0]
    for price in prices:
        if price > peak:
            peak = price
        drawdown = (peak - price) / peak
        if drawdown > max_drawdown:
            max_drawdown = drawdown
    return max_drawdown * 100

prices = [100, 102, 105, 103, 97, 95, 99, 110]
max_drawdown = calculate_max_drawdown(prices)

assert abs(calculate_max_drawdown([100, 102, 105, 103, 97, 95, 99, 110]) - 10.0) < 1e-6
```

In [10]:
generate_and_parse_question(MultipleSelection, "topic: for loops and asset pricing and difficulty 1")

any intermediate_steps?:  False
output:
 {
  "question_text": "Consider a list `prices` representing the closing stock prices for a company over 5 consecutive days. Write a for loop to calculate the difference between each day's price and the previous day's price. Store the results in a list called `price_changes`.",
  "difficulty": 1,
  "topics": ["for loops", "asset pricing"],
  "choices": [
    "Using a for loop starting from the second element in `prices`",
    "Using a while loop and manually incrementing the index",
    "Using list comprehension with a for loop inside",
    "Using a for loop with the `range` function starting from 1"
  ],
  "solution": [0, 3]
}





Consider a list `prices` representing the closing stock prices for a company over 5 consecutive days. Write a for loop to calculate the difference between each day's price and the previous day's price. Store the results in a list called `price_changes`.

- [x] Using a for loop starting from the second element in `prices`
- [ ] Using a while loop and manually incrementing the index
- [ ] Using list comprehension with a for loop inside
- [x] Using a for loop with the `range` function starting from 1


In [11]:
generate_and_parse_question(SingleSelection, "topic: probability and difficulty 1")

any intermediate_steps?:  True
output:
 {
  "question_text": "What is the expected waiting time for a geometric distribution if the probability of success on any given trial is `p`?",
  "difficulty": 1,
  "topics": ["probability", "geometric distribution"],
  "choices": [
    "The expected waiting time is `1/p`",
    "The expected waiting time is `p/(1-p)`",
    "The expected waiting time is `1/(1-p)`",
    "The expected waiting time is `p`"
  ],
  "solution": 0
}





What is the expected waiting time for a geometric distribution if the probability of success on any given trial is `p`?

- [x] The expected waiting time is `1/p`
- [ ] The expected waiting time is `p/(1-p)`
- [ ] The expected waiting time is `1/(1-p)`
- [ ] The expected waiting time is `p`


In [12]:
generate_and_parse_question(FillInBlank, "topic: probability and difficulty 1")

any intermediate_steps?:  False
output:
 {
  "question_text": "Suppose you have a list of outcomes from a fair six-sided die roll: `[1, 2, 3, 4, 5, 6]`. Write a Python function `calculate_probability` that takes a list of outcomes and a specific outcome, and returns the probability of that outcome. Assume each outcome is equally likely.",
  "difficulty": 1,
  "topics": ["probability", "functions"],
  "starting_code": "def calculate_probability(outcomes, specific_outcome):\n    # Your code here\n    probability = ___X\n    return probability",
  "solution": ["1 / len(outcomes)"],
  "setup_code": "# No setup code required",
  "test_code": "assert calculate_probability([1, 2, 3, 4, 5, 6], 4) == 1/6"
}





Suppose you have a list of outcomes from a fair six-sided die roll: `[1, 2, 3, 4, 5, 6]`. Write a Python function `calculate_probability` that takes a list of outcomes and a specific outcome, and returns the probability of that outcome. Assume each outcome is equally likely.

```python
def calculate_probability(outcomes, specific_outcome):
    # Your code here
    probability = ___X
    return probability
```

**Solution**

[1 / len(outcomes)]
```

**Rendered Solution**

```python
def calculate_probability(outcomes, specific_outcome):
    # Your code here
    probability = 1 / len(outcomes)
    return probability
```

**Test Suite**

```python
# No setup code required

def calculate_probability(outcomes, specific_outcome):
    # Your code here
    probability = 1 / len(outcomes)
    return probability

assert calculate_probability([1, 2, 3, 4, 5, 6], 4) == 1/6
```

In [13]:
generate_and_parse_question(FillInBlank, "Give more difficult question on the same")

any intermediate_steps?:  False
output:
 {
    "question_text": "Given a 3x3 numpy array `matrix` representing a square matrix, write a function `determinant` that computes the determinant of the matrix without using any built-in numpy functions. Use the formula for the determinant of a 3x3 matrix and store the result in a variable named `det`.",
    "difficulty": 3,
    "topics": ["numpy", "linear algebra"],
    "starting_code": "def determinant(matrix):\n    # Calculate the determinant of the matrix\n    det = ___X\n    return det\n\nmatrix = np.array([[1, 2, 3], [0, 1, 4], [5, 6, 0]])\n# Call the function and store the result in det\n___X",
    "solution": [
        "matrix[0,0] * (matrix[1,1]*matrix[2,2] - matrix[1,2]*matrix[2,1]) - matrix[0,1] * (matrix[1,0]*matrix[2,2] - matrix[1,2]*matrix[2,0]) + matrix[0,2] * (matrix[1,0]*matrix[2,1] - matrix[1,1]*matrix[2,0])",
        "det = determinant(matrix)"
    ],
    "setup_code": "import numpy as np\n\nmatrix = np.array([[1, 2, 3], [0,

Given a 3x3 numpy array `matrix` representing a square matrix, write a function `determinant` that computes the determinant of the matrix without using any built-in numpy functions. Use the formula for the determinant of a 3x3 matrix and store the result in a variable named `det`.

```python
def determinant(matrix):
    # Calculate the determinant of the matrix
    det = ___X
    return det

matrix = np.array([[1, 2, 3], [0, 1, 4], [5, 6, 0]])
# Call the function and store the result in det
___X
```

**Solution**

[matrix[0,0] * (matrix[1,1]*matrix[2,2] - matrix[1,2]*matrix[2,1]) - matrix[0,1] * (matrix[1,0]*matrix[2,2] - matrix[1,2]*matrix[2,0]) + matrix[0,2] * (matrix[1,0]*matrix[2,1] - matrix[1,1]*matrix[2,0]), det = determinant(matrix)]
```

**Rendered Solution**

```python
def determinant(matrix):
    # Calculate the determinant of the matrix
    det = matrix[0,0] * (matrix[1,1]*matrix[2,2] - matrix[1,2]*matrix[2,1]) - matrix[0,1] * (matrix[1,0]*matrix[2,2] - matrix[1,2]*matrix[2,0]) + matrix[0,2] * (matrix[1,0]*matrix[2,1] - matrix[1,1]*matrix[2,0])
    return det

matrix = np.array([[1, 2, 3], [0, 1, 4], [5, 6, 0]])
# Call the function and store the result in det
det = determinant(matrix)
```

**Test Suite**

```python
import numpy as np

matrix = np.array([[1, 2, 3], [0, 1, 4], [5, 6, 0]])

def determinant(matrix):
    # Calculate the determinant of the matrix
    det = matrix[0,0] * (matrix[1,1]*matrix[2,2] - matrix[1,2]*matrix[2,1]) - matrix[0,1] * (matrix[1,0]*matrix[2,2] - matrix[1,2]*matrix[2,0]) + matrix[0,2] * (matrix[1,0]*matrix[2,1] - matrix[1,1]*matrix[2,0])
    return det

matrix = np.array([[1, 2, 3], [0, 1, 4], [5, 6, 0]])
# Call the function and store the result in det
det = determinant(matrix)

assert determinant(np.array([[1, 2, 3], [0, 1, 4], [5, 6, 0]])) == 1*(1*0-4*6) - 2*(0*0-4*5) + 3*(0*6-1*5)
```