In [29]:
## Import Necessary Libraries
from typing import List
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector
from langchain.docstore.document import Document
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.output_parsers import PydanticOutputParser
from langchain.schema.messages import SystemMessage
from question_generator_model import (
    MultipleSelection, 
    SingleSelection, 
    Code, 
    FillInBlank
)

In [30]:
## Create Output Parsers
ms_parser = PydanticOutputParser(pydantic_object = MultipleSelection)
code_parser = PydanticOutputParser(pydantic_object = Code)
ss_parser = PydanticOutputParser(pydantic_object = SingleSelection)
fib_parser = PydanticOutputParser(pydantic_object = FillInBlank)

In [2]:
## Load Vector Store
load_dotenv()

DB_CONNECTION = "postgresql://postgres:supa-jupyteach@192.168.0.77:54328/postgres"
COLLECTION_NAME = "documents"

def get_vectorstore():
    embeddings = OpenAIEmbeddings()

    db = PGVector(embedding_function=embeddings,
        collection_name=COLLECTION_NAME,
        connection_string=DB_CONNECTION,
    )
    return db

In [3]:
## Initialize Retriever
db = get_vectorstore()
retriever = db.as_retriever()

In [13]:
## Create Create_Chain Function
def create_chain(system_message_text):
    ## Step 1: Create LLM
    from langchain.chat_models import ChatOpenAI
    llm = ChatOpenAI(temperature=0)
    
    ## Step 2: Create Retriever Tool
    tool = create_retriever_tool(
        retriever,
        "search_course_content",
        "Searches and returns documents regarding the contents of the course and notes from the instructor.",
    )
    tools = [tool]

    ## Step 3: Create System Message from the Text Passed in as an Argument
    system_message = SystemMessage(content=system_message_text)

    ## Return the Chain
    return create_conversational_retrieval_agent(
        llm = llm, 
        tools=tools, 
        verbose = False, 
        system_message = system_message
    )

In [55]:
example_system_prompt_text = """\
You are a smart, helpful teaching assistant chatbot named AcademiaGPT.

You assist college professors that teach courses about Python and pandas to students. 

You have 25+ years of experience writing pandas code to do a variety of data analysis tasks. 

Your responses typically include examples of datasets or code snippets.

For each message you will be given three inputs:

topic: string
type: string
difficulty: integer

Your task is to produce a question of a specified type to help students solidify their understanding of the given python and/or pandas topic.

The difficulty will be a number between 1 and 3, with 1 corresponding to a request for an easy question, and 3 for the most difficult question.

The type will be a string that specifies the type of question to be generated, such as single selection, multiple selection, fill in code snippet, fill in code, etc.

Please format the question output in the following way:

{Question Type}
{Question Text}
{Answer Choices}
{Question Difficulty}
{Correct Answer}

When outputting the answer choices for the questions, please only print one answer choice per line.

If outputting multiple questions, please number them, always starting at 1.

Unless specifically specified, only give one correct answer to multiple choice questions.

Occasionaly the user may ask you to do something like produce a similar question, or try again but change the difficultly level, produce 5 more questions on that topic, produce a question on the same topic but as a different question format. 

You should follow the user request in these situations, but make sure you always output the questions in a consistent format.

"""

example_chat = create_chain(example_system_prompt_text)

## Single Selection Question

In [17]:
result2 = example_chat({"input": "topic: Merging Datasets\ntype:single selection\ndifficulty: 1"})

In [18]:
print(result2["output"])

Fill in the Blank

To merge two datasets in pandas, we can use the _______ function.

Question Difficulty: 1
Correct Answer: merge()


## Multiple Selection Question

In [56]:
result = example_chat({"input": "topic: Pandas Groupby\ntype:multiple selection\ndifficulty: 2"})

In [57]:
print(result["output"])

Multiple Selection

Which of the following statements about the pandas groupby function are true? Select all that apply.

1. The groupby function is used to split a DataFrame into groups based on one or more columns.
2. The groupby function returns a GroupBy object.
3. The groupby function can be used to perform aggregation operations on the grouped data.
4. The groupby function can only be applied to numerical columns.
5. The groupby function can be used to apply custom functions to the grouped data.

Difficulty: 2

Correct Answers: 
1. The groupby function is used to split a DataFrame into groups based on one or more columns.
2. The groupby function returns a GroupBy object.
3. The groupby function can be used to perform aggregation operations on the grouped data.
5. The groupby function can be used to apply custom functions to the grouped data.


## Code Question

In [19]:
result3 = example_chat({"input": "topic: Pandas Dataframes and Series\ntype:fill in code snippet\ndifficulty: 1"})

In [20]:
print(result3["output"])

Predict Code Output

What will be the output of the following code?

```python
import pandas as pd

data = {'Name': ['John', 'Emma', 'Michael'],
        'Age': [25, 28, 30],
        'City': ['New York', 'Los Angeles', 'Chicago']}

df = pd.DataFrame(data)
series = df['Age']

print(series)
```

Question Difficulty: 1
Correct Answer:
```
0    25
1    28
2    30
Name: Age, dtype: int64
```


## Fill in Blank Question

In [21]:
result4 = example_chat({"input": "topic: Markov Chains\ntype:fill in blank\ndifficulty: 3"})

In [22]:
print(result4["output"])

Fill in Code Snippet

Complete the code snippet below to create a Markov chain transition matrix using the pandas library:

```python
import pandas as pd

# Define the transition matrix
transition_matrix = pd.DataFrame(_____, index=_____, columns=_____)

# Fill in the transition matrix with the probabilities
transition_matrix.loc[_____, _____] = _____
transition_matrix.loc[_____, _____] = _____
transition_matrix.loc[_____, _____] = _____
transition_matrix.loc[_____, _____] = _____
transition_matrix.loc[_____, _____] = _____
transition_matrix.loc[_____, _____] = _____

print(transition_matrix)
```

Question Difficulty: 3
Correct Answer:
```python
import pandas as pd

# Define the transition matrix
transition_matrix = pd.DataFrame(0, index=['A', 'B', 'C'], columns=['A', 'B', 'C'])

# Fill in the transition matrix with the probabilities
transition_matrix.loc['A', 'A'] = 0.2
transition_matrix.loc['A', 'B'] = 0.4
transition_matrix.loc['A', 'C'] = 0.4
transition_matrix.loc['B', 'A'] = 0.3
tr

## Conversing with Question Generator

In [60]:
result5 = example_chat({"input": "please give 2 more fill in blank questions on the same topic, give some variety for each question, changing the difficulty to 3"})

In [61]:
print(result5["output"])

Fill in the Blank

1. The `groupby` function in pandas is often used in combination with the `______` method to split a DataFrame into groups based on one or more columns.

Difficulty: 3

Correct Answer: `split`

2. The `groupby` function returns a ______ object, which can be used to perform aggregation operations on the grouped data.

Difficulty: 3

Correct Answer: `GroupBy`


## Prompt 2

In [75]:
example_system_prompt_text2 = """\
For each message you will be given three inputs:

topic: string
type: string
difficulty: integer

Your task is to produce a question of a specified type to help students solidify their understanding of the given python and/or pandas topic.

The difficulty will be a number between 1 and 3, with 1 corresponding to a request for an easy question, and 3 for the most difficult question.

The type will be a string that specifies the type of question to be generated, such as single selection, multiple selection, fill in code snippet, fill in code, etc.

Please format the question output in the following way:

{Question Text}
{Answer Choices}
{Correct Answer}

When outputting the answer choices for the questions, please only print one answer choice per line.

Always output correct answer after question choices.

If outputting multiple questions, please number them, always starting at 1.

Unless specifically specified, only give one correct answer to multiple choice questions.

Occasionaly the user may ask you to do something like produce a similar question, or try again but change the difficultly level, produce 5 more questions on that topic, produce a question on the same topic but as a different question format. 

You should follow the user request in these situations, but make sure you always output the questions in a consistent format.

"""

example_chat2 = create_chain(example_system_prompt_text2)

In [76]:
result2_1 = example_chat2({"input": "topic: Pandas Groupby\ntype:single selection\ndifficulty: 2"})

In [77]:
print(result2_1["output"])

What does the `groupby` function in pandas do?

a) Sorts the data in ascending order based on a specified column
b) Aggregates data based on a specified column or columns
c) Filters the data based on a specified condition
d) Merges two or more dataframes based on a common column

Correct Answer: b) Aggregates data based on a specified column or columns


In [78]:
result2_2 = example_chat2({"input": "topic: Merging Datasets\ntype:multiple selection\ndifficulty: 1"})

In [79]:
print(result2_2["output"])

What are some ways to merge datasets in pandas? (Select all that apply)

a) `merge` function
b) `concat` function
c) `join` function
d) `append` function

Correct Answers:
a) `merge` function
b) `concat` function
c) `join` function


In [80]:
result2_3 = example_chat2({"input": "topic: Pandas Dataframes and Series\ntype:fill in code snippet\ndifficulty: 1"})

In [81]:
print(result2_3["output"])

Complete the code snippet to create a pandas Series from a list:

```
import pandas as pd

data = [10, 20, 30, 40, 50]
series = pd._____(data)
```

Correct Answer:
```
series = pd.Series(data)
```


In [82]:
result2_4 = example_chat2({"input": "topic: Markov Chains\ntype:fill in blank\ndifficulty: 3"})

In [83]:
print(result2_4["output"])

In Markov Chains, the probability of transitioning from one state to another is represented by the _________.


In [84]:
result2_5 = example_chat2({"input": "please give 2 more fill in blank questions on the same topic, give some variety for each question, changing the difficulty to 3"})

In [85]:
print(result2_5["output"])

1. In a Markov Chain, the probability of transitioning from one state to another is represented by the _________ matrix.

Correct Answer:
transition

2. The steady-state distribution of a Markov Chain is the ________ vector.

Correct Answer:
eigenvector
