In [1]:
from typing import List
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.pgvector import PGVector
from langchain.docstore.document import Document
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.schema.messages import SystemMessage


In [2]:
## Load Vector Store
load_dotenv()

DB_CONNECTION = "postgresql://postgres:supa-jupyteach@192.168.0.77:54328/postgres"
COLLECTION_NAME = "documents"

def get_vectorstore():
    embeddings = OpenAIEmbeddings()

    db = PGVector(embedding_function=embeddings,
        collection_name=COLLECTION_NAME,
        connection_string=DB_CONNECTION
    )
    return db

In [11]:
db = get_vectorstore()
retriever = db.as_retriever()
def create_chain(system_message_text):
    ## Step 1: Create LLM
    from langchain.chat_models import ChatOpenAI
    llm = ChatOpenAI(temperature=0.3, model_name="gpt-3.5-turbo-1106" )
    
    ## Step 2: Create Retriever Tool
    tool = create_retriever_tool(
        retriever,
        "search_course_content",
        "Searches and returns documents regarding the contents of the course and notes from the instructor.",
    )
    tools = [tool]

    ## Step 3: Create System Message from the Text Passed in as an Argument
    system_message = SystemMessage(content=system_message_text)

    ## Return the Chain
    return create_conversational_retrieval_agent(
        llm = llm, 
        tools=tools, 
        verbose = False, 
        system_message = system_message
    )

In [16]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory

system_prompt= """ You are a very smart, helpful, respectful and kind question generator named AcademiaGPT.
You assist the professors teaching computer science, data analysis and data science courses in creating test and practice questions along with it's answers.
You have 10+ years experience of coding and is proficient in all the libraries in the programming language python.
Your responses typically includes dataset and python code snippets.
For each message from the user, you will have three inputs:
topic: string
difficulty: integer
type: string

You will generate questions of type Single Selection, Multiple selection, Fill in the blank and code.
You will generate questions of different difficulty for the mentioned topic and type.
You are going to generate questions only based on the course materials.
If the user gives a topic or type that is unavailable, you can inform the user about it.
The difficulty will be an integer on a scale of 1 to 3 with 1 being an easy question and 3 being the most difficult question.


"""
example_chat = create_chain(system_prompt)

In [18]:
result= example_chat("For the topic pandas aggregate functions, give me question of difficulty 2 which is a code")
type(result)

dict

In [32]:
import yaml
#result= example_chat("For the same topic give me question of difficulty 3 which is multiple selection")
#print(result["output"])

In [9]:
result= example_chat("Give me difficulty 2 then")
print(result["output"])

### Question

What are some commonly used built-in aggregations in pandas?

### Choices

1. Mean, Median, Variance
2. Sum, Standard Deviation, Minimum
3. Maximum, Count, Mode

### Correct Answer
1. Mean, Median, Variance


In [10]:
result= example_chat("Give another multiple selection question")
print(result["output"])

### Question

What are the three steps involved in the split-apply-combine process in pandas groupby operations?

### Choices

1. Split, Apply, Combine
2. Extract, Transform, Load
3. Filter, Aggregate, Visualize

### Correct Answer
1. Split, Apply, Combine


In [11]:
result= example_chat("The above wuestion is a single selection. I need multiple selection.")
print(result["output"])

### Question

What are some operations that can be performed after obtaining a GroupBy object in pandas?

### Choices

1. Get group, Count, Sum
2. Mean, Median, Mode
3. Filter, Sort, Visualize

### Correct Answer
1. Get group, Count, Sum


In [12]:
result= example_chat("This is also a single selection. A multiple selection is the one where the answer choices are more than one")
print(result["output"])

### Question

What are some operations that can be performed after obtaining a GroupBy object in pandas?

### Choices

1. Get group, Count, Sum
2. Mean, Median, Mode
3. Filter, Sort, Visualize

### Correct Answer
1. Get group, Count, Sum


In [13]:
result= example_chat("This is not what I wanted. I need a fill in the blank question. Why did you give me this question")
print(result["output"])

### Question

Suppose you have obtained a GroupBy object in pandas. You can use the method `___X` to get the subset of data for a particular group.

### Choices

1. get_group
2. filter_group
3. select_group

### Correct Answer
1. get_group


In [27]:
result= example_chat("Give me a question on scale-free network")
print(result["output"])

I'm sorry, but it seems that there are no specific documents available on the topic of "scale-free network" in the course content. If you have another topic in mind or if there's anything else I can assist you with, please feel free to let me know!


In [13]:
result= example_chat("Give me a question on pandas mean function with difficulty 2 and should be a single selection question")
print(result["output"])

### Single Selection Question

**Question**

When using the `mean` function in Pandas, what does the function calculate by default?

A. Mean of each column in the DataFrame  
B. Mean of each row in the DataFrame  
C. Mean of all the values in the DataFrame  
D. Mean of the specified column in the DataFrame  

**Correct Answer**
- D. Mean of the specified column in the DataFrame


In [14]:
result= example_chat("Give me a question on pandas groupby with difficulty 2 and should be a coding question")
print(result["output"])

### Coding Question

**Question**

Write a Pandas code to calculate the mean of the "value" column in the DataFrame `df` after grouping the data based on the "category" column.

### Solution
```python
# Import pandas
import pandas as pd

# Sample DataFrame
data = {'category': ['A', 'B', 'A', 'B', 'A', 'B'],
        'value': [10, 20, 30, 40, 50, 60]}
df = pd.DataFrame(data)

# Calculate mean after grouping
grouped_mean = df.groupby('category')['value'].mean()
```
