In [18]:
import json
from pathlib import Path

import pandas as pd
from dotenv import load_dotenv
from langchain.callbacks import get_openai_callback
from langchain.chains import LLMChain, SequentialChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

load_dotenv()  # take environment variables from .env.

True

In [4]:
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.5,
)

In [12]:
RESPONSE_JSON = {
    "mcqs": [
        {
            "question": "Example question 1?",
            "options": {
                "A": "Option A",
                "B": "Option B",
                "C": "Option C",
                "D": "Option D",
            },
            "answer": "A",
        },
        # //... {number} MCQs in total
    ],
}

In [13]:
TEMPLATE = """
Text: {text}
You are an expert MCQ maker. Given the above text, your job is to
create {number} multiple choice questions for {subject} students.
The questions should be:
- Appropriate for the {difficulty_level} level
- Non-repetitive and directly relevant to the text
- Accurate and educational

Format your response according to the RESPONSE_JSON structure provided below.
Ensure the MCQs are clearly numbered and include a question with four options
(A, B, C, D), one of which is the correct answer.

### RESPONSE_JSON
{response_json}
"""

In [11]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "number", "subject", "difficulty_level", "response_json"],
    template=TEMPLATE,
)

In [14]:
quiz_chain = LLMChain(
    llm=llm,
    prompt=quiz_generation_prompt,
    output_key="quiz",
    verbose=True,
)

In [15]:
TEMPLATE2 = """
You are an expert English grammarian and writer. Your task is to evaluate a
Multiple Choice Quiz designed for {subject} students.
Please provide:
1. A brief analysis (max 50 words) on the complexity of the quiz, focusing on
its suitability for the intended cognitive and analytical abilities of the students.
2. Recommendations for modifications if the quiz does not align well with the
students' abilities. This includes updating any questions and adjusting the
difficulty level to better fit the students' capabilities.

### Quiz_MCQs:
{quiz}

### Expert English Writer's Evaluation:
"""

In [16]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["subject", "quiz"],
    template=TEMPLATE2,
)

In [17]:
review_chain = LLMChain(
    llm=llm,
    prompt=quiz_evaluation_prompt,
    output_key="review",
    verbose=True,
)

In [29]:
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain],
    input_variables=["text", "number", "subject", "difficulty_level", "response_json"],
    output_variables=["quiz", "review"],
    verbose=True,
)

In [21]:
file_path = "/Users/valentinmonney/Desktop/Generative_ai/mcqgen/data.txt"
with Path(file_path).open() as file:
    TEXT = file.read()

In [23]:
# Serialize the Python dictionary into a JSON-formatted string
json.dumps(RESPONSE_JSON)

'{"mcqs": [{"question": "Example question 1?", "options": {"A": "Option A", "B": "Option B", "C": "Option C", "D": "Option D"}, "answer": "A"}]}'

In [27]:
NUMBER = 5
SUBJECT = "machine learning"
DIFFICULTY_LEVEL = "easy"

In [32]:
# https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

# How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject": SUBJECT,
            "difficulty_level": DIFFICULTY_LEVEL,
            "response_json": json.dumps(RESPONSE_JSON),
        },
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text: The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence. The synonym self-teaching computers was also used in this time period.

Although the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes. In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells. Hebb's model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neuron

In [33]:
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

Total Tokens:1697
Prompt Tokens:1226
Completion Tokens:471
Total Cost:0.002781


In [34]:
response

{'text': 'The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence. The synonym self-teaching computers was also used in this time period.\n\nAlthough the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes. In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells. Hebb\'s model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data. Other researchers who have studied human cognitive systems contributed to the modern

In [38]:
quiz = json.loads(response.get("quiz"))

In [43]:
quiz.keys()

dict_keys(['mcqs'])

In [49]:
quiz.items()

dict_items([('mcqs', [{'question': "Who coined the term 'machine learning'?", 'options': {'A': 'Arthur Samuel', 'B': 'Donald Hebb', 'C': 'Walter Pitts', 'D': 'Warren McCulloch'}, 'answer': 'A'}, {'question': "In which year was the term 'machine learning' coined?", 'options': {'A': '1950', 'B': '1959', 'C': '1960', 'D': '1973'}, 'answer': 'B'}, {'question': 'Which book introduced a theoretical neural structure formed by certain interactions among nerve cells?', 'options': {'A': 'The Organization of Behavior', 'B': 'Learning Machines', 'C': 'Computing Machinery and Intelligence', 'D': 'Research into Machine Learning'}, 'answer': 'A'}, {'question': 'What was the purpose of the Cybertron machine developed by Raytheon Company?', 'options': {'A': 'To analyze sonar signals', 'B': 'To analyze electrocardiograms', 'C': 'To analyze speech patterns', 'D': 'All of the above'}, 'answer': 'D'}, {'question': 'What are the two objectives of modern-day machine learning?', 'options': {'A': 'To classify 

In [51]:
quiz_table_data = []

for mcq in quiz["mcqs"]:
    question = mcq["question"]
    options = " | ".join(
        [
            f"{option_key}: {option_value}"
            for option_key, option_value in mcq["options"].items()
        ],
    )
    correct_answer = mcq["answer"]

    quiz_table_data.append(
        {"MCQ": question, "Choices": options, "Correct": correct_answer},
    )

In [52]:
quiz_table_data

[{'MCQ': "Who coined the term 'machine learning'?",
  'Choices': 'A: Arthur Samuel | B: Donald Hebb | C: Walter Pitts | D: Warren McCulloch',
  'Correct': 'A'},
 {'MCQ': "In which year was the term 'machine learning' coined?",
  'Choices': 'A: 1950 | B: 1959 | C: 1960 | D: 1973',
  'Correct': 'B'},
 {'MCQ': 'Which book introduced a theoretical neural structure formed by certain interactions among nerve cells?',
  'Choices': 'A: The Organization of Behavior | B: Learning Machines | C: Computing Machinery and Intelligence | D: Research into Machine Learning',
  'Correct': 'A'},
 {'MCQ': 'What was the purpose of the Cybertron machine developed by Raytheon Company?',
  'Choices': 'A: To analyze sonar signals | B: To analyze electrocardiograms | C: To analyze speech patterns | D: All of the above',
  'Correct': 'D'},
 {'MCQ': 'What are the two objectives of modern-day machine learning?',
  'Choices': 'A: To classify data and make predictions | B: To analyze sonar signals and electrocardiogr

In [55]:
quiz = pd.DataFrame(quiz_table_data)

In [56]:
quiz.to_csv("machinelearning.csv", index=False)

In [9]:
from datetime import datetime
import pytz

datetime.now(pytz.timezone("Europe/Zurich")).strftime("%Y_%m_%d_%H_%M_%S")

'2024_01_02_19_22_25'