In [4]:
import time
import json
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [8]:
# Define models to test
MODEL_NAMES = [
    # "facebook/blenderbot-400M-distill",
    # "google/t5-v1_1-xxl",
    "Salesforce/codet5-base",
    "describeai/gemini",
    # "CohereForAI/aya-101",
    "Babelscape/rebel-large",
    "KETI-AIR/ke-t5-base-ko",
    "microsoft/phi-2",
    "tiiuae/falcon-7b-instruct",
]

In [9]:
# Default topic text and number of MCQs
TEXT = """Electromagnetic induction is the process of generating an electromotive force (EMF) across a conductor 
when it is exposed to a changing magnetic field. This principle was discovered by Michael Faraday in 1831 
and is the basis for many electrical devices."""
NUMBER = 5
SUBJECT = "Physics"
TONE = "simple"

# JSON format for response
RESPONSE_JSON = {
    "1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"},
    "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}
}

# Prompt template
TEMPLATE = """
Text:{text}
You are an expert MCQ maker. Given the above text, create {number} multiple-choice questions 
for {subject} students at a difficulty level of {tone}. Ensure questions are unique and relevant.

### RESPONSE_JSON
{response_json}
"""
quiz_prompt = PromptTemplate(input_variables=["text", "number", "subject", "tone", "response_json"], template=TEMPLATE)

In [15]:
# Load Mistral model and tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # Use float16 for efficiency
    device_map="auto"  # Automatically assign model to GPU if available
)

# Define pipeline
pipe = pipeline(f
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=1024,
    temperature=0.7,
    do_sample=True
)

# Wrap in LangChain LLM
llm = HuggingFacePipeline(pipeline=pipe)

# Create LangChain chain
quiz_chain = LLMChain(llm=llm, prompt=quiz_prompt, output_key="quiz", verbose=False)

# Measure execution time
start_time = time.time()
response = quiz_chain.run({
    "text": TEXT,
    "number": NUMBER,
    "subject": SUBJECT,
    "tone": TONE,
    "response_json": json.dumps(RESPONSE_JSON)
})
end_time = time.time()

# Print results
print("Generated MCQs:", response)
print("Execution Time (seconds):", round(end_time - start_time, 2))

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   1%|1         | 62.9M/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

ChunkedEncodingError: ('Connection broken: IncompleteRead(2963335684 bytes read, 2036483652 more expected)', IncompleteRead(2963335684 bytes read, 2036483652 more expected))

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the model and tokenizer
model_name = "microsoft/Phi-4-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32, device_map="cpu")

# Define the input prompt
prompt = "Explain the theory of relativity in simple terms."

# Tokenize the input
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")

# Generate output
with torch.no_grad():
    output = model.generate(**inputs, max_length=200)

# Decode and print the output
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(response)

tokenizer_config.json:   0%|          | 0.00/2.93k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/3.91M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/15.5M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/249 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/587 [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   4%|4         | 210M/4.90G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.77G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]