In [1]:
import spacy
import random
from collections import Counter


In [59]:
# input text
text = """
The Greek historian knew what he was talking about. The Nile River fed Egyptian civilization for hundreds of years. 
The Longest River the Nile is 4,160 miles long—the world’s longest river. It begins near the equator in Africa and 
flows north to the Mediterranean Sea. In the south the Nile churns with cataracts. A cataract is a waterfall. Near the 
sea the Nile branches into a delta. A delta is an area near a river’s mouth where the water deposits fine soil called silt. 
In the delta, the Nile divides into many streams. The river is called the upper Nile in the south and the lower Nile in the
north. For centuries, heavy rains in Ethiopia caused the Nile to flood every summer. The floods deposited rich soil along the 
Nile’s shores. This soil was fertile, which means it was good for growing crops. Unlike the Tigris and Euphrates,
the Nile River flooded at the same time every year, so farmers could predict when to plant their crops.
"""

num_questions = 5

In [60]:
nlp = spacy.load('en_core_web_sm')
# Process the text with spaCy
doc = nlp(text)

# Extract sentences from the text
sentences = [sent.text for sent in doc.sents]


In [61]:
# doc
sentences

['\nThe Greek historian knew what he was talking about.',
 'The Nile River fed Egyptian civilization for hundreds of years. \n',
 'The Longest River the Nile is 4,160 miles long—the world’s longest river.',
 'It begins near the equator in Africa and \nflows north to the Mediterranean Sea.',
 'In the south the Nile churns with cataracts.',
 'A cataract is a waterfall.',
 'Near the \nsea the Nile branches into a delta.',
 'A delta is an area near a river’s mouth where the water deposits fine soil called silt. \n',
 'In the delta, the Nile divides into many streams.',
 'The river is called the upper Nile in the south and the lower Nile in the\nnorth.',
 'For centuries, heavy rains in Ethiopia caused the Nile to flood every summer.',
 'The floods deposited rich soil along the \nNile’s shores.',
 'This soil was fertile, which means it was good for growing crops.',
 'Unlike the Tigris and Euphrates,\nthe Nile River flooded at the same time every year, so farmers could predict when to plant t

In [62]:
# Randomly select sentences to from questions
selected_sentences = random.sample(sentences, (min(num_questions, len(sentences))))

In [68]:
mcqs = []

# Generate MCQs for each selected 
for sentence in selected_sentences:
    sentence = sentence.lower()
    sent_doc = nlp(sentence)
    
    # Extract entities (nouns) from sentence
    nouns = [token.text for token in sent_doc if token.pos_=='NOUN']
    
    
    
    if len(nouns)<2:
        continue
    noun_counts = Counter(nouns)
    
              
    if noun_counts:
        subject = noun_counts.most_common(1)[0][0]
        
        
        answer_choice = [subject]
        question_stem = sentence.replace(subject, "________")
        
        
        for _ in range(3):
            distractor = random.choice(list(set(nouns) - set([subject])))
            answer_choice.append(distractor)
        
        random.shuffle(answer_choice)
        
        correct_answer = chr(64 + answer_choice.index(subject) +1)
        print(correct_answer)
        
        mcqs.append((question_stem, answer_choice, correct_answer))
        

A
B
B
D
B


In [69]:
print(mcqs)

[('a ________ is a waterfall.', ['cataract', 'waterfall', 'waterfall', 'waterfall'], 'A'), ('near the \n________ the nile branches into a delta.', ['branches', 'sea', 'delta', 'delta'], 'B'), ('for ________, heavy rains in ethiopia caused the nile to flood every summer.', ['summer', 'centuries', 'rains', 'summer'], 'B'), ('a ________ is an area near a river’s mouth where the water deposits fine soil called silt. \n', ['water', 'silt', 'silt', 'delta'], 'D'), ('in the ________ the nile churns with cataracts.', ['cataracts', 'south', 'cataracts', 'cataracts'], 'B')]


In [95]:
import spacy
from collections import Counter
import random

# Load English tokenizer, tagger, parser, NER, and word vectors
nlp = spacy.load("en_core_web_sm")

def generate_mcqss(text, num_questions=5):
    # text = clean_text(text)
    if text is None:
        return []

    # Process the text with spaCy
    doc = nlp(text)

    # Extract sentences from the text
    sentences = [sent.text for sent in doc.sents]

    # Ensure that the number of questions does not exceed the number of sentences
    num_questions = min(num_questions, len(sentences))

    # Randomly select sentences to form questions
    selected_sentences = random.sample(sentences, num_questions)

    # Initialize list to store generated MCQs
    mcqs = []

    # Generate MCQs for each selected sentence
    for sentence in selected_sentences:
        # Process the sentence with spaCy
        sent_doc = nlp(sentence)

        # Extract entities (nouns) from the sentence
        nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"]

        # Ensure there are enough nouns to generate MCQs
        if len(nouns) < 2:
            continue

        # Count the occurrence of each noun
        noun_counts = Counter(nouns)

        # Select the most common noun as the subject of the question
        if noun_counts:
            subject = noun_counts.most_common(1)[0][0]

            # Generate the question stem
            question_stem = sentence.replace(subject, "______")

            # Generate answer choices
            answer_choices = [subject]

            # Add some random words from the text as distractors
            distractors = list(set(nouns) - {subject})

            # Ensure there are at least three distractors
            while len(distractors) < 3:
                distractors.append("[Distractor]")  # Placeholder for missing distractors

            random.shuffle(distractors)
            for distractor in distractors[:3]:
                answer_choices.append(distractor)

            # Shuffle the answer choices
            random.shuffle(answer_choices)

            # Append the generated MCQ to the list
            correct_answer = chr(64 + answer_choices.index(subject) + 1)  # Convert index to letter
            mcqs.append((question_stem, answer_choices, correct_answer))

    return mcqs

In [101]:
# Test the function with the provided text

tech_text = """
The universe is vast and filled with mysteries that continue to captivate scientists and astronomers alike. From the depths of space to the farthest reaches of distant galaxies, the cosmos holds countless wonders waiting to be explored.

One of the fundamental concepts in astrophysics is the Big Bang theory, which posits that the universe originated from a singular, infinitely dense point nearly 13.8 billion years ago. Over time, the universe expanded and cooled, giving rise to the formation of galaxies, stars, and planets.

Galaxies are immense systems containing billions or even trillions of stars, as well as various types of interstellar matter such as gas, dust, and dark matter. The Milky Way, our home galaxy, is a spiral galaxy containing hundreds of billions of stars, including our own Sun.

Stars are the celestial objects that shine brightly in the night sky, fueled by nuclear fusion reactions occurring in their cores. They come in a variety of sizes, colors, and temperatures, with some stars being much larger and hotter than others. The life cycle of a star depends on its mass, with massive stars undergoing supernova explosions at the end of their lives, while smaller stars like our Sun eventually evolve into white dwarfs.

Planets orbit stars and come in different types, including terrestrial planets like Earth, gas giants like Jupiter, and icy worlds like Neptune. In our solar system, eight planets revolve around the Sun, each with its own unique characteristics and features.

Space exploration has allowed humanity to venture beyond Earth and explore the cosmos firsthand. Missions to the Moon, Mars, and beyond have expanded our understanding of the universe and laid the groundwork for future exploration and colonization of other worlds.

The search for extraterrestrial life is a central focus of space exploration, driven by the desire to uncover whether life exists beyond Earth. Scientists study the conditions on other planets and moons in our solar system, as well as exoplanets orbiting distant stars, in the hope of finding signs of life elsewhere in the universe.

The study of black holes, mysterious regions of spacetime where gravity is so strong that nothing, not even light, can escape, is another area of active research in astrophysics. Black holes come in various sizes, from stellar-mass black holes formed from the collapse of massive stars to supermassive black holes that lurk at the centers of galaxies.

Cosmology, the scientific study of the origin, evolution, and eventual fate of the universe, seeks to answer some of the most profound questions about our existence. By analyzing cosmic microwave background radiation, the distribution of galaxies, and the structure of the universe on the largest scales, cosmologists aim to unravel the mysteries of the cosmos and our place within it.

"""



mcqs = generate_mcqss(tech_text, num_questions=10)  # Pass the selected number of questions
# Ensure each MCQ is formatted correctly as (question_stem, answer_choices, correct_answer)
mcqs_with_index = [(i + 1, mcq) for i, mcq in enumerate(mcqs)]

for question in mcqs_with_index:
    print("Question", question[0], ":", question[1][0])
    print("Options:")
    options = question[1][1]
    for i, option in enumerate(options):
        print(f"{chr(97 + i)}) {option}")
    print("Correct Answer:", question[1][2])
    print("\n")
    
    

Question 1 : 
The ______ is vast and filled with mysteries that continue to captivate scientists and astronomers alike.
Options:
a) astronomers
b) universe
c) mysteries
d) scientists
Correct Answer: B


Question 2 : Over ______, the universe expanded and cooled, giving rise to the formation of galaxies, stars, and planets.


Options:
a) universe
b) stars
c) time
d) planets
Correct Answer: C


Question 3 : The life cycle of a star depends on its mass, with massive ______ undergoing supernova explosions at the end of their lives, while smaller ______ like our Sun eventually evolve into white dwarfs.


Options:
a) cycle
b) end
c) explosions
d) stars
Correct Answer: D


Question 4 : In our solar ______, eight planets revolve around the Sun, each with its own unique characteristics and features.


Options:
a) planets
b) system
c) features
d) characteristics
Correct Answer: B


Question 5 : Galaxies are immense systems containing billions or even trillions of stars, as well as various types 

In [97]:
print(mcqs_with_index[1][1])

('near the \n________ the nile branches into a delta.', ['branches', 'sea', 'delta', 'delta'], 'B')
