### Import Libraries

In [14]:
import nltk
import string
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from flashtext import KeywordProcessor
from pywsd.lesk import adapted_lesk
from nltk.corpus import wordnet as wn
import requests
import random
import re
import PyPDF2
from PyPDF2 import PdfReader

from rake_nltk import Rake

r = Rake()

In [15]:
file = open(r"C:\Users\vivek\Downloads\MCQ Generator Project\Articles\ahistoryofdepression.txt", encoding='utf-8')
text = file.read()

In [16]:
text

'When is sorrow sickness? So begins Jonathan Sadowsky’s The Empire of Depression, a history riven with professional turf wars around where to draw the line between normal sadness and something more serious — now, across much of the world, called depression. He argues against reductionism and dogma. Instead of getting stuck in old disagreements about whether depression is caused by a chemical imbalance or by social inequality, Sadowsky urges that depression can be psychological, biological and social, just as it can be a real illness even if it is cultural.\n\nGiven that the World Health Organization names depression as a major contributor to the global burden of disease, tracing its history is a significant task. And it is an important one, given the mental-health crisis attending the COVID-19 pandemic. It is no mean feat to characterize something that has ever-shifting and contested boundaries dependent on time and place. Sadowsky, a historian of medicine, offers three possible reason

In [17]:
# Function to load Lottie animations
def load_lottie_url(url: str):
    r = requests.get(url)
    if r.status_code != 200:
        return None
    return r.json()


In [18]:
# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('popular')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\vivek\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\vivek\AppData\Roaming\nltk

True

In [19]:
#Step 2- Extract the important words(keywords) from the text article that can be used to create MCQ using RAKE

# Define the function to extract important words
def getImportantWords(art):
    r = Rake(stopwords=stopwords.words('english') + list(string.punctuation))
    r.extract_keywords_from_text(art)
    keyphrases = r.get_ranked_phrases_with_scores()
    result = [keyphrase for _, keyphrase in keyphrases[:25]]  # Get the top 25 keyphrases
    return result

In [20]:
# Split the text into sentences
def splitTextToSents(art):
    s = [sent_tokenize(art)]
    s = [y for x in s for y in x]
    s = [sent.strip() for sent in s if len(sent) > 15]
    return s

In [21]:
# Map sentences to keywords
def mapSents(impWords, sents):
    processor = KeywordProcessor()
    keySents = {}
    for word in impWords:
        keySents[word] = []
        processor.add_keyword(word)
    for sent in sents:
        found = processor.extract_keywords(sent)
        for each in found:
            keySents[each].append(sent)
    for key in keySents.keys():
        temp = keySents[key]
        temp = sorted(temp, key=len, reverse=True)
        keySents[key] = temp
    return keySents

In [22]:
# Get the sense of the word
def getWordSense(sent, word):
    word = word.lower()
    if len(word.split()) > 0:
        word = word.replace(" ", "_")
    synsets = wn.synsets(word, 'n')
    if synsets:
        wup = max_similarity(sent, word, 'wup', pos='n')
        adapted_lesk_output = adapted_lesk(sent, word, pos='n')
        lowest_index = min(synsets.index(wup), synsets.index(adapted_lesk_output))
        return synsets[lowest_index]
    else:
        return None

In [23]:
#Step 6- Get distractor from WordNet. These distractors work on the basis of hypernym and hyponym explained in detail in the documentation.

def getDistractors(syn, word):
    dists = []
    word = word.lower()
    actword = word
    if len(word.split()) > 0:
        word.replace(" ", "_")
    hypernym = syn.hypernyms()
    if len(hypernym) == 0:
        return dists
    for each in hypernym[0].hyponyms():
        name = each.lemmas()[0].name()
        if name == actword:
            continue
        name = name.replace("_", " ")
        name = " ".join(w.capitalize() for w in name.split())
        if name is not None and name not in dists:
            dists.append(name)
    return dists

In [24]:
# Get distractors from ConceptNet
def getDistractors2(word):
    word = word.lower()
    actword = word
    if len(word.split()) > 0:
        word = word.replace(" ", "_")
    dists = []
    url = f"http://api.conceptnet.io/query?node=/c/en/{word}/n&rel=/r/PartOf&start=/c/en/{word}&limit=5"
    obj = requests.get(url).json()
    for edge in obj['edges']:
        link = edge['end']['term']
        url2 = f"http://api.conceptnet.io/query?node={link}&rel=/r/PartOf&end={link}&limit=10"
        obj2 = requests.get(url2).json()
        for edge in obj2['edges']:
            word2 = edge['start']['label']
            if word2 not in dists and actword.lower() not in word2.lower():
                dists.append(word2)
    return dists


In [12]:
# Step 8 (continued): Find and map the distractors to the keywords
mappedDists = {}
correctAnswers = {}  # Dictionary to store correct answers

for each in mappedSents:
    wordsense = getWordSense(mappedSents[each][0], each)
    if wordsense:
        dists = getDistractors(wordsense, each)
        if len(dists) == 0:
            dists = getDistractors2(each)
        if len(dists) != 0:
            mappedDists[each] = dists
            correctAnswers[each] = each.capitalize()  # Store correct answer
    else:
        dists = getDistractors2(each)
        if len(dists) > 0:
            mappedDists[each] = dists
            correctAnswers[each] = each.capitalize()

In [13]:
import re 

In [14]:
# Step 9- The final step is to present our MCQ in a nice and readable manner.

num_mcqs = int(input("Enter the number of questions you want : "))  # control the number of MCQs generated

print("**************************************        Multiple Choice Questions        *******************************")
print()

import random
iterator = 1  # To keep the count of the questions
for each in mappedDists:
    if iterator > num_mcqs:
        break  # exit the loop if the desired number of MCQs has been reached
    sent = mappedSents[each][0]
    p = re.compile(each, re.IGNORECASE)  # Converts into regular expression for pattern matching
    op = p.sub("________", sent)  # Replaces the keyword with underscores(blanks)
    correct_answer = each.capitalize()  # The correct answer
    print("Question %s-> "%(iterator), op)  # Prints the question along with a question number
    options = [each.capitalize()] + mappedDists[each]  # Capitalizes the options
    options = options[:4]  # Selects only 4 options
    opts = ['a', 'b', 'c', 'd']
    random.shuffle(options)  # Shuffle the options so that order is not always same
    for i, ch in enumerate(options):
        print("\t", opts[i], ") ", ch)  # Print the options
    print("Correct Answer: ", correct_answer)  # Print the correct answer
    print()
    iterator += 1  # Increase the counter

**************************************        Multiple Choice Questions        *******************************

Question 1->  The power to say who’s rational and who isn’t, and to detain people or treat them without consent, is perhaps the starkest reminder of why treating ________ is not just like administering insulin for diabetes, and of why stigma looms large despite (or because of) the understandable appeals to biochemistry.
	 a )  Aquifer
	 b )  Beach
	 c )  Cave
	 d )  Depression
Correct Answer:  Depression

Question 2->  It is alarming, then, that apart from using “________” as an analogy for the global dominance of Western psychiatry in interpreting distress, Sadowsky devotes little attention here to power and politics — especially given his previous work on colonialism (in the 1999 book Imperial Bedlam).
	 a )  Archduchy
	 b )  Duchy
	 c )  Barony
	 d )  Empire
Correct Answer:  Empire

Question 3->  In fact, many forms of resistance have been deemed symptoms of mental illness