### Import Libraries

In [1]:
import nltk
import string
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from flashtext import KeywordProcessor
from pywsd.lesk import adapted_lesk
from nltk.corpus import wordnet as wn
import requests
import random
import re
import PyPDF2
from PyPDF2 import PdfReader

from rake_nltk import Rake

r = Rake()

Warming up PyWSD (takes ~10 secs)... took 9.757792949676514 secs.


In [2]:
# Download necessary NLTK data
#nltk.download('stopwords')
#nltk.download('punkt')
#nltk.download('popular')

In [3]:
#Step 2- Extract the important words(keywords) from the text article that can be used to create MCQ using RAKE

# Define the function to extract important words
def getImportantWords(art):
    r = Rake(stopwords=stopwords.words('english') + list(string.punctuation))
    r.extract_keywords_from_text(art)
    keyphrases = r.get_ranked_phrases_with_scores()
    result = [keyphrase for _, keyphrase in keyphrases[:25]]  # Get the top 25 keyphrases
    return result

In [4]:
# Split the text into sentences
def splitTextToSents(art):
    s = [sent_tokenize(art)]
    s = [y for x in s for y in x]
    s = [sent.strip() for sent in s if len(sent) > 15]
    return s

In [5]:
# Map sentences to keywords
def mapSents(impWords, sents):
    processor = KeywordProcessor()
    keySents = {}
    for word in impWords:
        keySents[word] = []
        processor.add_keyword(word)
    for sent in sents:
        found = processor.extract_keywords(sent)
        for each in found:
            keySents[each].append(sent)
    for key in keySents.keys():
        temp = keySents[key]
        temp = sorted(temp, key=len, reverse=True)
        keySents[key] = temp
    return keySents

In [6]:
# Get the sense of the word
def getWordSense(sent, word):
    word = word.lower()
    if len(word.split()) > 0:
        word = word.replace(" ", "_")
    synsets = wn.synsets(word, 'n')
    if synsets:
        wup = max_similarity(sent, word, 'wup', pos='n')
        adapted_lesk_output = adapted_lesk(sent, word, pos='n')
        lowest_index = min(synsets.index(wup), synsets.index(adapted_lesk_output))
        return synsets[lowest_index]
    else:
        return None

In [7]:
# Get distractor from WordNet. These distractors work on the basis of hypernym and hyponym explained in detail in the documentation.

def getDistractors(syn, word):
    dists = []
    word = word.lower()
    actword = word
    if len(word.split()) > 0:
        word.replace(" ", "_")
    hypernym = syn.hypernyms()
    if len(hypernym) == 0:
        return dists
    for each in hypernym[0].hyponyms():
        name = each.lemmas()[0].name()
        if name == actword:
            continue
        name = name.replace("_", " ")
        name = " ".join(w.capitalize() for w in name.split())
        if name is not None and name not in dists:
            dists.append(name)
    return dists

In [8]:
# Get distractors from ConceptNet
def getDistractors2(word):
    word = word.lower()
    actword = word
    if len(word.split()) > 0:
        word = word.replace(" ", "_")
    dists = []
    url = f"http://api.conceptnet.io/query?node=/c/en/{word}/n&rel=/r/PartOf&start=/c/en/{word}&limit=5"
    obj = requests.get(url).json()
    for edge in obj['edges']:
        link = edge['end']['term']
        url2 = f"http://api.conceptnet.io/query?node={link}&rel=/r/PartOf&end={link}&limit=10"
        obj2 = requests.get(url2).json()
        for edge in obj2['edges']:
            word2 = edge['start']['label']
            if word2 not in dists and actword.lower() not in word2.lower():
                dists.append(word2)
    return dists


### Load and Process Text File

In [9]:
# Function to load and read text file
def load_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

# Example usage (replace 'path_to_text_file.txt' with the actual path to your text file)
text_file_path =   r"C:\Users\vivek\Downloads\MCQ Generator Project\Articles\ahistoryofdepression.txt" # Change this to the path of your text file
text = load_text_file(text_file_path)

# Process the text
impWords = getImportantWords(text)
sents = splitTextToSents(text)
mappedSents = mapSents(impWords, sents)

# Example output
print("Important Words:", impWords)
print("Sentences:", sents[:5])  # Display first 5 sentences
print("Mapped Sentences:", {k: v[:2] for k, v in mappedSents.items()})  # Display first 2 sentences for each important word


Important Words: ['1999 book imperial bedlam ).', '2006 ); emily martin ’', 'world health organization names depression', 'stigma looms large despite', 'rural north america testify', 'professional turf wars around', 'enslaved africans fleeing brutality', 'precarious living situations —', 'offers three possible reasons', 'see jackie orr ’', 'life presents many reasons', 'using “ empire ”', 'sadowsky devotes little attention', 'yet also diagnosing anticolonialism', 'begins jonathan sadowsky ’', 'help people find others', 'unequal power relations —', 'disqualifying many people ’', 'politics — especially given', 'health crisis attending', 'depressed — poverty', 'wider societal factors', 'share similar experiences', 'fluoxetine ), quantification', 'duped en masse']
Sentences: ['When is sorrow sickness?', 'So begins Jonathan Sadowsky’s The Empire of Depression, a history riven with professional turf wars around where to draw the line between normal sadness and something more serious — now, a

### Generate MCQs

In [11]:
# Function to generate MCQs
def generate_mcqs(impWords, mappedSents, num_mcqs=5):
    mappedDists = {}
    correctAnswers = {}
    for each in impWords:
        if each not in mappedSents or not mappedSents[each]:
            continue
        distractors = random.sample([k for k in impWords if k != each], 3)
        mappedDists[each] = distractors

    mcqs = []
    iterator = 1
    for each in mappedDists:
        if iterator > num_mcqs:
            break
        if each not in mappedSents or not mappedSents[each]:
            continue
        sent = mappedSents[each][0]
        p = re.compile(each, re.IGNORECASE)
        op = p.sub("________", sent)
        correct_answer = each.capitalize()
        options = [each.capitalize()] + mappedDists[each]
        options = options[:4]
        opts = ['a', 'b', 'c', 'd']
        random.shuffle(options)
        question = {
            "question": f"**Question {iterator}**: {op}",
            "options": {opts[i]: ch for i, ch in enumerate(options)},
            "correct_answer": correct_answer
        }
        mcqs.append(question)
        iterator += 1
    return mcqs

# Example usage
num_mcqs = 10
mcqs = generate_mcqs(impWords, mappedSents, num_mcqs)

# Display MCQs
for mcq in mcqs:
    print(mcq["question"])
    for opt, val in mcq["options"].items():
        print(f"\t {opt}) {val}")
    print(f"**Correct Answer**: {mcq['correct_answer']}\n")


**Question 1**: Given that the ________ as a major contributor to the global burden of disease, tracing its history is a significant task.
	 a) wider societal factors
	 b) World health organization names depression
	 c) 1999 book imperial bedlam ).
	 d) depressed — poverty
**Correct Answer**: World health organization names depression

**Question 2**: The power to say who’s rational and who isn’t, and to detain people or treat them without consent, is perhaps the starkest reminder of why treating depression is not just like administering insulin for diabetes, and of why ________ (or because of) the understandable appeals to biochemistry.
	 a) wider societal factors
	 b) Stigma looms large despite
	 c) disqualifying many people ’
	 d) offers three possible reasons
**Correct Answer**: Stigma looms large despite

**Question 3**: This is more than theoretical, as increasing prescriptions of antidepressants in austerity Britain and opioids in ________.
	 a) health crisis attending
	 b) Rura