<a href="https://colab.research.google.com/github/vrundaa30/Online-voting-system/blob/master/Examination_Checker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Text summarizer and Word Count Code of the Student Answer
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize, word_tokenize
from collections import Counter
import pandas as pd

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def count_words(input_text):
    words = input_text.split()
    return len(words)

def lemmatize_text(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))

    # Tokenize the text into sentences and words
    sentences = sent_tokenize(text)
    tokenized_sentences = [word_tokenize(sentence) for sentence in sentences]

    # Lemmatize each word and remove stop words
    lemmatized_sentences = [
        [lemmatizer.lemmatize(word) for word in words if word.lower() not in stop_words]
        for words in tokenized_sentences
    ]

    return lemmatized_sentences

def variable_length_summary(text, summary_length_percentage=20):
    lemmatized_sentences = lemmatize_text(text)

    # Flatten the list of lemmatized words
    all_words = [word for sentence in lemmatized_sentences for word in sentence]

    # Calculate word frequency
    word_freq = Counter(all_words)

    # Calculate sentence importance based on word frequency
    sentence_importance = [sum(word_freq[word] for word in sentence) for sentence in lemmatized_sentences]

    # Calculate the total length of the summary in sentences
    num_sentences = int(len(lemmatized_sentences) * summary_length_percentage / 100)

    # Select sentences based on importance scores for summary
    summary_sentences = [lemmatized_sentences[i] for i in sorted(range(len(sentence_importance)), key=lambda k: sentence_importance[k], reverse=True)[:num_sentences]]

    # Join the summary sentences to form the final summary
    summary = ' '.join([' '.join(sentence) for sentence in summary_sentences])

    return summary

# Read the Excel file
data = pd.read_excel('/content/Dataset-Original.xlsx')

# Create empty lists to store summaries and word counts
summaries = []
word_counts = []

# Iterate through each row of the DataFrame
for index, row in data.iterrows():
    # Get the text from the "student answer" column
    text = row['student answer']

    # Get the summary for the text
    summary = variable_length_summary(text, summary_length_percentage=90)

    # Calculate the word count of the summary
    word_count = count_words(summary)

    # Append the summary and word count to the lists
    summaries.append(summary)
    word_counts.append(word_count)

# Add the summaries and word counts as new columns to the DataFrame
data['summary'] = summaries
data['word count'] = word_counts

# Print the DataFrame with the summaries and word counts
data.head()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


FileNotFoundError: [Errno 2] No such file or directory: '/content/Dataset-Original.xlsx'

In [None]:
# Cosine_Similarity Code

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
import numpy as np

# Select necessary columns
df = data[['student answer', 'answer key', 'max marks', 'summary', 'word count']]

# Split dataset into train and test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Define TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the training data
tfidf_matrix_train = tfidf_vectorizer.fit_transform(train_df['student answer'].values.astype('U'))
tfidf_matrix_answer_key_train = tfidf_vectorizer.transform(train_df['answer key'].values.astype('U'))

# Compute cosine similarity between training data
cosine_similarities_train = cosine_similarity(tfidf_matrix_train, tfidf_matrix_answer_key_train)

# Calculate match percentage
match_percentage_train = cosine_similarities_train.max(axis=1) * 100

# Assign match percentage to training data
train_df['match_percentage'] = match_percentage_train

# Define TF-IDF vectorizer for test data
tfidf_matrix_test = tfidf_vectorizer.transform(test_df['student answer'].values.astype('U'))
tfidf_matrix_answer_key_test = tfidf_vectorizer.transform(test_df['answer key'].values.astype('U'))

# Compute cosine similarity between test data
cosine_similarities_test = cosine_similarity(tfidf_matrix_test, tfidf_matrix_answer_key_test)

# Calculate match percentage for test data
match_percentage_test = cosine_similarities_test.max(axis=1) * 100

# Assign match percentage to test data
test_df['match_percentage'] = match_percentage_test

In [None]:
# Define TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Transform student answers and answer keys
tfidf_matrix_new_student_answer = tfidf_vectorizer.fit_transform(data['student answer'].astype('U'))
tfidf_matrix_new_answer_key = tfidf_vectorizer.transform(data['answer key'].astype('U'))

# Compute cosine similarity
cosine_similarities = cosine_similarity(tfidf_matrix_new_student_answer, tfidf_matrix_new_answer_key)

# Calculate match percentage
match_percentages = cosine_similarities.max(axis=1) * 100

# Add match percentage to the dataframe
data['match_percentage'] = match_percentages



data.head()

Unnamed: 0,question,student answer,marks,max marks,answer key,summary,word count,match_percentage
0,What is the difference between abstraction and...,Abstraction and encapsulation are both essenti...,1,5,"In a comprehensive view, abstraction and encap...",Abstraction process simplifying complex system...,25,34.813211
1,What is the difference between abstraction and...,Abstraction focuses on hiding the unnecessary ...,2,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , involves bundling data ...",47,34.962813
2,What is the difference between abstraction and...,Abstraction and encapsulation work hand-in-han...,3,5,"In a comprehensive view, abstraction and encap...","Together , contribute principle data abstracti...",51,50.58575
3,What is the difference between abstraction and...,Abstraction and encapsulation are fundamental ...,4,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , bundling data method si...",110,53.771002
4,What is the difference between abstraction and...,"In a comprehensive view, abstraction and encap...",5,5,"In a comprehensive view, abstraction and encap...","Together , abstraction encapsulation form foun...",114,100.0


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Define the function to extract keywords based on max marks
def extract_keywords(answer, max_marks):
    # Define the mapping between max marks and top N
    top_n_mapping = {1: 5, 2: 10, 3: 20, 4: 30, 5: 50}
    top_keywords = top_n_mapping.get(max_marks, 50)  # Default to 50 if max_marks not in mapping

    # Fit the vectorizer and transform the text
    tfidf_matrix = vectorizer.fit_transform([answer])

    # Get the feature names (words)
    feature_names = vectorizer.get_feature_names_out()

    # Get the TF-IDF scores for each word
    tfidf_scores = tfidf_matrix.toarray()[0]

    # Create a dictionary of words and their corresponding TF-IDF scores
    word_tfidf_scores = dict(zip(feature_names, tfidf_scores))

    # Sort the words by their TF-IDF scores in descending order
    sorted_words = sorted(word_tfidf_scores.items(), key=lambda x: x[1], reverse=True)

    # Extract top keywords based on max marks
    top_keywords = min(len(sorted_words), top_keywords)

    # Return top keywords
    return [word for word, _ in sorted_words[:top_keywords]]

# Assuming 'data' is your DataFrame containing the columns 'student answer', 'answer_key', and 'max marks'
# Extract keywords for student answer and store in a new column 'sa_keywords'
data['sa_keywords'] = data.apply(lambda row: extract_keywords(row['student answer'], row['max marks']), axis=1)

# Extract keywords for answer key and store in a new column 'ak_keywords'
data['ak_keywords'] = data.apply(lambda row: extract_keywords(row['answer key'], row['max marks']), axis=1)

data.head()


Unnamed: 0,question,student answer,marks,max marks,answer key,summary,word count,match_percentage,sa_keywords,ak_keywords
0,What is the difference between abstraction and...,Abstraction and encapsulation are both essenti...,1,5,"In a comprehensive view, abstraction and encap...",Abstraction process simplifying complex system...,25,34.813211,"[abstraction, data, encapsulation, essential, ...","[encapsulation, abstraction, code, software, a..."
1,What is the difference between abstraction and...,Abstraction focuses on hiding the unnecessary ...,2,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , involves bundling data ...",47,34.962813,"[data, details, hiding, implementation, abstra...","[encapsulation, abstraction, code, software, a..."
2,What is the difference between abstraction and...,Abstraction and encapsulation work hand-in-han...,3,5,"In a comprehensive view, abstraction and encap...","Together , contribute principle data abstracti...",51,50.58575,"[abstraction, code, create, encapsulation, han...","[encapsulation, abstraction, code, software, a..."
3,What is the difference between abstraction and...,Abstraction and encapsulation are fundamental ...,4,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , bundling data method si...",110,53.771002,"[abstraction, data, encapsulation, bundling, c...","[encapsulation, abstraction, code, software, a..."
4,What is the difference between abstraction and...,"In a comprehensive view, abstraction and encap...",5,5,"In a comprehensive view, abstraction and encap...","Together , abstraction encapsulation form foun...",114,100.0,"[encapsulation, abstraction, code, software, a...","[encapsulation, abstraction, code, software, a..."


In [None]:
# Function to calculate percentage of SA keywords present in AK keywords
def calculate_keyword_overlap(sa_keywords, ak_keywords):
    # Convert both lists to sets for efficient comparison
    sa_set = set(sa_keywords)
    ak_set = set(ak_keywords)

    # Calculate the intersection of SA and AK keywords
    intersection = sa_set.intersection(ak_set)

    # Calculate percentage overlap
    if len(sa_set) == 0:
        return 0
    else:
        return (len(intersection) / len(sa_set)) * 100

# Apply the function row-wise to calculate the percentage overlap
data['keyword_overlap_percentage'] = data.apply(lambda row: calculate_keyword_overlap(row['sa_keywords'], row['ak_keywords']), axis=1)

# Display the DataFrame with the new column
data.head()


Unnamed: 0,question,student answer,marks,max marks,answer key,summary,word count,match_percentage,sa_keywords,ak_keywords,keyword_overlap_percentage
0,What is the difference between abstraction and...,Abstraction and encapsulation are both essenti...,1,5,"In a comprehensive view, abstraction and encap...",Abstraction process simplifying complex system...,25,34.813211,"[abstraction, data, encapsulation, essential, ...","[encapsulation, abstraction, code, software, a...",16.666667
1,What is the difference between abstraction and...,Abstraction focuses on hiding the unnecessary ...,2,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , involves bundling data ...",47,34.962813,"[data, details, hiding, implementation, abstra...","[encapsulation, abstraction, code, software, a...",23.684211
2,What is the difference between abstraction and...,Abstraction and encapsulation work hand-in-han...,3,5,"In a comprehensive view, abstraction and encap...","Together , contribute principle data abstracti...",51,50.58575,"[abstraction, code, create, encapsulation, han...","[encapsulation, abstraction, code, software, a...",31.818182
3,What is the difference between abstraction and...,Abstraction and encapsulation are fundamental ...,4,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , bundling data method si...",110,53.771002,"[abstraction, data, encapsulation, bundling, c...","[encapsulation, abstraction, code, software, a...",40.0
4,What is the difference between abstraction and...,"In a comprehensive view, abstraction and encap...",5,5,"In a comprehensive view, abstraction and encap...","Together , abstraction encapsulation form foun...",114,100.0,"[encapsulation, abstraction, code, software, a...","[encapsulation, abstraction, code, software, a...",100.0


In [None]:
!pip install language_tool_python


Collecting language_tool_python
  Downloading language_tool_python-2.8-py3-none-any.whl (35 kB)
Installing collected packages: language_tool_python
Successfully installed language_tool_python-2.8


In [None]:
# Grammar Check

from language_tool_python import LanguageTool

# Initialize LanguageTool for English
tool = LanguageTool('en-US')

# Function to calculate grammar accuracy
def calculate_grammar_accuracy(text):
    matches = tool.check(text)
    num_errors = len(matches)
    total_words = len(text.split())
    accuracy = (total_words - num_errors) / total_words * 100
    return accuracy

# Apply the function to each row in the DataFrame
data['Grammar Accuracy'] = data['student answer'].apply(calculate_grammar_accuracy)


Downloading LanguageTool 6.4: 100%|██████████| 246M/246M [00:05<00:00, 47.4MB/s]
INFO:language_tool_python.download_lt:Unzipping /tmp/tmpf17x9b96.zip to /root/.cache/language_tool_python.
INFO:language_tool_python.download_lt:Downloaded https://www.languagetool.org/download/LanguageTool-6.4.zip to /root/.cache/language_tool_python.


In [None]:
data.head()

Unnamed: 0,question,student answer,marks,max marks,answer key,summary,word count,match_percentage,sa_keywords,ak_keywords,keyword_overlap_percentage,Grammar Accuracy
0,What is the difference between abstraction and...,Abstraction and encapsulation are both essenti...,1,5,"In a comprehensive view, abstraction and encap...",Abstraction process simplifying complex system...,25,34.813211,"[abstraction, data, encapsulation, essential, ...","[encapsulation, abstraction, code, software, a...",16.666667,100.0
1,What is the difference between abstraction and...,Abstraction focuses on hiding the unnecessary ...,2,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , involves bundling data ...",47,34.962813,"[data, details, hiding, implementation, abstra...","[encapsulation, abstraction, code, software, a...",23.684211,100.0
2,What is the difference between abstraction and...,Abstraction and encapsulation work hand-in-han...,3,5,"In a comprehensive view, abstraction and encap...","Together , contribute principle data abstracti...",51,50.58575,"[abstraction, code, create, encapsulation, han...","[encapsulation, abstraction, code, software, a...",31.818182,100.0
3,What is the difference between abstraction and...,Abstraction and encapsulation are fundamental ...,4,5,"In a comprehensive view, abstraction and encap...","Encapsulation , hand , bundling data method si...",110,53.771002,"[abstraction, data, encapsulation, bundling, c...","[encapsulation, abstraction, code, software, a...",40.0,100.0
4,What is the difference between abstraction and...,"In a comprehensive view, abstraction and encap...",5,5,"In a comprehensive view, abstraction and encap...","Together , abstraction encapsulation form foun...",114,100.0,"[encapsulation, abstraction, code, software, a...","[encapsulation, abstraction, code, software, a...",100.0,100.0
