In [29]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ai-powered-study-buddy-sample-pdf/sample-input.pdf


In [30]:
import nltk

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


True

# AI-Powered Study Buddy



## Project Overview
This project aims to develop an AI-powered Study Buddy that assists students in understanding concepts, summarizing study materials, answering questions, generating quizzes, and creating flashcards to support effective exam preparation.

## Project Workflow

1. Load educational dataset from Kaggle  
2. Preprocess text data  
3. Explain concepts using AI  
4. Summarize study content  
5. Generate quiz questions  
6. Create flashcards  
7. Display outputs for students

In [31]:
# Sample study content for initial testing
study_text = """
Electromagnetic induction is a fundamental concept in physics and electrical engineering.
It explains how a changing magnetic field can induce an electric current in a conductor.
This principle was discovered by Michael Faraday and forms the basis of many electrical devices.
Generators, transformers, and inductors operate using electromagnetic induction.
Understanding this concept is very important for students studying electrical and electronics engineering.
"""

print(study_text)


Electromagnetic induction is a fundamental concept in physics and electrical engineering.
It explains how a changing magnetic field can induce an electric current in a conductor.
This principle was discovered by Michael Faraday and forms the basis of many electrical devices.
Generators, transformers, and inductors operate using electromagnetic induction.
Understanding this concept is very important for students studying electrical and electronics engineering.



In [32]:
# Basic text preprocessing
def preprocess_text(text):
    text = text.lower()              # convert to lowercase
    text = text.replace("\n", " ")   # remove line breaks
    return text

clean_text = preprocess_text(study_text)
print(clean_text)

 electromagnetic induction is a fundamental concept in physics and electrical engineering. it explains how a changing magnetic field can induce an electric current in a conductor. this principle was discovered by michael faraday and forms the basis of many electrical devices. generators, transformers, and inductors operate using electromagnetic induction. understanding this concept is very important for students studying electrical and electronics engineering. 


In [33]:
# Concept explanation function
def explain_concept(text):
    explanation = (
        "Simple Explanation:\n"
        + text
        + "\n\nIn simple words, this concept explains how changes in physical conditions "
          "can produce useful effects in real-world applications. It is widely used in "
          "engineering and technology."
    )
    return explanation

In [34]:
# Test concept explanation
concept_output = explain_concept(clean_text)
print(concept_output)

Simple Explanation:
 electromagnetic induction is a fundamental concept in physics and electrical engineering. it explains how a changing magnetic field can induce an electric current in a conductor. this principle was discovered by michael faraday and forms the basis of many electrical devices. generators, transformers, and inductors operate using electromagnetic induction. understanding this concept is very important for students studying electrical and electronics engineering. 

In simple words, this concept explains how changes in physical conditions can produce useful effects in real-world applications. It is widely used in engineering and technology.


In [35]:
# Keyword-based summarization (improved)
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

def summarize_text_tfidf(text, max_sentences=2):
    sentences = [s.strip() for s in text.split('.') if s.strip()]
    
    if len(sentences) <= max_sentences:
        return text
    
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(sentences)
    
    sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
    top_sentence_indices = sentence_scores.argsort()[-max_sentences:][::-1]
    
    summary = '. '.join([sentences[i] for i in sorted(top_sentence_indices)])
    return summary + '.'

In [36]:
# Test TF-IDF based summarization
summary_output = summarize_text_tfidf(clean_text)
print("Summary:")
print(summary_output)

Summary:
it explains how a changing magnetic field can induce an electric current in a conductor. this principle was discovered by michael faraday and forms the basis of many electrical devices.


In [37]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

def answer_question(text, question, top_n=2):
    sentences = [s.strip() for s in text.split('.') if s.strip()]
    
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(sentences + [question])
    
    similarity_scores = (tfidf_matrix[:-1] * tfidf_matrix[-1].T).toarray().flatten()
    
    top_indices = similarity_scores.argsort()[-top_n:][::-1]
    
    answer = ". ".join([sentences[i] for i in sorted(top_indices)])
    return answer + "."

In [38]:
# Test Question Answering
question = "What is electromagnetic induction?"
answer = answer_question(clean_text, question)

print("Question:", question)
print("Answer:", answer)

Question: What is electromagnetic induction?
Answer: electromagnetic induction is a fundamental concept in physics and electrical engineering. generators, transformers, and inductors operate using electromagnetic induction.


In [39]:
import random

def generate_quiz(text, num_questions=3):
    sentences = [s.strip() for s in text.split('.') if s.strip()]
    
    templates = [
        "What does the following statement explain?",
        "Identify the concept described below:",
        "What is being referred to in this statement?"
    ]
    
    quiz_questions = []
    for i, sentence in enumerate(sentences[:num_questions]):
        template = random.choice(templates)
        question = f"Q{i+1}. {template}\n{sentence}"
        quiz_questions.append(question)
    
    return quiz_questions

In [40]:
# Test quiz generation
quiz = generate_quiz(clean_text)

for q in quiz:
    print(q)
    print()

Q1. What is being referred to in this statement?
electromagnetic induction is a fundamental concept in physics and electrical engineering

Q2. What is being referred to in this statement?
it explains how a changing magnetic field can induce an electric current in a conductor

Q3. What is being referred to in this statement?
this principle was discovered by michael faraday and forms the basis of many electrical devices



In [74]:
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
from nltk.corpus import stopwords

stop_words = set(stopwords.words("english"))

def generate_flashcards(text, num_cards=5):
    sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 30]

    vectorizer = TfidfVectorizer(
        stop_words='english',
        ngram_range=(1, 2),
        max_features=10
    )

    tfidf = vectorizer.fit_transform(sentences)
    keywords = vectorizer.get_feature_names_out()

    flashcards = []

    for kw in keywords[:num_cards]:
        for sent in sentences:
            if kw in sent:
                flashcards.append({
                    "Front": kw.title(),
                    "Back": sent[:120]  # LIMIT length
                })
                break

    return flashcards

In [76]:
import nltk
from nltk import word_tokenize, pos_tag
from nltk.chunk import RegexpParser

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


def extract_noun_phrases(sentence):
    tokens = word_tokenize(sentence)
    tagged = pos_tag(tokens)

    grammar = r"""
        NP: {<JJ>*<NN|NNS|NNP|NNPS>+}
    """
    chunker = RegexpParser(grammar)
    tree = chunker.parse(tagged)

    noun_phrases = []
    for subtree in tree.subtrees(filter=lambda t: t.label() == 'NP'):
        phrase = " ".join(word for word, tag in subtree.leaves())
        if len(phrase.split()) > 1:
            noun_phrases.append(phrase)

    return noun_phrases


def generate_flashcards(text):
    sentences = [s.strip() for s in text.split('.') if len(s.strip()) > 25]
    flashcards = []

    for sentence in sentences:
        noun_phrases = extract_noun_phrases(sentence)

        if noun_phrases:
            flashcards.append({
                "Front": noun_phrases[0].title(),
                "Back": sentence
            })

    return flashcards

[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [77]:
# Test flashcard generation
flashcards = generate_flashcards(clean_text)

for card in flashcards:
    print("Front:", card["Front"])
    print("Back:", card["Back"])
    print()

Front: Electromagnetic Induction
Back: electromagnetic induction is a fundamental concept in physics and electrical engineering

Front: Changing Magnetic Field
Back: it explains how a changing magnetic field can induce an electric current in a conductor

Front: Michael Faraday
Back: this principle was discovered by michael faraday and forms the basis of many electrical devices

Front: Electromagnetic Induction
Back: generators, transformers, and inductors operate using electromagnetic induction

Front: Electronics Engineering
Back: understanding this concept is very important for students studying electrical and electronics engineering



In [78]:
!pip install PyPDF2



In [79]:
import PyPDF2

In [80]:
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

In [81]:
pdf_text = extract_text_from_pdf(
    "/kaggle/input/ai-powered-study-buddy-sample-pdf/sample-input.pdf"
)

In [82]:
# Simple cleaning
clean_pdf_text = pdf_text.lower()

# Use existing pipeline (already built earlier)
summary = summarize_text_tfidf(clean_pdf_text)
answer = answer_question(clean_pdf_text, "Explain the concept")
flashcards = generate_flashcards(clean_pdf_text)

print("SUMMARY:\n", summary)
print("\nANSWER:\n", answer)
print("\nFLASHCARDS:\n")
for i, card in enumerate(flashcards, 1):
    print(f"{i}. Front: {card['Front']}")
    print(f"   Back : {card['Back']}\n")

SUMMARY:
 objectives of artificial intelligence
the main objectives of artificial intelligence are:
• to develop intelligent systems that can perform human-like tasks
• to enable machines to learn from experience
• to improve efficiency and accuracy in problem solving
• to automate repetitive and time-consuming tasks
• to assist humans in decision making
ai helps reduce human effort and improves productivity in various fields. applications of artificial intelligence
artificial intelligence is used in many areas:
• healthcare – disease diagnosis, medical imaging, and treatment
planning
• education – personalized learning, virtual tutors, and automated
grading
• finance – fraud detection, credit scoring, and customer support
• transportation – self-driving cars and traffic management
• entertainment – movie and music recommendation systems
6.

ANSWER:
 c) super ai
super ai is a theoretical concept where machines surpass human
intelligence in all aspects, including creativity and emotiona