<a href="https://colab.research.google.com/github/yasirsid2004/Bank-Customer-Churn-Prediction/blob/main/text_summarizer_and_analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
# Step 1: Install required libraries quietly
!pip install transformers torch sentencepiece nltk --quiet

import nltk
from transformers import pipeline
import textwrap
import re
from collections import Counter

# Step 2: Download necessary NLTK data
try:
    nltk.data.find('tokenizers/punkt')
    nltk.data.find('corpus/stopwords')
    nltk.data.find('tokenizers/punkt_tab') # Check for punkt_tab as well
except LookupError:
    print("Downloading NLTK data (one-time setup)...")
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
    nltk.download('punkt_tab', quiet=True) # Download punkt_tab
    print("NLTK data downloaded.")


# Step 3: Define the core functions
def analyze_text(text):
    """Performs a basic analysis of the given text."""
    # Word count (handles various spacing)
    words = re.findall(r'\b\w+\b', text.lower())
    word_count = len(words)

    # Character count
    char_count = len(text)

    # Sentence count
    sentences = nltk.sent_tokenize(text)
    sentence_count = len(sentences)

    # Estimated reading time (average 200 words per minute)
    reading_time = round(word_count / 200)
    if reading_time < 1:
        reading_time = "less than 1"


    # Keyword Extraction (most common words, excluding stopwords)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words and len(word) > 2]
    word_freq = Counter(filtered_words)
    keywords = [word for word, freq in word_freq.most_common(5)]

    return {
        "word_count": word_count,
        "char_count": char_count,
        "sentence_count": sentence_count,
        "reading_time_minutes": reading_time,
        "keywords": keywords
    }

def summarize_text(text):
    """Summarizes the text using a pre-trained Hugging Face model."""
    print("\nInitializing summarization model (this may take a moment on first run)...")
    try:
        # Using a distilled, faster model perfect for Colab
        summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

        # The model has a max input length; we'll truncate if necessary.
        # A more advanced approach would be to chunk the text.
        max_chunk_length = 1024

        # Summarize with min/max length constraints
        summary = summarizer(text[:max_chunk_length], max_length=150, min_length=40, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return f"An error occurred during summarization: {e}"

def chatbot():
    """Main function to run the interactive chatbot."""
    wrapper = textwrap.TextWrapper(width=80)

    print("="*80)
    print("🚀 Welcome to the Text Analyzer and Summarizer Chatbot! 🚀")
    print("="*80)

    while True:
        print("\nPaste the text you want to analyze and summarize below.")
        print("When you're finished, press Enter on an empty line.")

        # Collect multi-line input from the user
        user_text_lines = []
        while True:
            line = input()
            if line == "":
                break
            user_text_lines.append(line)

        user_text = "\n".join(user_text_lines)

        if not user_text.strip():
            print("\nIt looks like you didn't enter any text. Let's try again!")
            continue

        # --- Analysis ---
        print("\n" + "-"*35 + " ANALYSIS " + "-"*35)
        analysis_results = analyze_text(user_text)
        print(f"   Word Count: {analysis_results['word_count']}")
        print(f"   Character Count: {analysis_results['char_count']}")
        print(f"   Sentence Count: {analysis_results['sentence_count']}")
        print(f"   Estimated Reading Time: {analysis_results['reading_time_minutes']} minute(s)")
        print(f"   Top Keywords: {', '.join(analysis_results['keywords'])}")
        print("-" * 80)

        # --- Summarization ---
        summary = summarize_text(user_text)
        print("\n" + "*"*35 + " SUMMARY " + "*"*36)
        print(wrapper.fill(text=summary))
        print("*" * 80)

        # --- Continue or Exit ---
        while True:
            another = input("\nWould you like to analyze another text? (yes/no): ").lower()
            if another in ["yes", "y", "no", "n"]:
                break
            print("Invalid input. Please enter 'yes' or 'no'.")

        if another in ["no", "n"]:
            print("\nThank you for using the chatbot. Goodbye! 👋")
            break

# Start the chatbot
chatbot()

noDownloading NLTK data (one-time setup)...
NLTK data downloaded.
🚀 Welcome to the Text Analyzer and Summarizer Chatbot! 🚀

Paste the text you want to analyze and summarize below.
When you're finished, press Enter on an empty line.
I’m an aspiring AI and Machine Learning engineer driven by curiosity, creativity, and a desire to turn data into meaningful impact. With a strong foundation in Python and hands-on experience exploring intelligent systems, I aim to design solutions that blend innovation with purpose. My goal is to grow as a problem-solver who not only codes but also creates — developing technologies that inspire progress and make everyday life smarter.


----------------------------------- ANALYSIS -----------------------------------
   Word Count: 71
   Character Count: 438
   Sentence Count: 3
   Estimated Reading Time: less than 1 minute(s)
   Top Keywords: aspiring, machine, learning, engineer, driven
-----------------------------------------------------------------------

Device set to use cpu
Your max_length is set to 150, but your input_length is only 85. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)



*********************************** SUMMARY ************************************
 I'm an aspiring AI and Machine Learning engineer driven by curiosity,
creativity, and a desire to turn data into meaningful impact . My goal is to
grow as a problem-solver who not only codes but also creates — developing
technologies that inspire progress and make everyday life smarter .
********************************************************************************

Would you like to analyze another text? (yes/no): no

Thank you for using the chatbot. Goodbye! 👋


In [10]:
try:
    nltk.data.find('tokenizers/punkt')
    nltk.data.find('corpus/stopwords')
    print("✅ NLTK data is already downloaded.")
except LookupError:
    print("Downloading NLTK data...")
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
    print("✅ NLTK data downloaded successfully.")

Downloading NLTK data...
✅ NLTK data downloaded successfully.


In [11]:
def analyze_text(text):
    """Performs a basic analysis of the given text."""
    words = re.findall(r'\b\w+\b', text.lower())
    word_count = len(words)
    char_count = len(text)
    sentences = nltk.sent_tokenize(text)
    sentence_count = len(sentences)

    reading_time = round(word_count / 200)
    if reading_time < 1:
        reading_time = "less than 1"

    stop_words = set(nltk.corpus.stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words and len(word) > 2]
    word_freq = Counter(filtered_words)
    keywords = [word for word, freq in word_freq.most_common(5)]

    return {
        "word_count": word_count,
        "char_count": char_count,
        "sentence_count": sentence_count,
        "reading_time_minutes": reading_time,
        "keywords": keywords
    }

def summarize_text(text):
    """Summarizes the text using a pre-trained Hugging Face model."""
    print("\nInitializing summarization model (this may take a moment on the first run)...")
    try:
        summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
        max_chunk_length = 1024
        summary = summarizer(text[:max_chunk_length], max_length=150, min_length=40, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return f"An error occurred during summarization: {e}"

print("✅ Core functions (analyze_text, summarize_text) are defined.")


✅ Core functions (analyze_text, summarize_text) are defined.
