In [1]:
import os
from spellchecker import SpellChecker
import nltk
from tkinter import filedialog
import tkinter as tk
import csv
from datetime import datetime

# Download the necessary NLTK data
nltk.download('punkt')

def select_input_folder():
    """Use GUI to select input folder"""
    root = tk.Tk()
    root.withdraw()  # Hide the main window
    folder_path = filedialog.askdirectory(title="Select Input Folder")
    return folder_path

def select_output_folder():
    """Use GUI to select output folder"""
    root = tk.Tk()
    root.withdraw()
    folder_path = filedialog.askdirectory(title="Select Output Folder")
    return folder_path

def analyze_spelling(text, spell_checker):
    """Analyze text for misspelled words and return corrections with error count"""
    words = nltk.word_tokenize(text)
    misspelled = spell_checker.unknown(words)
    corrections = {}
    for word in misspelled:
        corrections[word] = spell_checker.correction(word)
    return corrections, len(misspelled)

def correct_spelling(text, spell_checker):
    """Correct spelling in text"""
    words = nltk.word_tokenize(text)
    corrected_words = [spell_checker.correction(word) or word for word in words]
    return ' '.join(corrected_words)

def export_to_csv(error_summary, output_folder):
    """Export spelling error data to CSV files"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Summary CSV with basic statistics
    summary_csv_path = os.path.join(output_folder, f"spelling_error_summary_{timestamp}.csv")
    with open(summary_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Filename', 'Total Word Count', 'Error Count', 'Error Rate (%)'])
        
        for filename, data in error_summary.items():
            total_words = len(nltk.word_tokenize(data['original_text']))
            error_rate = (data['error_count'] / total_words * 100) if total_words > 0 else 0
            writer.writerow([
                filename,
                total_words,
                data['error_count'],
                f"{error_rate:.2f}"
            ])
    
    # Detailed CSV with all corrections
    detailed_csv_path = os.path.join(output_folder, f"spelling_error_details_{timestamp}.csv")
    with open(detailed_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Filename', 'Misspelled Word', 'Correction'])
        
        for filename, data in error_summary.items():
            for word, correction in data['errors'].items():
                writer.writerow([filename, word, correction])
    
    return summary_csv_path, detailed_csv_path

def main():
    # Initialize spell checker
    spell = SpellChecker()
    
    # Get folder paths through GUI
    print("Please select input folder...")
    input_folder = select_input_folder()
    if not input_folder:
        print("No input folder selected. Exiting...")
        return
        
    print("Please select output folder...")
    output_folder = select_output_folder()
    if not output_folder:
        print("No output folder selected. Exiting...")
        return

    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Process each file in the input folder
    error_summary = {}
    
    for filename in os.listdir(input_folder):
        if filename.endswith('.txt'):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, filename)
            
            # Read the file
            with open(input_path, 'r', encoding='utf-8') as file:
                content = file.read()
            
            # Analyze spelling errors
            errors, error_count = analyze_spelling(content, spell)
            error_summary[filename] = {
                'error_count': error_count,
                'errors': errors,
                'original_text': content  # Store original text for word count
            }
            
            # Correct spelling
            corrected_content = correct_spelling(content, spell)
            
            # Write corrected content to new file
            with open(output_path, 'w', encoding='utf-8') as file:
                file.write(corrected_content)
            
            print(f"Processed: {filename}")
            print(f"Number of spelling errors found: {error_count}")
            print("Misspelled words and their corrections:")
            for word, correction in errors.items():
                print(f"  {word} -> {correction}")
            print("-" * 50)

    # Export results to CSV files
    summary_csv, detailed_csv = export_to_csv(error_summary, output_folder)

    # Write traditional error report
    report_path = os.path.join(output_folder, "_spelling_error_report.txt")
    with open(report_path, 'w', encoding='utf-8') as report:
        report.write("Spelling Error Summary Report\n")
        report.write("=" * 30 + "\n\n")
        for filename, data in error_summary.items():
            report.write(f"File: {filename}\n")
            report.write(f"Total errors: {data['error_count']}\n")
            report.write("Corrections:\n")
            for word, correction in data['errors'].items():
                report.write(f"  {word} -> {correction}\n")
            report.write("\n" + "-" * 30 + "\n\n")
    # ðŸ”¹ New Feature: Show Top 5 Most Frequent Misspellings
    all_errors = []
    for data in error_summary.values():
        all_errors.extend(list(data['errors'].keys()))

    # Count frequencies
    from collections import Counter
    counter = Counter(all_errors)
    top_5 = counter.most_common(5)

    print("\nTop 5 Most Frequent Misspelled Words:")
    for word, freq in top_5:
        print(f"  {word} (appeared {freq} times)")
    print("-" * 50)

    print("\nSpelling correction completed!")
    print(f"Corrected files and reports are in: {output_folder}")
    print(f"Summary CSV: {os.path.basename(summary_csv)}")
    print(f"Detailed CSV: {os.path.basename(detailed_csv)}")

if __name__ == "__main__":
    main()

<class 'ModuleNotFoundError'>: No module named 'spellchecker'