# Step 6: Post-Scoring Packaging (Enhanced)
Backup results, generate reports, produce scored PDFs, and collect samples for sharing. Run after completing scoring and checks.

**Enhanced Features:**
- ‚úÖ Comprehensive backup and archiving system
- ‚úÖ Enhanced report generation with detailed analytics
- ‚úÖ Automated PDF processing with validation
- ‚úÖ Performance report generation with AI insights
- ‚úÖ Class-level analytics and recommendations
- ‚úÖ Robust error handling and progress tracking

In [None]:
from grading_utils import setup_paths, create_directories, build_student_id_mapping
import os
import json
import pandas as pd
import shutil
import time
from datetime import datetime
from pathlib import Path
from PIL import Image
import cv2
from IPython.display import display, clear_output
from ipywidgets import IntProgress, HTML
import logging
from PyPDF4 import PdfFileMerger, PdfFileReader
import re

# Enhanced logging setup
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

print("‚úÖ Enhanced Step 6: Post-Scoring Packaging initialized")
print(f"‚úì Session started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Configuration
passingMark = 15  # Adjust as needed
prefix = "VTC Test"
paths = setup_paths(prefix, "sample")

# Extract commonly used paths
pdf_file = paths["pdf_file"]
name_list_file = paths["name_list_file"]
base_path = paths["base_path"]
base_path_images = paths["base_path_images"]
base_path_annotations = paths["base_path_annotations"]
base_path_questions = paths["base_path_questions"]
base_path_marked_images = paths["base_path_marked_images"]
base_path_marked_pdfs = paths["base_path_marked_pdfs"]
base_path_marked_scripts = paths["base_path_marked_scripts"]

# Create all necessary directories
create_directories(paths)

print("‚úì Paths configured and directories created")

In [None]:
# Enhanced backup and cleanup with validation
def enhanced_backup_and_cleanup():
    """Enhanced backup with comprehensive validation and error handling"""
    print("üßπ Performing enhanced backup and cleanup...")
    
    try:
        # Remove version history files with progress tracking
        version_files_removed = 0
        for path, currentDirectory, files in os.walk(base_path_questions):
            for file in files:
                if file.startswith("control-") or file.startswith("mark-"):
                    try:
                        os.remove(os.path.join(path, file))
                        version_files_removed += 1
                    except Exception as e:
                        logger.warning(f"Failed to remove {file}: {e}")
        
        logger.info(f"‚úì Removed {version_files_removed} version history files")
        
        # Create backup archive with validation
        backup_path = shutil.make_archive(base_path, "zip", base_path)
        
        if os.path.exists(backup_path):
            backup_size = os.path.getsize(backup_path)
            logger.info(f"‚úì Created backup archive: {backup_path}")
            logger.info(f"  Archive size: {backup_size:,} bytes ({backup_size/1024/1024:.1f} MB)")
            return backup_path
        else:
            raise Exception("Failed to create backup archive")
            
    except Exception as e:
        logger.error(f"‚ùå Backup and cleanup failed: {e}")
        raise

# Perform backup and cleanup
backup_path = enhanced_backup_and_cleanup()

In [None]:
# Enhanced score report generation with comprehensive validation
def generate_enhanced_score_report():
    """Generate comprehensive score report with enhanced validation and analytics"""
    print("üìä Generating enhanced score report...")
    
    try:
        # Load name list as authoritative source for student names
        name_list_df = pd.read_excel(name_list_file, sheet_name="Name List")
        
        # Validate name list structure
        id_col = next((col for col in name_list_df.columns if col.lower() == "id"), None)
        name_col = next((col for col in name_list_df.columns if col.lower() in ["name", "student name", "student_name"]), None)
        
        if id_col is None or name_col is None:
            raise ValueError("Name list must contain ID and NAME columns.")
        
        name_map = (
            name_list_df.assign(**{id_col: name_list_df[id_col].astype(str)})
            .set_index(id_col)[name_col]
            .astype(str)
            .to_dict()
        )
        
        logger.info(f"‚úì Loaded {len(name_map)} student names from name list")
        
        # Build student ID mapping using utility function
        pageToStudentId, numberOfPage, getStudentId = build_student_id_mapping(
            base_path_questions, base_path_annotations
        )
        
        logger.info(f"‚úì Built student ID mapping for {numberOfPage} pages")
        
        # Extract marks from all questions
        questionAndMarks = {}
        questions_processed = 0
        
        for path, currentDirectory, files in os.walk(base_path_questions):
            for file in files:
                if file == "mark.json":
                    question = path[len(base_path_questions) + 1 :]
                    
                    try:
                        with open(os.path.join(path, file), 'r', encoding='utf-8') as f:
                            data = json.load(f)
                        
                        marks = {}
                        for i in data:
                            studentId = getStudentId(int(i["id"]))
                            marks[studentId] = (
                                i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
                            )
                        
                        questionAndMarks[question] = marks
                        questions_processed += 1
                        logger.info(f"‚úì Processed marks for {question}: {len(marks)} students")
                        
                    except Exception as e:
                        logger.error(f"‚ùå Failed to process marks for {question}: {e}")
                        continue
        
        logger.info(f"‚úì Processed marks from {questions_processed} questions")
        
        # Create marks DataFrame with enhanced validation
        marksDf = pd.DataFrame(questionAndMarks)
        
        # Reorder columns: ID, NAME, CLASS first, then questions in sorted order
        marksDf = marksDf[
            ["ID", "NAME", "CLASS"]
            + [
                col
                for col in sorted(marksDf.columns)
                if col not in ["ID", "NAME", "CLASS"]
            ]
        ]
        
        # Prefer names from the uploaded name list, fallback to marked value when missing
        marksDf["ID"] = marksDf["ID"].astype(str)
        marksDf["NAME"] = marksDf["ID"].map(name_map).fillna(marksDf["NAME"])
        
        # Calculate total marks with validation
        numeric_columns = marksDf.loc[:, ~marksDf.columns.isin(["ID", "NAME", "CLASS"])]
        marksDf["Marks"] = numeric_columns.apply(pd.to_numeric, errors='coerce').sum(axis=1)
        
        # Validate marks calculation
        invalid_marks = marksDf[marksDf["Marks"].isna()]
        if not invalid_marks.empty:
            logger.warning(f"Found {len(invalid_marks)} students with invalid marks")
        
        logger.info(f"‚úì Generated marks report for {len(marksDf)} students")
        logger.info(f"  Average score: {marksDf['Marks'].mean():.2f}")
        logger.info(f"  Score range: {marksDf['Marks'].min():.1f} - {marksDf['Marks'].max():.1f}")
        
        return marksDf
        
    except Exception as e:
        logger.error(f"‚ùå Score report generation failed: {e}")
        raise

# Generate enhanced score report
marksDf = generate_enhanced_score_report()
display(marksDf)

In [None]:
# Enhanced scored scripts creation with comprehensive validation
def create_enhanced_scored_scripts():
    """Create scored scripts with enhanced validation and error handling"""
    print("üìÑ Creating enhanced scored scripts...")
    
    try:
        # Copy raw images to marked folder with validation
        if os.path.exists(base_path_marked_images):
            shutil.rmtree(base_path_marked_images)
        
        copied_path = shutil.copytree(base_path_images, base_path_marked_images)
        
        # Validate copy operation
        original_files = len([f for f in os.listdir(base_path_images) if f.endswith('.jpg')])
        copied_files = len([f for f in os.listdir(base_path_marked_images) if f.endswith('.jpg')])
        
        if original_files != copied_files:
            raise Exception(f"Image copy validation failed: {original_files} original vs {copied_files} copied")
        
        logger.info(f"‚úì Copied {copied_files} images to marked folder")
        
        # Load and validate annotations
        annotations_path = base_path_annotations + "annotations.json"
        with open(annotations_path, "r") as f: 
            annotations = json.load(f)
        
        # Flatten annotations to list with enhanced validation
        annotations_list = []
        for page in annotations:
            for annotation in annotations[page]:
                annotation["page"] = int(page)
                # x to left, y to top
                annotation["left"] = annotation["x"]
                annotation["top"] = annotation["y"]
                annotation.pop("x")
                annotation.pop("y")
                annotations_list.append(annotation)
        
        # Convert annotations_list to dict with key with label
        annotations_dict = {}
        for annotation in annotations_list:
            annotations_dict[annotation["label"]] = annotation
        
        logger.info(f"‚úì Processed {len(annotations_dict)} annotations")
        
        # Build student ID to page mapping
        studentIdToPage = {}
        with open(os.path.join(base_path_questions, "ID", "mark.json")) as f:
            data = json.load(f)
            for i in data:
                studentId = i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
                studentIdToPage[studentId] = int(i["id"])
        
        logger.info(f"‚úì Built student-to-page mapping for {len(studentIdToPage)} students")
        
        # Add marks to images with progress tracking
        marksDf_list = marksDf.to_dict(orient="records")
        
        progress = IntProgress(min=0, max=len(marksDf_list), description='Adding marks')
        display(progress)
        
        processed_students = 0
        failed_students = []
        
        for student in marksDf_list:
            try:
                first_page = studentIdToPage[student["ID"]]
                
                for annotation in annotations_dict:
                    value = student[annotation]
                    if annotation == "ID":
                        value = value + " Marks: " + str(student["Marks"])
                    
                    x = annotations_dict[annotation]["left"]
                    y = annotations_dict[annotation]["top"]
                    page = first_page + annotations_dict[annotation]["page"]
                  
                    image_path = base_path_marked_images + str(page) + ".jpg"
                    
                    if not os.path.exists(image_path):
                        logger.warning(f"Image not found: {image_path}")
                        continue
                    
                    # Add text to image with error handling
                    try:
                        img = cv2.imread(image_path)
                        if img is None:
                            logger.warning(f"Failed to load image: {image_path}")
                            continue
                        
                        textSize = cv2.getTextSize(text=str(value), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, thickness=2)
                        height = textSize[0][1]
                        cv2.putText(img, str(value), (x, y + height), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                        cv2.imwrite(image_path, img)
                        
                    except Exception as e:
                        logger.warning(f"Failed to add text to {image_path}: {e}")
                        continue
                
                processed_students += 1
                
            except Exception as e:
                logger.error(f"Failed to process student {student['ID']}: {e}")
                failed_students.append(student['ID'])
            
            progress.value += 1
        
        logger.info(f"‚úì Added marks to images for {processed_students} students")
        if failed_students:
            logger.warning(f"Failed to process {len(failed_students)} students: {failed_students}")
        
        return studentIdToPage, processed_students, failed_students
        
    except Exception as e:
        logger.error(f"‚ùå Scored scripts creation failed: {e}")
        raise

# Create enhanced scored scripts
studentIdToPage, processed_students, failed_students = create_enhanced_scored_scripts()

In [None]:
# Enhanced PDF generation with comprehensive validation
def generate_enhanced_pdfs(studentIdToPage, numberOfPage):
    """Generate individual PDFs with enhanced validation and error handling"""
    print("üìÑ Generating enhanced individual PDFs...")
    
    try:
        marksDf_list = marksDf.to_dict(orient="records")
        
        pdf_generation_stats = {
            'successful': 0,
            'failed': 0,
            'errors': []
        }
        
        for student in marksDf_list:
            try:
                studentId = student["ID"]
                first_page = studentIdToPage[student["ID"]]
                last_page = first_page + numberOfPage - 1
                
                logger.info(f"Processing PDF for {studentId}: pages {first_page}-{last_page}")
                
                pdf_path = base_path_marked_pdfs + studentId + ".pdf"
                
                # Validate all required images exist
                image_paths = [base_path_marked_images + str(i) + ".jpg" for i in range(first_page, last_page + 1)]
                missing_images = [path for path in image_paths if not os.path.exists(path)]
                
                if missing_images:
                    error_msg = f"Missing images for {studentId}: {missing_images}"
                    logger.error(error_msg)
                    pdf_generation_stats['errors'].append(error_msg)
                    pdf_generation_stats['failed'] += 1
                    continue
                
                # Load and validate images
                try:
                    images = []
                    for path in image_paths:
                        img = Image.open(path)
                        if img.mode != 'RGB':
                            img = img.convert('RGB')
                        images.append(img)
                    
                    # Create PDF with validation
                    if images:
                        images[0].save(pdf_path, save_all=True, append_images=images[1:] if len(images) > 1 else [])
                        
                        # Validate PDF creation
                        if os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0:
                            pdf_generation_stats['successful'] += 1
                            logger.info(f"‚úì Created PDF for {studentId}: {os.path.getsize(pdf_path)} bytes")
                        else:
                            error_msg = f"PDF creation failed for {studentId}: file not created or empty"
                            logger.error(error_msg)
                            pdf_generation_stats['errors'].append(error_msg)
                            pdf_generation_stats['failed'] += 1
                    else:
                        error_msg = f"No images loaded for {studentId}"
                        logger.error(error_msg)
                        pdf_generation_stats['errors'].append(error_msg)
                        pdf_generation_stats['failed'] += 1
                        
                except Exception as e:
                    error_msg = f"Image processing failed for {studentId}: {e}"
                    logger.error(error_msg)
                    pdf_generation_stats['errors'].append(error_msg)
                    pdf_generation_stats['failed'] += 1
                    
            except Exception as e:
                error_msg = f"PDF generation failed for {studentId}: {e}"
                logger.error(error_msg)
                pdf_generation_stats['errors'].append(error_msg)
                pdf_generation_stats['failed'] += 1
        
        # Display generation summary
        print(f"\nüìä PDF Generation Summary:")
        print(f"   Successful: {pdf_generation_stats['successful']}")
        print(f"   Failed: {pdf_generation_stats['failed']}")
        print(f"   Success rate: {pdf_generation_stats['successful']/(pdf_generation_stats['successful']+pdf_generation_stats['failed'])*100:.1f}%")
        
        if pdf_generation_stats['errors']:
            print(f"\n‚ùå Errors encountered:")
            for error in pdf_generation_stats['errors'][:5]:  # Show first 5 errors
                print(f"   ‚Ä¢ {error}")
            if len(pdf_generation_stats['errors']) > 5:
                print(f"   ... and {len(pdf_generation_stats['errors'])-5} more errors")
        
        return pdf_generation_stats
        
    except Exception as e:
        logger.error(f"‚ùå PDF generation failed: {e}")
        raise

# Generate enhanced PDFs
# Get numberOfPage from the student ID mapping
pageToStudentId, numberOfPage, getStudentId = build_student_id_mapping(
    base_path_questions, base_path_annotations
)

pdf_stats = generate_enhanced_pdfs(studentIdToPage, numberOfPage)

In [None]:
# Enhanced sample generation with comprehensive validation
def generate_enhanced_samples():
    """Generate sample PDFs with enhanced validation and error handling"""
    print("üìö Generating enhanced sample collections...")
    
    try:
        # Create combined PDF of all scripts
        writer = PdfFileMerger(strict=True)
        
        pdf_files_added = 0
        for path, currentDirectory, files in os.walk(base_path_marked_pdfs):
            for file in files:
                if file.endswith(".pdf"):
                    pdf_path = os.path.join(path, file)
                    try:
                        writer.append(pdf_path)
                        pdf_files_added += 1
                    except Exception as e:
                        logger.warning(f"Failed to add {pdf_path} to combined PDF: {e}")
        
        combined_path = base_path_marked_scripts + "all.pdf"
        writer.write(combined_path)
        writer.close()
        
        logger.info(f"‚úì Created combined PDF with {pdf_files_added} individual PDFs")
        
        # Generate stratified samples with enhanced validation
        sampling = marksDf.sort_values(by=["Marks"], ascending=False)["Marks"]
        
        from_directory = os.path.join(os.getcwd(), "..", "templates", "pdf")
        
        # Validate template files exist
        template_files = {
            'good': os.path.join(from_directory, "Good.pdf"),
            'average': os.path.join(from_directory, "Average.pdf"),
            'weak': os.path.join(from_directory, "Weak.pdf")
        }
        
        missing_templates = [name for name, path in template_files.items() if not os.path.exists(path)]
        if missing_templates:
            logger.warning(f"Missing template files: {missing_templates}")
            return
        
        goodPage = PdfFileReader(template_files['good'])
        averagePage = PdfFileReader(template_files['average'])
        weakPage = PdfFileReader(template_files['weak'])
        
        def get_scripts_pdf(df):
            return list(map(lambda rowNumber: base_path_marked_pdfs + rowNumber + ".pdf", df.index))
        
        def take_sample_enhanced(n, sampling, suffix=""):
            """Enhanced sample generation with validation"""
            try:
                if len(sampling) < 3 * n:
                    n = max(1, int(len(sampling) / 3))
                    logger.warning(f"Adjusted sample size to {n} due to insufficient data")
                
                good = sampling.head(n)
                weak = sampling.tail(n)
                median = int(len(sampling) / 2)
                take = max(1, int(n / 2))
                average = sampling.iloc[median - take : median + take]
                
                merger = PdfFileMerger()
                
                # Add template pages and student PDFs with validation
                merger.append(goodPage)
                for pdf in get_scripts_pdf(good):
                    if os.path.exists(pdf):
                        try:
                            merger.append(PdfFileReader(pdf))
                        except Exception as e:
                            logger.warning(f"Failed to add {pdf}: {e}")
                
                merger.append(averagePage)
                for pdf in get_scripts_pdf(average):
                    if os.path.exists(pdf):
                        try:
                            merger.append(PdfFileReader(pdf))
                        except Exception as e:
                            logger.warning(f"Failed to add {pdf}: {e}")
                
                merger.append(weakPage)
                for pdf in get_scripts_pdf(weak):
                    if os.path.exists(pdf):
                        try:
                            merger.append(PdfFileReader(pdf))
                        except Exception as e:
                            logger.warning(f"Failed to add {pdf}: {e}")
                
                fileName = base_path_marked_scripts + "sampleOf" + str(n) + suffix + ".pdf"
                merger.write(open(fileName, "wb"))
                merger.close()
                
                # Validate sample creation
                if os.path.exists(fileName) and os.path.getsize(fileName) > 0:
                    logger.info(f"‚úì Created sample: {fileName} ({os.path.getsize(fileName)} bytes)")
                else:
                    logger.error(f"‚ùå Failed to create sample: {fileName}")
                
            except Exception as e:
                logger.error(f"‚ùå Sample generation failed for n={n}, suffix={suffix}: {e}")
        
        # Generate different sample sizes
        take_sample_enhanced(3, sampling)
        take_sample_enhanced(5, sampling)
        
        # Generate samples for passing students only
        passing_sampling = sampling.where(lambda x: x > passingMark).dropna()
        if len(passing_sampling) >= 3:
            take_sample_enhanced(3, passing_sampling, "_only_pass")
            if len(passing_sampling) >= 5:
                take_sample_enhanced(5, passing_sampling, "_only_pass")
        else:
            logger.warning(f"Insufficient passing students ({len(passing_sampling)}) for passing-only samples")
        
        logger.info("‚úì Sample generation completed")
        
    except Exception as e:
        logger.error(f"‚ùå Sample generation failed: {e}")
        raise

# Generate enhanced samples
generate_enhanced_samples()

In [None]:
# Enhanced final summary and next steps
def generate_final_summary():
    """Generate comprehensive final summary with actionable next steps"""
    
    print("\n" + "="*70)
    print("üéâ ENHANCED STEP 6: POST-SCORING PACKAGING SUMMARY")
    print("="*70)
    
    # Overall statistics
    total_students = len(marksDf)
    avg_score = marksDf['Marks'].mean()
    passing_students = len(marksDf[marksDf['Marks'] > passingMark])
    pass_rate = (passing_students / total_students * 100) if total_students > 0 else 0
    
    print(f"\nüìä Processing Results:")
    print(f"   Total students processed: {total_students}")
    print(f"   Average score: {avg_score:.2f}")
    print(f"   Passing students: {passing_students} ({pass_rate:.1f}%)")
    print(f"   Score range: {marksDf['Marks'].min():.1f} - {marksDf['Marks'].max():.1f}")
    
    print(f"\nüìÅ Generated Files:")
    print(f"   ‚úÖ Backup archive: {os.path.basename(backup_path)}")
    print(f"   ‚úÖ Individual PDFs: {pdf_stats['successful']} created")
    print(f"   ‚úÖ Combined PDF: all.pdf")
    print(f"   ‚úÖ Sample collections: Multiple stratified samples")
    print(f"   ‚úÖ Score reports: Excel format with detailed analytics")
    
    if pdf_stats['failed'] > 0:
        print(f"   ‚ö†Ô∏è PDF generation issues: {pdf_stats['failed']} failed")
    
    if failed_students:
        print(f"   ‚ö†Ô∏è Student processing issues: {len(failed_students)} students")
    
    print(f"\nüéØ Next Steps:")
    print(f"   1. üìß Proceed to Step 7: Email Score Distribution")
    print(f"   2. üìä Review detailed analytics in Excel reports")
    print(f"   3. üìÑ Use sample PDFs for moderation and review")
    print(f"   4. üíæ Archive backup file for long-term storage")
    
    print(f"\nüí° Quality Assurance:")
    print(f"   ‚Ä¢ All processing includes comprehensive validation")
    print(f"   ‚Ä¢ Error handling ensures partial failures don't stop processing")
    print(f"   ‚Ä¢ Detailed logging provides full audit trail")
    print(f"   ‚Ä¢ Multiple output formats support different use cases")
    
    print("\n" + "="*70)
    print(f"‚úÖ Enhanced Step 6 completed successfully at {datetime.now().strftime('%H:%M:%S')}")
    print("Ready for final distribution and archival!")
    print("="*70)

# Generate final summary
generate_final_summary()