## Step 1: Install Required Libraries

In [None]:
# Install required packages
!pip install tensorflow matplotlib seaborn scikit-learn pillow -q

print("‚úÖ All libraries installed successfully!")

## Step 2: Import Libraries

In [None]:
# Core libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

# TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Sklearn for metrics
from sklearn.metrics import (
    classification_report, confusion_matrix,
    accuracy_score, precision_score, recall_score, f1_score
)

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Check GPU availability
print("TensorFlow Version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))
print("\n‚úÖ All libraries imported successfully!")

## Step 3: Setup Kaggle API & Download Dataset

**Note:** Replace with your Kaggle API token if different

In [None]:
import json

# Replace with your actual Kaggle API token
KAGGLE_API_TOKEN = "KGAT_2967f7dc8630bc7987580c2cf613c4db"

# Create kaggle.json file
kaggle_credentials = {
    "username": "",
    "key": KAGGLE_API_TOKEN
}

# Create .kaggle directory
os.makedirs(os.path.expanduser("~/.kaggle"), exist_ok=True)

# Write credentials
kaggle_path = os.path.expanduser("~/.kaggle/kaggle.json")
with open(kaggle_path, 'w') as f:
    json.dump(kaggle_credentials, f)

# Set correct permissions
os.chmod(kaggle_path, 0o600)

print("‚úÖ Kaggle API credentials configured!")

In [None]:
# Download the ChestX6 dataset
print("‚¨áÔ∏è Downloading ChestX6 dataset from Kaggle...\n")

!kaggle datasets download -d mohamedasak/chest-x-ray-6-classes-dataset

# Unzip the dataset
print("\nüì¶ Extracting dataset...")
!unzip -q chest-x-ray-6-classes-dataset.zip -d dataset

print("\n‚úÖ Dataset downloaded and extracted!")

## Step 4: Auto-Detect Dataset Structure

In [None]:
# Auto-detect the correct dataset structure
print("üîç Auto-detecting dataset paths...")

possible_structures = [
    ('dataset/chest-xray', 'train', 'val', 'test'),
    ('dataset/chest-xray', 'train', 'validation', 'test'),
    ('dataset', 'train', 'val', 'test'),
    ('dataset', 'train', 'validation', 'test'),
]

BASE_DIR = None
TRAIN_DIR = None
VAL_DIR = None
TEST_DIR = None

for base, train, val, test in possible_structures:
    train_path = os.path.join(base, train)
    val_path = os.path.join(base, val)
    test_path = os.path.join(base, test)

    if os.path.exists(train_path):
        BASE_DIR = base
        TRAIN_DIR = train_path
        VAL_DIR = val_path if os.path.exists(val_path) else None
        TEST_DIR = test_path if os.path.exists(test_path) else None
        print(f"‚úÖ Found dataset structure!")
        print(f"   Base: '{BASE_DIR}'")
        print(f"   Train: {TRAIN_DIR} {'‚úì' if os.path.exists(TRAIN_DIR) else '‚úó'}")
        print(f"   Val: {VAL_DIR} {'‚úì' if VAL_DIR and os.path.exists(VAL_DIR) else '‚úó'}")
        print(f"   Test: {TEST_DIR} {'‚úì' if TEST_DIR and os.path.exists(TEST_DIR) else '‚úó'}")
        break

if TEST_DIR is None or not os.path.exists(TEST_DIR):
    raise Exception("‚ùå ERROR: Test directory not found!")

print(f"\nüìä Test Set Classes: {os.listdir(TEST_DIR)}")

## Step 5: Upload Your Trained Model

Upload `best_model_finetuned.h5` using the file browser on the left üìÅ

In [None]:
# Wait for user to upload the model
import time

model_file = 'best_model_finetuned.h5'

print("‚è≥ Waiting for model upload...")
print(f"Please upload '{model_file}' using the file browser (üìÅ) on the left")
print("\nChecking every 5 seconds...\n")

while not os.path.exists(model_file):
    time.sleep(5)
    print("‚è≥ Still waiting for model file...")

print(f"\n‚úÖ Model file '{model_file}' detected!")
print(f"File size: {os.path.getsize(model_file) / (1024*1024):.2f} MB")

## Step 6: Load the Model

In [None]:
# Load the fine-tuned model
print("üì• Loading the fine-tuned model...\n")

model = load_model('best_model_finetuned.h5')

print("‚úÖ Model loaded successfully!")
print(f"\nTotal Parameters: {model.count_params():,}")

# Display model architecture
model.summary()

## Step 7: Create Test Data Generator

In [None]:
# Configuration
IMG_SIZE = 224
BATCH_SIZE = 32
CLASS_MODE = 'categorical'

# Test data generator (only rescaling, no augmentation)
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode=CLASS_MODE,
    shuffle=False
)

# Get class labels
class_labels = list(test_generator.class_indices.keys())

print("\nüìã Class Indices:")
print(test_generator.class_indices)
print(f"\nTotal Test Images: {test_generator.samples}")
print(f"Number of Classes: {len(class_labels)}")

## Step 8: Evaluate Model on Test Set üéØ

In [None]:
# Evaluate the model
print("üìä Evaluating model on test set...\n")
print("This may take 2-5 minutes...\n")

test_loss, test_accuracy, test_precision, test_recall = model.evaluate(test_generator)

# Calculate F1 Score
test_f1 = 2 * (test_precision * test_recall) / (test_precision + test_recall)

print("\n" + "="*60)
print("üéØ TEST SET PERFORMANCE METRICS")
print("="*60)
print(f"‚úÖ Accuracy:  {test_accuracy*100:.2f}%")
print(f"‚úÖ Precision: {test_precision*100:.2f}%")
print(f"‚úÖ Recall:    {test_recall*100:.2f}%")
print(f"‚úÖ F1-Score:  {test_f1*100:.2f}%")
print(f"‚úÖ Loss:      {test_loss:.4f}")
print("="*60)

# Compare with expected accuracy
expected_accuracy = 86.07
difference = abs(test_accuracy*100 - expected_accuracy)

if difference < 1.0:
    print(f"\nüéâ VERIFIED! Accuracy matches expected {expected_accuracy}% ¬±1%")
elif difference < 3.0:
    print(f"\n‚úÖ CLOSE! Accuracy is within {expected_accuracy}% ¬±3% (acceptable)")
else:
    print(f"\n‚ö†Ô∏è WARNING: Accuracy differs from expected {expected_accuracy}% by {difference:.2f}%")

## Step 9: Generate Confusion Matrix

In [None]:
# Generate predictions
print("üîÆ Generating predictions...\n")

test_generator.reset()
predictions = model.predict(test_generator, verbose=1)
predicted_classes = np.argmax(predictions, axis=1)

# Get true labels
true_classes = test_generator.classes

# Confusion Matrix
cm = confusion_matrix(true_classes, predicted_classes)

# Plot confusion matrix
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_labels,
            yticklabels=class_labels,
            cbar_kws={'label': 'Count'},
            annot_kws={'fontsize': 10})
plt.title('Confusion Matrix - Test Set Performance', fontsize=16, fontweight='bold', pad=20)
plt.ylabel('True Label', fontsize=12, fontweight='bold')
plt.xlabel('Predicted Label', fontsize=12, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('test_confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Confusion matrix saved as 'test_confusion_matrix.png'")

## Step 10: Detailed Classification Report

In [None]:
# Classification Report
print("\nüìã DETAILED CLASSIFICATION REPORT:")
print("="*70)
report = classification_report(true_classes, predicted_classes,
                                target_names=class_labels,
                                digits=4)
print(report)
print("="*70)

# Per-class accuracy breakdown
print("\nüéØ PER-CLASS PERFORMANCE SUMMARY:\n")
for i, label in enumerate(class_labels):
    mask = true_classes == i
    class_accuracy = accuracy_score(true_classes[mask], predicted_classes[mask])
    class_count = np.sum(mask)
    
    # Visual indicator
    if class_accuracy >= 0.95:
        emoji = "üü¢"
    elif class_accuracy >= 0.85:
        emoji = "üü°"
    elif class_accuracy >= 0.75:
        emoji = "üü†"
    else:
        emoji = "üî¥"
    
    print(f"{emoji} {label:20s}: {class_accuracy*100:6.2f}% ({class_count:4d} images)")

## Step 11: Visualize Sample Predictions

In [None]:
# Visualize predictions
test_generator.reset()
x_batch, y_batch = next(test_generator)
predictions_batch = model.predict(x_batch)

fig, axes = plt.subplots(4, 4, figsize=(16, 16))
fig.suptitle('Sample Predictions on Test Set', fontsize=18, fontweight='bold', y=0.995)

for i, ax in enumerate(axes.flat):
    if i < len(x_batch):
        # Display image
        ax.imshow(x_batch[i])

        # Get prediction and true label
        pred_class = np.argmax(predictions_batch[i])
        true_class = np.argmax(y_batch[i])
        confidence = predictions_batch[i][pred_class] * 100

        pred_label = class_labels[pred_class]
        true_label = class_labels[true_class]

        # Color: green if correct, red if wrong
        color = 'green' if pred_class == true_class else 'red'
        marker = "‚úì" if pred_class == true_class else "‚úó"

        ax.set_title(f'{marker} True: {true_label}\nPred: {pred_label}\nConf: {confidence:.1f}%',
                    color=color, fontsize=10, fontweight='bold')
        ax.axis('off')

plt.tight_layout()
plt.savefig('test_sample_predictions.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Sample predictions saved as 'test_sample_predictions.png'")

## Step 12: Per-Class Confidence Analysis

In [None]:
# Analyze confidence scores per class
print("üìä CONFIDENCE SCORE ANALYSIS\n")
print("="*70)

for i, label in enumerate(class_labels):
    # Get predictions for this class
    class_mask = true_classes == i
    class_predictions = predictions[class_mask]
    
    if len(class_predictions) > 0:
        # Get confidence scores for correct predictions
        correct_mask = predicted_classes[class_mask] == i
        correct_confidences = class_predictions[correct_mask, i] * 100
        
        if len(correct_confidences) > 0:
            avg_conf = np.mean(correct_confidences)
            min_conf = np.min(correct_confidences)
            max_conf = np.max(correct_confidences)
            
            print(f"{label:20s}: Avg={avg_conf:6.2f}%  Min={min_conf:6.2f}%  Max={max_conf:6.2f}%")
        else:
            print(f"{label:20s}: No correct predictions")
    else:
        print(f"{label:20s}: No samples in test set")

print("="*70)

## Step 13: Download Test Results

In [None]:
from google.colab import files
import time

# Download test results
files_to_download = [
    'test_confusion_matrix.png',
    'test_sample_predictions.png',
]

print("‚¨áÔ∏è Downloading test result files...\n")

for file in files_to_download:
    if os.path.exists(file):
        size = os.path.getsize(file) / (1024*1024)
        print(f"üì• {file} ({size:.2f} MB)")
        try:
            files.download(file)
            print(f"‚úÖ Downloaded!\n")
            time.sleep(3)
        except Exception as e:
            print(f"‚ùå Failed: {str(e)}\n")
    else:
        print(f"‚ö†Ô∏è Not found: {file}\n")

print("‚úÖ Download complete!")

## üéâ Testing Complete!

### Summary:
- ‚úÖ Model loaded successfully
- ‚úÖ Tested on complete test dataset
- ‚úÖ Generated confusion matrix
- ‚úÖ Detailed classification report created
- ‚úÖ Sample predictions visualized

### Expected Results:
- **Overall Accuracy:** ~86%
- **Best Classes:** Tuberculosis (100%), COVID-19 (94%), Emphysema (94%)
- **Challenging Classes:** Pneumonia-Viral (55-60%)

### If accuracy matches:
Your model is **production-ready**! Place `best_model_finetuned.h5` in your backend as `chest_xray_model.h5` and start making predictions.

### If accuracy differs significantly:
- Check if model file was corrupted during download
- Verify file size (~100-120 MB)
- Re-download from Google Colab