# Develop a program to implement the Naive Bayesian classifier considering Olivetti Face Data set for training. Compute the accuracy of the classifier, considering a few test data sets.

In [1]:
from sklearn.datasets import fetch_olivetti_faces
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

## Load data:
- Loads the Olivetti faces dataset
- Prints basic information about the dataset structure
- Dataset contains 400 images (40 subjects with 10 images each)

In [4]:
# Load dataset
faces = fetch_olivetti_faces(shuffle=True, random_state=42)
X = faces.data
y = faces.target

# Print dataset information
print("Dataset Information:")
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")
print(f"Number of classes: {len(np.unique(y))}")

Dataset Information:
Number of samples: 400
Number of features: 4096
Number of classes: 40


## Data Splitting:
- Splits data into 80% training and 20% testing sets
- Uses stratification to ensure balanced class distribution
- Prints information about the split sizes

In [10]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("\nSplit Information:")
print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")


Split Information:
Training samples: 320
Testing samples: 80


## Model Training:
- Creates and trains a Gaussian Naive Bayes classifier
- Uses the training data to learn the probability distributions

In [6]:
# Initialize and train the Gaussian Naive Bayes classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

## Evaluation:
- Makes predictions on the test set
- Calculates and prints overall accuracy
- Generates a detailed classification report

In [7]:
# Make predictions on test set
y_pred = nb_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("\nModel Evaluation:")
print(f"Accuracy: {accuracy:.4f}")

# Print detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Model Evaluation:
Accuracy: 0.9125

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       1.00      0.50      0.67         2
           2       0.50      0.50      0.50         2
           3       0.50      1.00      0.67         2
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         2
           6       1.00      1.00      1.00         2
           7       0.67      1.00      0.80         2
           8       1.00      1.00      1.00         2
           9       1.00      0.50      0.67         2
          10       1.00      1.00      1.00         2
          11       1.00      1.00      1.00         2
          12       1.00      0.50      0.67         2
          13       1.00      1.00      1.00         2
          14       1.00      1.00      1.00         2
          15       1.00      1.00      1.00         2
          16       0.

## Individual Testing
- Tests accuracy specifically for the first 5 individuals
- Helps understand how well the model performs on different subjects

In [8]:
# Test accuracy for first 5 individuals (0-4)
for person_id in range(5):
    # Get all images of this person from test set
    person_mask = y_test == person_id
    if np.any(person_mask):  # Only if we have test images for this person
        person_X = X_test[person_mask]
        person_y = y_test[person_mask]
        person_pred = nb_classifier.predict(person_X)
        person_accuracy = accuracy_score(person_y, person_pred)
        print(f"\nAccuracy for person {person_id}: {person_accuracy:.4f}")


Accuracy for person 0: 0.5000

Accuracy for person 1: 0.5000

Accuracy for person 2: 0.5000

Accuracy for person 3: 1.0000

Accuracy for person 4: 1.0000
