## Katse mõelda

In [2]:
from __future__ import absolute_import, division, print_function
from tensorflow.keras import Model, layers
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist

rng = np.random

In [None]:
#!pip install kaggle

Collecting kaggle
  Downloading kaggle-1.7.4.5-py3-none-any.whl.metadata (16 kB)
Collecting python-slugify (from kaggle)
  Downloading python_slugify-8.0.4-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting text-unidecode (from kaggle)
  Downloading text_unidecode-1.3-py2.py3-none-any.whl.metadata (2.4 kB)
Downloading kaggle-1.7.4.5-py3-none-any.whl (181 kB)
Downloading python_slugify-8.0.4-py2.py3-none-any.whl (10 kB)
Downloading text_unidecode-1.3-py2.py3-none-any.whl (78 kB)
Installing collected packages: text-unidecode, python-slugify, kaggle

   ------------- -------------------------- 1/3 [python-slugify]
   ------------- -------------------------- 1/3 [python-slugify]
   ------------- -------------------------- 1/3 [python-slugify]
   ------------- -------------------------- 1/3 [python-slugify]
   -------------------------- ------------- 2/3 [kaggle]
   -------------------------- ------------- 2/3 [kaggle]
   -------------------------- ------------- 2/3 [kaggle]
   -------------

In [3]:
import os
# 2. DOWNLOAD THE DATASET
# Computer must have a unique kaggle.json file downloaded and have this path: "C:\Users\youruser\.kaggle\kaggle.json"
import kaggle

dataset_name = 'samithsachidanandan/human-face-emotions'
target_folder = 'human-face-emotions'

kaggle.api.dataset_download_files(dataset_name, path=target_folder, unzip=True)
print("Download complete!")

Dataset URL: https://www.kaggle.com/datasets/samithsachidanandan/human-face-emotions
Download complete!


## Loading and Preprocessing of Data

In [None]:
data_dir = './human-face-emotions/Data'

In [17]:
import os
from PIL import Image

base_folder = data_dir

summary = {}   # to store removed / kept per class
total_removed = 0
total_kept = 0

for label in os.listdir(base_folder):
    class_path = os.path.join(base_folder, label)
    removed = 0
    kept = 0

    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)

        try:
            img = Image.open(img_path)
            w, h = img.size
            img.close()  # ← IMPORTANT: close file so Windows unlocks it

            if (w, h) != (48, 48):
                os.remove(img_path)
                removed += 1
            else:
                kept += 1

        except:
            # corrupted or unreadable → try closing + remove
            try:
                img.close()
            except:
                pass
            os.remove(img_path)
            removed += 1

            

    summary[label] = {"removed": removed, "kept": kept}
    total_removed += removed
    total_kept += kept

# Print class-by-class report
print("=== Removal Report by Class ===")
for label, stats in summary.items():
    print(f"{label}: removed {stats['removed']}, kept {stats['kept']}")

print("\n=== TOTAL ===")
print("Total removed:", total_removed)
print("Total kept:", total_kept)


=== Removal Report by Class ===
Angry: removed 242, kept 9906
Fear: removed 18, kept 9714
Happy: removed 461, kept 17978
Sad: removed 399, kept 12154
Suprise: removed 223, kept 8004

=== TOTAL ===
Total removed: 1343
Total kept: 57756


In [18]:
# Read data from directory containing separate directories for each label value - anger, fear, happiness, sadness, surprise
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,     # 80/20 split
    subset="training",
    seed=42,                  # ensures same split every run
    label_mode="int",
    image_size=(48, 48),      # 
    color_mode="grayscale",   # or "rgb"
    batch_size=None,          # important: gives Numpy-like tensors
    shuffle=True              # shuffle!!
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=42,
    label_mode="int",
    image_size=(48, 48),
    color_mode="grayscale",
    batch_size=None # on iteration  gives one image at time
    # no shuffle!
)

class_names = train_ds.class_names
number_of_classes = 5

Found 57756 files belonging to 5 classes.
Using 46205 files for training.
Found 57756 files belonging to 5 classes.
Using 11551 files for validation.


In [19]:
train_ds = train_ds.shuffle(10000).batch(128).prefetch(tf.data.AUTOTUNE)
test_ds  = test_ds.batch(128).prefetch(tf.data.AUTOTUNE)

### Building the model

In [28]:
model = tf.keras.Sequential([

    # --- Convolution blocks ---
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(48, 48, 1)),
    tf.keras.layers.MaxPooling2D(),
    # 32: number of filters -> 32 different pattern detectors
    # (3×3) = kernel (=filter) size in pixels - like a (3×3) window
    # padding='same' : 48×48 input → 48×48 output
    # MaxPooling : reduces image size by taking the maximum value in each 3x3 window
    # e.g. pooling keeps the strongest signals
    

    tf.keras.layers.Conv2D(64, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D(),

    tf.keras.layers.Flatten(),

    # --- Dense layers ---
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(number_of_classes, activation='softmax')
])

In [29]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [30]:
history = model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=15
)

Epoch 1/15
[1m361/361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 82ms/step - accuracy: 0.3039 - loss: 3.9835 - val_accuracy: 0.4503 - val_loss: 1.3803
Epoch 2/15
[1m361/361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 76ms/step - accuracy: 0.4233 - loss: 1.3820 - val_accuracy: 0.4838 - val_loss: 1.2533
Epoch 3/15
[1m361/361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 75ms/step - accuracy: 0.4683 - loss: 1.2748 - val_accuracy: 0.5260 - val_loss: 1.1514
Epoch 4/15
[1m361/361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 80ms/step - accuracy: 0.5224 - loss: 1.1597 - val_accuracy: 0.5664 - val_loss: 1.0707
Epoch 5/15
[1m361/361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 78ms/step - accuracy: 0.5690 - loss: 1.0665 - val_accuracy: 0.6111 - val_loss: 0.9581
Epoch 6/15
[1m361/361[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 78ms/step - accuracy: 0.6034 - loss: 0.9876 - val_accuracy: 0.6360 - val_loss: 0.9168
Epoch 7/15
[1m3

In [32]:
test_loss, test_accuracy = model.evaluate(test_ds)
print('Test loss: %.4f accuracy: %.4f' % (test_loss, test_accuracy))

[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 35ms/step - accuracy: 0.7713 - loss: 0.6421
Test loss: 0.6481 accuracy: 0.7714


In [27]:
print("Computing Confusion Matrix")

# Initialize lists to store all labels
all_labels = []
all_predictions = []

# Loop over the entire validation dataset to get true vs predicted
for images, labels in val_ds:
    preds = model.predict(images, verbose=0)
    predicted_ids = np.argmax(preds, axis=-1)
    
    all_labels.extend(labels.numpy())
    all_predictions.extend(predicted_ids)

# Generate the matrix
cm = confusion_matrix(all_labels, all_predictions)

# Plot it
plt.figure(figsize=(10, 8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues, values_format='d')
plt.title("Confusion Matrix")
plt.show()

Computing Confusion Matrix


NameError: name 'val_ds' is not defined