In [33]:
import os
from PIL import Image
from pymongo import MongoClient
import pandas as pd
import pickle
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

In [34]:
def deserialize_image(img):
    return pickle.loads(img)

In [35]:
user = "admin"
password = "g41WZadbgsPmC37B"
uri = f"mongodb+srv://{user}:{password}@rpm.spzvwtw.mongodb.net"
db_name = 'RecognitivePretrainedModels'
collection_name = 'FER2013'

In [36]:
try:
    connection = MongoClient(uri)
    db = connection[db_name]
    collection = db[collection_name]
    print("Database Connected!")
except:
    print("Connection failed.")

Database Connected!


# Train Data

In [6]:
train_data = collection.find({'type':'train'})

In [7]:
train_df = pd.DataFrame(list(train_data))

In [8]:
train_df["image"] = train_df["serialized_image"].apply(deserialize_image)
train_df.drop(['_id','serialized_image','type'], axis=1, inplace=True)
train_df

Unnamed: 0,label,image
0,happy,"[[108, 83, 63, 65, 89, 111, 121, 121, 125, 111..."
1,happy,"[[137, 142, 159, 162, 158, 134, 113, 167, 174,..."
2,happy,"[[111, 148, 155, 167, 181, 191, 194, 193, 195,..."
3,happy,"[[151, 156, 121, 100, 80, 116, 151, 169, 183, ..."
4,happy,"[[248, 187, 149, 130, 97, 140, 139, 117, 197, ..."
...,...,...
28704,disgust,"[[27, 28, 23, 25, 30, 30, 26, 32, 34, 47, 64, ..."
28705,disgust,"[[95, 97, 78, 75, 87, 113, 140, 157, 167, 174,..."
28706,disgust,"[[45, 44, 43, 54, 51, 80, 104, 69, 72, 109, 12..."
28707,disgust,"[[61, 39, 61, 61, 28, 25, 30, 24, 38, 66, 87, ..."


In [9]:
class_distribution = train_df['label'].value_counts()
class_distribution

happy       7215
neutral     4965
sad         4830
fear        4097
angry       3995
surprise    3171
disgust      436
Name: label, dtype: int64

In [10]:
labels = train_df['label'].tolist()
images = train_df['image'].tolist()
minority_classes = ['disgust']
minority_images = [images[i] for i, label in enumerate(labels) if label in minority_classes]
minority_labels = [label for label in labels if label in minority_classes]

In [11]:
# Define your augmentation strategy (here, just an example)
datagen = ImageDataGenerator(
    horizontal_flip=True,
    zoom_range=0.1,
    rotation_range=10
)

# Apply augmentation to minority class images
augmented_images = []
augmented_labels = []

for img, label in zip(minority_images, minority_labels):
    img = img.reshape((1,48,48,1))  # Reshape for ImageDataGenerator
    aug_iter = datagen.flow(img)

    # Generate a specified number of augmented samples per image
    num_new_samples = 5  # Adjust as needed
    for _ in range(num_new_samples):
        aug_img = next(aug_iter)[0].astype('uint8')
        augmented_images.append(aug_img)
        augmented_labels.append(label)

In [12]:
images_array = np.array(images)  # Convert list of 2D arrays to a 3D array
images_4d = np.expand_dims(images_array, axis=-1)  # Add the channel dimension
augmented_images = np.array(augmented_images)

In [13]:
print("Images shape:", images_4d.shape)
print("Augmented images shape:", augmented_images.shape)


Images shape: (28709, 48, 48, 1)
Augmented images shape: (2180, 48, 48, 1)


In [14]:
combined_images = np.concatenate((images_4d, augmented_images), axis=0)
combined_labels = np.concatenate((labels, augmented_labels), axis=0)

In [15]:
unique, counts = np.unique(combined_labels, return_counts=True)
class_distribution = dict(zip(unique, counts))
print("Class distribution:", class_distribution)

Class distribution: {'angry': 3995, 'disgust': 2616, 'fear': 4097, 'happy': 7215, 'neutral': 4965, 'sad': 4830, 'surprise': 3171}


In [31]:
collection_name = 'FER2013_TRAIN_MODIFIED'

try:
    connection = MongoClient(uri)
    db = connection[db_name]
    collection = db[collection_name]
    print("Database Connected!")
except:
    print("Connection failed.")

Database Connected!


In [32]:
for image, label in zip(combined_images, combined_labels):
    image_binary = pickle.dumps(image)
    document = {
        'image': image_binary,
        'label': label
    }
    collection.insert_one(document)

# Test data

In [45]:
test_data = collection.find({'type':'test'})

In [46]:
test_df = pd.DataFrame(list(test_data))

In [47]:
test_df["image"] = test_df["serialized_image"].apply(deserialize_image)
test_df.drop(['_id','serialized_image','type'], axis=1, inplace=True)
test_df

Unnamed: 0,label,image
0,happy,"[[5, 4, 5, 9, 10, 9, 10, 12, 8, 14, 13, 17, 24..."
1,happy,"[[19, 21, 22, 18, 20, 21, 16, 18, 19, 18, 20, ..."
2,happy,"[[228, 229, 230, 229, 228, 227, 226, 226, 226,..."
3,happy,"[[25, 33, 43, 30, 46, 84, 105, 118, 139, 142, ..."
4,happy,"[[33, 29, 15, 15, 20, 36, 40, 54, 79, 98, 117,..."
...,...,...
7173,disgust,"[[2, 2, 1, 1, 1, 1, 1, 2, 0, 4, 2, 1, 3, 3, 0,..."
7174,disgust,"[[97, 105, 106, 105, 101, 101, 109, 111, 110, ..."
7175,disgust,"[[46, 42, 46, 41, 43, 55, 76, 83, 80, 85, 95, ..."
7176,disgust,"[[22, 26, 19, 14, 29, 30, 52, 112, 143, 160, 1..."


In [48]:
class_distribution = test_df['label'].value_counts()
class_distribution

happy       1774
sad         1247
neutral     1233
fear        1024
angry        958
surprise     831
disgust      111
Name: label, dtype: int64

In [49]:
labels = test_df['label'].tolist()
images = test_df['image'].tolist()
minority_classes = ['disgust']
minority_images = [images[i] for i, label in enumerate(labels) if label in minority_classes]
minority_labels = [label for label in labels if label in minority_classes]

In [50]:
# Define your augmentation strategy (here, just an example)
datagen = ImageDataGenerator(
    horizontal_flip=True,
    zoom_range=0.1,
    rotation_range=10
)

# Apply augmentation to minority class images
augmented_images = []
augmented_labels = []

for img, label in zip(minority_images, minority_labels):
    img = img.reshape((1,48,48,1))  # Reshape for ImageDataGenerator
    aug_iter = datagen.flow(img)

    # Generate a specified number of augmented samples per image
    num_new_samples = 5  # Adjust as needed
    for _ in range(num_new_samples):
        aug_img = next(aug_iter)[0].astype('uint8')
        augmented_images.append(aug_img)
        augmented_labels.append(label)

In [51]:
images_array = np.array(images)  # Convert list of 2D arrays to a 3D array
images_4d = np.expand_dims(images_array, axis=-1)  # Add the channel dimension
augmented_images = np.array(augmented_images)

In [52]:
print("Images shape:", images_4d.shape)
print("Augmented images shape:", augmented_images.shape)

Images shape: (7178, 48, 48, 1)
Augmented images shape: (555, 48, 48, 1)


In [53]:
combined_images = np.concatenate((images_4d, augmented_images), axis=0)
combined_labels = np.concatenate((labels, augmented_labels), axis=0)

In [54]:
unique, counts = np.unique(combined_labels, return_counts=True)
class_distribution = dict(zip(unique, counts))
print("Class distribution:", class_distribution)

Class distribution: {'angry': 958, 'disgust': 666, 'fear': 1024, 'happy': 1774, 'neutral': 1233, 'sad': 1247, 'surprise': 831}


In [55]:
collection_name = 'FER2013_TEST_MODIFIED'

try:
    connection = MongoClient(uri)
    db = connection[db_name]
    collection = db[collection_name]
    print("Database Connected!")
except:
    print("Connection failed.")

Database Connected!


In [56]:
for image, label in zip(combined_images, combined_labels):
    image_binary = pickle.dumps(image)
    document = {
        'image': image_binary,
        'label': label
    }
    collection.insert_one(document)