# importing the libraries

In [9]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Import the data loading function from your data.py file
from data import load_cifar10

# loading the data
<!-- should print:
    Training data shape: (50000, 32, 32, 3)
    Training labels shape: (50000,)
    Test data shape: (10000, 32, 32, 3)
    Test labels shape: (10000,) -->

In [10]:
# Define the root directory where the CIFAR-10 data is stored
root_dir = "//Users//yanajakhwal//Desktop//Projects//Image_Classification//cifar10_data//cifar-10-batches-py"

# Load the data
x_train, y_train, x_test, y_test = load_cifar10(root_dir)

# Verify the shapes
print("Training data shape:", x_train.shape)
print("Training labels shape:", y_train.shape)
print("Test data shape:", x_test.shape)
print("Test labels shape:", y_test.shape)

Training data shape: (50000, 32, 32, 3)
Training labels shape: (50000,)
Test data shape: (10000, 32, 32, 3)
Test labels shape: (10000,)


# preprocessing the data

In [11]:
# normalizing the pixel valyes of images
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# one-hot encoding the labels 
<!-- since the cifar-10 dataset is a categorical crossentropy loss -->

In [12]:
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# use data augmentation to increase the dataset's diversity with modified versions of images

In [13]:
# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

# Fit the generator to the training data
datagen.fit(x_train)

# creating a validation set
<!-- done by splitting the training data -->

In [14]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)