In [1]:
import h5py
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Load the data
train_dataset = h5py.File('/content/drive/My Drive/deep learning/Lab2/Te.h5', "r")
test_dataset = h5py.File('/content/drive/My Drive/deep learning/Lab2/Tr.h5', "r")

In [4]:
# Print the keys in the datasets
print("Keys in train_dataset:", list(train_dataset.keys()))
print("Keys in test_dataset:", list(test_dataset.keys()))

Keys in train_dataset: ['images', 'labels']
Keys in test_dataset: ['images', 'labels']


In [5]:
train_x_orig = np.array(train_dataset["images"][:])  # original train images
train_y_orig = np.array(train_dataset["labels"][:])  # original train labels
test_x_orig = np.array(test_dataset["images"][:])    # original test images
test_y_orig = np.array(test_dataset["labels"][:])    # original test labels

In [6]:
# Reshape the training and test examples
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T  # (49152, m_train)
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T     # (49152, m_test)

train_x = train_x_flatten / 255.  # Normalize to [0, 1]
test_x = test_x_flatten / 255.

# One-hot encoding for labels
train_y = train_y_orig.astype(int)
test_y = test_y_orig.astype(int)
num_classes = np.max(train_y) + 1

train_y = np.eye(num_classes)[train_y.reshape(-1)].T  # (num_classes, m_train)
test_y = np.eye(num_classes)[test_y.reshape(-1)].T    # (num_classes, m_test)

# Verify shapes
print(f"train_x shape: {train_x.shape}")  # Expected (49152, m_train)
print(f"train_y shape: {train_y.shape}")  # Expected (5, m_train)
print(f"test_x shape: {test_x.shape}")    # Expected (49152, m_test)
print(f"test_y shape: {test_y.shape}")    # Expected (5, m_test)

train_x shape: (49152, 50)
train_y shape: (5, 50)
test_x shape: (49152, 250)
test_y shape: (5, 250)


In [7]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(1)
    parameters = {}
    L = len(layer_dims)

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

    return parameters

def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    cache = (A, W, b)
    return Z, cache

def relu(Z):
    A = np.maximum(0, Z)
    cache = Z
    return A, cache

def softmax(Z):
    expZ = np.exp(Z - np.max(Z))
    A = expZ / expZ.sum(axis=0, keepdims=True)
    cache = Z
    return A, cache

def forward_propagation(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2  # number of layers

    # [LINEAR -> RELU]*(L-1)
    for l in range(1, L):
        A_prev = A
        Z, linear_cache = linear_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)])
        A, activation_cache = relu(Z)
        caches.append((linear_cache, activation_cache))

    # LINEAR -> SOFTMAX
    ZL, linear_cache = linear_forward(A, parameters['W' + str(L)], parameters['b' + str(L)])
    AL, activation_cache = softmax(ZL)
    caches.append((linear_cache, activation_cache))

    return AL, caches

def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = -1/m * np.sum(Y * np.log(AL + 1e-8))
    cost = np.squeeze(cost)
    return cost

def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = 1./m * np.dot(dZ, A_prev.T)
    db = 1./m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

def relu_backward(dA, cache):
    Z = cache
    dZ = np.array(dA, copy=True)  # converting dz to a correct object.
    dZ[Z <= 0] = 0
    return dZ

def softmax_backward(AL, Y):
    dZL = AL - Y
    return dZL

def backward_propagation(AL, Y, caches):
    grads = {}
    L = len(caches)  # number of layers
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)

    # Initializing the backpropagation
    dZL = softmax_backward(AL, Y)
    current_cache = caches[L-1]
    linear_cache, _ = current_cache
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_backward(dZL, linear_cache)

    # Loop from l=L-2 to l=0
    for l in reversed(range(L-1)):
        current_cache = caches[l]
        linear_cache, activation_cache = current_cache
        dA_prev_temp, dW_temp, db_temp = linear_backward(relu_backward(grads["dA" + str(l+1)], activation_cache), linear_cache)
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l+1)] = dW_temp
        grads["db" + str(l+1)] = db_temp

    return grads

def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2  # number of layers

    for l in range(L):
        parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]

    return parameters

def predict(X, parameters):
    AL, _ = forward_propagation(X, parameters)
    return np.argmax(AL, axis=0)


In [8]:
# Define the model structure
layer_dims = [train_x.shape[0], 512, 256, 128, num_classes]

# Initialize parameters
parameters = initialize_parameters_deep(layer_dims)

In [9]:
# Model training
def model(X, Y, layers_dims, learning_rate=0.001, num_epochs=3000, print_cost=False):
    np.random.seed(1)
    costs = []
    parameters = initialize_parameters_deep(layers_dims)

    for i in range(num_epochs):
        # Forward propagation
        AL, caches = forward_propagation(X, parameters)

        # Compute cost
        cost = compute_cost(AL, Y)

        # Backward propagation
        grads = backward_propagation(AL, Y, caches)

        # Update parameters
        parameters = update_parameters(parameters, grads, learning_rate)

        # Print the cost every 100 iterations
        if i % 100 == 0:
            costs.append(cost)
            if print_cost:
                print(f"Cost after iteration {i}: {cost}")

    return parameters


In [10]:
learning_rate = 0.001  # Increase learning rate
parameters = model(train_x, train_y, layer_dims, learning_rate=learning_rate, num_epochs=3000, print_cost=True)


Cost after iteration 0: 1.609630723178581
Cost after iteration 100: 1.6095119789906613
Cost after iteration 200: 1.609398752724427
Cost after iteration 300: 1.6092891288751454
Cost after iteration 400: 1.6091780264345834
Cost after iteration 500: 1.6090686386376976
Cost after iteration 600: 1.6089599048980707
Cost after iteration 700: 1.6088518177705182
Cost after iteration 800: 1.608741300488175
Cost after iteration 900: 1.608630770694294
Cost after iteration 1000: 1.608518081419324
Cost after iteration 1100: 1.608402807689046
Cost after iteration 1200: 1.608286476398959
Cost after iteration 1300: 1.6081663056224853
Cost after iteration 1400: 1.6080435072177985
Cost after iteration 1500: 1.6079176274819922
Cost after iteration 1600: 1.6077899114812533
Cost after iteration 1700: 1.6076602366902177
Cost after iteration 1800: 1.6075258553415055
Cost after iteration 1900: 1.6073847732711581
Cost after iteration 2000: 1.6072355175557596
Cost after iteration 2100: 1.6070802096119245
Cost af

In [11]:
# Evaluate the model
def evaluate_model(X, Y, parameters):
    predictions = predict(X, parameters)
    labels = np.argmax(Y, axis=0)
    accuracy = np.mean(predictions == labels)
    return accuracy

train_accuracy = evaluate_model(train_x, train_y, parameters)
test_accuracy = evaluate_model(test_x, test_y, parameters)

print(f"Train accuracy: {train_accuracy * 100:.2f}%")
print(f"Test accuracy: {test_accuracy * 100:.2f}%")

Train accuracy: 44.00%
Test accuracy: 21.60%
