In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from google.colab import drive
import zipfile
import io
from sklearn.model_selection import train_test_split
from PIL import Image,UnidentifiedImageError
import random
from sklearn.metrics import accuracy_score
from tqdm import tqdm
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

# Đường dẫn tới file zip và thư mục giải nén
zip_file_path = '/content/drive/MyDrive/datasets/archive.zip'
extract_path = '/content/dataset/'

# Mở file zip
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # Danh sách các tệp trong file zip
    file_list = zip_ref.namelist()

    # Lọc ra các tệp ảnh (.jpg)
    img_files = [file for file in file_list if file.endswith('.jpg')]

In [None]:
# Danh sách lưu trữ ảnh và nhãn
images = []
labels = []

# Tạo nhãn từ tên tệp
def get_label(filename):
    if 'cat' in filename.lower():
        return 0  # Mèo
    elif 'dog' in filename.lower():
        return 1  # Chó
    else:
        return None

# Đọc tất cả các ảnh trong file zip
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    file_list = zip_ref.namelist()

    # Lọc ra các tệp ảnh (.jpg)
    img_files = [file for file in file_list if file.endswith('.jpg')]

    # Chọn ngẫu nhiên 2000 ảnh từ danh sách
    random_files = random.sample(img_files, 100)

    for img_file in random_files:
        try:
            with zip_ref.open(img_file) as file:
                img = Image.open(io.BytesIO(file.read()))

                # Chuyển đổi ảnh thành RGB (3 kênh màu)
                img = img.convert("RGB")
                img = img.resize((150, 150))  # Thay đổi kích thước ảnh
                img_np = np.array(img)  # Chuyển đổi sang NumPy array (150, 150, 3)

                label = get_label(img_file)

                if label is not None:
                    images.append(img_np)
                    labels.append(label)
        except UnidentifiedImageError:
            print(f"UnidentifiedImageError: Bỏ qua tệp không hợp lệ {img_file}")
        except Exception as e:
            print(f"Lỗi khác {img_file}: {e}")

# Chuyển đổi danh sách sang dạng NumPy array
images = np.array(images)
labels = np.array(labels)

print('Số lượng ảnh đã đọc:', len(images))
print('Kích thước ảnh:', images.shape)


Số lượng ảnh đã đọc: 100
Kích thước ảnh: (100, 150, 150, 3)


In [None]:
# Chuẩn hóa giá trị pixel về khoảng [0, 1]
images = images.astype('float32') / 255.0

# Chia dữ liệu thành tập huấn luyện và tập kiểm tra (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

print('Kích thước tập huấn luyện:', X_train.shape)
print('Kích thước tập kiểm tra:', X_test.shape)
print('Kích thước tập huấn luyện:', y_train.shape)
print('Kích thước tập kiểm tra:', y_test.shape)

Kích thước tập huấn luyện: (80, 150, 150, 3)
Kích thước tập kiểm tra: (20, 150, 150, 3)
Kích thước tập huấn luyện: (80,)
Kích thước tập kiểm tra: (20,)


In [None]:
class Conv2D:
    def __init__(self, num_filters, kernel_size, input_channels, padding=0, stride=1):
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.padding = padding
        self.stride = stride
        self.weights = np.random.randn(kernel_size, kernel_size, input_channels, num_filters) * 0.01
        self.biases = np.zeros(num_filters)
        self.input = None

    def forward(self, input):
        self.input = input
        batch_size, height, width, channels = input.shape
        output_height = (height - self.kernel_size + 2 * self.padding) // self.stride + 1
        output_width = (width - self.kernel_size + 2 * self.padding) // self.stride + 1

        self.output = np.zeros((batch_size, output_height, output_width, self.num_filters))

        padded_input = np.pad(input, ((0,0), (self.padding,self.padding), (self.padding,self.padding), (0,0)), mode='constant')

        for b in range(batch_size):
            for i in range(output_height):
                for j in range(output_width):
                    for f in range(self.num_filters):
                        h_start = i * self.stride
                        h_end = h_start + self.kernel_size
                        w_start = j * self.stride
                        w_end = w_start + self.kernel_size
                        self.output[b, i, j, f] = np.sum(
                            padded_input[b, h_start:h_end, w_start:w_end, :] * self.weights[:, :, :, f]
                        ) + self.biases[f]
        return self.output

    def backward(self, dout, learning_rate):
        batch_size, _, _, _ = self.input.shape
        dweights = np.zeros_like(self.weights)
        dbiases = np.zeros_like(self.biases)
        dinput = np.zeros_like(self.input)

        padded_input = np.pad(self.input, ((0,0), (self.padding,self.padding), (self.padding,self.padding), (0,0)), mode='constant')
        padded_dinput = np.pad(dinput, ((0,0), (self.padding,self.padding), (self.padding,self.padding), (0,0)), mode='constant')

        for b in range(batch_size):
            for i in range(dout.shape[1]):
                for j in range(dout.shape[2]):
                    for f in range(self.num_filters):
                        h_start = i * self.stride
                        h_end = h_start + self.kernel_size
                        w_start = j * self.stride
                        w_end = w_start + self.kernel_size

                        dweights[:, :, :, f] += padded_input[b, h_start:h_end, w_start:w_end, :] * dout[b, i, j, f]
                        dbiases[f] += dout[b, i, j, f]
                        padded_dinput[b, h_start:h_end, w_start:w_end, :] += self.weights[:, :, :, f] * dout[b, i, j, f]

        if self.padding > 0:
            dinput = padded_dinput[:, self.padding:-self.padding, self.padding:-self.padding, :]
        else:
            dinput = padded_dinput

        self.weights -= learning_rate * dweights
        self.biases -= learning_rate * dbiases

        return dinput


In [None]:
class ReLU:
    def __init__(self):
        self.output = None

    def forward(self, input):
        self.output = np.maximum(0, input)
        return self.output

    def backward(self, upstream_gradient):
        return upstream_gradient * (self.output > 0)


In [None]:
class MaxPooling:
    def __init__(self, pool_size, stride):
        self.pool_size = pool_size
        self.stride = stride
        self.input = None

    def forward(self, input):
        self.input = input
        batch_size, height, width, channels = input.shape
        out_height = (height - self.pool_size) // self.stride + 1
        out_width = (width - self.pool_size) // self.stride + 1
        output = np.zeros((batch_size, out_height, out_width, channels))

        for b in range(batch_size):
            for c in range(channels):
                for i in range(out_height):
                    for j in range(out_width):
                        h_start = i * self.stride
                        h_end = h_start + self.pool_size
                        w_start = j * self.stride
                        w_end = w_start + self.pool_size
                        output[b, i, j, c] = np.max(input[b, h_start:h_end, w_start:w_end, c])

        return output

    def backward(self, dout):
        dinput = np.zeros_like(self.input)
        batch_size, height, width, channels = self.input.shape

        for b in range(batch_size):
            for c in range(channels):
                for i in range(dout.shape[1]):
                    for j in range(dout.shape[2]):
                        h_start = i * self.stride
                        h_end = h_start + self.pool_size
                        w_start = j * self.stride
                        w_end = w_start + self.pool_size

                        pool_slice = self.input[b, h_start:h_end, w_start:w_end, c]
                        mask = pool_slice == np.max(pool_slice)
                        dinput[b, h_start:h_end, w_start:w_end, c] += mask * dout[b, i, j, c]

        return dinput

In [None]:
class Flatten:
    def __init__(self):
        self.input_shape = None

    def forward(self, input_data):
        self.input_shape = input_data.shape
        return input_data.reshape(input_data.shape[0], -1)

    def backward(self, dout):
        return dout.reshape(self.input_shape)



In [None]:
class Dense:
    def __init__(self, input_size, output_size, activation=None):
        self.weights = np.random.randn(input_size, output_size) * 0.01  # Initialize weights
        self.biases = np.zeros((1, output_size))  # Initialize biases
        self.activation = activation

    def forward(self, input_data):
        if self.weights is None or self.biases is None:
            raise ValueError("Weights or biases are not initialized.")
        self.input_data = input_data  # Save input for backward pass
        z = np.dot(input_data, self.weights) + self.biases
        return self.activation(z) if self.activation else z

    def backward(self, dout, learning_rate):
        # Calculate gradients
        grad_input = np.dot(dout, self.weights.T)  # Gradient with respect to the input
        grad_weights = np.dot(self.input_data.T, dout)  # Gradient with respect to weights
        grad_biases = np.sum(dout, axis=0, keepdims=True)  # Gradient with respect to biases

        # Update weights and biases
        self.weights -= learning_rate * grad_weights
        self.biases -= learning_rate * grad_biases

        return grad_input  # Return the gradient to propagate to the previous layer


In [None]:
class Sigmoid:
    def __init__(self):
        self.output = None

    def forward(self, input):
        input = np.clip(input, -500, 500)
        self.output = 1 / (1 + np.exp(-input))
        return self.output

    def backward(self, dout):
        return dout * self.output * (1 - self.output)


In [None]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder()
y_train_encoded = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_encoded = encoder.transform(y_test.reshape(-1, 1)).toarray()

print("Kích thước y_train sau one-hot encoding:", y_train_encoded.shape)
print("Kích thước y_test sau one-hot encoding:", y_test_encoded.shape)

Kích thước y_train sau one-hot encoding: (80, 2)
Kích thước y_test sau one-hot encoding: (20, 2)


In [None]:
class CNN:
    def __init__(self, num_classes):
        self.conv1 = Conv2D(32, 3, 1)
        self.pool1 = MaxPooling(2, 2)
        self.conv2 = Conv2D(64, 3, 1)
        self.pool2 = MaxPooling(2, 2)
        self.flatten = Flatten()
        self.fc1 = Dense(128, 128, activation='relu')  # Đặt kích thước đầu vào dựa trên kích thước thực tế
        self.fc2 = Dense(128, num_classes, activation='softmax')

    def build(self, input_shape):
        # Truyền qua các lớp để xác định kích thước đầu ra
        x = np.zeros(input_shape)
        x = self.conv1.forward(x)
        x = self.pool1.forward(x)
        x = self.conv2.forward(x)
        x = self.pool2.forward(x)
        x = self.flatten.forward(x)

        # Đặt kích thước đầu vào cho lớp Dense đầu tiên
        self.fc1 = Dense(x.shape[1], 128, activation='relu')  # Cập nhật kích thước đầu vào cho fc1
        self.fc2 = Dense(128, num_classes, activation='softmax')  # Giữ nguyên fc2

    def forward(self, x):
        x = self.conv1.forward(x)
        x = self.pool1.forward(x)
        x = self.conv2.forward(x)
        x = self.pool2.forward(x)
        x = self.flatten.forward(x)
        x = self.fc1.forward(x)
        x = self.fc2.forward(x)
        return x


In [None]:
class CrossEntropyLoss:
    def compute_loss(self, predictions, targets):
        predictions = np.clip(predictions, 1e-9, 1 - 1e-9)  # Prevent log(0)
        N = predictions.shape[0]
        loss = -np.sum(targets * np.log(predictions)) / N
        return loss

    def backward(self, predictions, targets):
        return predictions - targets

In [None]:
def train(model, X_train, y_train, X_test, y_test, epochs, batch_size, learning_rate):
    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []
    loss_fn = CrossEntropyLoss()

    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")

        # Training
        train_loss = 0
        train_predictions = []
        num_batches = len(X_train) // batch_size

        with tqdm(total=num_batches, desc=f"Training Epoch {epoch + 1}", unit="batch") as pbar:
            for i in range(0, len(X_train), batch_size):
                batch_X = X_train[i:i + batch_size]
                batch_y = y_train[i:i + batch_size]

                # Forward pass
                output = model.forward(batch_X)
                loss = loss_fn.compute_loss(output, batch_y)
                train_loss += loss

                # Backward pass
                dout = loss_fn.backward(output, batch_y)
                model.backward(dout, learning_rate)

                train_predictions.extend(np.argmax(output, axis=1))

                pbar.update(1)

        train_loss /= num_batches
        train_accuracy = accuracy_score(np.argmax(y_train, axis=1), train_predictions)
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        # Testing
        test_output = model.forward(X_test)
        test_loss = loss_fn.compute_loss(test_output, y_test)
        test_predictions = np.argmax(test_output, axis=1)
        test_accuracy = accuracy_score(np.argmax(y_test, axis=1), test_predictions)
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")
        print("-----------------------------")

    return train_losses, test_losses, train_accuracies, test_accuracies

# Initialize model
num_classes = 2  # For binary classification (cat vs dog)
model = CNN(num_classes)


In [None]:
train_losses, test_losses, train_accuracies, test_accuracies = train(model, X_train, y_train, X_test, y_test, epochs=10, batch_size=8, learning_rate=0.001)


Epoch 1/10


Training Epoch 1:   0%|          | 0/10 [01:33<?, ?batch/s]


ValueError: shapes (8,82944) and (128,128) not aligned: 82944 (dim 1) != 128 (dim 0)