In [4]:
import numpy as np
from keras.datasets import cifar10
import pandas as pd 
import matplotlib.pyplot as plt
import sys

class Tensor:
    def __init__(self, shape):
        self.data = np.zeros(shape=shape, dtype=np.float32) # 存放数据
        self.grad = np.zeros(shape=shape, dtype=np.float32) # 存放梯度

    def clear_grad(self):
        self.grad = np.zeros_like(self.grad)

    def __str__(self):
        return "Tensor shape: {}, data: {}".format(self.data.shape, self.data)

class Initializer:
    """
    基类
    """
    def __init__(self, shape=None, name='initializer'):
        self.shape = shape
        self.name = name

    def __call__(self, *args, **kwargs):
        raise NotImplementedError

    def __str__(self):
        return self.name


class Constant(Initializer):
    def __init__(self, value=0., name='constant initializer', *args, **kwargs):
        super().__init__(name=name, *args, **kwargs)
        self.value = value

    def __call__(self, shape=None, *args, **kwargs):
        if shape:
            self.shape = shape
        assert shape is not None, "the shape of initializer must not be None."
        return self.value + np.zeros(shape=self.shape)


class Normal(Initializer):
    def __init__(self, mean=0., std=0.01, name='normal initializer', *args, **kwargs):
        super().__init__(name=name, *args, **kwargs)
        self.mean = mean
        self.std = std

    def __call__(self, shape=None, *args, **kwargs):
        if shape:
            self.shape = shape
        assert shape is not None, "the shape of initializer must not be None."
        return np.random.normal(self.mean, self.std, size=self.shape)

class Dataset:
    def __init__(self, *args, **kwargs):
        pass

    def __getitem__(self, idx):
        raise NotImplementedError("'{}' not implement in class {}"
                                  .format('__getitem__', self.__class__.__name__))

    def __len__(self):
        raise NotImplementedError("'{}' not implement in class {}"
                                  .format('__len__', self.__class__.__name__))

class BatchSampler:
    def __init__(self, dataset=None, shuffle=False, batch_size=1, drop_last=False):
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.shuffle = shuffle

        self.num_data = len(dataset)
        if self.drop_last or (self.num_data % batch_size == 0):
            self.num_samples = self.num_data // batch_size
        else:
            self.num_samples = self.num_data // batch_size + 1
        indices = np.arange(self.num_data)
        if shuffle:
            np.random.shuffle(indices)
        if drop_last:
            indices = indices[:self.num_samples * batch_size]
        self.indices = indices

    def __len__(self):
        return self.num_samples

    def __iter__(self):
        batch_indices = []
        for i in range(self.num_samples):
            if (i + 1) * self.batch_size <= self.num_data:
                for idx in range(i * self.batch_size, (i + 1) * self.batch_size):
                    batch_indices.append(self.indices[idx])
                yield batch_indices
                batch_indices = []
            else:
                for idx in range(i * self.batch_size, self.num_data):
                    batch_indices.append(self.indices[idx])
        if not self.drop_last and len(batch_indices) > 0:
            yield batch_indices

class DataLoader:
    def __init__(self, dataset, sampler=BatchSampler, shuffle=False, batch_size=1, drop_last=False):
        self.dataset = dataset
        self.batch_sampler = sampler
        self.sampler = self.batch_sampler(dataset, shuffle, batch_size, drop_last)
        self.shuffle = shuffle
        self.drop_last = drop_last
        self.batch_size = batch_size

    def __len__(self):
        return len(self.sampler)

    def __call__(self):
        self.__iter__()

    def __iter__(self):
        for sample_indices in self.sampler:
            data_list = []
            label_list = []
            for indice in sample_indices:
                data, label = self.dataset[indice]
                data_list.append(data)
                label_list.append(label)
            yield np.stack(data_list, axis=0), np.stack(label_list, axis=0)
        self.sampler = self.batch_sampler(self.dataset, self.shuffle, self.batch_size, self.drop_last)
        
class CifarDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx] / 255.0, self.Y[idx] # 图像值缩放到[0, 1]
    
class AverageMeter:
    def __init__(self):
        self.val = 0.
        self.count = 0

    def update(self, value, n=1):
        self.val += value
        self.count += n

    def __call__(self):
        return self.val / self.count

    def reset(self):
        self.val = 0.
        self.count = 0

    def __str__(self):
        return str(self.__call__())
    
class Layer:
    def __init__(self, name='layer', *args, **kwargs):
        self.name = name

    def forward(self, *args, **kwargs):
        raise NotImplementedError

    def backward(self):
        raise NotImplementedError

    def parameters(self):
        return []

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

    def __str__(self):
        return self.name


class Linear(Layer):
    def __init__(
        self,
        in_features,
        out_features,
        name='linear',
        weight_attr=Normal(),
        bias_attr=Constant(),
        *args,
        **kwargs
        ):
        super().__init__(name=name, *args, **kwargs)
        self.weights = Tensor((in_features, out_features))
        self.weights.data = weight_attr(self.weights.data.shape)
        self.bias = Tensor((1, out_features))
        self.bias.data = bias_attr(self.bias.data.shape)
        self.input = None

    def forward(self, x):
        self.input = x
        output = np.dot(x, self.weights.data) + self.bias.data
        return output

    def backward(self, gradient):
        self.weights.grad += np.dot(self.input.T, gradient)  # dy / dw
        self.bias.grad += np.sum(gradient, axis=0, keepdims=True)  # dy / db 
        input_grad = np.dot(gradient, self.weights.data.T)  # dy / dx
        return input_grad

    def parameters(self):
        return [self.weights, self.bias]

    def __str__(self):
        string = "linear layer, weight shape: {}, bias shape: {}".format(self.weights.data.shape, self.bias.data.shape)
        return string


class ReLU(Layer):
    def __init__(self, name='relu', *args, **kwargs):
        super().__init__(name=name, *args, **kwargs)
        self.activated = None

    def forward(self, x):
        x[x < 0] = 0
        self.activated = x
        return self.activated

    def backward(self, gradient):
        return gradient * (self.activated > 0)
    
class Sequential:
    def __init__(self, *args, **kwargs):
        self.graphs = []
        self._parameters = []
        for arg_layer in args:
            if isinstance(arg_layer, Layer):
                self.graphs.append(arg_layer)
                self._parameters += arg_layer.parameters()

    def add(self, layer):
        assert isinstance(layer, Layer), "The type of added layer must be Layer, but got {}.".format(type(layer))
        self.graphs.append(layer)
        self._parameters += layer.parameters()

    def forward(self, x):
        for graph in self.graphs:
            x = graph(x)
        return x

    def backward(self, grad):
        # grad backward in inverse order of graph
        for graph in self.graphs[::-1]:
            grad = graph.backward(grad)

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

    def __str__(self):
        string = 'Sequential:\n'
        for graph in self.graphs:
            string += graph.__str__() + '\n'
        return string

    def parameters(self):
        return self._parameters
    
class Optimizer:
    def __init__(self, parameters, learning_rate=0.001, weight_decay=0.0, decay_type='l2'):
        assert decay_type in ['l1', 'l2'], "only support decay_type 'l1' and 'l2', but got {}.".format(decay_type)
        self.parameters = parameters
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.decay_type = decay_type

    def step(self):
        raise NotImplementedError

    def clear_grad(self):
        for p in self.parameters:
            p.clear_grad()

    def get_decay(self, g):
        if self.decay_type == 'l1':
            return self.weight_decay
        elif self.decay_type == 'l2':
            return self.weight_decay * g


class SGD(Optimizer):
    def __init__(self, momentum=0.9, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.momentum = momentum
        self.velocity = []
        for p in self.parameters:
            self.velocity.append(np.zeros_like(p.grad))

    def step(self):
        for p, v in zip(self.parameters, self.velocity):
            decay = self.get_decay(p.grad)
            v = self.momentum * v + p.grad + decay 
            p.data = p.data - self.learning_rate * v
            
class SoftmaxWithLogits(Layer):
    def __init__(self, reduction='mean', name='softamxwithlogits', *args, **kwargs):
        super().__init__(name=name, *args, **kwargs)
        assert reduction in ['mean', 'none', 'sum'], "reduction only support 'mean', 'none' and 'sum', but got {}.".format(reduction)
        self.reduction = reduction
        self.logits = None
        self.target = None

    def forward(self, logits, target):
        assert logits.shape[0] == target.shape[0], "The first fimension of logits and target is not same, logits shape {} cann't match target shape {}.".format(logits.shape, target.shape)
        self.logits = logits
        self.target = target
        loss = []
        for i in range(logits.shape[0]):
            loss_i = -logits[i, target.squeeze(-1)[i]] + np.log(np.sum(np.exp(logits[i])))
            loss.append(loss_i)
        loss = np.array(loss).reshape(target.shape)
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:
            return loss

    def backward(self):
        soft_denominator = np.sum(np.exp(self.logits), axis=1, keepdims=True)  # [N, 1]
        eq_grad = np.zeros_like(self.logits)
        for i in range(self.logits.shape[0]):
            eq_grad[i, self.target.squeeze(-1)[i]] = -1
        gradient = np.exp(self.logits) / soft_denominator + eq_grad
        return gradient

epoches = 20   # epoch
batch_size = 4  
learning_rate = 0.001 
num_classes = 10

def load_cifar10(subset_percentage=10):
    # Load the CIFAR-10 dataset
    (train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

    # Select a random subset of the data
    num_train_samples = int(len(train_images) * (subset_percentage / 100))
    num_test_samples = int(len(test_images) * (subset_percentage / 100))

    train_indices = np.random.choice(len(train_images), num_train_samples, replace=False)
    test_indices = np.random.choice(len(test_images), num_test_samples, replace=False)

    train_images = train_images[train_indices]
    train_labels = train_labels[train_indices]
    test_images = test_images[test_indices]
    test_labels = test_labels[test_indices]

    # Normalize pixel values to be between 0 and 1
    train_images = train_images.astype('float32') / 255.0
    test_images = test_images.astype('float32') / 255.0

    return (train_images, train_labels), (test_images, test_labels)

(train_images, train_labels), (test_images, test_labels) = load_cifar10()
print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

train_dataset = CifarDataset(train_images, train_labels)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = CifarDataset(test_images, test_labels)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, drop_last=False)

model = Sequential(
    Linear(3 * 32 * 32, 64, name='linear1'),
    ReLU(name='relu1'),
    Linear(64, 128, name='linear2'),
    ReLU(name='relu1'),
    Linear(128, 64, name='linear3'),
    ReLU(name='relu1'),
    Linear(64, num_classes, name='linear4'),
)
opt = SGD(parameters=model.parameters(), learning_rate=learning_rate, weight_decay=0.0, decay_type='l2')
loss_fn = SoftmaxWithLogits()

def eval(model, val_dataloader):
    predict_labels = []
    labels = []
    for x, y in val_dataloader:
        x = x.reshape((1, -1))
        logits = model(x)
        pred = np.argmax(logits, axis=1)
        predict_labels.append(pred)
        labels.append(y.squeeze(1))
    pred = np.array(predict_labels)
    labels = np.array(labels)
    acc = np.sum(pred == labels) / len(labels)
    print("val dataset accuracy:", acc)
    return acc

total_acc = []  
loss_avg = AverageMeter()
for epoch in range(1, epoches + 1):
    acc = eval(model, val_dataloader=val_dataloader)  
    total_acc.append(acc)  
    for idx, (x, y) in enumerate(train_dataloader):
        x = x.reshape((batch_size, -1))  
        logits = model(x)
        loss = loss_fn(logits, y)
        loss_avg.update(loss)

        grad = loss_fn.backward()
        model.backward(grad)

        opt.step()
        opt.clear_grad()
    print("epoch: {}. loss: {}".format(epoch, loss_avg))

(5000, 32, 32, 3) (5000, 1) (1000, 32, 32, 3) (1000, 1)
val dataset accuracy: 0.091
epoch: 1. loss: 2.3025733310664696
val dataset accuracy: 0.099
epoch: 2. loss: 2.3023429192946843
val dataset accuracy: 0.099
epoch: 3. loss: 2.3022167604995203
val dataset accuracy: 0.099
epoch: 4. loss: 2.302135124729503
val dataset accuracy: 0.099
epoch: 5. loss: 2.302076574056335
val dataset accuracy: 0.099
epoch: 6. loss: 2.302037018449607
val dataset accuracy: 0.099
epoch: 7. loss: 2.3020084807937833
val dataset accuracy: 0.099
epoch: 8. loss: 2.301988187918079
val dataset accuracy: 0.099
epoch: 9. loss: 2.301972618658357
val dataset accuracy: 0.099
epoch: 10. loss: 2.301961078015659
val dataset accuracy: 0.099
epoch: 11. loss: 2.301950043182075
val dataset accuracy: 0.099
epoch: 12. loss: 2.3019399811331245
val dataset accuracy: 0.099
epoch: 13. loss: 2.301932833563625
val dataset accuracy: 0.099
epoch: 14. loss: 2.3019270361224526
val dataset accuracy: 0.099
epoch: 15. loss: 2.301920958912521
va

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms


class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 16 * 16, 128)
        self.relu2 = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = x.view(-1, 32 * 16 * 16)  # Flatten the output
        x = self.fc1(x)
        x = self.relu2(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

def load_cifar10(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    train_dataset = datasets.CIFAR10(root='/home/yhz2023/code_file/data', train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root='/home/yhz2023/code_file/data', train=False, download=True, transform=transform)

    # Loading smaller subset of CIFAR-10
    train_subset = torch.utils.data.Subset(train_dataset, torch.arange(0, 5000))  # 10% of 50000
    test_subset = torch.utils.data.Subset(test_dataset, torch.arange(0, 1000))  # 10% of 10000

    train_loader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_subset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader


def train_model(model, train_loader, test_loader, epochs):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, weight_decay=0.001)  # L2 regularization as weight_decay

    for epoch in range(epochs):
        model.train()
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                output = model(data)
                test_loss += criterion(output, target).item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)
        print(
            f'Epoch {epoch + 1}: Test Loss: {test_loss:.4f}, Accuracy: {100. * correct / len(test_loader.dataset):.2f}%')


if __name__ == "__main__":
    train_loader, test_loader = load_cifar10(batch_size=4)
    model = CNNModel()
    train_model(model, train_loader, test_loader, epochs=20)

Using downloaded and verified file: /home/yhz2023/code_file/data/cifar-10-python.tar.gz
Extracting /home/yhz2023/code_file/data/cifar-10-python.tar.gz to /home/yhz2023/code_file/data
Files already downloaded and verified
Epoch 1: Test Loss: 0.5240, Accuracy: 29.30%
Epoch 2: Test Loss: 0.4879, Accuracy: 36.70%
Epoch 3: Test Loss: 0.4657, Accuracy: 36.00%
Epoch 4: Test Loss: 0.4532, Accuracy: 39.00%
Epoch 5: Test Loss: 0.4404, Accuracy: 39.30%
Epoch 6: Test Loss: 0.4353, Accuracy: 38.10%
Epoch 7: Test Loss: 0.4219, Accuracy: 41.00%
Epoch 8: Test Loss: 0.4146, Accuracy: 41.20%
Epoch 9: Test Loss: 0.4115, Accuracy: 42.00%
Epoch 10: Test Loss: 0.4050, Accuracy: 40.70%
Epoch 11: Test Loss: 0.3985, Accuracy: 42.60%
Epoch 12: Test Loss: 0.3933, Accuracy: 42.60%
Epoch 13: Test Loss: 0.3864, Accuracy: 45.10%
Epoch 14: Test Loss: 0.3820, Accuracy: 45.30%
Epoch 15: Test Loss: 0.3786, Accuracy: 44.80%
Epoch 16: Test Loss: 0.3736, Accuracy: 46.30%
Epoch 17: Test Loss: 0.3709, Accuracy: 46.60%
Epoch 