<a href="https://colab.research.google.com/github/wayu0730/advanced-algorithm/blob/main/hw1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Basic**

In [None]:
import torch
device = torch.device('cuda:0')

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in, device = device)
y = torch.randn(N, D_out, device = device)
w1 = torch.randn(D_in, H, device = device)
w2 = torch.randn(H, D_out, device = device)

In [None]:
learning_rate = 1e-6
for t in range(500):
  h = x.mm(w1)
  h_relu = h.clamp(min=0)
  y_pred = h_relu.mm(w2)
  loss = (y_pred - y).pow(2).sum()

  grad_y_pred = 2.0 * (y_pred - y)
  grad_w2 = h_relu.t().mm(grad_y_pred)
  grad_h_relu = grad_y_pred.mm(w2.t())
  grad_h = grad_h_relu.clone()
  grad_h[h < 0] = 0
  grad_w1 = x.t().mm(grad_h)

  w1 -= learning_rate * grad_w1
  w2 -= learning_rate * grad_w2

**Advance**

In [None]:
import torch
device = torch.device('cpu')
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H, requires_grad = True)
w2 = torch.randn(H, D_out, requires_grad = True)

In [None]:
learning_rate = 1e-6
for t in range(500):
  h = x.mm(w1)
  h_relu = h.clamp(min=0)
  y_pred = h_relu.mm(w2)
  loss = (y_pred - y).pow(2).sum()

  loss.backward()

  with torch.no_grad():
    w1 -= learning_rate * w1.grad
    w2 -= learning_rate * w2.grad
    w1.grad.zero_()
    w2.grad.zero_()

NN wrapper

In [None]:
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(torch.nn.Linear(D_in, H),
                            torch.nn.ReLU(),
                            torch.nn.Linear(H, D_out))

learning_rate = 1e-6
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for t in range(500):
  
  y_pred = model(x)
  loss = torch.nn.functional.mse_loss(y_pred, y)

  loss.backward()

  optimizer.step()
  optimizer.zero_grad()

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Conv2d(1, 32, 3)
        self.dropout = nn.Dropout2d(0.25)
        self.fc = nn.Linear(5408, 10) # 10 classes

    def forward(self, x):
        x = self.conv(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        output = F.log_softmax(x, dim=1) # log prob for numerical stability
        return output


def train(model, train_loader, optimizer, epochs, log_interval):
    model.train()
    for epoch in range(1, epochs + 1):
        for batch_idx, (data, target) in enumerate(train_loader):
            # Clear gradient
            optimizer.zero_grad()

            # Forward propagation
            output = model(data)

            # Negative log likelihood loss (log prob + nll loss = prob + cross entropy loss)
            loss = F.nll_loss(output, target)

            # Back propagation
            loss.backward()

            # Parameter update
            optimizer.step()

            # Log training info
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))


def test(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad(): # disable gradient calculation for efficiency
        for data, target in test_loader:
            # Prediction
            output = model(data)

            # Compute loss & accuracy
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item() # how many predictions in this batch are correct

    test_loss /= len(test_loader.dataset)

    # Log testing info
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


def main():
    # Training settings
    BATCH_SIZE = 64
    EPOCHS = 2
    LOG_INTERVAL = 10

    # Define image transform
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)) # mean and std for the MNIST training set
    ])

    # Load dataset
    train_dataset = datasets.MNIST('./data', train=True, download=True,
                       transform=transform)
    test_dataset = datasets.MNIST('./data', train=False,
                       transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

    # Create network & optimizer
    model = Net()
    optimizer = optim.Adam(model.parameters())

    # Train
    train(model, train_loader, optimizer, EPOCHS, LOG_INTERVAL)

    # Save and load model (for reference in case you are separating train and test files)
    torch.save(model.state_dict(), "mnist_cnn.pt")
    model = Net()
    model.load_state_dict(torch.load("mnist_cnn.pt"))

    # Test
    test(model, test_loader)


if __name__ == '__main__':
    main()

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


class TwoLyerNet(nn.Module):
    def __init__(self):
        super(TwoLyerNet, self).__init__()
        self.linear1 = nn.Linear()
        self.fc = nn.Linear(5408, 10) # 10 classes

    def forward(self, x):
        x = self.conv(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        output = F.log_softmax(x, dim=1) # log prob for numerical stability
        return output


def train(model, train_loader, optimizer, epochs, log_interval):
    model.train()
    for epoch in range(1, epochs + 1):
        for batch_idx, (data, target) in enumerate(train_loader):
            # Clear gradient
            optimizer.zero_grad()

            # Forward propagation
            output = model(data)

            # Negative log likelihood loss (log prob + nll loss = prob + cross entropy loss)
            loss = F.nll_loss(output, target)

            # Back propagation
            loss.backward()

            # Parameter update
            optimizer.step()

            # Log training info
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))


def test(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad(): # disable gradient calculation for efficiency
        for data, target in test_loader:
            # Prediction
            output = model(data)

            # Compute loss & accuracy
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item() # how many predictions in this batch are correct

    test_loss /= len(test_loader.dataset)

    # Log testing info
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


def main():
    # Training settings
    BATCH_SIZE = 64
    EPOCHS = 2
    LOG_INTERVAL = 10

    # Define image transform
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)) # mean and std for the MNIST training set
    ])

    # Load dataset
    train_dataset = datasets.MNIST('./data', train=True, download=True,
                       transform=transform)
    test_dataset = datasets.MNIST('./data', train=False,
                       transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

    # Create network & optimizer
    model = Net()
    optimizer = optim.Adam(model.parameters())

    # Train
    train(model, train_loader, optimizer, EPOCHS, LOG_INTERVAL)

    # Save and load model (for reference in case you are separating train and test files)
    torch.save(model.state_dict(), "mnist_cnn.pt")
    model = Net()
    model.load_state_dict(torch.load("mnist_cnn.pt"))

    # Test
    test(model, test_loader)


if __name__ == '__main__':
    main()

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision import transforms

# 定義神經網路模型
class TwoLayerNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(TwoLayerNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# 設定超參數
input_size = 28 * 28  # MNIST圖像的維度
hidden_size = 128  # 隱藏層的神經元數量
num_classes = 10  # MNIST有10個類別
learning_rate = 0.001
batch_size = 64
num_epochs = 10

# 載入MNIST數據集
train_dataset = MNIST(root='data/', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = MNIST(root='data/', train=False, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 初始化神經網路模型
model = TwoLayerNet(input_size, hidden_size, num_classes)

# 定義損失函數和優化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 訓練神經網路模型
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # 將圖像攤平成向量
        images = images.reshape(-1, input_size)

        # 前向傳播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向傳播和優化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

# 在測試集上評估模型
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, input_size)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))


Epoch [1/10], Step [100/938], Loss: 0.4718
Epoch [1/10], Step [200/938], Loss: 0.3996
Epoch [1/10], Step [300/938], Loss: 0.3629
Epoch [1/10], Step [400/938], Loss: 0.1389
Epoch [1/10], Step [500/938], Loss: 0.3152
Epoch [1/10], Step [600/938], Loss: 0.1657
Epoch [1/10], Step [700/938], Loss: 0.3751
Epoch [1/10], Step [800/938], Loss: 0.1908
Epoch [1/10], Step [900/938], Loss: 0.0661
Epoch [2/10], Step [100/938], Loss: 0.4095
Epoch [2/10], Step [200/938], Loss: 0.2047
Epoch [2/10], Step [300/938], Loss: 0.1396
Epoch [2/10], Step [400/938], Loss: 0.0507
Epoch [2/10], Step [500/938], Loss: 0.2214
Epoch [2/10], Step [600/938], Loss: 0.1172
Epoch [2/10], Step [700/938], Loss: 0.1014
Epoch [2/10], Step [800/938], Loss: 0.2870
Epoch [2/10], Step [900/938], Loss: 0.0969
Epoch [3/10], Step [100/938], Loss: 0.1921
Epoch [3/10], Step [200/938], Loss: 0.0805
Epoch [3/10], Step [300/938], Loss: 0.0277
Epoch [3/10], Step [400/938], Loss: 0.0403
Epoch [3/10], Step [500/938], Loss: 0.0491
Epoch [3/10

In [None]:
import torch
import torchvision
from torchvision import transforms

# 載入MNIST手寫數字資料集，並將資料轉換為Tensor型態
train_data = torchvision.datasets.MNIST(root='./data', train=True, 
                                        transform=transforms.ToTensor(), 
                                        download=True)
test_data = torchvision.datasets.MNIST(root='./data', train=False, 
                                       transform=transforms.ToTensor(), 
                                       download=True)

# 定義資料集分割比例
train_ratio = 0.8
test_ratio = 0.2

# 計算分割點
train_size = int(train_ratio * len(train_data))
test_size = len(train_data) - train_size

# 將訓練集和驗證集合併為訓練集
train_data, val_data = torch.utils.data.random_split(train_data, [train_size, test_size])

# 定義DataLoader，用於將資料集轉換為小批次數據
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

# 轉換訓練資料
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])

# 載入MNIST手寫數字資料集，並將資料轉換為Tensor型態
train_data = torchvision.datasets.MNIST(root='./data', train=True, 
                                        transform=transforms.ToTensor(), 
                                        download=True)
test_data = torchvision.datasets.MNIST(root='./data', train=False, 
                                       transform=transforms.ToTensor(), 
                                       download=True)

# 定義資料集分割比例
train_ratio = 0.8
test_ratio = 0.2

# 計算分割點
train_size = int(train_ratio * len(train_data))
test_size = len(train_data) - train_size

# 將訓練集和驗證集合併為訓練集
train_data, val_data = torch.utils.data.random_split(train_data, [train_size, test_size])

# 定義DataLoader，用於將資料集轉換為小批次數據
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)


# 定義模型
class Net(torch.nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(28*28, 100)
        self.fc2 = torch.nn.Linear(100, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net()

# 定義損失函數和優化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# 訓練模型
for epoch in range(10):  # 訓練 10 個 epochs

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:    
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

# 測試模型
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import ConcatDataset

# 轉換訓練資料
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])

# 載入MNIST手寫數字資料集，並將資料轉換為Tensor型態
train_data = torchvision.datasets.MNIST(root='./data', train=True, 
                                        transform=transforms.ToTensor(), 
                                        download=True)
test_data = torchvision.datasets.MNIST(root='./data', train=False, 
                                       transform=transforms.ToTensor(), 
                                       download=True)


In [None]:
len(train_data[0])

2

In [None]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [None]:
dataset = ConcatDataset([train_data, test_data])

In [None]:
len(dataset)

70000

In [None]:
from sklearn.model_selection import train_test_split


train_data, test_data = train_test_split(dataset, random_state=777, train_size=0.8)


batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

In [None]:
print(len(train_data))
print(len(test_data))

In [None]:
class TwoLayerNet(torch.nn.Module):

    def __init__(self):
        super(TwoLayerNet, self).__init__()
        self.fc1 = torch.nn.Linear(28*28, 100)
        self.fc2 = torch.nn.Linear(100, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
model = TwoLayerNet()
optimizer = torch.optim.SGD(model.parameters(), lr =1e-4, momentum=0.9)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
for epoch in range(100):

  running_loss = 0.0
  for i, data in enumerate(train_loader, 0):

    inputs,label = data #每次64個
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, label)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()
    if i % 100 == 0:    
            print('[%d, %5d] loss: %.3f' %
                  (epoch , i , running_loss / 100))
            running_loss = 0.0

print('Finished Training')

In [None]:
# 測試模型
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        print("label size", labels.size(0))
        correct += (predicted == labels).sum().item()
        print("predicted",predicted)
        print("labels",labels)
        print("correct", (predicted == labels).sum().item())
print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))
