In [4]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import ConcatDataset
from sklearn.model_selection import train_test_split
Activation_settings=["tanh","ReLU"]
Initial_settings=["random","Xavier","Kaiming"]
L2_settings=[0.001,0.0001,"none"]
Optimizer_settings=["sgd","Momentum","Adam"]
epochs_settings=[100,200,300]
hidden_nodes=[5,8,11]
LR_scheduler_settings=["none","cosine"]
save={}
# 轉換訓練資料
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])

# 載入MNIST手寫數字資料集，並將資料轉換為Tensor型態
train_data = torchvision.datasets.MNIST(root='./data', train=True, 
                                        transform=transform, 
                                        download=True)
test_data = torchvision.datasets.MNIST(root='./data', train=False, 
                                       transform=transform, 
                                       download=True)
dataset = ConcatDataset([train_data, test_data])




train_data, test_data = train_test_split(dataset, random_state=777, train_size=0.8)


batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [5]:
class TwoLayerNet(torch.nn.Module):

    def __init__(self, Activation_settings, hidden_nodes):
        super(TwoLayerNet, self,).__init__()
        self.fc1 = torch.nn.Linear(28*28, hidden_nodes)
        self.fc2 = torch.nn.Linear(hidden_nodes, 10)
        self.activation = Activation_settings

    def forward(self, x):
      if self.activation =="ReLU":
        x = x.view(-1, 28*28)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
      elif self.activation =="tanh":
        x = x.view(-1, 28*28)
        x = torch.nn.functional.tanh(self.fc1(x))
        x = self.fc2(x)
      return x

In [6]:
def init_weight(m, Initial_settings='Xavier', Activation_settings='none'):
  if isinstance(m,nn.Linear):
    if Initial_settings =='Xavier':
      torch.nn.init.xavier_uniform(m.weight)
    elif Initial_settings =='Kaiming':
      torch.nn.init.kaiming_uniform(m.weight, nonlinearity = Activation_settings)

In [9]:
def train_EB(Activation_settings, Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings):
  model = TwoLayerNet(Activation_settings, hidden_nodes)
  if Initial_settings =='Xavier':
      model.apply(lambda model: init_weight(model, 'Xavier'))
  elif Initial_settings =='Kaiming':
      model.apply(lambda model: init_weight(model, 'Kaiming', 'relu'))
  

  if Optimizer_settings =='sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=L2_settings)
  elif Optimizer_settings =='momentum':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=L2_settings)
  else:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=L2_settings)
  
  if LR_scheduler_settings == 'cosine':
    torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 32, eta_min=0, last_epoch=-1)
    
  criterion = torch.nn.CrossEntropyLoss()

  for epoch in range(epochs_settings):
    total=0
    correct=0
    best_acc=0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):

      inputs,label = data #每次64個
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, label)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      if i % 100 == 0:    
              
              running_loss = 0.0
    with torch.no_grad():
      for data in test_loader:
          images, labels = data
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          #print("label size", labels.size(0))
          correct += (predicted == labels).sum().item()
          #print("predicted",predicted)
          #print("labels",labels)
          #print("correct", (predicted == labels).sum().item())
      if (correct/total) > best_acc:
        best_acc=correct / total
    print('Accuracy of the network on the 14000 test images: %.3f %%' % (
        100 * correct / total))
  print('Finished Training')
  print('Best Accuracy of the network on the 14000 test images: %.3f %%' % (
        100 * best_acc))
  torch.save(model, '/content/drive/MyDrive/advanced algo./HW2/EC_{}_{}_{}_{}_{}_{}_{}.pt'.format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings))
  print(save)




In [12]:
def train_EB_LG(Activation_settings, Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings):
  
  model = TwoLayerNet(Activation_settings, hidden_nodes)
  if Initial_settings =='Xavier':
      model.apply(lambda model: init_weight(model, 'Xavier'))
  elif Initial_settings =='Kaiming':
      model.apply(lambda model: init_weight(model, 'Kaiming', 'relu'))
  

  if Optimizer_settings =='sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=L2_settings)
  elif Optimizer_settings =='momentum':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=L2_settings)
  else:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=L2_settings)
  
  if LR_scheduler_settings == 'cosine':
    torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 32, eta_min=0, last_epoch=-1)
    
  criterion = torch.nn.CrossEntropyLoss()
  epoch = 0
  LG = 0.9441
  total=0
  correct=0
  best_acc=0
  running_loss = 0.0

  while best_acc < LG:
    epoch = epoch + 1
    for i, data in enumerate(train_loader, 0):

      inputs,label = data #每次64個
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, label)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      if i % 100 == 0:    
              
              running_loss = 0.0
    with torch.no_grad():
      for data in test_loader:
          images, labels = data
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          #print("label size", labels.size(0))
          correct += (predicted == labels).sum().item()
          #print("predicted",predicted)
          #print("labels",labels)
          #print("correct", (predicted == labels).sum().item())
      if (correct/total) > best_acc:
        best_acc=correct / total
        torch.save(model, '/content/drive/MyDrive/advanced algo./HW2/EB_LG_{}_{}_{}_{}_{}_{}_{}.pt'.format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings))
    #print('Accuracy of the network on the 14000 test images: %.3f %%' % (100 * correct / total))

    if epoch >= 200: 
      break

  print('Finished Training in epoch {}'.format(epoch))
  print('Best Accuracy of the network on the 14000 test images: %.3f %%' % (
        100 * best_acc))
  


In [13]:
def train_LG(Activation_settings, Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings):
  
  model = TwoLayerNet(Activation_settings, hidden_nodes)
  if Initial_settings =='Xavier':
      model.apply(lambda model: init_weight(model, 'Xavier'))
  elif Initial_settings =='Kaiming':
      model.apply(lambda model: init_weight(model, 'Kaiming', 'relu'))
  

  if Optimizer_settings =='sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=L2_settings)
  elif Optimizer_settings =='momentum':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=L2_settings)
  else:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=L2_settings)
  
  if LR_scheduler_settings == 'cosine':
    torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 32, eta_min=0, last_epoch=-1)
    
  criterion = torch.nn.CrossEntropyLoss()
  epoch = 0
  LG = 0.9441
  total=0
  correct=0
  best_acc=0
  running_loss = 0.0

  while best_acc < LG:
    epoch = epoch + 1
    for i, data in enumerate(train_loader, 0):

      inputs,label = data #每次64個
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, label)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
      if i % 100 == 0:    
              
              running_loss = 0.0
    with torch.no_grad():
      for data in test_loader:
          images, labels = data
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          #print("label size", labels.size(0))
          correct += (predicted == labels).sum().item()
          #print("predicted",predicted)
          #print("labels",labels)
          #print("correct", (predicted == labels).sum().item())
      if (correct/total) > best_acc:
        best_acc=correct / total
        torch.save(model, '/content/drive/MyDrive/advanced algo./HW2/LG_{}_{}_{}_{}_{}_{}_{}.pt'.format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings))
    #print('Accuracy of the network on the 14000 test images: %.3f %%' % (100 * correct / total))


  print('Finished Training in epoch {}'.format(epoch))
  print('Best Accuracy of the network on the 14000 test images: %.3f %%' % (
        100 * best_acc))
  


### HW1最好的3個
* ReLU_random_0.001_Adam_200_11_cosine.pt
* ReLU_Kaiming_0.001_Adam_200_11_none.pt
* ReLU_Xavier_0.001_Adam_200_11_cosine.pt

### 先用EB的accuracy作為benchmark
* EB_ReLU_random_0.001_Adam_200_11_cosine.pt : 94.493 %
* EB_ReLU_Kaiming_0.001_Adam_200_11_none.pt : 94.307 %
* EB_ReLU_Xavier_0.001_Adam_200_11_cosine.pt : 94.429 %
* average benchmark: 94.410 %


* Activation_settings=["tanh","ReLU"]
* Initial_settings=["random","Xavier","Kaiming"]
* L2_settings=[0.001,0.0001,"none"]
* Optimizer_settings=["sgd","Momentum","Adam"]
* epochs_settings=[100,200,300]
* hidden_nodes=[5,8,11]
* LR_scheduler_settings=["none","cosine"]


In [10]:
train_EB(Activation_settings[1], Initial_settings[0], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])
train_EB(Activation_settings[1], Initial_settings[1], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[0])
train_EB(Activation_settings[1], Initial_settings[2], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])

[0,     0] loss: 0.023
[0,   100] loss: 1.275
[0,   200] loss: 0.581
[0,   300] loss: 0.465
[0,   400] loss: 0.382
[0,   500] loss: 0.370
[0,   600] loss: 0.345
[0,   700] loss: 0.367
[0,   800] loss: 0.324
Accuracy of the network on the 14000 test images: 90.986 %
[1,     0] loss: 0.002
[1,   100] loss: 0.308
[1,   200] loss: 0.295
[1,   300] loss: 0.293
[1,   400] loss: 0.277
[1,   500] loss: 0.304
[1,   600] loss: 0.291
[1,   700] loss: 0.286
[1,   800] loss: 0.272
Accuracy of the network on the 14000 test images: 91.486 %
[2,     0] loss: 0.002
[2,   100] loss: 0.265
[2,   200] loss: 0.271
[2,   300] loss: 0.271
[2,   400] loss: 0.298
[2,   500] loss: 0.254
[2,   600] loss: 0.245
[2,   700] loss: 0.263
[2,   800] loss: 0.254
Accuracy of the network on the 14000 test images: 92.357 %
[3,     0] loss: 0.002
[3,   100] loss: 0.237
[3,   200] loss: 0.256
[3,   300] loss: 0.259
[3,   400] loss: 0.251
[3,   500] loss: 0.259
[3,   600] loss: 0.260
[3,   700] loss: 0.238
[3,   800] loss: 0

  torch.nn.init.xavier_uniform(m.weight)


[0,   300] loss: 0.489
[0,   400] loss: 0.398
[0,   500] loss: 0.357
[0,   600] loss: 0.333
[0,   700] loss: 0.347
[0,   800] loss: 0.328
Accuracy of the network on the 14000 test images: 91.636 %
[1,     0] loss: 0.003
[1,   100] loss: 0.298
[1,   200] loss: 0.282
[1,   300] loss: 0.281
[1,   400] loss: 0.264
[1,   500] loss: 0.274
[1,   600] loss: 0.258
[1,   700] loss: 0.266
[1,   800] loss: 0.247
Accuracy of the network on the 14000 test images: 92.486 %
[2,     0] loss: 0.002
[2,   100] loss: 0.248
[2,   200] loss: 0.241
[2,   300] loss: 0.237
[2,   400] loss: 0.226
[2,   500] loss: 0.234
[2,   600] loss: 0.232
[2,   700] loss: 0.237
[2,   800] loss: 0.256
Accuracy of the network on the 14000 test images: 93.407 %
[3,     0] loss: 0.002
[3,   100] loss: 0.222
[3,   200] loss: 0.203
[3,   300] loss: 0.211
[3,   400] loss: 0.236
[3,   500] loss: 0.219
[3,   600] loss: 0.232
[3,   700] loss: 0.238
[3,   800] loss: 0.229
Accuracy of the network on the 14000 test images: 93.514 %
[4,  

  torch.nn.init.kaiming_uniform(m.weight, nonlinearity = Activation_settings)


[0,   300] loss: 0.498
[0,   400] loss: 0.444
[0,   500] loss: 0.419
[0,   600] loss: 0.381
[0,   700] loss: 0.386
[0,   800] loss: 0.334
Accuracy of the network on the 14000 test images: 90.379 %
[1,     0] loss: 0.004
[1,   100] loss: 0.334
[1,   200] loss: 0.341
[1,   300] loss: 0.324
[1,   400] loss: 0.311
[1,   500] loss: 0.326
[1,   600] loss: 0.295
[1,   700] loss: 0.283
[1,   800] loss: 0.304
Accuracy of the network on the 14000 test images: 92.071 %
[2,     0] loss: 0.003
[2,   100] loss: 0.272
[2,   200] loss: 0.284
[2,   300] loss: 0.262
[2,   400] loss: 0.292
[2,   500] loss: 0.287
[2,   600] loss: 0.267
[2,   700] loss: 0.271
[2,   800] loss: 0.254
Accuracy of the network on the 14000 test images: 91.871 %
[3,     0] loss: 0.005
[3,   100] loss: 0.270
[3,   200] loss: 0.251
[3,   300] loss: 0.239
[3,   400] loss: 0.274
[3,   500] loss: 0.247
[3,   600] loss: 0.247
[3,   700] loss: 0.267
[3,   800] loss: 0.256
Accuracy of the network on the 14000 test images: 92.621 %
[4,  

* LG_ReLU_random_0.001_Adam_200_11_cosine.pt : 
  * Epoch: 514, Accuracy: 94.410 %
* LG_ReLU_Kaiming_0.001_Adam_200_11_none.pt : 
  * Epoch: 402, Accuracy: 94.410 %
* LG_ReLU_Xavier_0.001_Adam_200_11_cosine.pt : 94.429 %
  * Epoch: 115, Accuracy: 94.410 %

In [14]:
'''
Activation_settings=["tanh","ReLU"]
Initial_settings=["random","Xavier","Kaiming"]
L2_settings=[0.001,0.0001,"none"]
Optimizer_settings=["sgd","Momentum","Adam"]
epochs_settings=[100,200,300]
hidden_nodes=[5,8,11]
LR_scheduler_settings=["none","cosine"]
'''
train_LG(Activation_settings[1], Initial_settings[0], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])

Finished Training in epoch 514
Best Accuracy of the network on the 14000 test images: 94.410 %


In [15]:
train_LG(Activation_settings[1], Initial_settings[1], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[0])


  torch.nn.init.xavier_uniform(m.weight)


Finished Training in epoch 402
Best Accuracy of the network on the 14000 test images: 94.410 %


In [16]:
train_LG(Activation_settings[1], Initial_settings[2], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])


  torch.nn.init.kaiming_uniform(m.weight, nonlinearity = Activation_settings)


Finished Training in epoch 115
Best Accuracy of the network on the 14000 test images: 94.410 %


* EB_LG_ReLU_random_0.001_Adam_200_11_cosine.pt : 
  * Epoch: 200, Accuracy: 94.230 %
* EB_LG_ReLU_Kaiming_0.001_Adam_200_11_none.pt : 
  * Epoch: 200, Accuracy: 94.010 %
* EB_LG_ReLU_Xavier_0.001_Adam_200_11_cosine.pt : 94.429 %
  * Epoch: 200, Accuracy: 94.274 %

In [17]:
train_EB_LG(Activation_settings[1], Initial_settings[0], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])



Finished Training in epoch 200
Best Accuracy of the network on the 14000 test images: 94.230 %


In [18]:
train_EB_LG(Activation_settings[1], Initial_settings[1], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[0])

  torch.nn.init.xavier_uniform(m.weight)


Finished Training in epoch 200
Best Accuracy of the network on the 14000 test images: 94.010 %


In [19]:
train_EB_LG(Activation_settings[1], Initial_settings[2], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])

  torch.nn.init.kaiming_uniform(m.weight, nonlinearity = Activation_settings)


Finished Training in epoch 200
Best Accuracy of the network on the 14000 test images: 94.279 %


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
