In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import ConcatDataset
from sklearn.model_selection import train_test_split
Activation_settings=["tanh","ReLU"]
Initial_settings=["random","Xavier","Kaiming"]
L2_settings=[0.001,0.0001,"none"]
Optimizer_settings=["sgd","Momentum","Adam"]
epochs_settings=[100,200,300]
hidden_nodes=[5,8,11]
LR_scheduler_settings=["none","cosine"]
save={}
# 轉換訓練資料
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])

# 載入MNIST手寫數字資料集，並將資料轉換為Tensor型態
train_data = torchvision.datasets.MNIST(root='./data', train=True, 
                                        transform=transform, 
                                        download=True)
test_data = torchvision.datasets.MNIST(root='./data', train=False, 
                                       transform=transform, 
                                       download=True)
dataset = ConcatDataset([train_data, test_data])




train_data, test_data = train_test_split(dataset, random_state=777, train_size=0.8)


batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 328394360.67it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 64813105.31it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 158110043.13it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3954853.39it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [4]:
class TwoLayerNet(torch.nn.Module):

    def __init__(self, Activation_settings, hidden_nodes):
        super(TwoLayerNet, self,).__init__()
        self.fc1 = torch.nn.Linear(28*28, hidden_nodes)
        self.fc2 = torch.nn.Linear(hidden_nodes, 10)
        self.activation = Activation_settings

    def forward(self, x):
      if self.activation =="ReLU":
        x = x.view(-1, 28*28)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
      elif self.activation =="tanh":
        x = x.view(-1, 28*28)
        x = torch.nn.functional.tanh(self.fc1(x))
        x = self.fc2(x)
      return x

In [5]:
def init_weight(m, Initial_settings='Xavier', Activation_settings='none'):
  if isinstance(m,nn.Linear):
    if Initial_settings =='Xavier':
      torch.nn.init.xavier_uniform(m.weight)
    elif Initial_settings =='Kaiming':
      torch.nn.init.kaiming_uniform(m.weight, nonlinearity = Activation_settings)

In [29]:
def train_EB_LG_UA(Activation_settings, Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings):
  
  model = TwoLayerNet(Activation_settings, hidden_nodes)
  if Initial_settings =='Xavier':
      model.apply(lambda model: init_weight(model, 'Xavier'))
  elif Initial_settings =='Kaiming':
      model.apply(lambda model: init_weight(model, 'Kaiming', 'relu'))
  

  if Optimizer_settings =='sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=L2_settings)
  elif Optimizer_settings =='momentum':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=L2_settings)
  else:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=L2_settings)
  
  if LR_scheduler_settings == 'cosine':
    torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 32, eta_min=0, last_epoch=-1)
    
  criterion = torch.nn.CrossEntropyLoss()
  epoch = 0
  LG = 0.9441
  total=0
  correct=0
  best_acc=0
  running_loss = 99999
  ε = 1e-4
  run = True

  while run:
    epoch = epoch + 1
    for i, data in enumerate(train_loader, 0):

      inputs,label = data #每次64個
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, label)
      loss.backward()
      optimizer.step()

    with torch.no_grad():
      for data in test_loader:
          images, labels = data
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          #print("label size", labels.size(0))
          correct += (predicted == labels).sum().item()
          #print("predicted",predicted)
          #print("labels",labels)
          #print("correct", (predicted == labels).sum().item())
      if (correct/total) > best_acc:
        best_acc=correct / total
        torch.save(model, '/content/drive/MyDrive/advanced algo./HW3/EB_LG_UA_{}_{}_{}_{}_{}_{}_{}.pt'.format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings))
        if best_acc > LG:
          run =False
    #print('Accuracy of the network on the 14000 test images: %.3f %%' % (100 * correct / total))
    if loss.item() < running_loss:
        for param_group in optimizer.param_groups:
          param_group['lr'] = param_group['lr'] * 1.2
        running_loss = loss.item()
        #print(running_loss)
        if epoch >= 200:
          print("unacceptable SLFN due to epoch >= 200")
          break
    else:
      for param_group in optimizer.param_groups:
        if param_group['lr'] > ε:
          param_group['lr'] = param_group['lr'] * 0.7
        else:
          print("unacceptable SLFN due to lr < ε")
          run =False
    

  print('Finished Training in epoch {}'.format(epoch))
  print('Finished Training in lr {}'.format(param_group['lr']))
  print('Best Accuracy of the network on the 14000 test images: %.3f %%' % (
        100 * best_acc))
  


In [30]:
def train_LG_UA(Activation_settings, Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings):
  
  model = TwoLayerNet(Activation_settings, hidden_nodes)
  if Initial_settings =='Xavier':
      model.apply(lambda model: init_weight(model, 'Xavier'))
  elif Initial_settings =='Kaiming':
      model.apply(lambda model: init_weight(model, 'Kaiming', 'relu'))
  

  if Optimizer_settings =='sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=L2_settings)
  elif Optimizer_settings =='momentum':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=L2_settings)
  else:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=L2_settings)
  
  if LR_scheduler_settings == 'cosine':
    torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 32, eta_min=0, last_epoch=-1)
    
  criterion = torch.nn.CrossEntropyLoss()
  epoch = 0
  LG = 0.9441
  total=0
  correct=0
  best_acc=0
  ε = 1e-4
  running_loss=99999
  run = True
  

  while run:
   
    epoch = epoch + 1
    for i, data in enumerate(train_loader, 0):

      inputs,label = data #每次64個
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, label)
      loss.backward()
      optimizer.step()
      
      
    with torch.no_grad():
      for data in test_loader:
          images, labels = data
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          #print("label size", labels.size(0))
          correct += (predicted == labels).sum().item()
          #print("predicted",predicted)
          #print("labels",labels)
          #print("correct", (predicted == labels).sum().item())
      if (correct/total) > best_acc:
        best_acc=correct / total
        torch.save(model, '/content/drive/MyDrive/advanced algo./HW3/LG_UA_{}_{}_{}_{}_{}_{}_{}.pt'.format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings))
        if best_acc > LG:
          run =False
    #print('Accuracy of the network on the 14000 test images: %.3f %%' % (100 * correct / total))
    if loss.item() < running_loss:
        for param_group in optimizer.param_groups:
          param_group['lr'] = param_group['lr'] * 1.2
        running_loss = loss.item()
        #print(running_loss)
    else:
      for param_group in optimizer.param_groups:
        if param_group['lr'] > ε:
          param_group['lr'] = param_group['lr'] * 0.7
        else:
          print("unacceptable SLFN due to lr < ε")
          run =False



  print('Finished Training in epoch {}'.format(epoch))
  print('Finished Training in lr {}'.format(param_group['lr']))
  print('Best Accuracy of the network on the 14000 test images: %.3f %%' % (
        100 * best_acc))
  


### HW1最好的3個
* ReLU_random_0.001_Adam_200_11_cosine.pt
* ReLU_Kaiming_0.001_Adam_200_11_none.pt
* ReLU_Xavier_0.001_Adam_200_11_cosine.pt

### 先用EB的accuracy作為benchmark
* EB_ReLU_random_0.001_Adam_200_11_cosine.pt : 94.493 %
* EB_ReLU_Kaiming_0.001_Adam_200_11_none.pt : 94.307 %
* EB_ReLU_Xavier_0.001_Adam_200_11_cosine.pt : 94.429 %
* average benchmark: 94.410 %


* Activation_settings=["tanh","ReLU"]
* Initial_settings=["random","Xavier","Kaiming"]
* L2_settings=[0.001,0.0001,"none"]
* Optimizer_settings=["sgd","Momentum","Adam"]
* epochs_settings=[100,200,300]
* hidden_nodes=[5,8,11]
* LR_scheduler_settings=["none","cosine"]


In [31]:
train_LG_UA(Activation_settings[1], Initial_settings[0], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])
train_EB_LG_UA(Activation_settings[1], Initial_settings[0], L2_settings[0], Optimizer_settings[2], epochs_settings[1], hidden_nodes[2], LR_scheduler_settings[1])

unacceptable SLFN due to lr < ε
Finished Training in epoch 19
Finished Training in lr 8.203786289181494e-05
Best Accuracy of the network on the 14000 test images: 93.405 %
unacceptable SLFN due to lr < ε
Finished Training in epoch 25
Finished Training in lr 8.335571912130903e-05
Best Accuracy of the network on the 14000 test images: 93.407 %
