# **作業流程**

1. 搭建model
  * 使用MNIST數據集
  * 設定不同hyperparameters的condition，可以使用的組合：
    * Activation_settings=["tanh","ReLU"]
    * Initial_settings=["random","Xavier","Kaiming"]
    * L2_settings=[0.001,0.0001,"none"]
    * Optimizer_settings=["sgd","Momentum","Adam"]
    * epochs_settings=[100,200,300]
    * hidden_nodes=[5,8,11]
    * LR_scheduler_settings=["none","cosine"]

2. 訓練model和evaluate
  * 總共訓練了(3*3*3*2)54種組合：
    * Activation_settings="ReLU"
    * Initial_settings=["random","Xavier","Kaiming"]
    * L2_settings=0.001
    * Optimizer_settings="Adam"
    * epochs_settings=[100,200,300]
    * hidden_nodes=[5,8,11]
    * LR_scheduler_settings=["none","cosine"]

3. ensemble 3 top model:
  * save : 一個dictionary，key為model名稱，value為準確度
  * 使用save挑出三個準確度最高的model，並且平均三者的predict，再計算出最終的accuracy

## 搭建model

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import ConcatDataset
from sklearn.model_selection import train_test_split
Activation_settings=["tanh","ReLU"]
Initial_settings=["random","Xavier","Kaiming"]
L2_settings=[0.001,0.0001,"none"]
Optimizer_settings=["sgd","Momentum","Adam"]
epochs_settings=[100,200,300]
hidden_nodes=[5,8,11]
LR_scheduler_settings=["none","cosine"]
save={}
# 轉換訓練資料
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.1307,), (0.3081,))])

# 載入MNIST手寫數字資料集，並將資料轉換為Tensor型態
train_data = torchvision.datasets.MNIST(root='./data', train=True, 
                                        transform=transform, 
                                        download=True)
test_data = torchvision.datasets.MNIST(root='./data', train=False, 
                                       transform=transform, 
                                       download=True)
dataset = ConcatDataset([train_data, test_data])




train_data, test_data = train_test_split(dataset, random_state=777, train_size=0.8)


batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
class TwoLayerNet(torch.nn.Module):

    def __init__(self, Activation_settings, hidden_nodes):
        super(TwoLayerNet, self,).__init__()
        self.fc1 = torch.nn.Linear(28*28, hidden_nodes)
        self.fc2 = torch.nn.Linear(hidden_nodes, 10)
        self.activation = Activation_settings

    def forward(self, x):
      if self.activation =="ReLU":
        x = x.view(-1, 28*28)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
      elif self.activation =="tanh":
        x = x.view(-1, 28*28)
        x = torch.nn.functional.tanh(self.fc1(x))
        x = self.fc2(x)
      return x

In [None]:
def init_weight(m, Initial_settings='Xavier', Activation_settings='none'):
  if isinstance(m,nn.Linear):
    if Initial_settings =='Xavier':
      torch.nn.init.xavier_uniform(m.weight)
    elif Initial_settings =='Kaiming':
      torch.nn.init.kaiming_uniform(m.weight, nonlinearity = Activation_settings)

In [None]:
def train(Activation_settings, Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings):
  model = TwoLayerNet(Activation_settings, hidden_nodes)
  if Initial_settings =='Xavier':
      model.apply(lambda model: init_weight(model, "Xavier"))
  elif Initial_settings =='Kaiming':
      model.apply(lambda model: init_weight(model, "Kaiming", 'relu'))

  if Optimizer_settings =='sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=L2_settings)
  elif Optimizer_settings =='momentum':
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=L2_settings)
  else:
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=L2_settings)
  
  if LR_scheduler_settings == 'cosine':
    torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 32, eta_min=0, last_epoch=-1)
    
  criterion = torch.nn.CrossEntropyLoss()

  for epoch in range(epochs_settings):
    total=0
    correct=0
    best_acc=0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):

      inputs,label = data #每次64個
      optimizer.zero_grad()
      outputs = model(inputs)
      loss = criterion(outputs, label)
      loss.backward()
      optimizer.step()

      # running_loss += loss.item()
      # if i % 100 == 0:    
      #         print('[%d, %5d] loss: %.3f' %
      #               (epoch , i , running_loss / 100))
      #         running_loss = 0.0
    with torch.no_grad():
      for data in test_loader:
          images, labels = data
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          #print("label size", labels.size(0))
          correct += (predicted == labels).sum().item()
          #print("predicted",predicted)
          #print("labels",labels)
          #print("correct", (predicted == labels).sum().item())
      if (correct/total) > best_acc:
        best_acc=correct / total
        save["{}_{}_{}_{}_{}_{}_{}".format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings)]={}
        save["{}_{}_{}_{}_{}_{}_{}".format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings)]["model"]=model
        save["{}_{}_{}_{}_{}_{}_{}".format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings)]["accuracy"]=best_acc
    if epoch % 10 == 0:
      print('Epoch:%d \n Accuracy of the network on the 14000 test images: %d %%' % (
          epoch,100 * correct / total))
  print('Finished Training')
  print('Best Accuracy of the network on the 14000 test images: %d %%' % (
        100 * best_acc))
  torch.save(model, '{}_{}_{}_{}_{}_{}_{}.pt'.format(Activation_settings,Initial_settings, L2_settings, Optimizer_settings, epochs_settings, hidden_nodes, LR_scheduler_settings))
  print(save)




## 訓練model和evaluate

In [None]:
for i in range(3):
  for e in range(3):
    for h in range(3):
      for lr in range(2):
        train(Activation_settings[1], Initial_settings[i], L2_settings[0], Optimizer_settings[2], epochs_settings[e], hidden_nodes[h], LR_scheduler_settings[lr])

Epoch:0 
 Accuracy of the network on the 14000 test images: 79 %
Epoch:10 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:20 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:30 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:40 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:50 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:60 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:70 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:80 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:90 
 Accuracy of the network on the 14000 test images: 89 %
Finished Training
Best Accuracy of the network on the 14000 test images: 89 %
{'ReLU_random_0.001_Adam_100_5_none': {'model': TwoLayerNet(
  (fc1): Linear(in_features=784, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=10, bias=True)
), 'accuracy': 0.8978571428571429}}
Epoch:0 
 Accuracy of the network on the 14000 

  after removing the cwd from sys.path.


Epoch:0 
 Accuracy of the network on the 14000 test images: 84 %
Epoch:10 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:20 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:30 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:40 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:50 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:60 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:70 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:80 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:90 
 Accuracy of the network on the 14000 test images: 89 %
Finished Training
Best Accuracy of the network on the 14000 test images: 89 %
{'ReLU_random_0.001_Adam_100_5_none': {'model': TwoLayerNet(
  (fc1): Linear(in_features=784, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=10, bias=True)
), 'accuracy': 0.8978571428571429}, 'ReLU_random_0.001_Adam_100_5_cosine': {'model'

  


Epoch:0 
 Accuracy of the network on the 14000 test images: 80 %
Epoch:10 
 Accuracy of the network on the 14000 test images: 88 %
Epoch:20 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:30 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:40 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:50 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:60 
 Accuracy of the network on the 14000 test images: 89 %
Epoch:70 
 Accuracy of the network on the 14000 test images: 90 %
Epoch:80 
 Accuracy of the network on the 14000 test images: 90 %
Epoch:90 
 Accuracy of the network on the 14000 test images: 89 %
Finished Training
Best Accuracy of the network on the 14000 test images: 89 %
{'ReLU_random_0.001_Adam_100_5_none': {'model': TwoLayerNet(
  (fc1): Linear(in_features=784, out_features=5, bias=True)
  (fc2): Linear(in_features=5, out_features=10, bias=True)
), 'accuracy': 0.8978571428571429}, 'ReLU_random_0.001_Adam_100_5_cosine': {'model'

## Ensemble 3 top model

In [None]:
import pickle

# save dictionary to person_data.pkl file
with open('save.pkl', 'wb') as fp:
    pickle.dump(save, fp)
    print('dictionary saved successfully to file')

dictionary saved successfully to file


In [None]:
keys=list(save.keys())
accuracy_list=[]
for key in keys:
    accuracy_list.append(save[key]['accuracy'])
index_order=sorted(range(len(accuracy_list)), key=lambda k: accuracy_list[k],reverse=True)
print(keys[index_order[0]])
print(keys[index_order[1]])
print(keys[index_order[2]])
model1 = torch.load('{}.pt'.format(keys[index_order[0]]))
model2 = torch.load('{}.pt'.format(keys[index_order[1]]))
model3 = torch.load('{}.pt'.format(keys[index_order[2]]))



ReLU_Kaiming_0.001_Adam_200_11_none
ReLU_Xavier_0.001_Adam_200_11_cosine
ReLU_random_0.001_Adam_200_11_cosine


In [None]:
correct = 0
total = 0
with torch.no_grad():
  for data in test_loader:
      images, labels = data
      outputs=[]
      for i in range(3):
        if i == 1:
          outputs1 = model1(images)
          outputs1 = nn.Softmax(dim=1)(outputs1)
          outputs.append(outputs1)
        elif i == 1:
          outputs2 = model2(images)
          outputs2 = nn.Softmax(dim=1)(outputs2)
          outputs.append(outputs2)
        else:
          outputs3 = model3(images)
          outputs3 = nn.Softmax(dim=1)(outputs3)
          outputs.append(outputs3)
      outputs = sum(outputs)/3
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      #print("label size", labels.size(0))
      correct += (predicted == labels).sum().item()
      #print("predicted",predicted)
      #print("labels",labels)
      #print("correct", (predicted == labels).sum().item())
  print('Accuracy of the 3 top network on the 14000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the 3 top network on the 14000 test images: 95 %
