In [1]:
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch as t
from early_stopping import EarlyStopping

# ReLU + Sigmoid + Cross Entropy + L2 + early stopping

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(28*28, 300)
        self.fc2 = nn.Linear(300, 10)
        
    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.logsigmoid(self.fc2(x))
        return x

In [3]:
from mnist_loader import load_data_shared, vectorized_result
training_data1, validation_data1, _ = load_data_shared(filename="../mnist.pkl.gz",
                                                                     seed=666,
                                                                     train_size=400,
                                                                     vali_size=100,
                                                                     test_size=0)
training_data2, validation_data2, _ = load_data_shared(filename="../mnist.pkl.gz",
                                                                     seed=666,
                                                                     train_size=400,
                                                                     vali_size=100,
                                                                     test_size=0)
training_data3, validation_data3, _ = load_data_shared(filename="../mnist.pkl.gz",
                                                                     seed=666,
                                                                     train_size=400,
                                                                     vali_size=100,
                                                                     test_size=0)
_, _, test_data = load_data_shared(filename="../mnist.pkl.gz",
                                                                     seed=666,
                                                                     train_size=0,
                                                                     vali_size=0,
                                                                     test_size=100)

In [4]:
def predict(data, net, criterion):
    with t.no_grad():
        #for index in range(test_data[0].shape[0]):
            # get the inputs
        inputs, labels = t.Tensor(data[0]), t.Tensor(data[1])

        # forward + backward + optimize
        outputs = net(inputs)
        _, predicted = t.max(outputs, 1)
        loss = criterion(outputs, labels.long())

        correct = (predicted == labels).sum().item()
        accuracy = correct / data[0].shape[0]
        return loss, accuracy

In [5]:
def fit(net, train_data, vali_data, criterion, optimizer, is_early_stopping):
    loss_scores = []
    validate_loss = []
    
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=3, verbose=False)
    
    for epoch in range(1000):  # loop over the dataset multiple times

        # get the inputs
        inputs, labels = t.Tensor(train_data[0]), t.Tensor(train_data[1])
        vector_labels = t.Tensor([vectorized_result(y) for y in train_data[1]])
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()

        # print statistics
        loss_scores.append(loss.item())
        #train_scores.append(predict(training_data, net))
        one_vali_loss, _ = predict(vali_data, net, criterion)
        validate_loss.append(one_vali_loss)
        
        if is_early_stopping:
            early_stopping(one_vali_loss, net)

            if early_stopping.early_stop:
                print("Early stopping at ", epoch)
                break
                
    print('Finished Training')
    return net

#  子网络

In [6]:
import torch.optim as optim
net1 = Net()
criterion1 = nn.CrossEntropyLoss()
optimizer1 = optim.SGD(net1.parameters(), lr = 1e-1, weight_decay=1e-2)
net1 = fit(net1, training_data1, validation_data1, criterion1, optimizer1, True)
predict(test_data, net1, criterion1)

EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 2 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping 

(tensor(0.4662), 0.84)

In [7]:
net2 = Net()
criterion2 = nn.CrossEntropyLoss()
optimizer2 = optim.SGD(net2.parameters(), lr = 1e-1, weight_decay=1e-2)
net2 = fit(net2, training_data2, validation_data2, criterion2, optimizer2, True)
predict(test_data, net2, criterion2)

EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 2 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 2 out of 3
EarlyStopping 

(tensor(0.4531), 0.85)

In [8]:
net3 = Net()
criterion3 = nn.CrossEntropyLoss()
optimizer3 = optim.SGD(net3.parameters(), lr = 1e-1, weight_decay=1e-2)
net3 = fit(net3, training_data3, validation_data3, criterion3, optimizer3, True)
predict(test_data, net3, criterion3)

EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping counter: 1 out of 3
EarlyStopping 

(tensor(0.4567), 0.84)

# Bagging

In [9]:
import numpy as np
def predict2(data, nets):
    with t.no_grad():
        inputs, labels = t.Tensor(data[0]), t.Tensor(data[1])
        output = t.FloatTensor(data[0].shape[0], 10).zero_()
        for net in nets:
            output += net(inputs)
        _, predicted = t.max(output, 1)

        correct = (predicted == labels).sum().item()
        accuracy = correct / data[0].shape[0]
        return accuracy

In [10]:
predict2(test_data, [net1, net2, net3])

0.84

总的准确率比单个准确率低，这是为什么？  
难道是相关性太大？