# 膨胀网络

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler
from PIL import Image
import torchvision.datasets as dset
import torchvision.transforms as T
import numpy as np
import torch.nn.functional as F
from tensorboardX import SummaryWriter

# dataset

In [2]:
#不变
original = T.Compose([
    T.ToTensor(),
    ])

#缩小
zoom_out = T.Compose([
    T.Resize(14),
    T.Pad(7),
    T.ToTensor(),
    ])

def get_mnist_dataloader(transforms, batch_size, shuffle=False, if_print=False):
    set_train = dset.MNIST('../MNIST', train=True, transform=transforms, download=False)
    loader_train = DataLoader(set_train, batch_size=batch_size, shuffle=shuffle)
    set_test = dset.MNIST('../MNIST', train=False, transform=transforms,download=False)
    loader_test = DataLoader(set_test, batch_size=batch_size, shuffle=shuffle)
    if if_print:
        print("训练集大小：",set_train.train_data.size())
        print("训练集标签：",set_train.train_labels.size())
        print("测试集大小：",set_test.test_data.size())
        print("测试集标签：",set_test.test_labels.size())
    return loader_train, loader_test

# show imgs on tensorboard

In [3]:
tr_original, te_original = get_mnist_dataloader(original, 64, if_print=True)
tr_zoom_out, te_zoom_out = get_mnist_dataloader(zoom_out, 64)

test_imgs = next(iter(te_original))
print(test_imgs[0].shape)
writer = SummaryWriter()
writer.add_image('Image', test_imgs[0], 1)
writer.close()

训练集大小： torch.Size([60000, 28, 28])
训练集标签： torch.Size([60000])
测试集大小： torch.Size([10000, 28, 28])
测试集标签： torch.Size([10000])
torch.Size([64, 1, 28, 28])


# train

In [4]:
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()
    
def train(model, train_data, test_data, num_epochs = 1, print_every = 200):
    model.apply(reset)
    loss_fn = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        check_accuracy(model, test_data)
        model.train()
        for t, (x, y) in enumerate(train_data):
            x_var = Variable(x.cuda())
            y_var = Variable(y.cuda().long())
            scores = model(x_var)        
            loss = loss_fn(scores, y_var) 
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))            
            optimizer.zero_grad()
            loss.backward() 
            optimizer.step()
    print("\n原始样本精确度")
    check_accuracy(model, te_original)
    print("缩小样本精确度")
    check_accuracy(model, te_zoom_out)
            
def check_accuracy(model, test_data):
    num_correct = 0
    num_samples = 0
    model.eval() 
    for x, y in test_data:
        x_var = Variable(x.cuda(), volatile=True)
        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        num_correct += (preds == y).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

### normal net

# naive_net

In [5]:
class NaiveNet(nn.Module):
    def __init__(self):
        super(NaiveNet, self).__init__()
        self.feature = nn.Sequential(
            nn.Conv2d(1,64,5),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(64,64,3),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(64,64,3),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(64, 10),
        )
    def forward(self, x):
        x = self.feature(x)
        #print(x.shape)
        x = x.view(x.size(0),-1)
        x = self.classifier(x)
        return x

naive_net = NaiveNet().cuda()

input = Variable(torch.randn(5, 1, 28, 28)).cuda()
out = naive_net(input)
print(out.shape)

torch.Size([5, 10])


# run

In [23]:
# 训练naive_net
train(naive_net, 
      tr_zoom_out,
      te_zoom_out, 
      num_epochs=5)

Starting epoch 1 / 5
Got 974 / 10000 correct (9.74)
t = 200, loss = 0.4944
t = 400, loss = 0.2003
t = 600, loss = 0.1968
t = 800, loss = 0.2129
Starting epoch 2 / 5
Got 9597 / 10000 correct (95.97)
t = 200, loss = 0.0931
t = 400, loss = 0.1760
t = 600, loss = 0.1170
t = 800, loss = 0.1408
Starting epoch 3 / 5
Got 9717 / 10000 correct (97.17)
t = 200, loss = 0.0721
t = 400, loss = 0.1253
t = 600, loss = 0.0923
t = 800, loss = 0.1152
Starting epoch 4 / 5
Got 9813 / 10000 correct (98.13)
t = 200, loss = 0.0519
t = 400, loss = 0.1149
t = 600, loss = 0.0900
t = 800, loss = 0.0669
Starting epoch 5 / 5
Got 9851 / 10000 correct (98.51)
t = 200, loss = 0.0193
t = 400, loss = 0.1125
t = 600, loss = 0.0771
t = 800, loss = 0.0363

 原始样本精确度
Got 4820 / 10000 correct (48.20)
缩小样本精确度
Got 9869 / 10000 correct (98.69)


In [26]:
torch.save(naive_net.state_dict(),"naive_net")

In [6]:
naive_net.load_state_dict(torch.load("naive_net"))

In [7]:
x = Variable(torch.randn(5, 1, 28, 28)).cuda()
m = nn.Conv2d(1,64,5).cuda()
y = m(input)
y.shape

torch.Size([5, 64, 24, 24])

In [8]:
x = Variable(torch.randn(5, 1, 28, 28)).cuda()
m = nn.Conv2d(1,64,5,dilation=2,padding=2).cuda()
y = m(input)
y.shape

torch.Size([5, 64, 24, 24])

### DilateNet

In [9]:
class DilateNet(nn.Module):
    def __init__(self):
        super(DilateNet, self).__init__()
        self.feature = nn.Sequential(
            nn.Conv2d(1,64,5,dilation=2,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(64,64,3,dilation=2,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            
            nn.Conv2d(64,64,3,dilation=2,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(64, 10),
        )
    def forward(self, x):
        x = self.feature(x)
        #print(x.shape)
        x = x.view(x.size(0),-1)
        x = self.classifier(x)
        return x
    
dilate_net = DilateNet().cuda()

input = Variable(torch.randn(5, 1, 28, 28)).cuda()
out = dilate_net(input)
print(out.shape)

torch.Size([5, 10])


In [10]:
naive_net.load_state_dict(torch.load("naive_net"))
print("\n原始样本精确度")
check_accuracy(naive_net, te_original)
print("缩小样本精确度")
check_accuracy(naive_net, te_zoom_out)


原始样本精确度
Got 4820 / 10000 correct (48.20)
缩小样本精确度
Got 9869 / 10000 correct (98.69)


In [11]:
dilate_net.load_state_dict(torch.load("naive_net"))
print("\n原始样本精确度")
check_accuracy(dilate_net, te_original)
print("缩小样本精确度")
check_accuracy(dilate_net, te_zoom_out)


原始样本精确度
Got 9151 / 10000 correct (91.51)
缩小样本精确度
Got 3764 / 10000 correct (37.64)


### 卷积盒子

In [197]:
class ConvBox1(nn.Module):

    def __init__(self, w, b):
        super(ConvBox1, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, 5)
        self.conv2 = nn.Conv2d(1, 64, 5, dilation=2, padding=2)
        self.conv1.weight.data = w
        self.conv1.bias.data = b
        self.conv2.weight.data = w
        self.conv2.bias.data = b

    def forward(self, x):
        x1 = self.conv1(x)
        x2 = self.conv2(x)
        #y = torch.max(x1, x2)
        #y = x1 + x2
        return y
    
class ConvBox2(ConvBox1):

    def __init__(self, w, b):
        super(ConvBox2, self).__init__(w, b)
        self.conv1 = nn.Conv2d(64, 64, 3)
        self.conv2 = nn.Conv2d(64, 64, 3, dilation=2, padding=1)
        self.conv1.weight.data = w
        self.conv1.bias.data = b
        self.conv2.weight.data = w
        self.conv2.bias.data = b

In [198]:
w = naive_net.feature[3].weight.data
b = naive_net.feature[3].bias.data

input = Variable(torch.randn(5, 64, 12, 12)).cuda()

s = ConvBox2(w,b).cuda()
s(input).shape

torch.Size([5, 64, 10, 10])

In [199]:
class DilateNet2(nn.Module):
    def __init__(self):
        super(DilateNet2, self).__init__()
        self.feature = nn.Sequential(
            ConvBox1(naive_net.feature[0].weight.data,
                     naive_net.feature[0].bias.data),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            ConvBox2(naive_net.feature[3].weight.data,
                     naive_net.feature[3].bias.data),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

            ConvBox2(naive_net.feature[6].weight.data,
                     naive_net.feature[6].bias.data),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.fc1 = nn.Linear(64, 10)
        self.fc1.weight.data = naive_net.classifier[0].weight.data
        self.fc1.bias.data = naive_net.classifier[0].bias.data

    def forward(self, x):
        x = self.feature(x)
        # print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x


dilate_net2 = DilateNet2().cuda()

input = Variable(torch.randn(5, 1, 28, 28)).cuda()
out = dilate_net2(input)
print(out.shape)

torch.Size([5, 10])


In [200]:
print("\n原始样本精确度")
check_accuracy(dilate_net2, te_original)
print("缩小样本精确度")
check_accuracy(dilate_net2, te_zoom_out)


原始样本精确度
Got 9151 / 10000 correct (91.51)
缩小样本精确度
Got 4031 / 10000 correct (40.31)


In [None]:
# 结论：混合卷积核，可以提高改变尺寸后的精度，但会降低原有的精度，也许在1D数据中能表现更好