In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import math
import numpy as np
import matplotlib.pyplot as plt
from math import log, e
import torch.optim as optim
import pickle
import random

torch.manual_seed(11)
np.random.seed(11)
random.seed(11)

plt.rcParams.update({'font.size': 13})

In [2]:
class ConvNet(nn.Module):
    def __init__(self, input_dim, out_channel, patch_num, small=True, activation='linear'):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv1d(1, out_channel*2, int(input_dim/patch_num), int(input_dim/patch_num))        
        # small initialization
        if small:
            self.conv1.weight = torch.nn.Parameter(self.conv1.weight*0.001) 
            self.conv1.bias = torch.nn.Parameter(self.conv1.bias*0.001) 
        self.out_channel = out_channel
        self.activation = activation

    def forward(self, x):
        x = self.conv1(x)
        if self.activation == 'cubic':
            x = x**3
        elif self.activation == 'relu':
            x = F.relu(x)
        elif self.activation == 'celu':
            x = F.celu(x)
        elif self.activation == 'gelu':
            x = F.gelu(x)
        elif self.activation == 'tanh':
            x = torch.tanh(x)
        x = torch.sum(x,2)
        output = torch.stack([torch.sum(x[:,:self.out_channel],1), torch.sum(x[:,self.out_channel:],1)]).transpose(1,0)
        return output

### Training

In [3]:
def train_single(model, criterion, data, labels, optimizers, epochs):
    
    min_loss = float('inf')
    
    for epoch in range(epochs):  
        for optimizer in optimizers:
            optimizer.zero_grad()
        outputs = model(data) 
        loss = criterion(outputs, labels) 
        
        if loss.item() <= min_loss:
            min_loss = loss.item()
        elif epoch > 500 and loss > min_loss+0.02:
            break
        
        loss.backward() 
                
        for optimizer in optimizers:
            optimizer.step()
        
        if epoch%100 == 0:   
            print('Epoch %d --- loss: %.3f' %
                    (epoch + 1, loss.item()))
    print('Finished Training')

    
def test_single(model, criterion, data, labels):
    correct = 0
    
    with torch.no_grad():
        outputs = model(data) # ,_
        predicted = torch.max(outputs.data, 1).indices
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the %d test images: %.4f %%' % (data.shape[0],
        100 * correct / data.shape[0]))
    
    return 100 * correct / data.shape[0]

### Data Generation

In [4]:
DATA_NUM = 16000
CLUSTER_NUM = 4
EXPERT_NUM = 8
PATCH_NUM = 4
PATCH_LEN = 50

In [5]:
training_data = torch.load('synthetic_data_verify/train_data.pt')
training_labels = torch.load('synthetic_data_verify/train_labels.pt')

test_data = torch.load('synthetic_data_verify/test_data.pt')
test_labels = torch.load('synthetic_data_verify/test_labels.pt')

### Experiment

In [15]:
num_epochs = 801

linear_single = ConvNet(200, 64, PATCH_NUM, small=False).cuda() 
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(linear_single.parameters(), lr=0.001, weight_decay=5e-4) 
train_single(linear_single, criterion, training_data, training_labels, [optimizer], num_epochs)

test_single(linear_single, criterion, training_data, training_labels)
test_single(linear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 10.429
Epoch 101 --- loss: 0.382
Epoch 201 --- loss: 0.382
Epoch 301 --- loss: 0.381
Epoch 401 --- loss: 0.383
Epoch 501 --- loss: 0.382
Epoch 601 --- loss: 0.383
Epoch 701 --- loss: 0.382
Epoch 801 --- loss: 0.385
Finished Training
Accuracy of the network on the 16000 test images: 75.1688 %
Accuracy of the network on the 16000 test images: 74.5438 %


74.54375

In [7]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='cubic').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 44.965
Epoch 101 --- loss: 1.177
Epoch 201 --- loss: 0.522
Epoch 301 --- loss: 0.343
Epoch 401 --- loss: 0.244
Epoch 501 --- loss: 0.217
Finished Training
Accuracy of the network on the 16000 test images: 90.1250 %
Accuracy of the network on the 16000 test images: 72.6875 %


72.6875

In [13]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='relu').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 7.271
Epoch 101 --- loss: 0.481
Epoch 201 --- loss: 0.359
Epoch 301 --- loss: 0.320
Epoch 401 --- loss: 0.294
Epoch 501 --- loss: 0.274
Finished Training
Accuracy of the network on the 16000 test images: 85.4562 %
Accuracy of the network on the 16000 test images: 74.4938 %


74.49375

In [9]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='celu').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 5.692
Epoch 101 --- loss: 0.536
Epoch 201 --- loss: 0.603
Epoch 301 --- loss: 0.673
Epoch 401 --- loss: 0.610
Epoch 501 --- loss: 0.716
Finished Training
Accuracy of the network on the 16000 test images: 81.0000 %
Accuracy of the network on the 16000 test images: 76.9125 %


76.9125

In [10]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='gelu').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 6.210
Epoch 101 --- loss: 0.472
Epoch 201 --- loss: 0.419
Epoch 301 --- loss: 0.393
Epoch 401 --- loss: 0.369
Epoch 501 --- loss: 0.326
Finished Training
Accuracy of the network on the 16000 test images: 80.2000 %
Accuracy of the network on the 16000 test images: 74.0062 %


74.00625

In [11]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='tanh').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 5.375
Epoch 101 --- loss: 0.348
Epoch 201 --- loss: 0.254
Epoch 301 --- loss: 0.192
Epoch 401 --- loss: 0.161
Epoch 501 --- loss: 0.134
Finished Training
Accuracy of the network on the 16000 test images: 93.5125 %
Accuracy of the network on the 16000 test images: 74.7562 %


74.75625