In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import math
import numpy as np
import matplotlib.pyplot as plt
from math import log, e
import torch.optim as optim
import pickle
import random

torch.manual_seed(11)
np.random.seed(11)
random.seed(11)

plt.rcParams.update({'font.size': 13})

In [2]:
class ConvNet(nn.Module):
    def __init__(self, input_dim, out_channel, patch_num, small=True, activation='linear'):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv1d(1, out_channel*2, int(input_dim/patch_num), int(input_dim/patch_num))        
        # small initialization
        if small:
            self.conv1.weight = torch.nn.Parameter(self.conv1.weight*0.001) 
            self.conv1.bias = torch.nn.Parameter(self.conv1.bias*0.001) 
        self.out_channel = out_channel
        self.activation = activation

    def forward(self, x):
        x = self.conv1(x)
        if self.activation == 'cubic':
            x = x**3
        elif self.activation == 'relu':
            x = F.relu(x)
        elif self.activation == 'celu':
            x = F.celu(x)
        elif self.activation == 'gelu':
            x = F.gelu(x)
        elif self.activation == 'tanh':
            x = torch.tanh(x)
        x = torch.sum(x,2)
        output = torch.stack([torch.sum(x[:,:self.out_channel],1), torch.sum(x[:,self.out_channel:],1)]).transpose(1,0)
        return output

### Training

In [3]:
def train_single(model, criterion, data, labels, optimizers, epochs):
    
    min_loss = float('inf')
    
    for epoch in range(epochs):  
        for optimizer in optimizers:
            optimizer.zero_grad()
        outputs = model(data) 
        loss = criterion(outputs, labels) 
        
        if loss.item() <= min_loss:
            min_loss = loss.item()
        elif epoch > 500 and loss > min_loss+0.02:
            break
        
        loss.backward() 
                
        for optimizer in optimizers:
            optimizer.step()
        
        if epoch%100 == 0:   
            print('Epoch %d --- loss: %.3f' %
                    (epoch + 1, loss.item()))
    print('Finished Training')

    
def test_single(model, criterion, data, labels):
    correct = 0
    
    with torch.no_grad():
        outputs = model(data) # ,_
        predicted = torch.max(outputs.data, 1).indices
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the %d test images: %.4f %%' % (data.shape[0],
        100 * correct / data.shape[0]))
    
    return 100 * correct / data.shape[0]

### Data Generation

In [4]:
DATA_NUM = 16000
CLUSTER_NUM = 4
EXPERT_NUM = 8
PATCH_NUM = 4
PATCH_LEN = 50

In [5]:
training_data = torch.load('synthetic_data_s1/train_data.pt')
training_labels = torch.load('synthetic_data_s1/train_labels.pt')

test_data = torch.load('synthetic_data_s1/test_data.pt')
test_labels = torch.load('synthetic_data_s1/test_labels.pt')

### Experiment

In [6]:
num_epochs = 801

linear_single = ConvNet(200, 64, PATCH_NUM, small=False).cuda() 
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(linear_single.parameters(), lr=0.001, weight_decay=5e-4) 
train_single(linear_single, criterion, training_data, training_labels, [optimizer], num_epochs)

test_single(linear_single, criterion, training_data, training_labels)
test_single(linear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 7.830
Epoch 101 --- loss: 0.528
Epoch 201 --- loss: 0.528
Epoch 301 --- loss: 0.528
Epoch 401 --- loss: 0.537
Epoch 501 --- loss: 0.532
Epoch 601 --- loss: 0.535
Epoch 701 --- loss: 0.531
Epoch 801 --- loss: 0.530
Finished Training
Accuracy of the network on the 16000 test images: 66.2313 %
Accuracy of the network on the 16000 test images: 65.8688 %


65.86875

In [7]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='cubic').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 95.751
Epoch 101 --- loss: 2.389
Epoch 201 --- loss: 0.747
Epoch 301 --- loss: 0.398
Epoch 401 --- loss: 0.317
Epoch 501 --- loss: 0.420
Finished Training
Accuracy of the network on the 16000 test images: 83.7062 %
Accuracy of the network on the 16000 test images: 75.5250 %


75.525

In [8]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='relu').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 8.672
Epoch 101 --- loss: 0.593
Epoch 201 --- loss: 1.270
Epoch 301 --- loss: 0.207
Epoch 401 --- loss: 0.202
Epoch 501 --- loss: 0.452
Finished Training
Accuracy of the network on the 16000 test images: 76.4625 %
Accuracy of the network on the 16000 test images: 72.2812 %


72.28125

In [9]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='celu').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 5.317
Epoch 101 --- loss: 0.556
Epoch 201 --- loss: 0.317
Epoch 301 --- loss: 0.317
Epoch 401 --- loss: 0.277
Epoch 501 --- loss: 0.309
Finished Training
Accuracy of the network on the 16000 test images: 85.6375 %
Accuracy of the network on the 16000 test images: 81.7500 %


81.75

In [10]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='gelu').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 3.824
Epoch 101 --- loss: 0.782
Epoch 201 --- loss: 0.377
Epoch 301 --- loss: 0.251
Epoch 401 --- loss: 0.600
Epoch 501 --- loss: 0.300
Finished Training
Accuracy of the network on the 16000 test images: 82.7188 %
Accuracy of the network on the 16000 test images: 79.0375 %


79.0375

In [11]:
num_epochs = 801

nonlinear_single = ConvNet(200, 64, PATCH_NUM, small=False, activation='tanh').cuda()
criterion = torch.nn.CrossEntropyLoss()
optimizer =  torch.optim.Adam(nonlinear_single.parameters(), lr=0.01, weight_decay=5e-4) 
train_single(nonlinear_single, criterion, training_data, training_labels, 
                                                           [optimizer], num_epochs)

test_single(nonlinear_single, criterion, training_data, training_labels)
test_single(nonlinear_single, criterion, test_data, test_labels)

Epoch 1 --- loss: 9.262
Epoch 101 --- loss: 0.322
Epoch 201 --- loss: 0.199
Epoch 301 --- loss: 0.142
Epoch 401 --- loss: 0.106
Epoch 501 --- loss: 0.084
Finished Training
Accuracy of the network on the 16000 test images: 95.9938 %
Accuracy of the network on the 16000 test images: 81.7188 %


81.71875