Aim here is to train a supervised CNN to recognise duplications and deletions
1) Generate some event data to classify
2) Define the network
3) Do some training 
4) Test the prediction


In [1]:
import numpy as np
import numpy.random as rng
import torch
from torch import nn
import torch.nn.functional as F

rng.seed(1001)
torch.manual_seed(1001)

<torch._C.Generator at 0x7f645c0b9050>

In [2]:
# 1) Generate some data

def data_generator(ndat,nL,sd):
    data = np.zeros((ndat,1,nL)) 
    labs = np.zeros((ndat,)) 
    for i in range(0,ndat):
        # choose dup or del
        evnt = rng.choice([-1,1])
        #labs[i,] = int( (evnt+1)/2 )
        if evnt == -1:
            labs[i,] = 0
        else:
            labs[i,] = 1
    
        # choose start
        start = rng.randint(0,nL-1)
        #print(start,"\t",type)
        #start = 4
    
        # create event and add noise
        data[i,0, start:(start+2)] = evnt
        data[i,0,] += rng.normal(0,sd,(nL))
    
        #print(data[i,])

    return [data, labs]
        
ndat = 10000
nL = 10
sd = 0.5
data, labs = data_generator(ndat, nL, sd)
x_train = torch.from_numpy(data).float()
y_train = torch.from_numpy(labs).long()
#print(labs)
#print(data[0:5,])
print(data.shape)

ntest = 1000
tdata, tlabs = data_generator(ntest, nL, sd)
x_test = torch.from_numpy(tdata).float()


(10000, 1, 10)


In [3]:
# 2) Define a model. We will add some convolution
class Model1(nn.Module):
    def __init__(self, in_dim, n_class):
        super(Model1, self).__init__()
        
        self.conv1 = nn.Conv1d(1, 1, 3)
        self.linear = nn.Linear(6, n_class)
        self.pool = nn.MaxPool1d(3, stride=1)
        
    def forward(self, x):
        print(x.shape)
        out = F.relu(self.conv1(x))
        print(out.shape)
        out = self.pool( out )
        print(out.shape)
        out = self.linear(out)
        
        return out[:,0,:]
    
class Model2(nn.Module):
    def __init__(self, in_dim, n_class):
        super(Model2, self).__init__()
        self.conv1 = nn.Conv1d(1, 2, 3)
        self.conv2 = nn.Conv1d(2, 4, 3)
        self.pool = nn.MaxPool1d(3, stride=1)
        
        self.layer1 = nn.Linear(16, 16)
        self.layer2 = nn.Linear(16, 8)
        self.final = nn.Linear(8, n_class)
           
        self.debug = 0
    
        
    def forward(self, x):
        if self.debug: print(x.shape)
        x = F.relu(self.conv1(x))
        if self.debug: print(x.shape)
        x = self.pool( x )
        if self.debug: print(x.shape)
        x = F.relu(self.conv2(x))
        if self.debug: print(x.shape)
        
        # flatten into vector
        x = x.view(-1, self.num_flat_features(x))
        if self.debug: print(x.shape)
        
        x = F.relu(self.layer1(x))
        if self.debug: print(x.shape)
        x = F.relu(self.layer2(x))
        if self.debug: print(x.shape)
        x = self.final(x)
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
model = Model2(nL, 2)
#print(model)
print(model(x_train[0:10,]))
#print(model(x_train[0:10,]).shape)

tensor([[0.1645, 0.3557],
        [0.1634, 0.3559],
        [0.1617, 0.3567],
        [0.1604, 0.3579],
        [0.1635, 0.3562],
        [0.1617, 0.3572],
        [0.1627, 0.3568],
        [0.1626, 0.3563],
        [0.1655, 0.3548],
        [0.1566, 0.3597]], grad_fn=<AddmmBackward>)


In [4]:
# 3) Do some training
batch_size = 100
learning_rate = 1e-3
num_epochs = int(ndat/batch_size)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

i = 0
for epoch in range(num_epochs):
    #inds = rng.choice(range(0,ndat),size=batch_size, replace=False)
    inds = range(i,i+batch_size)
    inputs = x_train[inds,]
    target = y_train[inds,]
    
    i += batch_size
    
    # forward
    out = model(inputs)
    #print(target.shape)
    #print(out.shape)
    loss = criterion(out, target)
    
    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch[{epoch+1}/{num_epochs}], loss: {loss.item():.6f}')

print('Finished Training')

Epoch[10/100], loss: 0.705171
Epoch[20/100], loss: 0.701657
Epoch[30/100], loss: 0.699704
Epoch[40/100], loss: 0.716571
Epoch[50/100], loss: 0.701381
Epoch[60/100], loss: 0.699444
Epoch[70/100], loss: 0.701022
Epoch[80/100], loss: 0.701324
Epoch[90/100], loss: 0.701276
Epoch[100/100], loss: 0.691762
Finished Training


In [5]:
correct = 0
total = 0
with torch.no_grad():
    outputs = model(x_test)
    _, predicted = torch.max(outputs.data, 1)
    #print( abs(predicted-labs) )
    incorrect = (abs(predicted-tlabs)).sum()

print('Accuracy of the network on the test images: %d %%' % (100 * (ntest-incorrect) / float(ntest)) )

Accuracy of the network on the test images: 48 %


In [6]:
# examine the parameters of the fitted model
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)

conv1.weight tensor([[[-0.1207, -0.3933,  0.4114]],

        [[-0.3890, -0.3564, -0.2458]]])
conv1.bias tensor([-0.3308,  0.1718])
conv2.weight tensor([[[ 0.3405, -0.2432, -0.3101],
         [-0.0230, -0.1485,  0.1079]],

        [[-0.2858, -0.0297, -0.3580],
         [ 0.2312,  0.0792, -0.3772]],

        [[ 0.0109, -0.2679,  0.0240],
         [-0.0918,  0.3859, -0.2942]],

        [[ 0.1890,  0.1190,  0.3511],
         [-0.1699, -0.2201,  0.2542]]])
conv2.bias tensor([-0.1205, -0.4043,  0.0718, -0.1497])
layer1.weight tensor([[-0.0086, -0.0028, -0.1496,  0.0049,  0.0522,  0.2327, -0.1648,  0.1904,
         -0.0986, -0.2262, -0.1955,  0.2381,  0.0747, -0.2116,  0.2221, -0.0166],
        [-0.0950,  0.2329, -0.0409,  0.0801,  0.2103,  0.0507,  0.0474,  0.0977,
         -0.2380,  0.2072,  0.0870, -0.1018,  0.0195,  0.0654,  0.2215,  0.1142],
        [ 0.1180,  0.1390, -0.1215, -0.1335,  0.1848, -0.0397, -0.0632, -0.1476,
          0.2065, -0.1633,  0.2112,  0.0899,  0.0642,  0.0933, -0.1