In [3]:
# USE FOR GETTING INPUT TO THE ELM 
import numpy as np
from sklearn.svm import SVC # support vector classifier
import matplotlib.pyplot as plt
from sklearn.multiclass import OneVsRestClassifier
from sklearn.datasets.samples_generator import make_blobs
import torch 
import torch.nn as nn 


class CNN_2D_features(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=7, stride=2, padding=2)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=2)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=2)

        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(512 * 2 * 4, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        #self.fc3 = nn.Linear(4096, 7)

        self.drop = nn.Dropout(p=0.5)

    def forward(self, x):

        # print(x.shape)
        x = F.elu(self.conv1(x))
        # print(x.shape)
        x = self.max_pool(x)
        # print(x.shape)
        x = F.elu(self.conv2(x))
        x = self.avg_pool(x)
        # print(x.shape)
        x = F.elu(self.conv3(x))
        x = self.avg_pool(x)
        # print(x.shape)
        x = F.elu(self.conv4(x))
        # print(x.shape)
        x = x.view(-1, 512 * 2 * 4)
        # print(x.shape)
        x = self.drop(x)
        x = F.elu(self.fc1(x))
        x = F.elu(self.fc2(x))

        #x = F.elu(self.fc3(x))

        return x


class CNN_3D_features(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1a = nn.Conv3d(3, 64, kernel_size=3, stride=1)

        self.conv2a = nn.Conv2d(64, 128, kernel_size=3, stride=1)

        self.conv3a = nn.Conv2d(128, 256, kernel_size=3, stride=1)
        self.conv3b = nn.Conv2d(256, 256, kernel_size=3, stride=1)

        self.conv4a = nn.Conv2d(256, 512, kernel_size=3, stride=1)
        self.conv4b = nn.Conv2d(512, 512, kernel_size=3, stride=1)

        self.conv5a = nn.Conv2d(512, 512, kernel_size=3, stride=1)
        self.conv5b = nn.Conv2d(512, 512, kernel_size=3, stride=1)

        self.max_pool = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=2)
        self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(100, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        #self.fc3 = nn.Linear(4096, 7)

        self.drop = nn.Dropout(p=0.5)

    def forward(self, x):

        x = F.elu(self.conv1(x))
        x = self.max_pool(x)

        x = F.elu(self.conv2(x))
        x = self.avg_pool(x)

        x = F.elu(self.conv3(x))
        x = self.avg_pool(x)

        x = F.elu(self.conv4(x))
        x = x.view(-1, 100)

        x = self.drop(x)
        
        x = F.elu(self.fc1(x))
        x = F.elu(self.fc2(x))

        #x = F.elu(self.fc3(x))

        return x

In [4]:
class CNN_2D(CNN_2D_features):

    def __init__(self):
        super().__init__()
        self.fc3 = nn.Linear(4096, 7)

    def forward(self, x):
        x = super().forward(x)
        x = F.elu(self.fc3(x))

        return x


class CNN_3D(CNN_3D_features):

    def __init__(self):
        super().__init__()
        
        self.fc3 = nn.Linear(4096, 7)

    def forward(self, x):
        x = super().forward(x)
        x = F.elu(self.fc3(x))

        return x

In [42]:
# Input to first ELM:
# input size will be the concatenated length of the output of the two CNNs
class ELM_features(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes, device=None):
    super().__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = num_classes # 2 for first ELM, 5 for second
    self.device = device

    # just two layers; declare parameters: (maybe use xavier_uniform inits?) 
    self.alpha = nn.Linear(input_size, hidden_size)
    
    #self.alpha = nn.init.uniform_(torch.empty(self.input_size, self.hidden_size, device=self.device), a=-1., b=1.)
    #self.beta = nn.init.uniform_(torch.empty(self.hidden_size, self.output_size, device=self.device), a=-1., b=1.)

    #self.bias = torch.zeros(self.hidden_size, device=self.device)
    # bias included in alpha

    #self.activation = torch.nn.functional.gelu # other activations? # they said 'gaussian kernel'

  def forward(self, x):
    h = torch.nn.functional.gelu(self.alpha(x)) # forward used for training
    #return x.mm(beta)
    return h

  #def forwardToHidden(self, x):  # the output of this is what we feed to the next ELM AFTER THIS ONE IS TRAINED
  #  return self.activation(torch.add(x.mm(self.alpha), self.bias))

# Input to first ELM:
# input size will be the concatenated length of the output of the two CNNs
# NOT USING THIS FUNCTION CURRENTLY
class ELM(ELM_features):
  def __init__(self, input_size, hidden_size, num_classes, device=None):
    super().__init__(input_size, hidden_size, num_classes, device)

    self.beta = nn.Linear(hidden_size, num_classes, bias=False)
    #self.beta = nn.init.uniform_(torch.empty(self.hidden_size, self.output_size, device=self.device), a=-1., b=1.)

  def forward(self, x):
    x = super().forward(x)
    #h = self.activation(torch.add(x.mm(self.alpha), self.bias)) # forward used for training
    return self.beta(x)
    #return h

  def forwardToHidden(self, x):  # the output of this is what we feed to the next ELM AFTER THIS ONE IS TRAINED
    return super().forward(x)

In [7]:
def load_features(model, params):
    """ Load params into all layers of 'model'
        that are compatible, then freeze them"""
    model_dict = model.state_dict()

    imp_params = {k: v for k, v in params.items() if k in model_dict}

    # Load layers
    model_dict.update(imp_params)
    model.load_state_dict(imp_params)

    # Freeze layers
    for name, param in model.named_parameters():
        param.requires_grad = False


In [5]:
# create instance of CNN_2D_features, CNN_3D_features
CNN_2D_feat = CNN_2D_features()
CNN_3D_feat = CNN_3D_features()

#################
# DEFINE PARAMS #
# params_2D = 
# params_3D = 
#################

# Load weights from trained CNN_2D, CNN_3D into newly initialized CNN_2D_feat, CNN_3D_feat
load_features(CNN_2D_feat, params_2D)
load_features(CNN_3D_feat, params_3D)

In [8]:
# Run data through CNNs, concatenate outputs (flatten) (1x4096 + 1x4096 = 1x8192), this is the input to the ELM 
# input will be Bx8192 (B = batch size)
## TRAIN FIRST ELM ##
cuda_avail = torch.cuda.is_available()
# Input to first ELM:
# input size will be the concatenated length of the output of the two CNNs
class CNN_ELM():
  def __init__(self, input_size, hidden_size, num_classes, CNN_2D, CNN_3D, ELM, device=None):
    
    self.CNN_2D = CNN_2D
    self.CNN_3D = CNN_3D
    
    self.ELM_feat = ELM_features(input_size, hidden_size, num_classes, device)
    #self.ELM = ELM(input_size, hidden_size, num_classes, device)
    
    self.beta = nn.init.uniform_(torch.empty(self.hidden_size, self.output_size, device=self.device), a=-1., b=1.)

  def forward(self, audio, video):
    x_aud = self.CNN_2D(audio)
    x_vid = self.CNN_3D(video)
    
    x = torch.cat((x_aud, x_vid), dim=1)
    
    x = self.ELM_feat(x)
    
    return x.mm(beta)
    #return h

  def forwardToHidden(self, x):  # the output of this is what we feed to the next ELM AFTER THIS ONE IS TRAINED
    x_aud = self.CNN_2D(audio)
    x_vid = self.CNN_3D(video)
    
    x = torch.cat((x_aud, x_vid), dim=1)
    
    x = self.ELM_feat(x)
    
    return x

In [58]:
class pseudoInverse(object):
    def __init__(self,params,C=1e-2,forgettingfactor=1,L =100):
        self.params=list(params)
        self.is_cuda=False #self.params[len(self.params)-1].is_cuda
        self.C=C
        self.L=L
        self.w=self.params[len(self.params)-1]
        self.w.data.fill_(0) #initialize output weight as zeros
        # For sequential learning in OS-ELM
        self.dimInput=self.params[len(self.params)-1].data.size()[1]
        self.forgettingfactor=forgettingfactor
        self.M=Variable(torch.inverse(self.C*torch.eye(self.dimInput)),requires_grad=False, volatile=True)

        if self.is_cuda:
            self.M=self.M.cuda()

    def initialize(self):
        self.M = Variable(torch.inverse(self.C * torch.eye(self.dimInput)),requires_grad=False, volatile=True)

        if self.is_cuda:
            self.M = self.M.cuda()
        self.w = self.params[len(self.params) - 1]
        self.w.data.fill_(0.0)

    def pseudoBig(self,inputs,oneHotTarget):
        #print(inputs.shape)
        xtx = torch.mm(inputs.t(), inputs) # [ n_features * n_features ]
        dimInput=inputs.size()[1]
        I = Variable(torch.eye(dimInput),requires_grad=False, volatile=True)
        if self.is_cuda:
            I = I.cuda()
        if self.L > 0.0:
            mu = torch.mean(inputs, dim=0, keepdim=True)  # [ 1 * n_features ]
            S = inputs - mu
            S = torch.mm(S.t(), S)
            self.M = Variable(torch.inverse(xtx.data + self.C * (I.data+self.L*S.data)),requires_grad=False, volatile=True)
        else:
            self.M = Variable(torch.inverse(xtx.data + self.C *I.data), requires_grad=False, volatile=True)

        w = torch.mm(self.M, inputs.t())
        w = torch.mm(w, oneHotTarget)
        self.w.data = w.t().data

    def pseudoSmall(self,inputs,oneHotTarget):
        xxt = torch.mm(inputs, inputs.t())
        numSamples=inputs.size()[0]
        I = Variable(torch.eye(numSamples),requires_grad=False, volatile=True)
        if self.is_cuda:
            I = I.cuda()
        self.M = Variable(torch.inverse(xxt.data + self.C * I.data),requires_grad=False, volatile=True)
        w = torch.mm(inputs.t(), self.M)
        w = torch.mm(w, oneHotTarget)

        self.w.data = w.t().data

    def train(self,inputs,targets, oneHotVectorize=True):
        #targets = targets.view(targets.size(0),-1)
        #print("targets:", targets)
        if oneHotVectorize:
            targets=self.oneHotVectorize(targets=targets)
        numSamples=inputs.size()[0]
        dimInput=inputs.size()[1]
        dimTarget=targets.size()[1]

        if numSamples>dimInput:
            self.pseudoBig(inputs,targets)
        else:
            self.pseudoSmall(inputs,targets)



    def train_sequential(self,inputs,targets):
        oneHotTarget = self.oneHotVectorize(targets=targets)
        numSamples = inputs.size()[0]
        dimInput = inputs.size()[1]
        dimTarget = oneHotTarget.size()[1]

        if numSamples<dimInput:
            I1 = Variable(torch.eye(dimInput))
            if self.is_cuda:
                I1 = I1.cuda()
            xtx=torch.mm(inputs.t(),inputs)
            self.M=Variable(torch.inverse(xtx.data+self.C*I1.data),requires_grad=False, volatile=True)

        I = Variable(torch.eye(numSamples))
        if self.is_cuda:
            I = I.cuda()

        self.M = (1/self.forgettingfactor) * self.M - torch.mm((1/self.forgettingfactor) * self.M,
                                             torch.mm(inputs.t(), torch.mm(Variable(torch.inverse(I.data + torch.mm(inputs, torch.mm((1/self.forgettingfactor)* self.M, inputs.t())).data),requires_grad=False, volatile=True),
                                             torch.mm(inputs, (1/self.forgettingfactor)* self.M))))


        self.w.data += torch.mm(self.M,torch.mm(inputs.t(),oneHotTarget - torch.mm(inputs,self.w.t()))).t().data


    def oneHotVectorize(self,targets):
        oneHotTarget=torch.zeros(targets.size()[0],targets.max()+1)

        for i in range(targets.size()[0]):
            oneHotTarget[i][targets[i]]=1

        if self.is_cuda:
            oneHotTarget=oneHotTarget.cuda()
        oneHotTarget=Variable(oneHotTarget,requires_grad=False, volatile=True)

        return oneHotTarget

In [77]:
# TRAIN AND TEST FIRST MODEL 
from torch.autograd import Variable
#############################
# LOAD AUDIO AND VIDEO DATA #
#############################
# random data
import torch
from torch.utils.data import DataLoader

n = 10000
bs = 32
X = torch.rand(n, 8192)
Y = torch.randint(0, 2, (n,))
samples = []
for x, y in zip(X, Y):
    samples.append((x, y.item()))

train_loader = DataLoader(samples, batch_size=bs, shuffle=True)


X_t = torch.rand(n, 8192)
Y_t = torch.randint(0, 2, (n,))
t_samples = []
for x, y in zip(X_t, Y_t):
    t_samples.append((x, y.item()))

test_loader = DataLoader(t_samples, batch_size=bs, shuffle=True)

# for sample in train_loader:
#     print(sample)

# Steps:
'''
1) Train first model
2) Get outputs of first model
3) train second model on hidden layer output of first model
4) pass hidden layer output of second model to SVM
'''
### TRAINING FIRST MODEL ####
num_classes = 2
input_size = 8192
hidden_size = 100
#model = CNN_ELM(input_size, hidden_size, num_classes, CNN_2D_feat, CNN_3D_feat, ELM, device=None)

# JUST TEST ELM #
model = ELM(input_size, hidden_size, num_classes, device=None)
cuda_avail = False
if cuda_avail:
  model.cuda()
#print(list(model.parameters()))
optimizer = pseudoInverse(params=model.parameters(), C=0.001, L=0)
def train_ELM(model, optimizer, train_loader):
  model.train()
  correct=0
  for batch_idx, (data, target) in enumerate(train_loader):
    if cuda_avail:
      data, target = data.cuda(), taget.cuda() # .device() 
    data, target = Variable(data, requires_grad = False, volatile=True),  Variable(target, requires_grad = False, volatile=True)
    hiddenOut = model.forwardToHidden(data)
    optimizer.train(inputs=hiddenOut, targets=target)
    output=model.forward(data)
    pred=output.data.max(1)[1]
    correct+=pred.eq(target.data).cpu().sum()
  print('Accuracy:{}/{} ({:.2f}%)\n'.format(
        correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))

def test(model,test_loader):
    model.train()
    correct = 0
    for data, target in test_loader:
        if cuda_avail:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data,requires_grad=False, volatile=True), Variable(target,requires_grad=False, volatile=True)
        output = model.forward(data)
        pred=output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()
    print('\nTest set accuracy: {}/{} ({:.2f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

train_ELM(model,optimizer,train_loader)
test(model, test_loader)



  if sys.path[0] == '':


Accuracy:10000/10000 (100.00%)






Test set accuracy: 4984/10000 (49.84%)



In [61]:
'''def train_ELM(model, optimizer, train_loader):
  model.train()
  correct=0
  for batch_idx, (data, target) in enumerate(train_loader):
    if cuda_avail:
      data, target = data.cuda(), taget.cuda() # .device() 
    data, target = Variable(data, requires_grad = False, volatile=True),  Variable(target, requires_grad = False, volatile=True)
    hiddenOut = model.forwardToHidden(data)
    optimizer.train(inputs=hiddenOut, targets=target)
    output=model.forward(data)
    pred=output.data.max(1)[1]
    correct+=pred.eq(target.data).cpu().sum()
  print('Accuracy:{}/{} ({:.2f}%)\n'.format(
        correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
  
def test(model,test_loader):
    model.train()
    correct = 0
    for data, target in test_loader:
        if cuda_avail:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data,requires_grad=False, volatile=True), Variable(target,requires_grad=False, volatile=True)
        output = model.forward(data)
        pred=output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()
    print('\nTest set accuracy: {}/{} ({:.2f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
train(args,model,optimizer,train_loader)
test(args,model,test_loader)'''

"def train_ELM(model, optimizer, train_loader):\n  model.train()\n  correct=0\n  for batch_idx, (data, target) in enumerate(train_loader):\n    if cuda_avail:\n      data, target = data.cuda(), taget.cuda() # .device() \n    data, target = Variable(data, requires_grad = False, volatile=True),  Variable(target, requires_grad = False, volatile=True)\n    hiddenOut = model.forwardToHidden(data)\n    optimizer.train(inputs=hiddenOut, targets=target)\n    output=model.forward(data)\n    pred=output.data.max(1)[1]\n    correct+=pred.eq(target.data).cpu().sum()\n  print('Accuracy:{}/{} ({:.2f}%)\n'.format(\n        correct, len(train_loader.dataset),\n        100. * correct / len(train_loader.dataset)))\n  \ndef test(model,test_loader):\n    model.train()\n    correct = 0\n    for data, target in test_loader:\n        if cuda_avail:\n            data, target = data.cuda(), target.cuda()\n        data, target = Variable(data,requires_grad=False, volatile=True), Variable(target,requires_grad=Fa