In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from joblib import dump, load
import matplotlib.pyplot as plt
import csv
import torch
import torch.nn as nn
import torch.optim as optim
from data_loader import CustomSignalData, CustomSignalData1
from torch.autograd import Variable
from encoder import Encoder as E
from helpers import set_cmd_cb, rms_formuula, get_data, get_all_data, get_shift_data, get_operators, plot_cfs_mat, roll_data



DEFINF ESSENTIAL CLASSES, MODULES AND CPU

In [2]:
DEVICE = torch.device("cpu")
def getFeatureMatrix(rawDataMatrix, windowLength, windowOverlap):
    rms = lambda sig: np.sqrt(np.mean(sig**2))
    nChannels,nSamples = rawDataMatrix.shape    
    I = int(np.floor(nSamples/(windowLength-windowOverlap)))
    featMatrix = np.zeros([nChannels, I])
    for channel in range(nChannels):
        for i in range (I):
            wdwStrtIdx=i*(windowLength-windowOverlap)
            sigWin = rawDataMatrix[channel][wdwStrtIdx:(wdwStrtIdx+windowLength-1)] 
            featMatrix[channel, i] = rms(sigWin)
    featMatrixData = np.array(featMatrix)
    return featMatrixData

class FFNN(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(FFNN, self).__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Linear(inputSize, 9, bias=False),
            torch.nn.Sigmoid()
        )
        self.classifer = torch.nn.Sequential(
            torch.nn.Linear(9, outputSize, bias=False),
            # torch.nn.Softmax(dim=1)
        )

    def forward(self, x, encoder=None):
        if not encoder:
            encoder = self.encoder
        z = encoder(x)
        class_z = self.classifer(z)

        return class_z

class Operator(nn.Module):
    def __init__(self, in_features, n_rotations):
        super(Operator, self).__init__()
        """
        Args:
          in_features (int): Number of input features which should be equal to xsize.
          out_features (out): Number of output features which should be equal to ysize.
        """
        self.in_features = in_features
        self.core = torch.nn.Parameter(torch.zeros(3*self.in_features**2)- 0*torch.diag(torch.rand(3*self.in_features**2)/10))
        self.core.requires_grad = True
        self.n_rotations = n_rotations
        
    def rotate_batch(self, x, d, out_features):
      rotated = torch.empty(x.shape[0], 3*out_features*out_features, device=DEVICE)
      phies = [torch.linalg.matrix_power(self.core,i).to(DEVICE) for i in range (0,self.n_rotations+0)]
      for i in range (x.shape[0]):
        rotated[i] = phies[(d[i]+0)%4].matmul(x[i]) 
      return rotated

    def forward(self, x, d):
        """
        Args:
          x of shape (batch_size, 3, xsize, xsize): Inputs.
        
        Returns:
          y of shape (batch_size, 3*xsize^2): Outputs.
        """
        z = self.rotate_batch(x, d, self.in_features)
        return z
def get_tensor(arr):
    return torch.tensor(arr, device=DEVICE,dtype=torch.float )

def rotate_batch(x, d, out_features):
    M = torch.diag(torch.ones(8)).roll(-1,1)
    used_bases = [torch.linalg.matrix_power(M,i).to(DEVICE) for i in range (8)]
    rotated = torch.empty(x.shape, device=DEVICE)
    for i in range (x.shape[0]):
        rotated[i] = used_bases[d[i]].matmul(x[i]) 
    return rotated

def clf_acc(model, loader, masks = None, encoder = None):
    model.eval()
    correct = 0
    iter = 0
    with torch.no_grad():
        for inputs, labels,_,_ in loader:
            inputs = inputs.to(DEVICE)
            if masks is not None:
                inputs = inputs * masks[:inputs.size()[0]]
            labels = labels.to(DEVICE)
            labels = labels.flatten()
            if encoder:
                pred = model(inputs, encoder)
            else:
                pred = model(inputs)
            correct += (1-torch.abs(torch.sign(torch.argmax(pred,dim = 1)- labels))).mean().item()
            iter += 1
    return correct/iter

def compute_accuracy(a, b, loader):
    a.eval()
    b.eval()
    
    correct = 0
    iter = 0
    
    with torch.no_grad():
        for inputs1, inputs2, shift1, shift2, labels, _ in loader:
            inputs1 = inputs1.to(DEVICE)
            inputs2 = inputs2.to(DEVICE)
            shift1 = -shift1.int().flatten().to(DEVICE)
            shift2 = -shift2.int().flatten().to(DEVICE)
            labels = labels.flatten().to(DEVICE)
            # zero the parameter gradients
            optimizer.zero_grad()
            
            # forward + backward + optimize
            y1 = a(inputs1)
            y_tr_est1 = rotate_batch(y1,shift1,6)
            y_tr1 = b(y_tr_est1)

            y2 = a(inputs2)
            y_tr_est2 = rotate_batch(y2,shift1,6)
            y_tr2 = b(y_tr_est2)

            correct += (1-torch.abs(torch.sign(torch.argmax(y_tr1,dim = 1)- labels))).mean().item() + \
                    (1-torch.abs(torch.sign(torch.argmax(y_tr2,dim = 1)- labels))).mean().item()
            iter += 1
    return correct * 0.5 / iter

LOAD DATA AND SEPERATE THE TRAIN AND TEST DATASET

In [6]:
subject = '26'

Fs = 1000
windowLength = int(np.floor(0.1*Fs))  #160ms
windowOverlap =  int(np.floor(50/100 * windowLength))

X_train = [np.zeros([0, 8])] * 3
y_train=[np.zeros([0])] * 3
X_test = [np.zeros([0, 8])] * 3
y_test=[np.zeros([0])] * 3
# X_train = np.zeros([0,8])
# y_train = np.zeros([0])
for k_fold in range(0,3):
    for shift in range(0,9): 
        for files in sorted(os.listdir(f'Subject_{subject}/Shift_{shift}/')):
            _, class_,_, rep_ = files.split('_')
            if int(class_) in [1,2,3,4,5,6,7,8,9]:
                df = pd.read_csv(f'Subject_{subject}/Shift_{shift}/{files}',skiprows=0,sep=' ',header=None)
                data_arr = np.stack([np.array(df.T[i::8]).T.flatten().astype('float32') for i in range (8)])
                data_arr -= 121
                data_arr /= 255.0
                feaData = getFeatureMatrix(data_arr, windowLength, windowOverlap)
                
                if not class_.startswith('9'):
                    rms_feature = feaData.sum(0)
                    baseline = 2*rms_feature[-50:].mean()
                    start_ = np.argmax(rms_feature[::1]>baseline)
                    end_  = -np.argmax(rms_feature[::-1]>baseline)
                    feaData = feaData.T[start_:end_]
                else:
                    feaData = feaData.T

                if rep_.startswith(str(k_fold+1)):
                    X_test[k_fold] = np.concatenate([X_test[k_fold],feaData])
                    y_test[k_fold] = np.concatenate([y_test[k_fold],np.ones_like(feaData)[:,0]*int(class_)-1])
                else:
                    X_train[k_fold] = np.concatenate([X_train[k_fold],feaData])
                    y_train[k_fold] = np.concatenate([y_train[k_fold],np.ones_like(feaData)[:,0]*int(class_)-1])


In [7]:
# 3-fold validation for 3 repetitions: 
# fold 1: train: rep 2, rep 3; test: rep 1
# fold 2: train: rep 1, rep 3; test: rep 2
# fold 3: train: rep 1, rep 1; test: rep 3
for i in range(3):
    print(X_train[i])
    print(X_train[i].shape)
    print(y_train[i])
    print(y_train[i].shape)
print("#############")
for i in range(3):
    print(X_test[i])
    print(X_test[i].shape)
    print(y_test[i])
    print(y_test[i].shape)

[[0.00800002 0.0031776  0.00327391 ... 0.02204677 0.00692822 0.00452824]
 [0.00784314 0.0031034  0.00315306 ... 0.02686469 0.00762216 0.00687193]
 [0.00729944 0.00267314 0.00292296 ... 0.02537185 0.00787279 0.00674646]
 ...
 [0.00804841 0.00336747 0.00332102 ... 0.00312833 0.00305294 0.00341329]
 [0.00771333 0.00315306 0.00320195 ... 0.0031034  0.00302739 0.0034585 ]
 [0.00760345 0.00357989 0.00311173 ... 0.00311173 0.00282385 0.00353989]]
(42004, 8)
[0. 0. 0. ... 8. 8. 8.]
(42004,)
[[0.00899624 0.00365503 0.00305294 ... 0.02290042 0.00879543 0.00738407]
 [0.01201943 0.00442413 0.00363372 ... 0.03947626 0.01278351 0.01073604]
 [0.01242612 0.00417111 0.00367622 ... 0.03907481 0.01230047 0.01338891]
 ...
 [0.00804841 0.00336747 0.00332102 ... 0.00312833 0.00305294 0.00341329]
 [0.00771333 0.00315306 0.00320195 ... 0.0031034  0.00302739 0.0034585 ]
 [0.00760345 0.00357989 0.00311173 ... 0.00311173 0.00282385 0.00353989]]
(41977, 8)
[0. 0. 0. ... 8. 8. 8.]
(41977,)
[[0.00899624 0.00365503 

SIMULATED SHIFT THE DATA

In [9]:
trainloader_list = []
alltrainloader_list = []
triplettrainloader_list = []

for k_fold in range(3):
    all_X_train, all_y_train, all_shift_train = get_all_data(X_train[k_fold], y_train[k_fold])
    all_X1_train, all_X2_train, all_shift_1_train, all_shift_2_train, all_y_shift_train = get_shift_data(all_X_train, all_shift_train, all_y_train)
    
    traindataset = CustomSignalData(get_tensor(X_train[k_fold]), get_tensor(y_train[k_fold]))
    trainloader = torch.utils.data.DataLoader(traindataset, batch_size = 1, shuffle=True)
    trainloader_list.append(trainloader)

    all_train_dataset = CustomSignalData(get_tensor(all_X_train), get_tensor(all_y_train))
    alltrainloader = torch.utils.data.DataLoader(all_train_dataset, batch_size = 102, shuffle=True)
    alltrainloader_list.append(alltrainloader)

    triplet_train_dataset = CustomSignalData1(get_tensor(all_X1_train), get_tensor(all_X2_train), get_tensor(all_shift_1_train), get_tensor(all_shift_2_train), get_tensor(all_y_shift_train))
    triplettrainloader = torch.utils.data.DataLoader(triplet_train_dataset, batch_size = 102, shuffle=True)
    triplettrainloader_list.append(triplettrainloader)



TRAIN LOGISTIC REGRESSION LEARNING MODEL

In [10]:
for k_fold in range(3):
    reg = LogisticRegression(penalty='l2', C=100).fit(X_train[k_fold], y_train[k_fold])
    dump(reg, f'LogisticRegression_fold_{k_fold+1}.joblib')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

TEST ACCURACY OF LOGISTIC REGRESSION LEARNIGN MODEL

In [11]:
for k_fold in range(3):
    logRegres  = load(f'LogisticRegression_fold_{k_fold+1}.joblib')
    accuracies_LosReg_shift = []
    for i in range (-4, 4):
        X_test_shift = roll_data(X_test[k_fold], i)
        accuracies_LosReg_shift.append(logRegres.score(X_test_shift,y_test[k_fold]))
    print(f'Accuracy of {k_fold+1}:')
    print(accuracies_LosReg_shift)

Accuracy of 1:
[0.3940665839931317, 0.37579891252504055, 0.387150624821139, 0.3919202518363064, 0.41190498902985784, 0.35824668510922447, 0.38295335304779166, 0.35672040446437087]
Accuracy of 2:
[0.4008479016815129, 0.3988948697184776, 0.37393416853236794, 0.37107607297670653, 0.4197589672748059, 0.3798885342733292, 0.3887486304958796, 0.3550231029390749]
Accuracy of 3:
[0.4164009328447004, 0.3976012564846985, 0.3930798153348246, 0.3895102565322926, 0.4212079386987768, 0.3919375565180144, 0.3742325448574556, 0.38479843891295035]


TRAIN FEED FORAWRD NEURAL NETWORK LEARNING MODEL

In [12]:
inputDim = 8     # takes variable 'x' 
outputDim = 9      # takes variable 'y'
learningRate = 0.005

for k_fold in range(3):
    model = FFNN(inputDim, outputDim)
    model = model.to(DEVICE)

    crit = torch.nn.CrossEntropyLoss()
    acc_record = []
    params_clf = list(model.parameters())# + list(encoder.parameters())
    optim = torch.optim.Adam(params_clf, lr=learningRate)

    epochs = 200
    #encoder = encoder.to(device)
    for epoch in range(epochs):
        model.train()

        # Converting inputs and labels to Variable
        for inputs, labels, _, _ in alltrainloader_list[k_fold]:
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            labels = labels.long()
            labels = labels.flatten()
            outputs = model(inputs, None)
            optim.zero_grad()
            # get loss for the predicted output
            losss = crit(outputs, labels) #+ 0.001 * model.l1_regula()
            # get gradients w.r.t to parameters
            losss.backward()
            # update parameters
            optim.step()

    torch.save(model.state_dict(), f"modelwoOperator_fold_{k_fold+1}.pt")



TypeError: 'DataLoader' object is not subscriptable