In [1]:
from __future__ import print_function
import argparse
import random
import time
import os
import logging
from timeit import default_timer as timer
from sklearn.preprocessing import StandardScaler
import joblib
import numpy as np

## Torch
import torch
import torch.nn as nn
from torch.utils import data
import torch.nn.functional as F
import torch.optim as optim

from src.logger_v1 import setup_logs

run_name = "cdc" + time.strftime("-%Y-%m-%d_%H_%M_%S")
print(run_name)

cdc-2021-03-31_11_54_13


In [2]:
def rolling_window(a, window):
    shape =  (a.shape[0] - window + 1, window) + a.shape[1:]
    strides = (a.strides[0],) + a.strides
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides, writeable = False)

In [3]:
class ScheduledOptim(object):
    """A simple wrapper class for learning rate scheduling"""

    def __init__(self, optimizer, n_warmup_steps):
        self.optimizer = optimizer
        self.d_model = 128 
        self.n_warmup_steps = n_warmup_steps
        self.n_current_steps = 0 
        self.delta = 1

    def state_dict(self):
        self.optimizer.state_dict()

    def step(self):
        """Step by the inner optimizer"""
        self.optimizer.step()

    def zero_grad(self):
        """Zero out the gradients by the inner optimizer"""
        self.optimizer.zero_grad()

    def increase_delta(self):
        self.delta *= 2

    def update_learning_rate(self):
        """Learning rate scheduling per step"""

        self.n_current_steps += self.delta
        new_lr = np.power(self.d_model, -0.5) * np.min([
            np.power(self.n_current_steps, -0.5),
            np.power(self.n_warmup_steps, -1.5) * self.n_current_steps])

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = new_lr
        return new_lr

In [4]:
class TETrainDataset(data.Dataset):
    def __init__(self, fault=list(range(1,21)), window=20):
        """ fault: [1,2,3,4,5,6]
            window: 20
        """
        self.window = window 
        temp = torch.from_numpy(rolling_window(np.loadtxt('data/d00.dat').T, window))
        self.sample = [temp]
        self.label = [0 for _ in temp]

        for label in fault:
            if label < 10:
                num = '0' + str(label)
            else:
                num = str(label)
            temp = rolling_window(np.loadtxt('data/d' + num + '.dat'), window)
            self.sample.append(temp)
            self.label.extend([label for _ in temp])
        
        self.sample = np.concatenate(self.sample,0)
        
        if os.path.exists('./scalar'):
            std = joblib.load('./scalar')
            sh = self.sample.shape
            self.sample = std.transform(self.sample.reshape(-1,sh[-1])).reshape(sh)
        else:
            std = StandardScaler()
            sh = self.sample.shape
            std.fit(self.sample.reshape(-1,sh[-1]))
            self.sample = std.transform(self.sample.reshape(-1,sh[-1])).reshape(sh)
            joblib.dump(std, './scalar')
        
        self.sample = torch.from_numpy(self.sample).float()
        self.label = torch.tensor(self.label).float()
        assert self.sample.shape[0] == self.label.shape[0]
        
    def __len__(self):
        return len(self.sample)

    def __getitem__(self, index):
        return self.sample[index], self.label[index]

class TETestDataset(data.Dataset):
    def __init__(self, fault=list(range(1,21)), window=20):
        """ fault: [1,2,3,4,5,6]
            window: 20
        """
        self.window = window 
        temp = torch.from_numpy(rolling_window(np.loadtxt('data/d00_te.dat'), window))
        self.sample = [temp]
        self.label = [0 for _ in temp]

        for label in fault:
            if label < 10:
                num = '0' + str(label)
            else:
                num = str(label)
            temp = rolling_window(np.loadtxt('data/d' + num + '_te.dat'), window)
            self.sample.append(temp)
            if window <= 160:
                self.label.extend([0 for _ in range(160-window+1)])
                self.label.extend([label for _ in range(800)])
            else:
                self.label.extend([label for _ in range(960-window+1)])
        
        self.sample = np.concatenate(self.sample,0)
        
        if os.path.exists('./scalar'):
            std = joblib.load('./scalar')
            sh = self.sample.shape
            self.sample = std.transform(self.sample.reshape(-1,sh[-1])).reshape(sh)
        else:
            std = StandardScaler()
            sh = self.sample.shape
            std.fit(self.sample.reshape(-1,sh[-1]))
            self.sample = std.transform(self.sample.reshape(-1,sh[-1])).reshape(sh)
            joblib.dump(std, './scalar')
        
        self.sample = torch.from_numpy(self.sample).float()
        self.label = torch.tensor(self.label).float()
        assert self.sample.shape[0] == self.label.shape[0]
        
    def __len__(self):
        return len(self.sample)

    def __getitem__(self, index):
        return self.sample[index], self.label[index]

In [17]:
class CPC_GRU(nn.Module):
    def __init__(self, input_size, low_size, high_size, num_layers, timestep, batch_size, seq_len):

        super(CPC_GRU, self).__init__()

        self.batch_size = batch_size
        self.seq_len = seq_len
        self.timestep = timestep
        self.input_size = input_size
        self.low_size = low_size
        self.high_size = high_size
        self.num_layers = num_layers
        self.encoder = nn.Sequential( # downsampling factor = 160
            nn.Linear(self.input_size,self.low_size, bias=False),
            nn.BatchNorm1d(self.seq_len+self.timestep),
            nn.ReLU(inplace=True),
            nn.Linear(self.low_size, self.low_size, bias=False),
            nn.BatchNorm1d(self.seq_len+self.timestep),
            nn.ReLU(inplace=True),
        )
        self.gru = nn.GRU(self.low_size, self.high_size, num_layers=self.num_layers, bidirectional=False, batch_first=True)
        self.Wk  = nn.ModuleList([nn.Linear(self.high_size, self.low_size) for i in range(timestep)])
        self.softmax  = nn.Softmax()
        self.lsoftmax = nn.LogSoftmax()

        def _weights_init(m):
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # initialize gru
        for layer_p in self.gru._all_weights:
            for p in layer_p:
                if 'weight' in p:
                    nn.init.kaiming_normal_(self.gru.__getattr__(p), mode='fan_out', nonlinearity='relu')

        self.apply(_weights_init)

    def init_hidden(self, batch_size, use_gpu=True):
        if use_gpu: return torch.zeros(self.num_layers, batch_size, self.high_size).cuda()
        else: return torch.zeros(self.num_layers, batch_size, self.high_size)

    def forward(self, x, hidden):
        batch = x.size()[0]
        z = self.encoder(x)

        encode_samples = z[:,-1*self.timestep:,:].transpose(0,1)
       
        nce = 0 # average over timestep and batch
        forward_seq = z[:,:self.seq_len,:] 
        output, hidden = self.gru(forward_seq, hidden) 
        c_t = output[:,-1,:].view(batch, -1) 
        pred = torch.empty((self.timestep,batch,32)).float().to(x.device) 
        for i in np.arange(0, self.timestep):
            pred[i] = self.Wk[i](c_t) 
        for i in np.arange(0, self.timestep):
            total = torch.mm(encode_samples[i], torch.transpose(pred[i],0,1)) # e.g. size 8*8
            correct = torch.sum(torch.eq(torch.argmax(self.softmax(total), dim=0).cpu(), torch.arange(0, batch))) # correct is a tensor
            nce += torch.sum(torch.diag(self.lsoftmax(total))) # nce is a tensor
        nce /= -1.*batch*self.timestep
        accuracy = 1.*correct.item()/batch

        return accuracy, nce

    def predict(self, x, hidden):
        batch = x.size()[0]
       
        z = self.encoder(x)
        
        # z = z.transpose(1,2)
        output, _ = self.gru(z, hidden) 

        return output[:,-1,:].view(batch, -1) 
        

In [18]:
def train(args, model, device, train_loader, optimizer, epoch, batch_size):
    model.train()
    for batch_idx, data in enumerate(train_loader):
        seq, label = data
        seq, label = seq.to(device), label.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(len(seq), use_gpu=True)
        acc, loss = model(seq, hidden)

        loss.backward()
        optimizer.step()
        lr = optimizer.update_learning_rate()
        if batch_idx % args.log_interval == 0:
            logger.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tlr:{:.5f}\tAccuracy: {:.4f}\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), lr, acc, loss.item()))

def validation(args, model, device, data_loader, batch_size):
    logger.info("Starting Validation")
    model.eval()
    total_loss = 0
    total_acc  = 0 

    with torch.no_grad():
        for data in data_loader:
            seq, label = data
            seq, label = seq.to(device), label.to(device)
            hidden = model.init_hidden(len(seq), use_gpu=True)
            acc, loss = model(seq, hidden)
            total_loss += len(seq) * loss 
            total_acc  += len(seq) * acc

    total_loss /= len(data_loader.dataset) # average loss
    total_acc  /= len(data_loader.dataset) # average acc

    logger.info('===> Validation set: Average loss: {:.4f}\tAccuracy: {:.4f}\n'.format(
                total_loss, total_acc))

    return total_acc, total_loss

def predict(args, model, device, data_loader, batch_size):
    logger.info("Starting predict")
    model.eval()
    features, labels = [], []

    with torch.no_grad():
        for data in data_loader:
            seq, label = data
            seq, label = seq.to(device), label.to(device)
            hidden = model.init_hidden(len(seq), use_gpu=True)
            feature = model.predict(seq, hidden)
            features.append(feature)
            labels.append(label)

    return torch.cat(features, axis=0), torch.cat(labels, axis=0)

def snapshot(dir_path, run_name, state):
    snapshot_file = os.path.join(dir_path,
                    run_name + '-model_best.pth')
    
    torch.save(state, snapshot_file)
    logger.info("Snapshot saved to {}\n".format(snapshot_file))

In [7]:
# def main():
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--logging-dir', default='snapshot/cdc/',
                    help='model save directory')
parser.add_argument('--epochs', type=int, default=60, metavar='N',
                    help='number of epochs to train')
parser.add_argument('--n-warmup-steps', type=int, default=50)
parser.add_argument('--batch-size', type=int, default=10, 
                    help='batch size')
parser.add_argument('--window', type=int, default=20, 
                    help='window length to sample from each utterance')
parser.add_argument('--timestep', type=int, default=10) 
parser.add_argument('--masked-frames', type=int, default=20)
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
#     args = parser.parse_args()
args, unknown = parser.parse_known_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()
print('use_cuda is', use_cuda)
global_timer = timer() # global timer
logger = setup_logs(args.logging_dir, run_name) # setup logs

use_cuda is True


In [8]:
args.timestep = 5
args.window = 20
args.batch_size = 10

In [19]:
device = torch.device("cuda" if use_cuda else "cpu")
model = CPC_GRU(input_size=52, low_size=32, high_size=16, num_layers=2, timestep=args.timestep, batch_size=args.batch_size, seq_len=args.window).to(device)

params = {'num_workers': 0,
            'pin_memory': False} if use_cuda else {}

logger.info('===> loading train, validation and eval dataset')
training_set   = TETrainDataset(window=args.window+args.timestep)
#training_set   = ReverseRawDataset(args.train_raw, args.train_list, args.audio_window)
#training_set   = RawXXreverseDataset(args.train_raw, args.train_list, args.audio_window)
test_set = TETestDataset(window=args.window+args.timestep)
#validation_set = ReverseRawDataset(args.validation_raw, args.validation_list, args.audio_window)
#validation_set = RawXXreverseDataset(args.validation_raw, args.validation_list, args.audio_window)
train_loader = data.DataLoader(training_set, batch_size=args.batch_size, shuffle=True, **params) # set shuffle to True
test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, **params) # set shuffle to False
# nanxin optimizer  
optimizer = ScheduledOptim(
    optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()), 
        betas=(0.9, 0.98), eps=1e-09, weight_decay=1e-4, amsgrad=True),
    args.n_warmup_steps)

model_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info('### Model summary below###\n {}\n'.format(str(model)))
logger.info('===> Model total parameter: {}\n'.format(model_params))

===> loading train, validation and eval dataset
### Model summary below###
 CPC_GRU(
  (encoder): Sequential(
    (0): Linear(in_features=52, out_features=32, bias=False)
    (1): BatchNorm1d(25, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Linear(in_features=32, out_features=32, bias=False)
    (4): BatchNorm1d(25, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
  )
  (gru): GRU(32, 16, num_layers=2, batch_first=True)
  (Wk): ModuleList(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): Linear(in_features=16, out_features=32, bias=True)
    (2): Linear(in_features=16, out_features=32, bias=True)
    (3): Linear(in_features=16, out_features=32, bias=True)
    (4): Linear(in_features=16, out_features=32, bias=True)
  )
  (softmax): Softmax(dim=None)
  (lsoftmax): LogSoftmax(dim=None)
)

===> Model total parameter: 9540



In [16]:
## Start training
best_acc = 0
best_loss = np.inf
best_epoch = -1 
features, labels = None, None
for epoch in range(1, args.epochs + 1):
    epoch_timer = timer()

    # Train and validate
    #trainXXreverse(args, model, device, train_loader, optimizer, epoch, args.batch_size)
    #val_acc, val_loss = validationXXreverse(args, model, device, validation_loader, args.batch_size)
    train(args, model, device, train_loader, optimizer, epoch, args.batch_size)
    val_acc, val_loss = validation(args, model, device, test_loader, args.batch_size)
    
    # Save
    if val_acc > best_acc: 
        best_acc = max(val_acc, best_acc)
        snapshot(args.logging_dir, run_name, {
            'epoch': epoch + 1,
            'validation_acc': val_acc, 
            'state_dict': model.state_dict(),
            'validation_loss': val_loss,
            'optimizer': optimizer.state_dict(),
        })
        best_epoch = epoch + 1
        features, labels = predict(args, model, device, test_loader, args.batch_size)
    elif epoch - best_epoch > 2:
        optimizer.increase_delta()
        best_epoch = epoch + 1
    
    end_epoch_timer = timer()
    logger.info("#### End epoch {}/{}, elapsed time: {}".format(epoch, args.epochs, end_epoch_timer - epoch_timer))

## end 
end_global_timer = timer()
logger.info("################## Success #########################")
logger.info("Total elapsed time: %s" % (end_global_timer - global_timer))

# if __name__ == '__main__':
    # main()

  correct = torch.sum(torch.eq(torch.argmax(self.softmax(total), dim=0).cpu(), torch.arange(0, batch))) # correct is a tensor
  nce += torch.sum(torch.diag(self.lsoftmax(total))) # nce is a tensor
Starting Validation
===> Validation set: Average loss: 2.1212	Accuracy: 0.1949

Snapshot saved to snapshot/cdc/cdc-2021-03-31_11_54_13-model_best.pth

Starting predict


TypeError: expected Tensor as element 0 in argument 0, but got tuple

In [10]:
run_name

'cdc-2021-03-27_21_17_22'