In [1]:
import time
import pandas as pd
from tqdm import tqdm
import os,sys
import random

import numpy as np


import torch
from torch import nn
import torch.nn.functional as F
from torch.utils import data
from torch.utils.data import DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
if(torch.cuda.is_available()):
    print(f"GPU:{torch.cuda.current_device()}: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU:0: GeForce GTX 1070


In [2]:
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav



In [3]:
def get_label(fn):
        '''
        Files containing meows are in the dataset.zip archive. They are PCM streams (.wav).
        Naming conventions follow the pattern C_NNNNN_BB_SS_OOOOO_RXX, which has to be exploded as follows:

            C = emission context (values: B = brushing; F = waiting for food; I: isolation in an unfamiliar environment);
            NNNNN = cat’s unique ID;
            BB = breed (values: MC = Maine Coon; EU: European Shorthair);
            SS = sex (values: FI = female, intact; FN: female, neutered; MI: male, intact; MN: male, neutered);
            OOOOO = cat owner’s unique ID;
            R = recording session (values: 1, 2 or 3)
            XX = vocalization counter (values: 01..99)
        example: B_BRI01_MC_FI_SIM01_202
            
        '''
        em={}
        em['B']=0
        em['F']=1
        em['I']=2
        fn_explode = fn.split("_")
        em_letter = fn_explode[0]
        return em[em_letter]

d={}
rootdir=r'data\meow'
dirs=os.listdir(rootdir)
for filename in dirs:
    label = get_label(filename)
    if(label not in d):
        d[label]=0
    d[label]+=1
print(d)
print(min(d.values()))    
#need to balance
min_val=min(d.values())
random.seed(2334)
random.shuffle(dirs)
d={}
dirs_dataset=[]
for filename in dirs:
    label = get_label(filename)
    if(label not in d):
        d[label]=0
    if(d[label]<min_val):
    #if(True):
        d[label]+=1
        dirs_dataset.append(filename)
print(len(dirs_dataset))
print(d)

{0: 127, 1: 92, 2: 221}
92
276
{2: 92, 1: 92, 0: 92}


In [4]:
n_features=102400

In [5]:


#load data
def chunk_train_generator(filenames, chunksize=n_features,rootdir=r'data\meow',maxbytes=n_features):
    """
    cutoff files at 100KB
    """
    def expand_filename(x,depth=4):
        return os.path.join(*tuple([rootdir] + [x[i:i+2] for i in range(0,2*depth,2)] + [x]))
    
    def expand_sample(x):
        N = len(x)
        x = np.asarray(x)
        if N <= chunksize:
            rem = np.zeros(chunksize-N,dtype=np.uint16)
            x = np.concatenate((x,rem))
        return x
    
    #dirs=os.listdir(rootdir)
    
    while(True):
        
        for filename in filenames:

            label = get_label(filename)
            fullpath=os.path.join(rootdir,filename)
            (rate,sig) = wav.read(fullpath)
            mfcc_feat = mfcc(sig,rate)
            fbank_feat = logfbank(sig,rate)

            #print(fbank_feat[1:3,:])
            bin_content =  np.fromfile(
                                fullpath,
                                dtype=np.uint8,
                                count=int(maxbytes),
                                ).astype(dtype=np.uint16)
            #print(bin_content.shape)
            yield expand_sample(fbank_feat.reshape(-1)), np.asarray([label])
        
for i,item in enumerate(chunk_train_generator(dirs_dataset)):
    print(item[0].shape)
    #break
    if(i==3):
        break

(102400,)
(102400,)
(102400,)
(102400,)




In [6]:
class Meow_CNN(nn.Module):
    def __init__(self,use_dropout=False, use_bn=False):
        super().__init__()
        self.use_bn=use_bn
        self.use_dropout=use_dropout
        self.embed = torch.nn.Embedding(128,10)
        
        self.activation = torch.nn.ReLU()
        self.dropout = nn.Dropout(p=0.3)
        
        self.conv1 = torch.nn.Conv1d(10,out_channels=96, kernel_size=11, stride=1)
        self.max1 = torch.nn.MaxPool1d(2, stride=2)
        self.bn1 = nn.BatchNorm1d(96)
        
        self.conv2 = torch.nn.Conv1d(96,out_channels=96, kernel_size=5, stride=1)
        self.max2 = torch.nn.MaxPool1d(2, stride=2)
        self.bn2 = nn.BatchNorm1d(96)
        
        self.conv3 = torch.nn.Conv1d(96,out_channels=96, kernel_size=5, stride=1)
        self.max3 = torch.nn.MaxPool1d(2, stride=2)
        self.bn3 = nn.BatchNorm1d(96)
        
        self.conv4 = torch.nn.Conv1d(96,out_channels=256, kernel_size=5, stride=1)
        self.max4 = torch.nn.MaxPool1d(2, stride=2)
        self.bn4 = nn.BatchNorm1d(256)
        
        self.conv5 = torch.nn.Conv1d(256,out_channels=512, kernel_size=5, stride=1)
        self.max5 = torch.nn.MaxPool1d(2, stride=2)
        self.bn5 = nn.BatchNorm1d(512)
        
        self.conv6 = torch.nn.Conv1d(256,out_channels=256, kernel_size=5, stride=1)
        self.max6 = torch.nn.MaxPool1d(2, stride=2)
        self.bn6 = nn.BatchNorm1d(256)
        
        self.conv7 = torch.nn.Conv1d(256,out_channels=96, kernel_size=5, stride=1)
        self.max7 = torch.nn.MaxPool1d(2, stride=2)
        self.bn7 = nn.BatchNorm1d(96)
        
        self.conv8 = torch.nn.Conv1d(96,out_channels=512, kernel_size=5, stride=1)
        self.max8 = torch.nn.MaxPool1d(2, stride=2)
        self.bn8 = nn.BatchNorm1d(512)
        
        self.gmax1 = torch.nn.AdaptiveMaxPool1d(1)
        self.gavg1 = torch.nn.AdaptiveAvgPool1d(1)
        
        self.catstate_output = nn.Sequential(nn.Linear(512,512,bias=True),
                                             nn.ReLU(),
                                             nn.Dropout(p=0.1, inplace=False),
                                             nn.Linear(512,3,bias=True)
                                             ).to(device)

        
    def forward(self,inp):
        
        x = self.embed(inp)
        x = torch.transpose(x,1,2)
        print(x.shape)
        x = self.conv1(x)
        x = self.max1(x)
        x = self.activation(x)
        if self.use_bn:
            x = self.bn1(x)
        if self.use_dropout:
            x = self.dropout(x)

        x = self.conv2(x)
        x = self.max2(x)
        x = self.activation(x)
        if self.use_bn:
            x = self.bn2(x)
        if self.use_dropout:
            x = self.dropout(x)

        x = self.conv3(x)
        x = self.max3(x)
        x = self.activation(x)
        if self.use_bn:
            x = self.bn3(x)
        if self.use_dropout:
            x = self.dropout(x)

        x = self.conv4(x)
        x = self.max4(x)
        x = self.activation(x)
        if self.use_bn:
            x = self.bn4(x)
        if self.use_dropout:
            x = self.dropout(x)

        x = self.conv5(x)
        x = self.activation(x)

        #xmax = self.gmax1(x)
        xavg = self.gavg1(x)

        
        #merge = torch.cat((xmax, xavg), 1)
        x = xavg
        x = torch.flatten(x, start_dim=1)
        x = self.activation(x)
        out = self.catstate_output(x)
        return out
    

In [7]:
class TrainDataset(data.Dataset):

    def __init__(self,filenames, chunksize=n_features,maxbytes=n_features,rootdir=r'data\meow'):

        self.gen = chunk_train_generator(filenames=filenames,
                                        chunksize=chunksize,
                                        rootdir=rootdir,maxbytes=maxbytes)
        
        self.length = len(filenames)
    def __len__(self):
        return self.length

    def __getitem__(self,index):
        sample,label = next(self.gen)
        return sample.astype(np.int),label

In [8]:
#loss functions

# def poisson_loss(ypred,ytrue,eps=1e-6):
#     return torch.mean(ypred-ytrue*torch.log(ypred+eps))
# def cross_entropy(ypred,ytrue):
#     #print(ypred)
#     #print(ytrue)
#     return F.binary_cross_entropy(ypred,ytrue)

def compute_loss(predictions,labels):
    #print(type(labels[0]))
    
    #labels=labels.type(torch.LongTensor).cuda().squeeze()

    #criterion = nn.CrossEntropyLoss() 
    loss = F.cross_entropy(predictions.squeeze(),labels.type(torch.LongTensor).cuda().squeeze())
    return loss


def get_accuracy(logit, target, batch_size):
    ''' Obtain accuracy for training round '''
    # torch.max[0] stores the data, torch.max[1] stores the index (of the max)
    preds=torch.max(logit, 1)[1].view(target.size())
    
    print(preds.squeeze().data.tolist())
    print(target.squeeze().data.tolist())
    #print(target.data)
    corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
    accuracy = 100.0 * corrects/batch_size
    
    return accuracy.item()


In [9]:
def freeze_model(model):
    for param in model.parameters():
        param.requires_grad = False
    return

In [10]:
torch.cuda.empty_cache()

def train(n_epochs=6,batch_size=10,output_dir=r'models'):
    
    print("[INFO] Running pytorch training loop")
    dataset = TrainDataset(filenames=dirs_dataset)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=None,
                       batch_sampler=None, collate_fn=None,
                       pin_memory=False, drop_last=False, timeout=0,
                       worker_init_fn=None)
    net = Meow_CNN(use_dropout=False,use_bn=False).cuda()
    start_epoch = 0
    #maybe other optimizers
    opt = torch.optim.SGD(net.parameters(),lr=0.01,momentum=0.9)
    #opt = torch.optim.Adam(net.parameters())
    loss_avg_queue=[]
    acc_avg_queue=[]
    for epoch in range(n_epochs):
        
        for i,sample_batch in enumerate(dataloader):
            samples,labels = sample_batch
            samples = samples.cuda()
            net=net.train()
            if i == 0:
                print(f"samples shape: {samples.shape}")
                print(f"input Cuda: {samples.is_cuda}")
            
            labels = labels.cuda()
            
            out = net(samples)
            #print(labels)
            loss = compute_loss(out,labels)
            opt.zero_grad()
            loss.backward()
            opt.step()
            acc = get_accuracy(out,labels,batch_size)
            
            loss_avg_queue.append(loss.item())
            acc_avg_queue.append(acc)
            if len(loss_avg_queue) > 10:
                loss_avg_queue = loss_avg_queue[1:]
                acc_avg_queue = acc_avg_queue[1:]
            loss_avg = np.mean(loss_avg_queue)
            acc_avg = np.mean(acc_avg_queue)
            print(f'\r Epoch: {epoch}/{n_epochs} Iter: {i+1}/{len(dataloader)} Loss: {round(loss_avg,5)} Acc: {round(acc_avg,5)}')
            #print(f"acc={acc}, labels={labels}")
            
            net.eval()
        #print("sum:")
        #print(out)
        #print(labels)
        print(f'...writing out model for epoch {epoch}')
        torch.save(net.state_dict(), os.path.join(output_dir,f'epoch_{epoch}.pt'))
        break
train()

torch.cuda.empty_cache()

[INFO] Running pytorch training loop




samples shape: torch.Size([10, 102400])
input Cuda: True
torch.Size([10, 10, 102400])


RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR