In [None]:
from datetime import datetime
import logging

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

from dset import GoogleVoiceDataset
from model import RNN
from eval import get_set_accuracy
from preprocess import mfcc_sequence

In [None]:
ROOT = '/home/wilsonyan/data/speech'

In [None]:
BATCH_SIZE = 128

In [None]:
def train_model(model, optimizer, criterion, loader_train, loader_val, num_epochs=100):
    log_file = 'logs/{}.log'.format(datetime.now().strftime('%Y%m%d-%H%M%S'))
    logging.basicConfig(filename=log_file,level=logging.DEBUG)
    for epoch in range(num_epochs):
        print('Epoch %s' % epoch)
        print('=' * 40)
        
        logging.info('Epoch %s' % epoch)
        logging.info('=' * 40)
        
        losses = []
        correct_train, total = 0, 0
        for x, y in tqdm(iter(loader_train)):
            x, y = Variable(x), Variable(y)
            score = model(x)
            _, y_pred = torch.max(score, 1)
            correct_train += y_pred.eq(y).sum().data[0]
            total += x.data.size(0)

            loss = criterion(score, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.data[0])

        train_acc = correct_train / total
        val_acc = get_set_accuracy(model, loader_val)
        print('Loss: %s, train_acc: %s, val_acc: %s' % (np.mean(losses), train_acc, val_acc))
        logging.info('Loss: %s, train_acc: %s' % (np.mean(losses), train_acc))
    return model

In [None]:
dset_train = GoogleVoiceDataset(ROOT, mfcc_sequence, mode='train')
dset_val = GoogleVoiceDataset(ROOT, mfcc_sequence, mode='val')
loader_train = DataLoader(dset_train, shuffle=True, batch_size=BATCH_SIZE)
loader_val = DataLoader(dset_val, shuffle=True, batch_size=BATCH_SIZE)

In [None]:
model = RNN(13, 256, dset_train.n_classes)
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

In [None]:
model = train_model(model, optimizer, criterion, loader_train, loader_val)

In [None]:
dset_test = GoogleVoiceDataset(ROOT, mfcc_sequence, mode='test')
loader_test = DataLoader(dset_val, shuffle=True, batch_size=BATCH_SIZE)
print('test_acc: %s' % (get_set_accuracy(model, loader_test)))