## Homework 2-1 Phoneme Classification

## Download Data

In [2]:
# !pip install --upgrade --no-cache-dir gdown
# !gdown --id '1iDmtJ8vg-SF8dC0r0AoOg4n5UdVOj9To' --output data.zip
# !unzip data.zip
# !ls 

## Import Packages

In [3]:
# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# For data preprocess
import numpy as np
import csv
import os

# Utility
import gc

my_seed = 0
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(my_seed)
torch.manual_seed(my_seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(my_seed)

## Download Data

In [4]:
print ("Loading data ...")

data_root = "./timit_11/"
train_data = np.load(data_root + "train_11.npy")
train_label = np.load(data_root + "train_label_11.npy")
test_data = np.load(data_root + "test_11.npy")

print("Size of training data: {}".format(train_data.shape))
print("Size of testing data: {}".format(test_data.shape))

Loading data ...
Size of training data: (922449, 429)
Size of testing data: (307483, 429)


## Data Analysis

In [5]:
# 統計每個類別的數量
print ("Total number {:d}".format(train_label.shape[0]))

train_cnt = np.zeros((39), dtype=int)
for i in range(39):
    train_cnt[i] = np.sum(train_label == str(i))

sum = np.sum(train_cnt)
print ("\n   class   count    rate")
for i in range(39):
    print ("{:8d}".format(i), end='')
    print ("{:8d}".format(train_cnt[i]), end='')
    print ("  {:.4f}".format(train_cnt[i] / sum))

Total number 922449

   class   count    rate
       0   47070  0.0510
       1   62434  0.0677
       2   26807  0.0291
       3   44575  0.0483
       4   29669  0.0322
       5   20282  0.0220
       6    2952  0.0032
       7   55166  0.0598
       8   21464  0.0233
       9   25429  0.0276
      10    8029  0.0087
      11    8789  0.0095
      12   20028  0.0217
      13   32881  0.0356
      14   29662  0.0322
      15    8600  0.0093
      16   15788  0.0171
      17   38796  0.0421
      18   18675  0.0202
      19   35149  0.0381
      20    6427  0.0070
      21    5415  0.0059
      22    5404  0.0059
      23    7953  0.0086
      24    2811  0.0030
      25    6099  0.0066
      26    5689  0.0062
      27    4523  0.0049
      28    8531  0.0092
      29   15552  0.0169
      30   18653  0.0202
      31   23591  0.0256
      32    9047  0.0098
      33   16663  0.0181
      34    5098  0.0055
      35   63362  0.0687
      36   20142  0.0218
      37   10604  0.0115
    

## Dataset

In [6]:
class TIMITDataset(Dataset):
    def __init__(self, x, y=None):
        self.x = torch.from_numpy(x).float()
        if y is not None:
            # y = y.astype(np.int)
            y = y.astype(int)
            self.y = torch.LongTensor(y)
        else:
            self.y = None

    def __getitem__(self, index):
        if self.y is not None:
            return self.x[index], self.y[index]
        else:
            return self.x[index]

    def __len__(self):
        return len(self.x)

## Setup Hyper-parameters

In [7]:
# 超參數
valid_rate = 0.2          
num_epoch = 200             
learning_rate = 0.0001      
weight_decay_l1 = 0.0
weight_decay_l2 = 0.001
batch_size = 2048           # 原本設64，跑起來更慢，且沒辦法代表分佈
number = 10000              # 每個類別最少補到一萬筆data

model_path = "./model.ckpt"

## Shuffle Training and Validation Data

In [8]:
# 打亂分佈，隨機選取training data跟valid data
train_indices, valid_indices = train_test_split([i for i in range(train_data.shape[0])], test_size=valid_rate, random_state=1)
train_x = train_data[train_indices, :]
train_y = train_label[train_indices]
valid_x = train_data[valid_indices, :]
valid_y = train_label[valid_indices]

## Data Process

In [9]:
# 紀錄每個類的id，方便sample
train_class = []
id = np.arange(train_x.shape[0])
for i in range(39):
    train_class.append(id[train_y == str(i)])

del train_data, train_label
gc.collect()

33

## Deep Neural Network

In [10]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        self.layer1 = nn.Linear(429, 2048)
        self.layer2 = nn.Linear(2048, 2048)
        self.layer3 = nn.Linear(2048, 2048)
        self.layer4 = nn.Linear(2048, 1024)
        self.layer5 = nn.Linear(1024, 512)
        self.layer6 = nn.Linear(512, 128)

        self.bn1 = nn.BatchNorm1d(2048)
        self.bn2 = nn.BatchNorm1d(2048)
        self.bn3 = nn.BatchNorm1d(2048)
        self.bn4 = nn.BatchNorm1d(1024)
        self.bn5 = nn.BatchNorm1d(512)
        self.bn6 = nn.BatchNorm1d(128)

        self.out = nn.Linear(128, 39) 
        
        self.drop = nn.Dropout(0.5)
        self.act_fn = nn.ReLU()

    def forward(self, x):
        x = self.layer1(x)
        x = self.act_fn(x)
        x = self.bn1(x)
        x = self.drop(x)

        x = self.layer2(x)
        x = self.act_fn(x)
        x = self.bn2(x)
        x = self.drop(x)

        x = self.layer3(x)
        x = self.act_fn(x)
        x = self.bn3(x)
        x = self.drop(x)

        x = self.layer4(x)
        x = self.act_fn(x)
        x = self.bn4(x)
        x = self.drop(x)

        x = self.layer5(x)
        x = self.act_fn(x)
        x = self.bn5(x)
        x = self.drop(x)

        x = self.layer6(x)
        x = self.act_fn(x)
        x = self.bn6(x)
        x = self.drop(x)

        x = self.out(x)
        
        return x
     

## Utility


In [11]:
# check device
def get_device():
    return "cuda" if torch.cuda.is_available() else "cpu"

## Training

In [12]:
def cal_regularization(model, weight_decay_l1, weight_decay_l2):
    l1 = 0
    l2 = 0
    for i in model.parameters():
        l1 += torch.sum(abs(i))
        l2 += torch.sum(torch.pow(i, 2))
    return weight_decay_l1 * l1 + weight_decay_l2 * l2

def train_model(num_epoch, learning_rate, weight_decay_l1, weight_decay_l2,
                train_dataset, train_dataloader,
                valid_dataset, valid_dataloader):
    model = Classifier().to(device)
    criterion = nn.CrossEntropyLoss()

    best_acc = 0.0
    for epoch in range(num_epoch):
        # 前面使用adam，收斂快，後面使用SGDM，穩定且偏差小
        if epoch == 0:
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        elif epoch == 35:
            optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

        train_acc = 0.0
        train_loss = 0.0
        val_acc = 0.0
        val_loss = 0.0

        # training
        model.train() # set the model to training mode
        for i, data in enumerate(train_dataloader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
    
            optimizer.zero_grad() 
            outputs = model(inputs) 
    
            batch_loss = criterion(outputs, labels)
            _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
            (batch_loss + cal_regularization(model, weight_decay_l1, weight_decay_l2)).backward() 

            optimizer.step() 
    
            train_acc += (train_pred.cpu() == labels.cpu()).sum().item()
            train_loss += batch_loss.item()
    
        # validation
        if len(valid_dataset) > 0:
            model.eval() # set the model to evaluation mode
            with torch.no_grad():
                for i, data in enumerate(valid_dataloader):
                    inputs, labels = data
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    batch_loss = criterion(outputs, labels) 
                    _, val_pred = torch.max(outputs, 1) 
                
                    val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
                    val_loss += batch_loss.item()
    
                print ("[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f} | Val Acc: {:3.6f} loss: {:3.6f}".format(
                    epoch + 1, num_epoch, train_acc / len(train_dataset), train_loss / len(train_dataloader), val_acc / len(valid_dataset), val_loss / len(valid_dataloader)
                ))
    
                # if the model improves, save a checkpoint at this epoch
                if val_acc > best_acc:
                    best_acc = val_acc
                    torch.save(model.state_dict(), model_path)
                    print ("saving model with acc {:.3f}".format(best_acc / len(valid_dataset)))
        else:
            print("[{:03d}/{:03d}] Train Acc: {:3.6f} Loss: {:3.6f}".format(
                epoch + 1, num_epoch, train_acc / len(train_dataset), train_loss / len(train_dataloaders)
            ))

    # if not validating, save the last epoch
    if len(valid_dataset) == 0:
        torch.save(model.state_dict(), model_path)
        print("saving model at last epoch")

In [13]:
# get device 
device = get_device()
print(f"DEVICE: {device}")

DEVICE: cuda


In [14]:
valid_dataset = TIMITDataset(valid_x, valid_y)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

del valid_x, valid_y
gc.collect()

0

In [15]:
print ("Sample data:")
print ("\n   class   count")
for i in range(len(train_class)):
    if (train_class[i].shape[0] < number):
        print ("{:8d}".format(i), end='')
        print ("{:8d}".format(number - train_class[i].shape[0]))

        id = np.random.choice(train_class[i], size=number-train_class[i].shape[0])
        train_x = np.vstack((train_x, train_x[id]))
        label = np.empty((id.shape[0]), dtype=int)
        train_y = np.append(train_y, label)
        train_y[-id.shape[0]:] = int(i)

print ("\n", train_x.shape, train_y.shape)
train_dataset = TIMITDataset(train_x, train_y)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

Sample data:

   class   count
       6    7644
      10    3637
      11    3006
      15    3096
      20    4866
      21    5650
      22    5665
      23    3603
      24    7764
      25    5084
      26    5473
      27    6380
      28    3128
      32    2773
      34    5926
      37    1556

 (813210, 429) (813210,)


In [16]:
train_model(num_epoch, learning_rate, weight_decay_l1, weight_decay_l2, train_dataset, train_dataloader, valid_dataset, valid_dataloader)

del train_x, train_y, train_dataset, train_dataloader, valid_dataset, valid_dataloader
gc.collect()

[001/200] Train Acc: 0.315937 Loss: 2.556726 | Val Acc: 0.498260 loss: 1.699729
saving model with acc 0.498
[002/200] Train Acc: 0.462841 Loss: 1.865155 | Val Acc: 0.582996 loss: 1.412998
saving model with acc 0.583
[003/200] Train Acc: 0.525238 Loss: 1.631198 | Val Acc: 0.624327 loss: 1.261590
saving model with acc 0.624
[004/200] Train Acc: 0.562797 Loss: 1.492708 | Val Acc: 0.650279 loss: 1.162712
saving model with acc 0.650
[005/200] Train Acc: 0.589197 Loss: 1.397958 | Val Acc: 0.668806 loss: 1.096518
saving model with acc 0.669
[006/200] Train Acc: 0.607929 Loss: 1.330902 | Val Acc: 0.680541 loss: 1.051722
saving model with acc 0.681
[007/200] Train Acc: 0.622641 Loss: 1.276725 | Val Acc: 0.691159 loss: 1.009632
saving model with acc 0.691
[008/200] Train Acc: 0.634000 Loss: 1.235490 | Val Acc: 0.699241 loss: 0.981304
saving model with acc 0.699
[009/200] Train Acc: 0.644177 Loss: 1.199961 | Val Acc: 0.705653 loss: 0.959631
saving model with acc 0.706
[010/200] Train Acc: 0.65180

0

## Save Output

In [17]:
# create testing dataset
test_dataset = TIMITDataset(test_data, None)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# create model and load weights from checkpoint
model = Classifier().to(device)
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [18]:
predict = []
model.eval()

with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        inputs = data
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, test_pred = torch.max(outputs, 1) 

        for y in test_pred.cpu().numpy():
            predict.append(y)

In [19]:
with open("prediction.csv", 'w') as f:
    f.write("Id,Class\n")
    for i, y in enumerate(predict):
        f.write("{},{}\n".format(i, y))