In [None]:
!git clone --recursive https://github.com/parlance/ctcdecode.git
!pip install wget
%cd ctcdecode
!pip install .
%cd ..

Cloning into 'ctcdecode'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 1063 (delta 6), reused 7 (delta 2), pack-reused 1047[K
Receiving objects: 100% (1063/1063), 763.61 KiB | 16.25 MiB/s, done.
Resolving deltas: 100% (509/509), done.
Submodule 'third_party/ThreadPool' (https://github.com/progschj/ThreadPool.git) registered for path 'third_party/ThreadPool'
Submodule 'third_party/kenlm' (https://github.com/kpu/kenlm.git) registered for path 'third_party/kenlm'
Cloning into '/content/ctcdecode/third_party/ThreadPool'...
remote: Enumerating objects: 82, done.        
remote: Total 82 (delta 0), reused 0 (delta 0), pack-reused 82        
Cloning into '/content/ctcdecode/third_party/kenlm'...
remote: Enumerating objects: 90, done.        
remote: Counting objects: 100% (90/90), done.        
remote: Compressing objects: 100% (64/64), done.        
remote: Total 13672 (delta 41

In [None]:
!pip install python-Levenshtein

Collecting python-Levenshtein
[?25l  Downloading https://files.pythonhosted.org/packages/42/a9/d1785c85ebf9b7dfacd08938dd028209c34a0ea3b1bcdb895208bd40a67d/python-Levenshtein-0.12.0.tar.gz (48kB)
[K     |██████▊                         | 10kB 28.7MB/s eta 0:00:01[K     |█████████████▌                  | 20kB 4.0MB/s eta 0:00:01[K     |████████████████████▏           | 30kB 5.1MB/s eta 0:00:01[K     |███████████████████████████     | 40kB 5.9MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 3.2MB/s 
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25l[?25hdone
  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.0-cp36-cp36m-linux_x86_64.whl size=144794 sha256=863a4c8fa485932728fb3abc085ba2ccf51886262ad8e002936b0f1e5695c06a
  Stored in directory: /root/.cache/pip/wheels/de/c2/93/660fd5f7559049268ad2dc6d81c4e39e9e36518766eaf7e342
Successfully built python-Levenshtein
Installin

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.tensor as tensor
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import *
import time
import Levenshtein as ls

In [None]:
batch_size = 64
cuda = torch.cuda.is_available()

num_of_workers = 4 if cuda else 0
lr = 1e-3
weight_decay = 5e-6

In [None]:
from phoneme_list import N_PHONEMES, PHONEME_LIST, PHONEME_MAP
print(len(PHONEME_MAP))

42


In [None]:
from ctcdecode import CTCBeamDecoder
import os
decoder = CTCBeamDecoder(PHONEME_MAP, beam_width=25, num_processes=os.cpu_count(), log_probs_input=True)

In [None]:
train_features = np.load('train.npy', allow_pickle=True, encoding='latin1')
train_labels = np.load('train_labels.npy', allow_pickle=True, encoding='latin1')
val_features = np.load('dev.npy', allow_pickle=True, encoding='latin1')
val_labels = np.load('dev_labels.npy', allow_pickle=True, encoding='latin1')

class MyDataset(Dataset):
  def __init__(self, X, Y):
    self.X = X
    self.Y = Y

  def __len__(self):
    return len(self.Y)
  
  def __getitem__(self, index):
    X = self.X[index]
    X_len = X.shape[0]
    Y = self.Y[index] + 1
    Y_len = Y.shape[0]
    return torch.from_numpy(X).float(), X_len, torch.from_numpy(Y).float(), Y_len


def collate_fn(batch):
  X, X_len, Y, Y_len = zip(*batch)
  X_lens = torch.LongTensor(X_len)
  Y_lens = torch.LongTensor(Y_len)
  X_pad = pad_sequence(X)
  Y_pad = pad_sequence(Y, batch_first=True)
  return X_pad, X_lens, Y_pad, Y_lens

train_dataset = MyDataset(train_features, train_labels)
val_dataset = MyDataset(val_features, val_labels)
train_loader_args = dict(shuffle=True, batch_size=batch_size, num_workers=num_of_workers, pin_memory=True, collate_fn=collate_fn)
train_loader = DataLoader(train_dataset, **train_loader_args)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size, num_workers=num_of_workers, collate_fn=collate_fn)


In [None]:
def xavier_init(params):
  for m in params:
    if isinstance(m, nn.Linear):
      nn.init.xavier_normal_(m.weight)
      nn.init.zeros_(m.bias)

class MyModel(nn.Module):
  def __init__(self, in_utter, out_phone, hidden_size):
    super(MyModel, self).__init__()
    # self.conv1 = nn.Conv1d()
    self.lstm1 = nn.LSTM(in_utter, hidden_size, bidirectional=True, num_layers=4, dropout=0.5)
    self.fc = nn.Linear(hidden_size * 2, hidden_size)
    self.output = nn.Linear(hidden_size, out_phone)
  
  def init_weights(self):
    with torch.no_grad():
      xavier_init(self.modules())

  def forward(self, X, lengths):
    packed_X = pack_padded_sequence(X, lengths, enforce_sorted=False)
    packed_out = self.lstm1(packed_X)[0]
    out, out_lens = pad_packed_sequence(packed_out)
    out = self.fc(out)
    out = self.output(out).log_softmax(2)
    return out, out_lens


  def save(self, ckpt_path):
    ckpt = {
        'params': self.state_dict()
    }
    torch.save(ckpt, ckpt_path)
  
  def load(self, ckpt_path):
    ckpt = torch.load(ckpt_path)
    self.load_state_dict(ckpt['params'], strict=True)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = MyModel(13, 42, 512)
model.init_weights()
model.to(device)
print(model)
criterion = nn.CTCLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.85)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.1,verbose=True, patience=2, threshold=5e-2)

def output_2_string(out):
  strings = ""
  for j in range(len(out)):
    strings += PHONEME_MAP[int(out[j])]
  return strings

cuda
MyModel(
  (lstm1): LSTM(13, 512, num_layers=4, dropout=0.5, bidirectional=True)
  (fc): Linear(in_features=1024, out_features=512, bias=True)
  (output): Linear(in_features=512, out_features=42, bias=True)
)


In [None]:
temp = torch.load("/content/drive/My Drive/HW3P2 Model_19")
model.load_state_dict(temp['model_state_dict'])

optimizer.load_state_dict(temp['optimizer_state_dict'])
scheduler.load_state_dict(temp['scheduler_state_dict'])
print(scheduler.state_dict())

{'factor': 0.1, 'min_lrs': [0], 'patience': 2, 'verbose': True, 'cooldown': 0, 'cooldown_counter': 0, 'mode': 'min', 'threshold': 0.05, 'threshold_mode': 'rel', 'best': 7.831046312178388, 'num_bad_epochs': 1, 'mode_worse': inf, 'eps': 1e-08, 'last_epoch': 16, '_last_lr': [1e-07]}


In [None]:
def validate(model, data_loader):
  model.eval()
  test_loss = []
  dist = 0
  total = 0
  for batch_num, data in enumerate(data_loader):
    X_pad, X_lens, Y_pad, Y_lens = data
    X_pad, Y_pad = X_pad.to(device), Y_pad.to(device) 
    out, out_lens = model(X_pad, X_lens)
    output, _, _, out_seq_len = decoder.decode(out.transpose(0,1), out_lens)

    for i in range(len(output)):
      string1 = output_2_string(output[i,0,:out_seq_len[i,0]])
      string2 = output_2_string(Y_pad[i,:Y_lens[i]])
      dist += ls.distance(string1, string2)
    loss = criterion(out, Y_pad, out_lens, Y_lens)
    total += len(Y_pad)
    test_loss.extend([loss.item()]*Y_pad.size()[0])
    
    torch.cuda.empty_cache()
    del X_pad
    del X_lens
    del Y_pad
    del Y_lens

  model.train()
  return np.mean(test_loss), dist/total


In [None]:
model.train()
torch.cuda.empty_cache()
for epoch in range(20):
  start_time = time.time()
  avg_loss = 0.0
  for batch_num, data in enumerate(train_loader):
    X_pad, X_lens, Y_pad, Y_lens = data
    X_pad, Y_pad = X_pad.to(device), Y_pad.to(device)

    optimizer.zero_grad()
    
    out, out_lens = model(X_pad, X_lens)
    loss = criterion(out, Y_pad, out_lens, Y_lens)
    loss.backward()
    optimizer.step()
    
    avg_loss += loss.item()

    if batch_num % 50 == 49:
        print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/50))
        end_time = time.time()
        print(f"50 batches took {end_time - start_time} seconds")
        avg_loss = 0.0    
        start_time = time.time()
    
    torch.cuda.empty_cache()
    del loss
    del X_pad
    del Y_pad
    del X_lens
    del Y_lens
  torch.save({'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict' : scheduler.state_dict(),
  }, "/content/drive/My Drive/"+"HW3P2 Model_"+str(epoch+15))

  val_loss, val_dist = validate(model, val_loader)
  print('Val Loss: {:.4f}\tVal Distance: {:.4f}'.
        format(val_loss, val_dist))
  scheduler.step(val_dist)




Epoch: 1	Batch: 50	Avg-Loss: 0.2400
50 batches took 248.69465255737305 seconds
Epoch: 1	Batch: 100	Avg-Loss: 0.2400
50 batches took 252.19888019561768 seconds
Epoch: 1	Batch: 150	Avg-Loss: 0.2353
50 batches took 251.2945535182953 seconds
Epoch: 1	Batch: 200	Avg-Loss: 0.2404
50 batches took 250.70153617858887 seconds
Epoch: 1	Batch: 250	Avg-Loss: 0.2356
50 batches took 248.22054052352905 seconds
Epoch: 1	Batch: 300	Avg-Loss: 0.2338
50 batches took 250.92295384407043 seconds
Val Loss: 0.3974	Val Distance: 7.7166
Epoch: 2	Batch: 50	Avg-Loss: 0.2361
50 batches took 250.09404921531677 seconds
Epoch: 2	Batch: 100	Avg-Loss: 0.2409
50 batches took 251.4939103126526 seconds
Epoch: 2	Batch: 150	Avg-Loss: 0.2348
50 batches took 250.7012550830841 seconds
Epoch: 2	Batch: 200	Avg-Loss: 0.2370
50 batches took 251.26714944839478 seconds
Epoch: 2	Batch: 250	Avg-Loss: 0.2396
50 batches took 250.14151692390442 seconds
Epoch: 2	Batch: 300	Avg-Loss: 0.2368
50 batches took 250.66490077972412 seconds
Val Los

KeyboardInterrupt: ignored

In [None]:
test_features = np.load('test.npy', allow_pickle=True, encoding='latin1')
print("Test length is ", len(test_features))
class TestDataset(Dataset):
  def __init__(self, X):
    self.X = X

  def __len__(self):
    return len(self.X)

  def __getitem__(self, index):
    X = self.X[index]
    X_len = X.shape[0]
    return torch.from_numpy(X).float(), X_len

def test_collate_fn(batch):
  X, X_len = zip(*batch)
  X_lens = torch.LongTensor(X_len)
  X_pad = pad_sequence(X)
  return X_pad, X_lens

test_dataset = TestDataset(test_features)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=num_of_workers, collate_fn=test_collate_fn)

Test length is  2251


In [None]:
torch.cuda.empty_cache()
model.eval()
pred = []
with torch.no_grad():
  for i, data in enumerate(test_loader):
    X, X_lens = data
    X = X.to(device)
    out, out_lens = model(X, X_lens)
    # print("out shape is ",out.shape)
    # print("out lens is ",out_lens)
    output, _, _, out_seq_len = decoder.decode(out.transpose(0,1), out_lens)
    for j in range(len(output)):
      # if j == 0:
      #   print("output is like this ", output[j,0,:out_seq_len[j,0]])
      pred.append(output[j,0,:out_seq_len[j,0]])
    torch.cuda.empty_cache()
    del X


In [None]:
out = []
for i in pred:
  temp = ""
  for j in range(len(i)):
    temp += PHONEME_MAP[i[j]]
  out.append(temp)

In [None]:
print(len(out))
df = pd.DataFrame(out, columns=['label'])
df.to_csv("/content/submission.csv", index_label="id")

2251
