In [1]:
!nvidia-smi

Fri Apr  9 05:30:22 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.67       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   58C    P8    12W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip install sentencepiece



In [2]:
import sentencepiece as spm

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [4]:
ls gdrive/MyDrive/idl_proj/data

dev.npy                      test.npy        train_std2.npy
dev.npy.zip                  test.npy.zip    train_std.npy
dev_transcripts_cleaned.txt  train_mean.npy  train_transcripts_cleaned_mini.txt
dev_transcripts.npy          train_mini.npy  train_transcripts_cleaned.txt
sample.csv                   train.npy       train_transcripts.npy
[0m[01;34msample_data[0m/                 train.npy.zip   train_transcripts.npy.zip


In [5]:
cd /content/gdrive/MyDrive/idl_proj/

/content/gdrive/MyDrive/idl_proj


In [6]:
ls data

dev.npy                      test.npy        train_std2.npy
dev.npy.zip                  test.npy.zip    train_std.npy
dev_transcripts_cleaned.txt  train_mean.npy  train_transcripts_cleaned_mini.txt
dev_transcripts.npy          train_mini.npy  train_transcripts_cleaned.txt
sample.csv                   train.npy       train_transcripts.npy
[0m[01;34msample_data[0m/                 train.npy.zip   train_transcripts.npy.zip


In [7]:
import os

In [8]:
BASE_PATH = "/content/gdrive/MyDrive/idl_proj/"
DATA_PATH = os.path.join(BASE_PATH, "data")
MODEL_PATH = os.path.join(BASE_PATH, "models")
PREDICTION_PATH = os.path.join(BASE_PATH, "predictions")

In [17]:
# # Installing CTC Decoder
!git clone --recursive https://github.com/parlance/ctcdecode.git
!cd ctcdecode && pip install .

fatal: destination path 'ctcdecode' already exists and is not an empty directory.
Processing /content/gdrive/MyDrive/idl_proj/ctcdecode
Building wheels for collected packages: ctcdecode
  Building wheel for ctcdecode (setup.py) ... [?25l[?25hdone
  Created wheel for ctcdecode: filename=ctcdecode-1.0.2-cp37-cp37m-linux_x86_64.whl size=12877755 sha256=fed33c3c05ccdcdf5193ebee6c04e9e50309ade343dd723f2e1213f6790bb00e
  Stored in directory: /tmp/pip-ephem-wheel-cache-rot7ht0j/wheels/2b/33/31/5d0b9670a4ad51535fe162448223c80b2bf1cbb17ee4793b3c
Successfully built ctcdecode
Installing collected packages: ctcdecode
Successfully installed ctcdecode-1.0.2


In [9]:
import time

In [10]:
# Import necessary libraries

import timeit
import os
import sys
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from torch.nn.utils.rnn import *
from torch.autograd import Variable

from ctcdecode import CTCBeamDecoder
from datetime import datetime as dt

In [11]:
# Check if cuda is available

cuda = torch.cuda.is_available()
num_workers = 4 if cuda else 0
print("Cuda = "+str(cuda)+" with num_workers = "+str(num_workers))

def get_device():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    return device
device = get_device()

Cuda = True with num_workers = 4


In [12]:
# Files from drive
train_filename = os.path.join(DATA_PATH, 'train_mini.npy')
train_transcripts_filename = os.path.join(DATA_PATH, 'train_transcripts_cleaned_mini.txt')

dev_filename = os.path.join(DATA_PATH, 'dev.npy')
dev_transcripts_filename = os.path.join(DATA_PATH, 'dev_transcripts_cleaned.txt')

test_filename = os.path.join(DATA_PATH, "test.npy")

In [13]:
# makes segmenter instance and loads the model file (m.model)
sp = spm.SentencePieceProcessor()
sp.load('train_model_cleaned.model')
label_map = [sp.id_to_piece(id) for id in range(sp.get_piece_size())]

In [14]:
class Wav2LetterDataset(Dataset):
    def __init__(self, x_path, y_path, sp_model, mean=None, std=None):
        stime = time.time()
        self.mean = mean.reshape(1, 40)
        self.std = std.reshape(1, 40)
        self.sp_model = sp_model
        with open(y_path, 'r') as file:
          y_ = file.read().splitlines()
        self.y = [self.sp_model.encode_as_ids(x) for x in y_]
        self.X = np.load(x_path, allow_pickle=True)
        assert len(self.X)==len(self.y); "Lengths match!"
        etime = time.time()
        print(f"Loaded the dataset of {len(self.X)} instances in {etime-stime:3.3f} Secs")
        self.length = self.X.shape[0]
    def normalize(self, x):
        if self.mean is not None and self.std is not None:
          return (x-self.mean)/self.std
        return x

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        x = torch.Tensor(self.normalize(self.X[index]))
        y = torch.Tensor(self.y[index])
        return x, y

def pad_collate_(batch):

    data = [torch.LongTensor(item[0]) for item in batch]
    data_lengths = torch.LongTensor([len(seq) for seq in data])
    data = pad_sequence(data)
    
    max_seqlength = data.shape[0]
    input_len_ratio = torch.FloatTensor([item[0].shape[0]/float(max_seqlength) for item in batch])

    target = [torch.LongTensor(item[1]) for item in batch]
    target_lengths = torch.LongTensor([len(seq) for seq in target])
    target = pad_sequence(target, batch_first=True)
  
    return data, target, data_lengths, target_lengths, input_len_ratio

def pad_collate(batch):
    inputs = []
    inputs_lengths = []
    targets = []
    targets_lengths = []
    for i, (x, y) in enumerate(batch):
      inputs.append(x)
      inputs_lengths.append(x.shape[0]//2)
      targets.append(1 + y) # this is to accomodate for the blank symbol
      targets_lengths.append(len(y))
    inputs = nn.utils.rnn.pad_sequence(inputs, batch_first=True).transpose(1, 2)
    targets = nn.utils.rnn.pad_sequence(targets, batch_first=True)
    inputs_lengths = torch.Tensor(inputs_lengths).long()
    targets_lengths = torch.Tensor(targets_lengths).long()
    return inputs, targets, inputs_lengths, targets_lengths

In [15]:
hyperparameters = {
    "batch_size": 8,
    "epochs": 50,
    "learning_rate": 5e-3,
    "weight_decay": 1e-5,
}

In [16]:
train_mean = np.load(os.path.join(DATA_PATH, 'train_mean.npy'))
train_std = np.load(os.path.join(DATA_PATH, 'train_std.npy'))

In [17]:
train_data = Wav2LetterDataset(train_filename, train_transcripts_filename, sp_model=sp, mean=train_mean, std=train_std)
train_args = dict(shuffle=True, batch_size=hyperparameters["batch_size"], num_workers=num_workers, drop_last=True, collate_fn=pad_collate)
train_loader = DataLoader(train_data, **train_args)

val_data = Wav2LetterDataset(dev_filename, dev_transcripts_filename, sp_model=sp, mean=train_mean, std=train_std)
val_args = dict(shuffle=True, batch_size=hyperparameters["batch_size"], num_workers=num_workers, drop_last=True, collate_fn=pad_collate)
val_loader = DataLoader(val_data, **val_args)

Loaded the dataset of 5000 instances in 1.795 Secs


  cpuset_checked))


Loaded the dataset of 2703 instances in 0.584 Secs


In [18]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, final=False):
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv1d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
        if not final:
          self.relu = nn.ReLU(inplace=True)
        else:
          self.relu = None

    def forward(self, x):
        out = self.conv(x)
        if self.relu:
          out = self.relu(out)

        return out

In [19]:
class Wav2Letter(nn.Module):

    def __init__(self, num_classes = 42, num_features = 40):
        super(Wav2Letter, self).__init__()

        model = nn.Sequential(
            ConvBlock(in_channels=num_features, out_channels=250, kernel_size=48, stride=2, padding=23),

            ConvBlock(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
            ConvBlock(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
            ConvBlock(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
            ConvBlock(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
            ConvBlock(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
            ConvBlock(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),
            ConvBlock(in_channels=250, out_channels=250, kernel_size=7, stride=1, padding=3),

            ConvBlock(in_channels=250, out_channels=2000, kernel_size=32, stride=1, padding=16),
            ConvBlock(in_channels=2000, out_channels=2000, kernel_size=1, stride=1, padding=0),
            ConvBlock(in_channels=2000, out_channels=num_classes, kernel_size=1, stride=1, padding=0, final=True)
        )
        
        self.model = model
        self.log_softmax = nn.LogSoftmax(dim=1)
    
    def forward(self, x):
        # Input - (batch_size, num_features, input_length)
        out = self.model(x)
        out = self.log_softmax(out)
        
        return out.transpose(0,1)

In [20]:
sp.get_piece_size()

31

In [21]:
model = Wav2Letter(num_classes=sp.get_piece_size()+1)
model.to(device)

Wav2Letter(
  (model): Sequential(
    (0): ConvBlock(
      (conv): Conv1d(40, 250, kernel_size=(48,), stride=(2,), padding=(23,))
      (relu): ReLU(inplace=True)
    )
    (1): ConvBlock(
      (conv): Conv1d(250, 250, kernel_size=(7,), stride=(1,), padding=(3,))
      (relu): ReLU(inplace=True)
    )
    (2): ConvBlock(
      (conv): Conv1d(250, 250, kernel_size=(7,), stride=(1,), padding=(3,))
      (relu): ReLU(inplace=True)
    )
    (3): ConvBlock(
      (conv): Conv1d(250, 250, kernel_size=(7,), stride=(1,), padding=(3,))
      (relu): ReLU(inplace=True)
    )
    (4): ConvBlock(
      (conv): Conv1d(250, 250, kernel_size=(7,), stride=(1,), padding=(3,))
      (relu): ReLU(inplace=True)
    )
    (5): ConvBlock(
      (conv): Conv1d(250, 250, kernel_size=(7,), stride=(1,), padding=(3,))
      (relu): ReLU(inplace=True)
    )
    (6): ConvBlock(
      (conv): Conv1d(250, 250, kernel_size=(7,), stride=(1,), padding=(3,))
      (relu): ReLU(inplace=True)
    )
    (7): ConvBlock(

In [22]:
criterion = nn.CTCLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameters["learning_rate"], weight_decay=hyperparameters["weight_decay"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=5) # OR can use some other scheduler

In [23]:
torch.__version__

'1.8.1+cu101'

In [24]:
# Train the model - Change based on the model

def train_model(train_loader, model):
    training_loss = 0
    
    # Set model in 'Training mode'
    model.train()
    
    # enumerate mini batches
    for i, (inputs, targets, out_lengths, target_lengths) in enumerate(train_loader):

        inputs = inputs.to(device)#.transpose(0,1).transpose(1,2).reshape((hyperparameters["batch_size"], 40, -1))
        targets = targets.to(device)
        
        # clear the gradients
        optimizer.zero_grad()
        
        # compute the model output
        out = model(inputs)
        
        # calculate loss
        loss = criterion(out.permute(2, 1, 0), targets, out_lengths, target_lengths)
        
        # Backward pass
        loss.backward()
        
        # Update model weights
        optimizer.step()

        training_loss += loss.item()
        if i%100==0:
          print(f"\tIteration {i}/{len(train_loader)} and loss: {loss.item()}")
    training_loss /= len(train_loader)
    return training_loss

In [25]:
# Evaluate the model - Change based on the model

def evaluate_model(val_loader, model):
    
    model.eval()
    
    # enumerate mini batches
    total_loss = 0
    for i, (inputs, targets, out_lengths, target_lengths ) in enumerate(val_loader):

        inputs = inputs.to(device) #.transpose(0,1).transpose(1,2).reshape((hyperparameters["batch_size"], 40, -1)).to(device)        
        targets = targets.to(device)
        
        # compute the model output
        out = model(inputs)
        
        # calculate loss
        loss = criterion(out.permute(2, 1, 0), targets, out_lengths, target_lengths)

        # decoder = CTCBeamDecoder(label_map, beam_width=2, log_probs_input=True)
        # out, _, _, out_lengths = decoder.decode(out.transpose(0, 1), out_lengths)
        total_loss += loss.item()

    return total_loss/len(val_loader)

In [26]:
experiment_name = 'wav2letter_001'

In [28]:
best_loss = 2**32
os.makedirs(MODEL_PATH, exist_ok=True)
for epoch in range(hyperparameters["epochs"]):
    print("Epoch: ", epoch)

    # Train
    starttime = timeit.default_timer()
    training_loss = train_model(train_loader, model)
    endtime = timeit.default_timer()
    print("Training time: ", (endtime - starttime)/60)

    # Validation
    starttime = timeit.default_timer()
    val_loss = evaluate_model(val_loader, model)
    endtime = timeit.default_timer()
    print("Validation time: ", (endtime - starttime)/60, "and validation loss:", val_loss)
    if val_loss<best_loss:
      print("Best model is updated......")
      best_loss = val_loss
      torch.save(model, os.path.join(MODEL_PATH, f"{experiment_name}_base_model.pth"))
    scheduler.step(val_loss)

    # Print log of accuracy and loss
    print("Epoch: "+str(epoch)+", Training loss: "+str(training_loss)+", Validation loss: "+str(val_loss)+
          ", LR: "+str(scheduler.get_last_lr())+"\n")

Epoch:  0


  cpuset_checked))


	Iteration 0/625 and loss: 2.825741767883301
	Iteration 100/625 and loss: 2.867755651473999
	Iteration 200/625 and loss: 2.852854013442993
	Iteration 300/625 and loss: 2.8600077629089355
	Iteration 400/625 and loss: 2.8683714866638184
	Iteration 500/625 and loss: 2.869204044342041
	Iteration 600/625 and loss: 2.822143077850342
Training time:  1.9128743624166722
Validation time:  0.29826686001667135 and validation loss: 2.870196056648956
Best model is updated......


NameError: ignored

In [30]:
for i, (inputs, targets, out_lengths, target_lengths) in enumerate(train_loader):
  print(i)
  inputs = inputs.to(device)        
  targets = targets.to(device)
  
  # # clear the gradients
  # optimizer.zero_grad()
  
  # compute the model output
  out = model(inputs.float())
  
  # calculate loss
  loss = criterion(out.permute(2, 1, 0), targets, out_lengths, target_lengths)
  break

  cpuset_checked))


0


In [None]:
decoder = CTCBeamDecoder(label_map, beam_width=2, log_probs_input=True)
out, _, _, out_lengths = decoder.decode(out.transpose(0, 1), out_lengths)

In [71]:
inputs.shape

torch.Size([8, 40, 1336])

In [73]:
out.shape

torch.Size([32, 8, 669])

In [74]:
targets.shape

torch.Size([8, 237])

In [75]:
target_lengths

tensor([205, 217,  94, 142, 237, 196, 194, 166])

In [78]:
out_lengths

[542, 668, 309, 495, 602, 562, 571, 547]

In [58]:
loss

tensor(inf, device='cuda:0', grad_fn=<MeanBackward0>)

In [52]:
out.shape

torch.Size([32, 8, 630])

In [66]:
out

tensor([[[-2.0789, -2.0788, -2.0790,  ..., -2.0794, -2.0794, -2.0795],
         [-2.0790, -2.0791, -2.0791,  ..., -2.0794, -2.0794, -2.0795],
         [-2.0789, -2.0790, -2.0791,  ..., -2.0794, -2.0794, -2.0795],
         ...,
         [-2.0808, -2.0813, -2.0805,  ..., -2.0794, -2.0794, -2.0795],
         [-2.0798, -2.0797, -2.0798,  ..., -2.0794, -2.0794, -2.0795],
         [-2.0793, -2.0793, -2.0794,  ..., -2.0794, -2.0794, -2.0795]],

        [[-2.0792, -2.0793, -2.0792,  ..., -2.0794, -2.0794, -2.0794],
         [-2.0795, -2.0795, -2.0792,  ..., -2.0794, -2.0794, -2.0794],
         [-2.0790, -2.0792, -2.0791,  ..., -2.0794, -2.0794, -2.0794],
         ...,
         [-2.0802, -2.0795, -2.0803,  ..., -2.0794, -2.0794, -2.0794],
         [-2.0791, -2.0793, -2.0791,  ..., -2.0794, -2.0794, -2.0794],
         [-2.0799, -2.0798, -2.0797,  ..., -2.0794, -2.0794, -2.0794]],

        [[-2.0800, -2.0800, -2.0801,  ..., -2.0794, -2.0794, -2.0794],
         [-2.0797, -2.0799, -2.0795,  ..., -2

In [72]:
out1.shape

torch.Size([8, 31, 671])

In [73]:
out1

tensor([[[ 0.0026,  0.0026,  0.0024,  ...,  0.0020,  0.0024,  0.0023],
         [-0.0086, -0.0087, -0.0086,  ..., -0.0088, -0.0089, -0.0087],
         [ 0.0113,  0.0115,  0.0115,  ...,  0.0089,  0.0088,  0.0087],
         ...,
         [ 0.0057,  0.0054,  0.0055,  ...,  0.0050,  0.0047,  0.0048],
         [-0.0061, -0.0064, -0.0064,  ..., -0.0058, -0.0060, -0.0061],
         [-0.0313, -0.0308, -0.0304,  ..., -0.0310, -0.0309, -0.0313]],

        [[ 0.0027,  0.0024,  0.0024,  ...,  0.0020,  0.0023,  0.0022],
         [-0.0085, -0.0086, -0.0087,  ..., -0.0091, -0.0090, -0.0088],
         [ 0.0112,  0.0112,  0.0112,  ...,  0.0089,  0.0088,  0.0086],
         ...,
         [ 0.0055,  0.0054,  0.0053,  ...,  0.0050,  0.0048,  0.0048],
         [-0.0061, -0.0062, -0.0064,  ..., -0.0060, -0.0062, -0.0063],
         [-0.0311, -0.0310, -0.0305,  ..., -0.0308, -0.0310, -0.0314]],

        [[ 0.0030,  0.0028,  0.0025,  ...,  0.0020,  0.0023,  0.0022],
         [-0.0082, -0.0085, -0.0088,  ..., -0

In [81]:
lgsftmx = nn.LogSoftmax(dim=1)
sftmx = nn.Softmax(dim=1)

In [87]:
lgsftmx(out1)

tensor([[[-3.4290, -3.4290, -3.4292,  ..., -3.4296, -3.4292, -3.4293],
         [-3.4402, -3.4403, -3.4402,  ..., -3.4404, -3.4405, -3.4404],
         [-3.4203, -3.4201, -3.4201,  ..., -3.4227, -3.4228, -3.4229],
         ...,
         [-3.4259, -3.4263, -3.4261,  ..., -3.4267, -3.4269, -3.4268],
         [-3.4378, -3.4380, -3.4380,  ..., -3.4374, -3.4376, -3.4377],
         [-3.4629, -3.4624, -3.4620,  ..., -3.4626, -3.4625, -3.4629]],

        [[-3.4289, -3.4291, -3.4292,  ..., -3.4296, -3.4293, -3.4294],
         [-3.4401, -3.4401, -3.4403,  ..., -3.4407, -3.4406, -3.4404],
         [-3.4204, -3.4204, -3.4204,  ..., -3.4226, -3.4227, -3.4229],
         ...,
         [-3.4261, -3.4262, -3.4262,  ..., -3.4266, -3.4268, -3.4267],
         [-3.4377, -3.4378, -3.4380,  ..., -3.4376, -3.4378, -3.4378],
         [-3.4627, -3.4626, -3.4621,  ..., -3.4624, -3.4626, -3.4629]],

        [[-3.4286, -3.4288, -3.4291,  ..., -3.4296, -3.4293, -3.4294],
         [-3.4399, -3.4401, -3.4404,  ..., -3

In [86]:
sftmx(out1)

tensor([[[0.0324, 0.0324, 0.0324,  ..., 0.0324, 0.0324, 0.0324],
         [0.0321, 0.0321, 0.0321,  ..., 0.0321, 0.0320, 0.0321],
         [0.0327, 0.0327, 0.0327,  ..., 0.0326, 0.0326, 0.0326],
         ...,
         [0.0325, 0.0325, 0.0325,  ..., 0.0325, 0.0325, 0.0325],
         [0.0321, 0.0321, 0.0321,  ..., 0.0321, 0.0321, 0.0321],
         [0.0313, 0.0314, 0.0314,  ..., 0.0313, 0.0314, 0.0313]],

        [[0.0324, 0.0324, 0.0324,  ..., 0.0324, 0.0324, 0.0324],
         [0.0321, 0.0321, 0.0321,  ..., 0.0320, 0.0320, 0.0321],
         [0.0327, 0.0327, 0.0327,  ..., 0.0326, 0.0326, 0.0326],
         ...,
         [0.0325, 0.0325, 0.0325,  ..., 0.0325, 0.0325, 0.0325],
         [0.0321, 0.0321, 0.0321,  ..., 0.0321, 0.0321, 0.0321],
         [0.0313, 0.0313, 0.0314,  ..., 0.0314, 0.0313, 0.0313]],

        [[0.0324, 0.0324, 0.0324,  ..., 0.0324, 0.0324, 0.0324],
         [0.0321, 0.0321, 0.0321,  ..., 0.0320, 0.0320, 0.0321],
         [0.0327, 0.0327, 0.0327,  ..., 0.0326, 0.0326, 0.

In [85]:
out1

tensor([[[ 0.0026,  0.0026,  0.0024,  ...,  0.0020,  0.0024,  0.0023],
         [-0.0086, -0.0087, -0.0086,  ..., -0.0088, -0.0089, -0.0087],
         [ 0.0113,  0.0115,  0.0115,  ...,  0.0089,  0.0088,  0.0087],
         ...,
         [ 0.0057,  0.0054,  0.0055,  ...,  0.0050,  0.0047,  0.0048],
         [-0.0061, -0.0064, -0.0064,  ..., -0.0058, -0.0060, -0.0061],
         [-0.0313, -0.0308, -0.0304,  ..., -0.0310, -0.0309, -0.0313]],

        [[ 0.0027,  0.0024,  0.0024,  ...,  0.0020,  0.0023,  0.0022],
         [-0.0085, -0.0086, -0.0087,  ..., -0.0091, -0.0090, -0.0088],
         [ 0.0112,  0.0112,  0.0112,  ...,  0.0089,  0.0088,  0.0086],
         ...,
         [ 0.0055,  0.0054,  0.0053,  ...,  0.0050,  0.0048,  0.0048],
         [-0.0061, -0.0062, -0.0064,  ..., -0.0060, -0.0062, -0.0063],
         [-0.0311, -0.0310, -0.0305,  ..., -0.0308, -0.0310, -0.0314]],

        [[ 0.0030,  0.0028,  0.0025,  ...,  0.0020,  0.0023,  0.0022],
         [-0.0082, -0.0085, -0.0088,  ..., -0

In [126]:
inputs[0].max(dim=1)

torch.return_types.max(values=tensor([ 10,  37,  99, 152,  48,  93, 149, 267, 184,  69,  60,  17,  25,  22,
         30,  17,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,  47,  35,  27,  14,  13,  43,  28,  12,   8,  14],
       device='cuda:0'), ...)

In [134]:
global_feats = np.zeros(40)
gloabl_std = np.zeros(40)
for i, x in enumerate(train_data.X):
  global_mean += np.mean(x, 1)


(40,)

In [None]:
tmp_ = np.concatenate(train_data.X)

In [None]:
tmp_.shape

In [56]:
criterion = nn.CTCLoss().to(device)

In [65]:
loss = criterion(out, targets, out_lengths, target_lengths)

In [66]:
loss.item()

nan

In [67]:
out.shape

torch.Size([32, 8, 630])

In [69]:
targets

tensor([[ 3,  6,  8,  ...,  0,  0,  0],
        [ 3, 18, 10,  ..., 19,  5, 10],
        [ 3, 21,  7,  ...,  0,  0,  0],
        ...,
        [ 3, 20,  4,  ...,  0,  0,  0],
        [ 3, 17,  6,  ...,  0,  0,  0],
        [ 3,  9,  5,  ...,  0,  0,  0]], device='cuda:0')

In [70]:
out_lengths

tensor([32, 29, 13, 17, 25, 29, 27, 17], dtype=torch.int32)

In [74]:
inputs.dtype

torch.int64