In [1]:
%env CUDA_VISIBLE_DEVICES=3

env: CUDA_VISIBLE_DEVICES=3


In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from data_objects.kaldi_interface import KaldiInterface

In [3]:
class InterpLnr(nn.Module):
    
    def __init__(self, max_len_seq=1, max_len_pad=192, min_len_seg=19, max_len_seg=32):
        super().__init__()
        self.max_len_seq = max_len_seq
        self.max_len_pad = max_len_pad
        
        self.min_len_seg = min_len_seg
        self.max_len_seg = max_len_seg
        
        self.max_num_seg = self.max_len_seq // self.min_len_seg + 1
        #self.training = config.train
        
        
    def pad_sequences(self, sequences, max_ilen):
        channel_dim = sequences[0].size()[-1]
        #out_dims = (len(sequences), self.max_len_pad, channel_dim)
        out_dims = (len(sequences), max_ilen, channel_dim)
        out_tensor = sequences[0].data.new(*out_dims).fill_(0)
        
        for i, tensor in enumerate(sequences):
            #print(f'in rr before pad seq: {tensor.shape}')
            length = tensor.size(0)
            #out_tensor[i, :length, :] = tensor[:self.max_len_pad]
            out_tensor[i, :length, :] = tensor[:max_ilen]
            
        return out_tensor 
    

    def forward(self, x, len_seq):  
        
        # if not self.training:
        #     #print("Not Training! (from RR)")
        #     return x
        
        #print(f'Training: {self.training}')
        
        device = x.device
        batch_size = x.size(0)
        max_ilen = max(len_seq)

        # indices of each sub segment
        indices = torch.arange(self.max_len_seg*2, device=device)\
                  .unsqueeze(0).expand(batch_size*self.max_num_seg, -1)
        # scales of each sub segment
        scales = torch.rand(batch_size*self.max_num_seg, 
                            device=device) + 0.5
        
        idx_scaled = indices / scales.unsqueeze(-1)
        idx_scaled_fl = torch.floor(idx_scaled)
        lambda_ = idx_scaled - idx_scaled_fl
        
        len_seg = torch.randint(low=self.min_len_seg, 
                                high=self.max_len_seg, 
                                size=(batch_size*self.max_num_seg,1),
                                device=device)
        
        # end point of each segment
        idx_mask = idx_scaled_fl < (len_seg - 1)
       
        offset = len_seg.view(batch_size, -1).cumsum(dim=-1)
        # offset starts from the 2nd segment
        offset = F.pad(offset[:, :-1], (1,0), value=0).view(-1, 1)
        
        idx_scaled_org = idx_scaled_fl + offset
        
        len_seq_rp = torch.repeat_interleave(len_seq, self.max_num_seg)
        idx_mask_org = idx_scaled_org < (len_seq_rp - 1).unsqueeze(-1)
        
        idx_mask_final = idx_mask & idx_mask_org
        
        counts = idx_mask_final.sum(dim=-1).view(batch_size, -1).sum(dim=-1)
        
        index_1 = torch.repeat_interleave(torch.arange(batch_size, 
                                            device=device), counts)
        
        index_2_fl = idx_scaled_org[idx_mask_final].long()
        index_2_cl = index_2_fl + 1
        
        y_fl = x[index_1, index_2_fl, :]
        y_cl = x[index_1, index_2_cl, :]
        lambda_f = lambda_[idx_mask_final].unsqueeze(-1)
        
        y = (1-lambda_f)*y_fl + lambda_f*y_cl
        
        sequences = torch.split(y, counts.tolist(), dim=0)
       
        seq_padded = self.pad_sequences(sequences, max_ilen)
        
        return seq_padded 

In [3]:
def get_bnfs(spk_id, utterance_id, kaldi_dir):
    ki = KaldiInterface(wav_scp=str(os.path.join(kaldi_dir, 'wav.scp')),
                        bnf_scp=str(os.path.join(kaldi_dir, 'bnf/feats.scp')))
    bnf = ki.get_feature('_'.join([spk_id, utterance_id]), 'bnf')
    return bnf

In [4]:
device = 'cuda'

utterance_ids = ['arctic_b0534', 'arctic_b0537', 'arctic_b0538', 'arctic_b0539']
basepath = '/mnt/data1/waris/datasets/data/arctic_dataset/test_speakers_16k'

src_speaker_fpath = os.path.join(basepath, 'BDL')
src_speaker = os.path.basename(src_speaker_fpath)
src_speaker_kaldi_dir = os.path.join(src_speaker_fpath, 'kaldi')

ppg = get_bnfs(src_speaker, utterance_ids[0], src_speaker_kaldi_dir)
ppg = torch.from_numpy(ppg).unsqueeze(0).to(device)

In [5]:
ppg.shape

torch.Size([1, 294, 256])

In [6]:
max_len_seq=300
max_len_pad=300
min_len_seg=19
max_len_seg=32
        
max_num_seg = max_len_seq // min_len_seg + 1

max_num_seg

16

In [7]:
device = ppg.device
batch_size = ppg.size(0)
#max_len_seq = x.size(1)

#len_seq = torch.tensor([ppg.size(1)])
len_seq = torch.tensor(max_len_pad).expand(ppg.size(0)).to(ppg.device)

print(f'len seq: {len_seq}')

max_ilen = max(len_seq)

# indices of each sub segment
indices = torch.arange(max_len_seg*2, device=device)\
            .unsqueeze(0).expand(batch_size*max_num_seg, -1)

indices.shape

len seq: tensor([300], device='cuda:0')


torch.Size([16, 64])

In [8]:
indices

tensor([[ 0,  1,  2,  ..., 61, 62, 63],
        [ 0,  1,  2,  ..., 61, 62, 63],
        [ 0,  1,  2,  ..., 61, 62, 63],
        ...,
        [ 0,  1,  2,  ..., 61, 62, 63],
        [ 0,  1,  2,  ..., 61, 62, 63],
        [ 0,  1,  2,  ..., 61, 62, 63]], device='cuda:0')

In [9]:
# scales of each sub segment
scales = torch.rand(batch_size*max_num_seg, 
                    device=device) + 0.5
scales

tensor([1.4114, 1.0726, 0.7010, 1.1393, 1.0577, 1.2919, 0.5459, 0.7948, 1.1990,
        0.9711, 1.2421, 1.0016, 1.4341, 0.8457, 0.5923, 1.3101],
       device='cuda:0')

In [10]:
idx_scaled = indices / scales.unsqueeze(-1)
idx_scaled

tensor([[  0.0000,   0.7085,   1.4170,  ...,  43.2193,  43.9278,  44.6363],
        [  0.0000,   0.9323,   1.8647,  ...,  56.8728,  57.8051,  58.7375],
        [  0.0000,   1.4266,   2.8532,  ...,  87.0212,  88.4477,  89.8743],
        ...,
        [  0.0000,   1.1825,   2.3650,  ...,  72.1312,  73.3137,  74.4961],
        [  0.0000,   1.6882,   3.3764,  ..., 102.9814, 104.6696, 106.3579],
        [  0.0000,   0.7633,   1.5266,  ...,  46.5607,  47.3239,  48.0872]],
       device='cuda:0')

In [11]:
idx_scaled_fl = torch.floor(idx_scaled)
idx_scaled_fl

tensor([[  0.,   0.,   1.,  ...,  43.,  43.,  44.],
        [  0.,   0.,   1.,  ...,  56.,  57.,  58.],
        [  0.,   1.,   2.,  ...,  87.,  88.,  89.],
        ...,
        [  0.,   1.,   2.,  ...,  72.,  73.,  74.],
        [  0.,   1.,   3.,  ..., 102., 104., 106.],
        [  0.,   0.,   1.,  ...,  46.,  47.,  48.]], device='cuda:0')

In [12]:
lambda_ = idx_scaled - idx_scaled_fl
lambda_

tensor([[0.0000, 0.7085, 0.4170,  ..., 0.2193, 0.9278, 0.6363],
        [0.0000, 0.9323, 0.8647,  ..., 0.8728, 0.8051, 0.7375],
        [0.0000, 0.4266, 0.8532,  ..., 0.0212, 0.4477, 0.8743],
        ...,
        [0.0000, 0.1825, 0.3650,  ..., 0.1312, 0.3137, 0.4961],
        [0.0000, 0.6882, 0.3764,  ..., 0.9814, 0.6696, 0.3579],
        [0.0000, 0.7633, 0.5266,  ..., 0.5607, 0.3239, 0.0872]],
       device='cuda:0')

In [13]:
len_seg = torch.randint(low=min_len_seg, 
                        high=max_len_seg, 
                        size=(batch_size*max_num_seg,1),
                        device=device)
len_seg

tensor([[26],
        [26],
        [23],
        [24],
        [20],
        [20],
        [21],
        [20],
        [25],
        [29],
        [24],
        [29],
        [24],
        [29],
        [30],
        [25]], device='cuda:0')

In [14]:
# end point of each segment
idx_mask = idx_scaled_fl < (len_seg - 1)
idx_mask

tensor([[ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False],
        ...,
        [ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False]], device='cuda:0')

In [15]:
offset = len_seg.view(batch_size, -1).cumsum(dim=-1)
offset

tensor([[ 26,  52,  75,  99, 119, 139, 160, 180, 205, 234, 258, 287, 311, 340,
         370, 395]], device='cuda:0')

In [16]:
# offset starts from the 2nd segment
offset = F.pad(offset[:, :-1], (1,0), value=0).view(-1, 1)

offset

tensor([[  0],
        [ 26],
        [ 52],
        [ 75],
        [ 99],
        [119],
        [139],
        [160],
        [180],
        [205],
        [234],
        [258],
        [287],
        [311],
        [340],
        [370]], device='cuda:0')

In [17]:
idx_scaled_org = idx_scaled_fl + offset
idx_scaled_org

tensor([[  0.,   0.,   1.,  ...,  43.,  43.,  44.],
        [ 26.,  26.,  27.,  ...,  82.,  83.,  84.],
        [ 52.,  53.,  54.,  ..., 139., 140., 141.],
        ...,
        [311., 312., 313.,  ..., 383., 384., 385.],
        [340., 341., 343.,  ..., 442., 444., 446.],
        [370., 370., 371.,  ..., 416., 417., 418.]], device='cuda:0')

In [18]:
len_seq_rp = torch.repeat_interleave(len_seq, max_num_seg)
len_seq_rp

tensor([300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300, 300,
        300, 300], device='cuda:0')

In [19]:
idx_mask_org = idx_scaled_org < (len_seq_rp - 1).unsqueeze(-1)
idx_mask_org

tensor([[ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]], device='cuda:0')

In [20]:
idx_mask_final = idx_mask & idx_mask_org
idx_mask_final

tensor([[ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False],
        [ True,  True,  True,  ..., False, False, False],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]], device='cuda:0')

In [21]:
counts = idx_mask_final.sum(dim=-1).view(batch_size, -1).sum(dim=-1)
counts

tensor([312], device='cuda:0')

In [22]:
index_1 = torch.repeat_interleave(torch.arange(batch_size, 
                                    device=device), counts)
index_1

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [23]:
index_2_fl = idx_scaled_org[idx_mask_final].long()
index_2_fl

tensor([  0,   0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,
          9,  10,  11,  12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,
         19,  20,  21,  21,  22,  23,  24,  24,  26,  26,  27,  28,  29,  30,
         31,  32,  33,  34,  35,  36,  37,  38,  39,  39,  40,  41,  42,  43,
         44,  45,  46,  47,  48,  49,  50,  52,  53,  54,  56,  57,  59,  60,
         61,  63,  64,  66,  67,  69,  70,  71,  73,  75,  75,  76,  77,  78,
         79,  80,  81,  82,  82,  83,  84,  85,  86,  87,  88,  89,  89,  90,
         91,  92,  93,  94,  95,  96,  96,  97,  99,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 116,
        117, 119, 119, 120, 121, 122, 122, 123, 124, 125, 125, 126, 127, 128,
        129, 129, 130, 131, 132, 132, 133, 134, 135, 136, 136, 137, 139, 140,
        142, 144, 146, 148, 149, 151, 153, 155, 157, 160, 161, 162, 163, 165,
        166, 167, 168, 170, 171, 172, 173, 175, 176, 177, 178, 1

In [24]:
index_2_cl = index_2_fl + 1
index_2_cl

tensor([  1,   1,   2,   3,   3,   4,   5,   5,   6,   7,   8,   8,   9,  10,
         10,  11,  12,  13,  13,  14,  15,  15,  16,  17,  18,  18,  19,  20,
         20,  21,  22,  22,  23,  24,  25,  25,  27,  27,  28,  29,  30,  31,
         32,  33,  34,  35,  36,  37,  38,  39,  40,  40,  41,  42,  43,  44,
         45,  46,  47,  48,  49,  50,  51,  53,  54,  55,  57,  58,  60,  61,
         62,  64,  65,  67,  68,  70,  71,  72,  74,  76,  76,  77,  78,  79,
         80,  81,  82,  83,  83,  84,  85,  86,  87,  88,  89,  90,  90,  91,
         92,  93,  94,  95,  96,  97,  97,  98, 100, 100, 101, 102, 103, 104,
        105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 117,
        118, 120, 120, 121, 122, 123, 123, 124, 125, 126, 126, 127, 128, 129,
        130, 130, 131, 132, 133, 133, 134, 135, 136, 137, 137, 138, 140, 141,
        143, 145, 147, 149, 150, 152, 154, 156, 158, 161, 162, 163, 164, 166,
        167, 168, 169, 171, 172, 173, 174, 176, 177, 178, 179, 1

In [25]:
y_fl = ppg[index_1, index_2_fl, :]
y_cl = ppg[index_1, index_2_cl, :]


/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [153,0,0], thread: [96,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [153,0,0], thread: [97,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [153,0,0], thread: [98,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [153,0,0], thread: [99,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [153,0,0], thread: [100,0,0] Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"` failed.
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:93: operator(): block: [

In [26]:
lambda_f = lambda_[idx_mask_final].unsqueeze(-1)
lambda_f

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:








y = (1-lambda_f)*y_fl + lambda_f*y_cl

sequences = torch.split(y, counts.tolist(), dim=0)