In [1]:
import sys
sys.path.insert(0, '../')
import os
import shutil
import math
from itertools import accumulate
from collections import Counter
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchaudio
from AudioDataset import AudioDataset

## Import AudioDataset and creat train_iter, valid_iter, test_iter

In [2]:
train_dir = '../data/train'
valid_dir = '../data/valid'
test_dir = '../data/test'
if torch.cuda.is_available():
    device = 'cuda:0'
else:
    device = 'cpu'
sample_rate = 44100
num_sample = 441000
mel_sepectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=sample_rate,
    n_fft=1024,
    hop_length=512,
    n_mels=64
)

In [3]:
train_df = pd.read_csv('../data/train_labels.csv', sep=',')
valid_df = pd.read_csv('../data/valid_labels.csv', sep=',')
test_df = pd.read_csv('../data/test_labels.csv', sep=',')

In [5]:
train_dataset = AudioDataset(train_df, train_dir, mel_sepectrogram, sample_rate, num_sample, device, is_train_or_valid=True)
valid_dataset = AudioDataset(valid_df, valid_dir, mel_sepectrogram, sample_rate, num_sample, device, is_train_or_valid=True)
test_dataset = AudioDataset(test_df, test_dir, mel_sepectrogram, sample_rate, num_sample, device, is_train_or_valid=False)

In [6]:
batch_size = 128
train_iter = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
valid_iter = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_iter = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

## test

In [7]:
len(test_dataset), len(valid_dataset), len(test_dataset)

(8620, 3138, 8620)

In [8]:
train_dataset[1], valid_dataset[2], test_dataset[3]

((tensor([[[1.1633e+02, 3.8026e+02, 3.4064e+02,  ..., 5.7142e+02,
            3.7510e+02, 1.3724e+03],
           [2.3848e+01, 4.9045e+01, 4.3070e+01,  ..., 8.2234e+01,
            5.8340e+01, 1.7947e+02],
           [3.0780e+00, 2.0690e-01, 3.3941e-02,  ..., 2.4016e+00,
            3.0143e+00, 1.5435e+00],
           ...,
           [8.9861e-04, 1.3113e-03, 1.0714e-03,  ..., 2.9557e-04,
            3.0149e-04, 3.4686e-04],
           [8.8186e-04, 1.5311e-03, 1.3008e-03,  ..., 3.3841e-04,
            3.5703e-04, 4.5909e-04],
           [7.4307e-04, 9.5932e-04, 8.2296e-04,  ..., 2.3123e-04,
            3.4594e-04, 1.9274e-04]]], device='cuda:0'),
  1),
 (tensor([[[ 0.6874,  0.4845,  0.1313,  ...,  1.6760,  1.8902,  1.4898],
           [ 4.1843,  8.1157,  2.0886,  ..., 42.5024, 24.5852, 16.4842],
           [ 9.9030, 10.6194,  3.0169,  ..., 29.5416, 59.8446, 68.7926],
           ...,
           [ 0.1746,  0.3060,  0.1547,  ...,  0.1943,  0.3051,  0.3026],
           [ 0.2404,  0.2430,  0

In [9]:
train_dataset[100][0].shape, valid_dataset[200][0].shape, test_dataset[300].shape

(torch.Size([1, 64, 862]), torch.Size([1, 64, 862]), torch.Size([1, 64, 862]))

In [10]:
next(iter(train_iter))

[tensor([[[[8.2308e-08, 2.6577e-08, 3.7945e-07,  ..., 1.2835e-05,
            4.8284e-05, 2.8868e-03],
           [9.7987e-08, 5.7030e-08, 4.2895e-07,  ..., 2.8272e-04,
            6.5776e-04, 3.0101e-03],
           [1.2398e-07, 3.0621e-08, 9.5070e-08,  ..., 2.6137e-03,
            2.2891e-03, 1.2269e-02],
           ...,
           [2.4960e-06, 2.5149e-06, 2.4092e-06,  ..., 3.0111e-05,
            2.4996e-05, 7.8534e-05],
           [2.8452e-06, 4.0500e-06, 2.8573e-06,  ..., 2.1592e-05,
            2.1787e-05, 3.7100e-05],
           [2.4764e-06, 3.2499e-06, 2.0939e-06,  ..., 8.0805e-06,
            6.2304e-06, 5.6010e-06]]],
 
 
         [[[6.5579e-01, 7.6493e-02, 2.9243e-02,  ..., 6.7875e-01,
            2.4297e-01, 4.4928e-02],
           [2.9925e-01, 6.2422e-01, 3.1942e-02,  ..., 1.3887e+00,
            1.5327e-01, 2.2501e-01],
           [4.2503e-02, 8.1509e-01, 9.4089e-02,  ..., 9.2791e-01,
            6.9442e-02, 7.3675e-01],
           ...,
           [2.1925e-04, 1.4650e-04,