In [None]:
'''
Create the dataset with traditional method.

Input:
    file path to the dataset

Return:
    (data, label) as np.array
'''
import numpy as np
def get_dataset(filePath):
    allCQT = None
    allLabel = None
    for i, song in enumerate(filePath):
        data = np.load(song)
        if (i == 0):
            allCQT = data['cqt']
            allLabel = data['label']
        else:      
            allCQT = np.append(allCQT, data['cqt'], axis=0)
            allLabel = np.append(allLabel, data['label'], axis=0)
    
    return allCQT, allLabel

In [None]:
'''
Partition the dataset of np.array.

Input:
    data and label as np.array

Return:
    partitioned dataset as np.array
'''
import numpy as np
def partitionDataset(data, label, val_ratio=0.1, test_ratio=0.1, seed=0):
    np.random.seed(seed)
    size = data.shape[0]
    indices = np.random.permutation(size)
    training_idx, val_idx, test_idx = indices[:int((1-val_ratio-test_ratio)*size)], indices[int((1-val_ratio-test_ratio)*size):int((1-test_ratio)*size)], indices[int((1-test_ratio)*size):]
    x_train, x_val, x_test = data[training_idx,:], data[val_idx,:], data[test_idx,:]
    y_train, y_val, y_test = label[training_idx], label[val_idx], label[test_idx]
    
    return x_train, x_val, x_test, y_train, y_val, y_test

In [None]:
'''
PyTorch wrapper of beatles dataset.

Initialization:
    file path to the dataset

Return:
    PyTorch dataset
'''
from torch.utils.data import Dataset
class BeatlesDataset(Dataset):
    def __init__(self, filePath, output_size=25):
        if torch.cuda.is_available():
            self.device = torch.device("cuda")     
        else:
            self.device = torch.device("cpu")
        allCQT, allLabel = get_dataset(filePath)
        self.allCQT = allCQT
        self.allLabel = allLabel
        self.output_size = output_size

    def __len__(self):
        return len(self.allCQT)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        x = torch.from_numpy(self.allCQT[idx, :]).to(self.device)
        y = torch.zeros(self.output_size).to(self.device)
        y[self.allLabel[idx]] = 1
        return [x, y]

In [None]:
'''
Split the dataset and create respective data_loader.

Input:
    Whole dataset

Return:
    3 dataloader
'''
import torch
from torch.utils.data import DataLoader
def datasetSplit(dataset, val_ratio=0.1, test_ratio=0.1, batch_size=512, shuffle=True, drop_last=True):
    # calculate the size of portion
    val_size = int(val_ratio * len(dataset))
    test_size = int(test_ratio * len(dataset))
    train_size = len(dataset) - val_size - test_size
    
    # split and create dataloader
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
    train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle=True, drop_last=True)
    test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle=True, drop_last=True)
    
    return train_loader, val_loader, test_loader

In [None]:
# Example usecase
# beatles_dataset = BeatlesDataset(npzs_8th)
# train_loader, val_loader, test_loader = datasetSplit(beatles_dataset, batch_size=BATCH_SIZE)