In [2]:
'''
data_manager.py
A file that loads saved features and convert them into PyTorch DataLoader.
'''
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader

# Class based on PyTorch Dataset
class GTZANDataset(Dataset):
	def __init__(self, x, y):
		self.x = x
		self.y = y

	def __getitem__(self, index):
		return self.x[index], self.y[index]

	def __len__(self):
		return self.x.shape[0]

# Function to get genre index for the given file
def get_label(file_name, hparams):
	genre = file_name.split('.')[0]
	label = hparams.genres.index(genre)

	return label

# Function for loading entire data from given dataset and return numpy array
def load_dataset(set_name, hparams):
	x = []
	y = []

	dataset_path = os.path.join(hparams.feature_path, set_name)
	for root, dirs, files in os.walk(dataset_path):
		for file in files:
			data = np.load(os.path.join(root, file))
			label = get_label(file, hparams)
			x.append(data)
			y.append(label)

	x = np.stack(x)
	y = np.stack(y)

	return x, y

# Function to load numpy data and normalize, it returns dataloader for train, valid, test
def get_dataloader(hparams):
	x_train, y_train = load_dataset('train', hparams)
	x_valid, y_valid = load_dataset('valid', hparams)
	x_test, y_test = load_dataset('test', hparams)

	mean = np.mean(x_train)
	std = np.std(x_train)
	x_train = (x_train - mean)/std
	x_valid = (x_valid - mean)/std
	x_test = (x_test - mean)/std

	train_set = GTZANDataset(x_train, y_train)
	valid_set = GTZANDataset(x_valid, y_valid)
	test_set = GTZANDataset(x_test, y_test)

	train_loader = DataLoader(train_set, batch_size=hparams.batch_size, shuffle=True, drop_last=False)
	valid_loader = DataLoader(valid_set, batch_size=hparams.batch_size, shuffle=False, drop_last=False)
	test_loader = DataLoader(test_set, batch_size=hparams.batch_size, shuffle=False, drop_last=False)

	return train_loader, valid_loader, test_loader


In [3]:
'''
data_manager.py
A file that loads saved features and convert them into PyTorch DataLoader.
'''
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader

# Class based on PyTorch Dataset
class GTZANDataset(Dataset):
	def __init__(self, x, y):
		self.x = x
		self.y = y

	def __getitem__(self, index):
		return self.x[index], self.y[index]

	def __len__(self):
		return self.x.shape[0]

# Function to get genre index for the given file
def get_label(file_name, hparams):
	genre = file_name.split('.')[0]
	label = hparams.genres.index(genre)

	return label

# Function for loading entire data from given dataset and return numpy array
def load_dataset(set_name, hparams):
	x = []
	y = []

	dataset_path = os.path.join(hparams.feature_path, set_name)
	for root, dirs, files in os.walk(dataset_path):
		for file in files:
			data = np.load(os.path.join(root, file))
			label = get_label(file, hparams)
			x.append(data)
			y.append(label)

	x = np.stack(x)
	y = np.stack(y)

	return x, y

# Function to load numpy data and normalize, it returns dataloader for train, valid, test
def get_dataloader(hparams):
	x_train, y_train = load_dataset('train', hparams)
	x_valid, y_valid = load_dataset('valid', hparams)
	x_test, y_test = load_dataset('test', hparams)

	mean = np.mean(x_train)
	std = np.std(x_train)
	x_train = (x_train - mean)/std
	x_valid = (x_valid - mean)/std
	x_test = (x_test - mean)/std

	train_set = GTZANDataset(x_train, y_train)
	valid_set = GTZANDataset(x_valid, y_valid)
	test_set = GTZANDataset(x_test, y_test)

	train_loader = DataLoader(train_set, batch_size=hparams.batch_size, shuffle=True, drop_last=False)
	valid_loader = DataLoader(valid_set, batch_size=hparams.batch_size, shuffle=False, drop_last=False)
	test_loader = DataLoader(test_set, batch_size=hparams.batch_size, shuffle=False, drop_last=False)

	return train_loader, valid_loader, test_loader

In [17]:
# import argparse

class HParams(object):
	def __init__(self):
		# Dataset Settings
		self.dataset_path = './gtzan'
		self.feature_path = './feature'
		self.genres = ['classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae']

		# Feature Parameters
		self.sample_rate = 22050
		self.fft_size = 1024
		self.win_size = 1024
		self.hop_size = 512
		self.num_mels = 128
		self.feature_length = 1024  # audio length = feature_length*hop_size/sample_rate (s)

		# Training Parameters
		self.device = 1  # 0: CPU, 1: GPU0, 2: GPU1, ...
		self.batch_size = 10
		self.num_epochs = 200
		self.learning_rate = 1e-2
		self.stopping_rate = 1e-10
		self.weight_decay = 1e-6
		self.momentum = 0.9
		self.factor = 0.2
		self.patience = 3.

	# Function for parsing argument and set hyper parameters
# 	def parse_argument(self, print_argument=True):
# 		parser = argparse.ArgumentParser()
# 		for var in vars(self):
# 			value = getattr(hparams, var)
# 			argument = '--' + var
# 			parser.add_argument(argument, type=type(value), default=value)

# 		args = parser.parse_args()
# 		for var in vars(self):
# 			setattr(hparams, var, getattr(args, var))

# 		if print_argument:
# 			print('-------------------------')
# 			print('Hyper Parameter Settings')
# 			print('-------------------------')
# 			for var in vars(self):
# 				value = getattr(hparams, var)
# 				print(var + ': ' + str(value))
# 			print('-------------------------')

hparams = HParams()
# hparams.parse_argument()

In [18]:
'''
model_archive.py
A file that contains neural network models.
You can also implement your own model here.
'''
import torch.nn as nn

class Baseline(nn.Module):
    def __init__(self, hparams):
        super(Baseline, self).__init__()
        
        
        # 128,32 kernel 8, strid 1 
        self.conv0 = nn.Sequential(
        nn.Conv1d(hparams.num_mels, 64, kernel_size=8, stride=1, padding=1),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        nn.MaxPool1d(4, stride=4)
        )

        self.conv1 = nn.Sequential(
        nn.Conv1d(64, 128, kernel_size=4, stride=1, padding=1),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.MaxPool1d(3, stride=3)
        )

        self.conv2 = nn.Sequential(
        nn.Conv1d(128, 256, kernel_size=8, stride=1, padding=1),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.MaxPool1d(4, stride=4)
        )

        self.conv3 = nn.Sequential(
        nn.Conv1d(256, 512, kernel_size=4, stride=1, padding=1),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.MaxPool1d(4, stride=4)
        )
        
        self.conv4 = nn.Sequential(
        nn.Conv1d(512, 64, kernel_size=4, stride=1, padding=1),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        nn.MaxPool1d(4, stride=4)
        )
        

        self.linear0 = nn.Linear(64, 96)
        self.linear3 = nn.Linear(96, len(hparams.genres))
        
        

    def forward(self, x):
        #print(x.shape)
        x = x.transpose(1, 2)
        #print(x.shape)
        x = self.conv0(x)
        #print(x.shape)
        x = self.conv1(x)
        #print(x.shape)
        x = self.conv2(x)
        #print(x.shape)
        x = self.conv3(x)
        x = self.conv4(x)
        #print(x.shape)
        x = x.view(x.size(0), x.size(1)*x.size(2))
        #print(x.shape)
        x = self.linear0(x)
        x = self.linear3(x)

        return x

In [19]:
conv0 = nn.Sequential(
        nn.Conv1d(hparams.num_mels, 32, kernel_size=8, stride=1, padding=0),
        nn.BatchNorm1d(32),
        nn.ReLU(),
        nn.MaxPool1d(8, stride=8)
        )
        

In [20]:
import torch
torch.__version__
torch.backends.cudnn.version()

7401

In [21]:
'''
train_test.py
A file for training model for genre classification.
Please check the device in hparams.py before you run this code.
'''
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau

# import data_manager
# import models
# from hparams import hparams

# Wrapper class to run PyTorch model
class Runner(object):
    def __init__(self, hparams):
        self.model = Baseline(hparams)
        self.criterion = nn.CrossEntropyLoss()
        #Adam Optimizer 사용
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=hparams.learning_rate)
        #self.optimizer = torch.optim.SGD(self.model.parameters(), lr=hparams.learning_rate, momentum=hparams.momentum)
        self.scheduler = ReduceLROnPlateau(self.optimizer, mode='min', factor=hparams.factor, patience=hparams.patience, verbose=True)
        self.learning_rate = hparams.learning_rate
        self.stopping_rate = hparams.stopping_rate
        self.device = torch.device("cpu")

        if hparams.device > 0:
            torch.cuda.set_device(hparams.device - 1)##
            self.model.cuda(hparams.device - 1)
            self.criterion.cuda(hparams.device - 1)
            self.device = torch.device("cuda:" + str(hparams.device - 1))
            
#         device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#         self.model = Baseline(hparams).to(device)
#         if torch.cuda.device_count() > 1:
#             print('\n===> Training on GPU!')
#             self.model = nn.DataParallel(net)

    # Accuracy function works like loss function in PyTorch
    def accuracy(self, source, target):
        source = source.max(1)[1].long().cpu()
        target = target.cpu()
        correct = (source == target).sum().item()

        return correct/float(source.size(0))

    # Running model for train, test and validation. mode: 'train' for training, 'eval' for validation and test
    def run(self, dataloader, mode='train'):
        self.model.train() if mode is 'train' else self.model.eval()

        epoch_loss = 0.0
        epoch_acc = 0.0
        
        for batch, (x, y) in enumerate(dataloader):
            x = x.to(self.device)
            y = y.to(self.device).type(torch.long)
            
            #print(y)

            prediction = self.model(x)
            loss = self.criterion(prediction, y)
            acc = self.accuracy(prediction, y)

            if mode is 'train':
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

            epoch_loss += prediction.size(0)*loss.item()
            epoch_acc += prediction.size(0)*acc

        epoch_loss = epoch_loss/len(dataloader.dataset)
        epoch_acc = epoch_acc/len(dataloader.dataset)

        return epoch_loss, epoch_acc

    # Early stopping function for given validation loss
    def early_stop(self, loss, epoch):
        self.scheduler.step(loss, epoch)
        self.learning_rate = self.optimizer.param_groups[0]['lr']
        stop = self.learning_rate < self.stopping_rate

        return stop

def device_name(device):
    if device == 0:
        device_name = 'CPU'
    else:
        device_name = 'GPU:' + str(device - 1)

    return device_name

def main():
    train_loader, valid_loader, test_loader = get_dataloader(hparams)
    runner = Runner(hparams)

    print('Training on ' + device_name(hparams.device))
    for epoch in range(hparams.num_epochs):
        train_loss, train_acc = runner.run(train_loader, 'train')
        valid_loss, valid_acc = runner.run(valid_loader, 'eval')

        print("[Epoch %d/%d] [Train Loss: %.4f] [Train Acc: %.4f] [Valid Loss: %.4f] [Valid Acc: %.4f]" %
              (epoch + 1, hparams.num_epochs, train_loss, train_acc, valid_loss, valid_acc))

        if runner.early_stop(valid_loss, epoch + 1):
            break

    test_loss, test_acc = runner.run(test_loader, 'eval')
    print("Training Finished")
    print("Test Accuracy: %.2f%%" % (100*test_acc))

In [22]:
main()

Training on GPU:0
[Epoch 1/200] [Train Loss: 1.9742] [Train Acc: 0.2096] [Valid Loss: 2.2881] [Valid Acc: 0.3067]
[Epoch 2/200] [Train Loss: 1.7268] [Train Acc: 0.3201] [Valid Loss: 1.9844] [Valid Acc: 0.1933]
[Epoch 3/200] [Train Loss: 1.6361] [Train Acc: 0.3428] [Valid Loss: 1.5047] [Valid Acc: 0.3933]
[Epoch 4/200] [Train Loss: 1.4850] [Train Acc: 0.4873] [Valid Loss: 2.1161] [Valid Acc: 0.3867]
[Epoch 5/200] [Train Loss: 1.5019] [Train Acc: 0.4306] [Valid Loss: 1.4650] [Valid Acc: 0.5200]
[Epoch 6/200] [Train Loss: 1.3466] [Train Acc: 0.5127] [Valid Loss: 1.6637] [Valid Acc: 0.4200]
[Epoch 7/200] [Train Loss: 1.3034] [Train Acc: 0.5297] [Valid Loss: 2.0092] [Valid Acc: 0.4267]
[Epoch 8/200] [Train Loss: 1.2625] [Train Acc: 0.5269] [Valid Loss: 1.5617] [Valid Acc: 0.4333]
[Epoch 9/200] [Train Loss: 1.1928] [Train Acc: 0.5892] [Valid Loss: 2.0687] [Valid Acc: 0.4067]
Epoch     9: reducing learning rate of group 0 to 2.0000e-03.
[Epoch 10/200] [Train Loss: 1.0686] [Train Acc: 0.6346] 

In [8]:
torch.cuda.is_available()

True