## Import Libraries

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import math
import seaborn as sns
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import csv
import time
import random
import matplotlib.image as img
# import warnings
import warnings
# filter warnings
warnings.filterwarnings('ignore')

import os

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import RandomSampler
from torch.utils.data import TensorDataset
from torchvision.utils import make_grid
import torchvision.models as models
import time
from PIL import Image
from tqdm import tqdm
import gc
from torch.utils.data import DataLoader

## Import Dataset

In [2]:
class Musicdata(torch.utils.data.Dataset):
    def __init__(self, npz, mode='train'):
        npzfile = np.load(npz)
        self.mode = mode   
        self.x = npzfile['arr_0']
        self.x = [one.reshape(1,640,128) for one in self.x]
        self.y = npzfile['arr_1']

        
                    
    def __getitem__(self, index):
        data = torch.tensor(self.x[index], dtype = torch.float32)
        #data = transforms(data)
        if self.mode == 'test': 
            return data
        genre = [np.where(one == 1)[0][0] for one in self.y]
        genre = torch.tensor(int(genre[index]))
        return data, genre

    def __len__(self):
        return len(self.x)

In [3]:
dataset_train = Musicdata('../input/fma-small-npz/shuffled_train.npz', mode='train')
dataset_val = Musicdata('../input/fma-small-npz/shuffled_valid.npz', mode='val')
dataset_test = Musicdata('../input/fma-small-npz/test_arr.npz', mode='test')
train_loader = DataLoader(dataset_train, batch_size = 64, shuffle=False)
val_loader = DataLoader(dataset_val, batch_size = 64, shuffle=False)
test_loader = DataLoader(dataset_test, batch_size = 64, shuffle=False)

## Building Model Architecture

In [4]:
# def configuration(number_of_block):
#   cfg_list = []
#   forward_block = number_of_block
#   if number_of_block > 5:
#     forward_block = number_of_block - number_of_block//2
#   for i in range(number_of_block):
#     if i <= forward_block:
#       c_out = 2**(4+i)
#       cfg_list += [c_out, c_out, "M"]
#     if i > forward_block:
#       c_out = c_out/2
#       cfg_list += [c_out, c_out, "M"]

#   return cfg_list

# for i in range(8):
#   print("number_of_block =", i)
#   print(configuration(i))

In [5]:
def configuration(number_of_block, initial_size=1, th_1=3, th_2=5):
    cfg_list = []
    c_out = initial_size
    for i in range(number_of_block):
        if i < th_1: # i=0,1,2,...,th_1 只有一層捲積
            c_out = initial_size * 2**i
            cfg_list += [int(c_out), "M"]
        if i >= th_1 and i < th_2: # 
            c_out *= 2
            cfg_list += [int(c_out), int(c_out), "M"]
        if i >= th_2: # i=th_2,...,number_of_block 只有一層捲積，開始變小
            c_out /= 2
            cfg_list += [int(c_out), "M"]
    return cfg_list, int(c_out)

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 1
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [6]:
class VGG(nn.Module):
    def __init__(self, features, number_of_block, c_out):
        super(VGG, self).__init__()
        self.features = features
        self.c_out = c_out
        self.number_of_block = number_of_block
#         self.LSTM = nn.LSTM(c_out, hidden_size, num_layers, batch_first = True)
        self.classifier = nn.Sequential(
                nn.Dropout(),
                nn.Linear(int(c_out*640*128/4**number_of_block), 512),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(512, 512),
                nn.ReLU(True),
                nn.Linear(512, 8),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        #print(x.shape) # batch_size, seq_length, hidden_size
        #x = x.view(-1, int(640*128/(4**self.number_of_block)), self.c_out)
        #x, _ = self.LSTM(x)
        # print(x.shape) # batch_size, seq_length, hidden_size
        x = x.reshape(x.size(0), -1)
        x = self.classifier(x)
        return x

In [7]:
# class Net(nn.Module):
#     def __init__(self):
#         super().__init__()
        
#         self.features = nn.Sequential(
#             nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
#             nn.BatchNorm2d(64),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(kernel_size=3, stride=2),
#             nn.Conv2d(64, 192, kernel_size=5, padding=2),
#             nn.BatchNorm2d(192),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(kernel_size=3, stride=2),
#             nn.Conv2d(192, 384, kernel_size=3, padding=1),
#             nn.BatchNorm2d(384),
#             nn.ReLU(inplace=True),
#             nn.Conv2d(384, 256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.ReLU(inplace=True),
#             nn.Conv2d(256, 256, kernel_size=3, padding=1),
#             nn.BatchNorm2d(256),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(kernel_size=3, stride=2),
#         )
#         self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
#         self.LSTM = nn.LSTM(36, 20, 2, batch_first = True)
#         self.classifier = nn.Sequential(
#             nn.Dropout(),
#             nn.Linear(5120, 64),
#             nn.ReLU(inplace=True),
#             nn.Linear(64, 8),
#         )

#     def forward(self, x: torch.Tensor) -> torch.Tensor:
#         x = self.features(x)
#         x = self.avgpool(x)
#         x = x.reshape(-1, 256, 6*6)
#         x = self.LSTM(x)[0]
#         x = torch.flatten(x, 1)
#         x = self.classifier(x)
#         return x



# model = Net()
# model = model.cuda()

In [8]:
criterion = nn.CrossEntropyLoss()
criterion = criterion.cuda()

In [9]:
def train(input_data, model, criterion, optimizer, output_disable = False):

    model.train()
    loss_list = []
    total_count = 0
    acc_count = 0
    pbar = tqdm(input_data, disable = output_disable)
    for data in pbar:
        pbar.set_description("Training")
        music, genre = data[0].cuda(), data[1].cuda()
        optimizer.zero_grad()
        outputs = model(music)
        loss = criterion(outputs, genre) ##data type
        loss.backward()
        optimizer.step() 
        _, predicted = torch.max(outputs.data, 1)
        total_count += genre.size(0) 
        acc_count += (predicted == genre).sum()  
        loss_list.append(loss.item())
        

    acc = acc_count.to("cpu").detach().numpy() / total_count
    loss = sum(loss_list) / len(loss_list)
    return acc, loss

In [10]:
def val(input_data, model, criterion, output_disable = False):
    model.eval()
    
    loss_list = []
    total_count = 0
    acc_count = 0
    pbar = tqdm(input_data, disable = output_disable)
    with torch.no_grad():
        for data in pbar:
            pbar.set_description("Validation")
            music, genre = data[0].cuda(), data[1].cuda()
            outputs = model(music)
            loss = criterion(outputs, genre) ##data type
            _, predicted = torch.max(outputs.data, dim=1)
            total_count += genre.size(0)
            acc_count += (predicted == genre).sum()
            loss_list.append(loss.item())


    acc = acc_count.to("cpu").detach().numpy() / total_count
    loss = sum(loss_list) / len(loss_list)
    return acc, loss

In [11]:
def RNN(parameters ,max_epochs = 3):    
    learning_rate = parameters[0]
    initial_size, th_1, th_2, number_of_block = tuple(int(param) for param in parameters[1:])
    print("Learning rate =", learning_rate, "Number of block =",number_of_block)
    train_acc_list = []
    train_loss_list = []
    val_acc_list = []
    val_loss_list = []
    
    design, c_out = configuration(number_of_block, initial_size, th_1, th_2)
    convolution = make_layers(design, batch_norm=True)
    model = VGG(convolution, number_of_block, c_out)
    print(design)
    model = model.cuda()
    optimizer = torch.optim.Adam(params = model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    for epoch in range(1, max_epochs + 1):
        print('=' * 20, 'Epoch', epoch, '=' * 20)
        train_acc, train_loss = train(train_loader, model, criterion, optimizer, True)
        val_acc, val_loss = val(val_loader, model, criterion, True)

        train_acc_list.append(train_acc)
        train_loss_list.append(train_loss)
        val_acc_list.append(val_acc)
        val_loss_list.append(val_loss)

        print('Train Acc: {:.6f} Train Loss: {:.6f}'.format(train_acc, train_loss))
        print('  Val Acc: {:.6f}   Val Loss: {:.6f}'.format(val_acc, val_loss))
    return val_acc

In [12]:
def comparison(parameter, lower_bound, upper_bound, integer = False, mutation_rate = 0.02):
    new_parameter = parameter
    if parameter > upper_bound:
        new_parameter =  np.random.uniform(low = (lower_bound+upper_bound)/2, high = upper_bound)
    elif parameter < lower_bound:
        new_parameter =  np.random.uniform(low = lower_bound, high = (lower_bound+upper_bound)/2)
    else:
        if random.uniform(0,1) < mutation_rate:
            new_parameter = np.random.uniform(low = lower_bound, high = upper_bound)
            print("Mutation occur:", parameter, "-->", new_parameter)
    if integer:
        new_parameter = int(new_parameter)
    return new_parameter

In [13]:
p_num = 4  ##粒子個數  initial_size, th_1, th_2, number_of_block
max_iteration = 10 ##迭代次數
record = np.zeros((1, max_iteration), dtype='float')
boundary = [[1e-5, 5e-3], [1, 64], [1,5], [3,7], [1, 7]]
x = np.array([[0.001, 32, 3, 5, 7, 0.436250], 
              [0.0003112003852983763, 44, 2, 5, 4, 0.523750],
              [0.0007612228704013985, 26, 2, 6, 4,  0.513750],
              [0.0006310624186537511, 33, 2, 6, 3, 0.475000]])
x = x.transpose()
w = 0.5
c1 = 2
c2 = 2
V = np.zeros((5, p_num))
Pb = x
Gb = Pb[:, np.argmax(Pb[5])]

In [14]:
for i in range(max_iteration):
    print('-' * 20, 'Iteration', i+1, '-' * 20)
    for j in range(p_num):
        print('.' * 20, 'Particle', j+1, '.' * 20)
        for k in range(5):
            V[k,j] = w * V[k,j] + c1 * random.uniform(0,1) * (Pb[k,j] - x[k,j]) + c2 * random.uniform(0,1) * (Gb[k] - x[k,j])  ##計算速度
            x[k,j] += V[k,j]  ##更新位置
            x[k,j] = comparison(x[k,j], boundary[k][0], boundary[k][1], (k != 0))
        gc.collect()
        with torch.no_grad():
            torch.cuda.empty_cache()
        time.sleep(10)
        x[5,j] = RNN(x[:5,j].tolist())
    for j in range(p_num): ##每個粒子的最好位置更新
        if Pb[5,j] < x[5,j]:
            Pb[:,j] = x[:,j]
    if Gb[5] < max(Pb[5]): 
        Gb = Pb[:, np.argmax(Pb[5])]
    print(Gb)

-------------------- Iteration 1 --------------------
.................... Particle 1 ....................
Learning rate = 0.00074155116568344 Number of block = 6
[54, 'M', 108, 'M', 216, 216, 'M', 432, 432, 'M', 864, 864, 'M', 432, 'M']
Train Acc: 0.253832 Train Loss: 1.940217
  Val Acc: 0.207500   Val Loss: 2.375772
Train Acc: 0.352205 Train Loss: 1.718199
  Val Acc: 0.263750   Val Loss: 1.896855
Train Acc: 0.396935 Train Loss: 1.642892
  Val Acc: 0.298750   Val Loss: 2.144374
.................... Particle 2 ....................
Learning rate = 0.0003112003852983763 Number of block = 4
[44, 'M', 88, 'M', 176, 176, 'M', 352, 352, 'M']
Train Acc: 0.213951 Train Loss: 2.490623
  Val Acc: 0.325000   Val Loss: 1.750831
Train Acc: 0.315608 Train Loss: 1.807083
  Val Acc: 0.386250   Val Loss: 1.684115
Train Acc: 0.337348 Train Loss: 1.742408
  Val Acc: 0.402500   Val Loss: 1.620841
.................... Particle 3 ....................
Learning rate = 0.0006764838275202769 Number of block = 4