In [1]:
!pip3 install gdown

Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
Successfully installed gdown-4.7.1
[0m

In [2]:
!pip3 install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
[0m

In [3]:
import gdown
gdown.download_folder('https://drive.google.com/drive/folders/16xOkParUb5eftMIiI0bku74eDfSZKfRH', quiet=True)

['/kaggle/working/gtzannpy/x_gtzan.npy',
 '/kaggle/working/gtzannpy/y_gtzan.npy']

In [4]:
import torch 
from torch import nn 
import pandas as pd 
import numpy as np 
import sklearn 

In [5]:
import librosa

In [6]:
from torchsummary import summary

In [7]:
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
X,Y= np.load('/kaggle/working/gtzannpy/x_gtzan.npy'), np.load('/kaggle/working/gtzannpy/y_gtzan.npy')

In [9]:
X= X[:, np.newaxis, : ,:]

In [10]:
num_classes= len(set(Y))

In [11]:
def conv(ni, nf, ks=3, stride=1, bias=False):
    return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=bias)

def conv_layer(ni, nf, ks=3, stride=1,act=True):
    bn = nn.BatchNorm2d(nf)
    layers = [conv(ni, nf, ks, stride=stride), bn]
    act_fn = nn.ReLU(inplace=True)
    if act: layers.append(act_fn)
    return nn.Sequential(*layers)

class ResBlock(nn.Module):
    def __init__(self, nf):
        super().__init__()
        self.conv1 = conv_layer(nf, nf)
        self.conv2 = conv_layer(nf, nf)
    def forward(self, x):
        return x + self.conv2(self.conv1(x))

def conv_layer_averpl(ni, nf):
    aver_pl = nn.AvgPool2d(kernel_size=2, stride=2)
    return nn.Sequential(conv_layer(ni, nf), aver_pl)

model = nn.Sequential(
    conv_layer_averpl(1, 64),
    ResBlock(64),
    conv_layer_averpl(64, 64),
    ResBlock(64),
    conv_layer_averpl(64, 128),
    ResBlock(128),
    conv_layer_averpl(128, 256),
    ResBlock(256),
    conv_layer_averpl(256, 512),
    ResBlock(512),
    nn.AdaptiveAvgPool2d((2,2)),
    nn.Flatten(),
    nn.Linear(2048, 1024),
    nn.Dropout(0.5),
    nn.Linear(1024, 10)
)

In [12]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, random_state=42)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.3, random_state=4)

In [15]:
class AudioData(Dataset): 
  def __init__(self, x, y):
    self.x= x
    self.y = y 
  
  def __len__(self):
    return len(self.x)
  
  def __getitem__(self, idx):
    return self.x[idx], self.y[idx]

def collate_fn(data): 
  x, y = zip(*data)
  
  return torch.from_numpy(np.array(x)).to(device, non_blocking=True), torch.from_numpy(np.array(y)).to(device, non_blocking=True)


In [16]:
data_train = AudioData(X_train, y_train)
dataloader_train= DataLoader(data_train, batch_size= 32, collate_fn= collate_fn, shuffle= True)

data_val = AudioData(X_val, y_val)
dataloader_val= DataLoader(data_val, batch_size= 16, collate_fn= collate_fn, shuffle= False )

In [17]:
epochs= 15

In [18]:
optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1e-3,
                                            steps_per_epoch=int(len(dataloader_train)),
                                            epochs= epochs,
                                            anneal_strategy='linear')
criterion= nn.CrossEntropyLoss(label_smoothing= 0.1)

In [19]:
total_step= len(dataloader_train)
total_steps= total_step * epochs

In [20]:
def train_(): # định nghĩa hàm train 
  model.train() 
  total_acc, total_count = 0, 0
  for idx, (data, label) in enumerate(dataloader_train): 
    optimizer.zero_grad(set_to_none=True) 
    output= model.to(device)(data)
    # print(output.shape, label.shape)
    loss= criterion(output, label.long()) # tính loss  
    loss.backward() # thực hiện đạo hàm 
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.)
    optimizer.step() # thực hiện optimizer 
    scheduler.step()

    acc_step = (output.argmax(-1) == label).sum().item() / len(label)
    total_acc += acc_step
    total_count += 1

    print(f'Epoch: [{epoch}/{epochs}], Step: [{idx+1}/{total_step}], Loss: {loss.item()}, Acc: {acc_step}')

  
  return total_acc/ total_count, loss / total_count

def evaluate():  # định nghĩa hàm evalutate 
  model.eval() 
  total_acc, total_count= 0, 0

  with torch.no_grad(): 
    for idx, (data, label) in enumerate(dataloader_val): 
      output= model.to(device)(data)
      loss= criterion(output, label.long())

      total_acc += (output.argmax(-1) == label).sum().item() / len(label)
      total_count += 1

  
  return total_acc/ total_count , loss/ total_count

In [21]:
for epoch in range(1, epochs + 1): 
  train_acc, train_loss= train_()
  val_acc, val_loss = evaluate()
  print('-' * 59)
  print(f'End of epoch {epoch} - loss: {train_loss} - acc: {train_acc} - val_loss: {val_loss} - val_acc: {val_acc}')
  print('-' * 59)

  checkpoint = {
      'epoch': epoch,
      'model_state_dict': model.state_dict(),
      'optimizer_state_dict': optimizer.state_dict(),
      'loss': train_loss,
      'val_loss': val_loss, 
      'acc': train_acc, 
      'val_acc': val_acc

  }
  torch.save(checkpoint, f'epoch_{epoch}_{train_loss}_{val_loss}_{train_acc}_{val_acc}.pth')

Epoch: [1/15], Step: [1/682], Loss: 2.3383893966674805, Acc: 0.15625
Epoch: [1/15], Step: [2/682], Loss: 2.33817195892334, Acc: 0.09375
Epoch: [1/15], Step: [3/682], Loss: 2.415822982788086, Acc: 0.125
Epoch: [1/15], Step: [4/682], Loss: 2.125410318374634, Acc: 0.15625
Epoch: [1/15], Step: [5/682], Loss: 2.1481025218963623, Acc: 0.28125
Epoch: [1/15], Step: [6/682], Loss: 2.2999472618103027, Acc: 0.1875
Epoch: [1/15], Step: [7/682], Loss: 2.416947841644287, Acc: 0.15625
Epoch: [1/15], Step: [8/682], Loss: 2.3219103813171387, Acc: 0.1875
Epoch: [1/15], Step: [9/682], Loss: 2.288822889328003, Acc: 0.15625
Epoch: [1/15], Step: [10/682], Loss: 2.0110042095184326, Acc: 0.3125
Epoch: [1/15], Step: [11/682], Loss: 2.2489984035491943, Acc: 0.21875
Epoch: [1/15], Step: [12/682], Loss: 2.201982259750366, Acc: 0.25
Epoch: [1/15], Step: [13/682], Loss: 2.153287172317505, Acc: 0.25
Epoch: [1/15], Step: [14/682], Loss: 1.952568531036377, Acc: 0.25
Epoch: [1/15], Step: [15/682], Loss: 2.0836000442504

In [29]:
dir_pretrained= '/kaggle/working/epoch_14_0.0008289712714031339_0.0010464002843946218_0.998625366568915_0.9750256147540983.pth'
checkpoint = torch.load(dir_pretrained)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [30]:
data_test = AudioData(X_test, y_test)
dataloader_test= DataLoader(data_test, batch_size= 32, collate_fn= collate_fn, shuffle= False )

In [31]:
def evaluate_test(model, dataloader):  # định nghĩa hàm evalutate 
  model.eval() 
  total_acc, total_count= 0, 0

  with torch.no_grad(): 
    for idx, (data, label) in enumerate(dataloader): 
      output= model.to(device)(data)
      loss= criterion(output, label.long())

      total_acc += (output.argmax(-1) == label).sum().item() 
      total_count += len(label)
    

  
  return total_acc/ total_count , loss/ total_count

In [32]:
evaluate_test(model, dataloader_test)

(0.9799016463545007, tensor(5.3767e-05, device='cuda:0'))