# Preprocess-change npy to image

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import librosa
import librosa.display 
import os
#can choose 'train' or 'val'
path = os.path.join('DSP_HW','val')
for filename in os.listdir(path):
    train_voice = os.path.join(path,filename)
    for each_npy_file in os.listdir(train_voice):
        npy_file = os.path.join(train_voice,each_npy_file)
        y = np.load(npy_file,allow_pickle=True)
        S= librosa.feature.melspectrogram(y)
        plt.figure(figsize=(10, 4))
        S_dB = librosa.power_to_db(S, ref=np.max)
        librosa.display.specshow(S_dB, x_axis='time',
                                 y_axis='mel', 
                                 fmax=8000)
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel-frequency spectrogram')
        plt.tight_layout()
        name = each_npy_file.split(".", 1)[0]
        plt.savefig(os.path.join(train_voice,name+'.jpg'))

# Classification
1. load dataset
2. use pre-processing
3. train the model
4. save and load your well-trained model
5. test performance
6. obtain predictions from a few images

In [None]:
# import some libraries you maybe use
import torchvision # an useful library to help I/O (highly recommend). To install this, just do "pip install torchvision"
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# Data loading
In order to train the model with training data, the first step is to read the data from your folder, database, etc.

In [None]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose, ToTensor, Grayscale, Resize, Normalize
from torch.utils.data import DataLoader
import os

# Define path to your dataset
dataset = "DSP_HW" # the root folder
trainpath = os.path.join(dataset,"train") # train set
valpath = os.path.join(dataset,"val") # validation set


# Define some operations to preprocess your inputs.
mytransforms = Compose([Grayscale(num_output_channels=1),Resize((32,32)),ToTensor()])
# The above line will work in this flow:
# (PIL_RGB_INPUT) => (PIL_GrayScale_INPUT) => (32x32_PIL_GrayScale_INPUT) => (32x32_Tensor_GrayScale_INPUT)

traindata = ImageFolder(root=trainpath, transform=mytransforms)
valdata = ImageFolder(root=valpath,transform=mytransforms)

# Create a loader
trainloader = DataLoader(traindata,batch_size=64,shuffle=True)
valloader = DataLoader(valdata,batch_size=64,shuffle=True)

In [None]:
idx_to_class = {val: key for key, val in traindata.class_to_idx.items()} # build an inverse mapping for later use
print(idx_to_class)

# Build a network

In [None]:
class Net(nn.Module):
    def __init__(self,num_classes):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, num_classes)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = out.view(out.size(0), -1)
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [None]:
net = Net(num_classes=len(traindata.classes)) # initialize your network
# Whether to use GPU or not?
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
else: 
    device = 'cpu'
print("use",device,"now!")
net.to(device)
optimizer = optim.SGD(net.parameters(), lr=0.001) # setup your optimizer
criterion = nn.CrossEntropyLoss() # setup your criterion

# Train the model with the data

In [None]:
net.train()
num_epoch = 10
for epoch in range(num_epoch):
    for batch_idx, (data, target) in enumerate(trainloader):
        #print(data.shape,target)
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = net(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 5==0:
            print('epoch %d, iter %d loss: %.3f' %(epoch+1, batch_idx+1, loss.item()))

# Save model

In [None]:
def save_model(model,filename):
    state = model.state_dict()
    for key in state: state[key] = state[key].clone().cpu()
    torch.save(state, filename)
save_model(net,"weight.pth")

# Load model

In [None]:
def load_model(model,filename):
    model.load_state_dict(torch.load(filename))
    return model
net = Net(num_classes=len(traindata.classes)) # initialize your network
net = load_model(net,"weight.pth")
# Whether to use GPU or not?
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
else: 
    device = 'cpu'
print("use",device,"now!")
net.to(device)

# Evaluate on validation data

In [None]:
net.eval()
correct = 0
with torch.no_grad():
    for batch_idx, (data, target) in enumerate(valloader):
        data = data.to(device)
        target = target.to(device)
        output = net(data)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).sum()
    acc = correct.item() / len(valloader.dataset)
print("Validation Classification Accuracy: %f"%(acc))

# Obtain predictions from a few images

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import librosa
import librosa.display 
import os

path = os.path.join('DSP_HW','test')
y = np.load(os.path.join(path,'test.npy'),allow_pickle=True)
for index, item in enumerate(y):
    S= librosa.feature.melspectrogram(y[index])
    plt.figure(figsize=(10, 4))
    S_dB = librosa.power_to_db(S, ref=np.max)
    librosa.display.specshow(S_dB, x_axis='time',
                             y_axis='mel', 
                             fmax=8000)
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-frequency spectrogram')
    plt.tight_layout()
    output_path = os.path.join(path,str(index)+'.jpg')
    plt.savefig(output_path)

In [None]:
# fetch filepaths of the testing images
testpath = os.path.join(dataset,"test") # test set
testlist = [os.path.join(testpath,imgpath) for imgpath in os.listdir(testpath)]

In [None]:
# ImagePath => PIL_Image => Tensor
from PIL import Image
testdata = []
for imgpath in testlist:
    if imgpath == 'DSP_HW\\test\\test.npy':
        continue
    # In torchvision, we assume input images are all PIL types
    img = Image.open(imgpath).convert("RGB") # By default, torchvision read images in RGB-fashion
    transimg = mytransforms(img)
    testdata.append(transimg)
testdata = torch.stack(testdata)# list of tensors to tensor
testdata = torch.utils.data.TensorDataset(testdata)
# =========================================================================
# Don't shuffle the image list and set the batch_size = 1
# It's just a trick. You can still figure out another way to achieve the same thing.
testloader = torch.utils.data.DataLoader(testdata,batch_size=1,shuffle=False)

In [None]:
# define classes to label
classes2label = {
    "Tettigonioidea1":0, 
    "Tettigonioidea2":1,
    "drums_Snare":2,
    "Grylloidea1":3,
    "drums_MidTom":4,
    "drums_HiHat":5,
    "drums_Kick":6,
    "drums_SmallTom":7,
    "guitar_chord2":8,
    "Frog1":9,
    "Frog2":10,
    "drums_FloorTom":11,
    "guitar_7th_fret":12,
    "drums_Rim":13,
    "Grylloidea2":14,
    "guitar_3rd_fret":15,
    "drums_Ride":16,
    "guitar_chord1":17,
    "guitar_9th_fret":18,
    "Frog3":19
}

In [None]:
# Testing
net.eval()
result = {}
import collections
def sortedDictValues(adict): 
    keys = adict.keys() 
    keys.sort() 
    return [dict[key] for key in keys] 
       
with torch.no_grad():
    for idx, (data,) in enumerate(testloader):
        data = data.to(device)
        target = target.to(device)
        output = net(data)
        pred_idx = output.data.max(1, keepdim=True)[1]
        pred_class = idx_to_class[pred_idx.cpu().numpy()[0][0]]
        index = os.path.split(testlist[idx])[1][:-4]
        result[int(index)] = classes2label[pred_class]
        result =  dict(sorted(result.items()))          


# Write the result to csv

In [None]:
import csv
with open('2019_12_07.csv', 'w') as csvfile:
    fieldnames = ['id', 'category']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, lineterminator = '\n')
    writer.writeheader()
    for key in result.keys():
        csvfile.write("%s,%s\n"%(key,result[key]))