In [1]:
# Install libraries for data_loader
!pip install pandas
!pip install sklearn
!pip install torchvision
!pip install tqdm



In [2]:
# Import DataLoader and corresponding libraries
import pandas
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import StratifiedKFold
from torchvision import transforms, utils

In [3]:
# Import libraries for tensors
import numpy as np
import torch

In [4]:
# Import tqdm for progress bar construction
import tqdm

In [5]:
# Datatypes and Devices (from Assignment 2)
dtype = torch.float
ltype = torch.long

if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda:0


In [6]:
# Hyper-parameters for K-Fold Cross Validation
N = 5
seed = 42

# Directories for Data
FF1010_Path = './data/'
AudioImage_Path = './image/'

In [7]:
# Call StratifiedKFold object
skf = StratifiedKFold(
    n_splits=5, shuffle=True, random_state=seed
)

In [8]:
# Modify dataframe for K-Fold Cross Validation
ff1010_csv = pandas.read_csv(FF1010_Path + 'metadata.csv')
for i in range(2):
    ff1010_csv.loc[ff1010_csv['hasbird'] == 0, 'filepath'] = \
        FF1010_Path + 'nocall/' + ff1010_csv.query('hasbird==0')['filename'] + '.npy'
    ff1010_csv.loc[ff1010_csv['hasbird'] == 1, 'filepath'] = \
        FF1010_Path + 'bird/' + ff1010_csv.query('hasbird==1')['filename'] + '.npy'

ff1010_csv = ff1010_csv.dropna()
ff1010_csv = ff1010_csv.reset_index(drop=True)

# Add 'fold' attribute for dataset classification
ff1010_dataframe = ff1010_csv.copy()
for n, (_, nth_groups) in enumerate(
    skf.split(ff1010_dataframe, ff1010_dataframe['hasbird'])):
    ff1010_dataframe.loc[nth_groups, 'fold'] = int(n)

In [9]:
# Hyper-parameters for training 
ff1010_batch = 32

In [10]:
# class for FF1010
class FF1010(Dataset):
    def __init__(self, dataframe, process='train'):
        self.dataframe = dataframe
        self.filepaths = dataframe['filepath'].values
        self.labels = dataframe['hasbird'].values
        
        # Transforms for each train and validation
        self.train_transform = transforms.Compose([
            transforms.Resize([128, 281]),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            transforms.ToTensor(),
        ])
        self.valid_transform = transforms.Compose([
            transforms.Resize([128, 281]),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            transforms.ToTensor(),
        ])
    
    def __getitem__(self, idx):
        print(self.filepaths[idx])
        source = np.load(self.filepaths[idx])
        source = source.transpose(1, 2, 0)
        source = np.stack((np.squeeze(source), ) * 3, -1)
        
        # Apply transform
        if process == 'train':
            source = self.train_transform(source).to(device)
        elif process == 'valid':
            source = self.valid_transform(source).to(device)
        
        return source, torch.tensor(self.labels[idx], dtype=ltype).to(device)
    
    def __len__(self):
        return len(self.dataframe)

In [11]:
# Train loop for nocall detector
def nocall_train(train_dataframe, val_dataframe):
    train_data = FF1010(train_dataframe, process='train')
    val_data = FF1010(val_dataframe, process='valid')
    
    # Construct data loader for train and validation
    train_loader = DataLoader(train_data, batch_size=ff1010_batch,
                             shuffle=False, num_workers=4,
                             pin_memory=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=ff1010_batch,
                             shuffle=False, num_workers=4,
                             pin_memory=True, drop_last=False)
    
#     # Test for loaders
#     for index, (source, label) in enumerate(tqdm.tqdm(train_loader)):
#         sleep(0.01)
    
    val_losses = None
    train_losses = None
    return val_losses, train_losses

In [12]:
# Train, Validate and Test for nocall detector
def nocall(dataframe, val_index, test_index):
    assert val_index != test_index, \
        'Validation and test should be done on different fold.'
    train_dataframe = dataframe.query(
        'fold != ' + str(val_index) + ' and fold != ' + str(test_index) 
    ).reset_index(drop=True)
    val_dataframe = dataframe.query(
        'fold == ' + str(val_index) 
    ).reset_index(drop=False)
    
    val_losses, train_losses = nocall_train(train_dataframe, val_dataframe)
    return
    # TODO

In [13]:
nocall(ff1010_dataframe, 0, 1)

In [None]:
# # class for Mel-spectrograms
# class AudioImage(Dataset):
#     def __init__(self, dataframe, process='train'):
#         self.dataframe = dataframe
#         self.filepaths = dataframe['filepath'].values
#         self.labels = dataframe['hasbird'].values
        
#         # Transforms for each train and validation
#         self.train_transform = transforms.Compose([
#             transforms.Resize(128, 281),
#             transforms.RandomHorizontalFlip(p=0.5),
#             transforms.RandomVerticalFlip(p=0.5),
#             transforms.Normalize(
#                 mean=[0.485, 0.456, 0.406],
#                 std=[0.229, 0.224, 0.225],
#             ),
#             transforms.ToTensor(),
#         ])
#         self.valid_transform = transforms.Compose([
#             transforms.Resize(128, 281),
#             transforms.Normalize(
#                 mean=[0.485, 0.456, 0.406],
#                 std=[0.229, 0.224, 0.225],
#             ),
#             transforms.ToTensor(),
#         ])
    
#     def __getitem__(self, index):
#         source = np.load(self.filepaths[index]).to(device)
#         source = source.transpose(1, 2, 0)
#         source = np.stack((np.squeeze(source), ) * 3, -1)
        
#         # Apply transform
#         if process == 'train':
#             source = self.train_transform(source)
#         elif process == 'valid':
#             source = self.valid_transform(source)
        
#         return source, torch.tensor(self.labels[index], dtype=ltype).to(device)
    
#     def __len__(self):
#         return len(self.dataframe)