In [1]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import os

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
from conf.configure import Config
    
CONFIG = Config()

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [4]:
train_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'train.csv'))
test_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'test.csv'))

In [5]:
train_df

Unnamed: 0,id,path,label
0,RUNQPNJF,./train/RUNQPNJF.ogg,real
1,JFAWUOGJ,./train/JFAWUOGJ.ogg,fake
2,RDKEKEVX,./train/RDKEKEVX.ogg,real
3,QYHJDOFK,./train/QYHJDOFK.ogg,real
4,RSPQNHAO,./train/RSPQNHAO.ogg,real
...,...,...,...
55433,NJGPMAJL,./train/NJGPMAJL.ogg,real
55434,SZXIACUZ,./train/SZXIACUZ.ogg,fake
55435,PXLBTGRH,./train/PXLBTGRH.ogg,fake
55436,CGGQGPOQ,./train/CGGQGPOQ.ogg,fake


In [6]:
conf_dic = {
    "loss": "CCE",
    "model_config": {
        "nb_samp": 64600,
        "first_conv": 128,
        "filts": [70, [1, 32], [32, 32], [32, 64], [64, 64]],
        "gat_dims": [64, 32],
        "pool_ratios": [0.5, 0.7, 0.5, 0.5],
        "temperatures": [2.0, 2.0, 100.0, 100.0]
    }
}

In [7]:
from models.AASIST import Model
model = Model(conf_dic["model_config"])

In [8]:
from dataset.wav2vec_dataset import get_dataloader
train_dataloader, val_dataloader, test_dataloader = get_dataloader(CONFIG, train_df, test_df)

preprocessor_config.json:   0%|          | 0.00/257 [00:00<?, ?B/s]

In [9]:
from train import train
from transformers.optimization import get_cosine_schedule_with_warmup

optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.00001)
warmup_ratio = 0.1
t_total = len(train_dataloader) * CONFIG.N_EPOCHS
warmup_step = int(t_total * warmup_ratio)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = warmup_step, num_training_steps = t_total)


train(model, optimizer, scheduler, train_dataloader, val_dataloader, CONFIG)

100%|██████████| 462/462 [01:54<00:00,  4.04it/s]
100%|██████████| 116/116 [00:26<00:00,  4.34it/s]

Epoch [1], Train Loss : [0.86507] Val Loss : [0.71608] Val AUC : [0.46045]



100%|██████████| 462/462 [01:04<00:00,  7.12it/s]
100%|██████████| 116/116 [00:17<00:00,  6.47it/s]


Epoch [2], Train Loss : [0.82482] Val Loss : [0.69665] Val AUC : [0.50792]


 75%|███████▌  | 348/462 [00:49<00:16,  7.02it/s]


KeyboardInterrupt: 

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            last_hidden, probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [None]:
infer_model = torch.load('./best.pt')
preds = inference(infer_model, test_dataloader, device)

In [None]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER,'sample_submission.csv'))
submit.iloc[:, 1:] = preds
submit.head()

In [None]:
submit.to_csv('./submit.csv', index=False)