In [1]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import os

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
class Config:
    SR = 32000
    RESAMPLE_SR = 16000
    MAX_LEN = 2048
    # Dataset
    ROOT_FOLDER = './data'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 5
    #LR = 3e-4
    # Others
    SEED = 42
    
CONFIG = Config()

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [4]:
train_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'train.csv'))
test_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'test.csv'))

In [5]:
train_df

Unnamed: 0,id,path,label
0,RUNQPNJF,./train/RUNQPNJF.ogg,real
1,JFAWUOGJ,./train/JFAWUOGJ.ogg,fake
2,RDKEKEVX,./train/RDKEKEVX.ogg,real
3,QYHJDOFK,./train/QYHJDOFK.ogg,real
4,RSPQNHAO,./train/RSPQNHAO.ogg,real
...,...,...,...
55433,NJGPMAJL,./train/NJGPMAJL.ogg,real
55434,SZXIACUZ,./train/SZXIACUZ.ogg,fake
55435,PXLBTGRH,./train/PXLBTGRH.ogg,fake
55436,CGGQGPOQ,./train/CGGQGPOQ.ogg,fake


In [6]:
test_df

Unnamed: 0,id,path
0,TEST_00000,./test/TEST_00000.ogg
1,TEST_00001,./test/TEST_00001.ogg
2,TEST_00002,./test/TEST_00002.ogg
3,TEST_00003,./test/TEST_00003.ogg
4,TEST_00004,./test/TEST_00004.ogg
...,...,...
49995,TEST_49995,./test/TEST_49995.ogg
49996,TEST_49996,./test/TEST_49996.ogg
49997,TEST_49997,./test/TEST_49997.ogg
49998,TEST_49998,./test/TEST_49998.ogg


In [7]:
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model

class CustomDataset(Dataset) :
    def __init__(self, df, max_len, is_train) :
        self.df = df
        self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-base")
        self.max_len = max_len
        self.is_train = is_train

    def __getitem__(self, idx) :
        path = self.df.iloc[idx].path
        if self.is_train :
            label = self.df.iloc[idx].label
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            data = self.load_data(path).squeeze()
            return data, label_vector
        return self.load_data(path)

    def load_data(self, path) :
        y, sr = librosa.load(os.path.join(CONFIG.ROOT_FOLDER, path ), sr=CONFIG.SR)
        y = librosa.resample(y, orig_sr = sr, target_sr = CONFIG.RESAMPLE_SR)
        feat = self.feature_extractor(y, sampling_rate=CONFIG.RESAMPLE_SR, max_length=self.max_len, truncation=True, padding='max_length', return_tensors="pt")
        return feat.input_values
        
    def __len__(self) :
        return len(self.df)

In [8]:
class CustomModel(nn.Module) :
    def __init__(self) :
        super().__init__()
        self.hidden_len = 128
        self.classifier = nn.Sequential(nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, self.hidden_len), nn.ReLU(), nn.Dropout(0.3))
        self.classification_head = nn.Linear(self.hidden_len,2)

    def forward(self, x) :
        x = self.classifier(x)
        x = self.classification_head(x)
        x = torch.sigmoid(x)
        return x

In [9]:
train, val = train_test_split(train_df, test_size=0.2, random_state=CONFIG.SEED)

train_dataset = CustomDataset(train, max_len = 1024, is_train = True)
val_dataset = CustomDataset(val, max_len = 1024, is_train = True)
test_dataset = CustomDataset(test_df, max_len = 1024, is_train = False)

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=CONFIG.BATCH_SIZE, num_workers = 4)
val_dataloader = DataLoader(val_dataset, shuffle=True, batch_size=CONFIG.BATCH_SIZE, num_workers = 4)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=CONFIG.BATCH_SIZE, num_workers = 4)

In [10]:
from sklearn.metrics import roc_auc_score

def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)

            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            torch.save(model, './best.pt')
    

def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

In [11]:
from transformers.optimization import get_cosine_schedule_with_warmup

model = CustomModel()
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.00001)

train(model, optimizer, train_dataloader, val_dataloader, device)

100%|██████████| 462/462 [01:04<00:00,  7.13it/s]
100%|██████████| 116/116 [00:17<00:00,  6.66it/s]

Epoch [1], Train Loss : [0.69274] Val Loss : [0.69026] Val AUC : [0.61360]



100%|██████████| 462/462 [01:05<00:00,  7.03it/s]
100%|██████████| 116/116 [00:17<00:00,  6.80it/s]

Epoch [2], Train Loss : [0.68884] Val Loss : [0.68498] Val AUC : [0.67586]



100%|██████████| 462/462 [01:05<00:00,  7.04it/s]
100%|██████████| 116/116 [00:17<00:00,  6.74it/s]

Epoch [3], Train Loss : [0.68136] Val Loss : [0.67208] Val AUC : [0.69966]



100%|██████████| 462/462 [01:05<00:00,  7.00it/s]
100%|██████████| 116/116 [00:17<00:00,  6.69it/s]

Epoch [4], Train Loss : [0.66262] Val Loss : [0.64490] Val AUC : [0.71989]



100%|██████████| 462/462 [01:06<00:00,  6.94it/s]
100%|██████████| 116/116 [00:17<00:00,  6.61it/s]

Epoch [5], Train Loss : [0.63034] Val Loss : [0.61065] Val AUC : [0.74345]





In [12]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions.extend(probs.tolist())
    return predictions

In [13]:
infer_model = torch.load('./best.pt')
preds = inference(infer_model, test_dataloader, device)

100%|██████████| 521/521 [01:42<00:00,  5.07it/s]


In [14]:
for i in range(len(preds)) :
    preds[i] = preds[i][0]

In [15]:
pd.DataFrame(preds)

Unnamed: 0,0,1
0,0.496971,0.507154
1,0.532187,0.467610
2,0.567913,0.406331
3,0.249984,0.775443
4,0.655321,0.330115
...,...,...
49995,0.584868,0.402774
49996,0.432719,0.583828
49997,0.585091,0.396392
49998,0.428784,0.593593


In [16]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER,'sample_submission.csv'))
submit

Unnamed: 0,id,fake,real
0,TEST_00000,0,0
1,TEST_00001,0,0
2,TEST_00002,0,0
3,TEST_00003,0,0
4,TEST_00004,0,0
...,...,...,...
49995,TEST_49995,0,0
49996,TEST_49996,0,0
49997,TEST_49997,0,0
49998,TEST_49998,0,0


In [17]:
submit.iloc[:, 1:] = preds
submit.head()

Unnamed: 0,id,fake,real
0,TEST_00000,0.496971,0.507154
1,TEST_00001,0.532187,0.46761
2,TEST_00002,0.567913,0.406331
3,TEST_00003,0.249984,0.775443
4,TEST_00004,0.655321,0.330115


In [18]:
submit.to_csv('./baseline_submit.csv', index=False)