In [1]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset

In [2]:
class MFCCDataset(Dataset):
    def __init__(self, data_dir):
        self.data = []
        self.labels = []
        self.label_map = {'real': 0, 'fake': 1}

        for label_name in ['real', 'fake']:
            label_dir = os.path.join(data_dir, label_name)
            files = [f for f in os.listdir(label_dir) if f.endswith('.npy')]

            for file in files:
                self.data.append(os.path.join(label_dir, file))
                self.labels.append(self.label_map[label_name])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        mfcc = np.load(self.data[idx])
        mfcc = torch.tensor(mfcc, dtype=torch.float32)
        mfcc = mfcc.unsqueeze(0)  # Add channel dimension: [1, time, freq]
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return mfcc, label


In [3]:
from torch.utils.data import DataLoader, random_split

# Load dataset
full_dataset = MFCCDataset("/kaggle/input/audio-dataset/processed_mfcc")

# Split into train and val
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


In [4]:
import torch.nn as nn
import torch.nn.functional as F

class AudioCNN(nn.Module):
    def __init__(self):
        super(AudioCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(1184, 64)  # Adjust depending on MFCC shape
        self.fc2 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)


In [5]:
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = AudioCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    
    for X, y in loop:
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(X)
        loss = loss_fn(out, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
        loop.set_postfix(loss=loss.item())
    
    print(f"Epoch {epoch+1} completed. Avg Loss: {total_loss/len(train_loader):.4f}")


                                                                            

Epoch 1 completed. Avg Loss: 0.2580


                                                                           

Epoch 2 completed. Avg Loss: 0.1322


                                                                           

Epoch 3 completed. Avg Loss: 0.1076


                                                                            

Epoch 4 completed. Avg Loss: 0.0921


                                                                            

Epoch 5 completed. Avg Loss: 0.0816


                                                                            

Epoch 6 completed. Avg Loss: 0.0689


                                                                           

Epoch 7 completed. Avg Loss: 0.0634


                                                                             

Epoch 8 completed. Avg Loss: 0.0592


                                                                            

Epoch 9 completed. Avg Loss: 0.0500


                                                                            

Epoch 10 completed. Avg Loss: 0.0487




In [6]:
model.eval()
correct = total = 0
with torch.no_grad():
    for X, y in val_loader:
        X, y = X.to(device), y.to(device)
        preds = model(X).argmax(dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

print(f"Validation Accuracy: {correct / total:.2%}")


Validation Accuracy: 97.67%
