In [1]:
%pip install librosa torch torchaudio scikit-learn pandas tqdm

Note: you may need to restart the kernel to use updated packages.


In [None]:

import os
import torch
import torch.nn as nn
import librosa
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

# CUDA Settings
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Folder paths
TRAIN_AUDIO_DIR = "train_audio_folder"
TEST_AUDIO_DIR = "test_audio_folder"

# Load Data
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

# Parameters
SAMPLE_RATE = 16000
N_MFCC = 40

# Audio to MFCC Features
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC)
    mfcc = np.mean(mfcc.T, axis=0)
    return mfcc

# Custom Dataset
class GrammarDataset(Dataset):
    def __init__(self, df, folder, is_test=False):
        self.df = df
        self.folder = folder
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        file = row['filename'] if 'filename' in row else row['file']
        file_path = os.path.join(self.folder, file)
        try:
            features = extract_features(file_path)
        except Exception as e:
            print(f"Error processing {file}: {e}")
            features = np.zeros(N_MFCC)

        features = torch.tensor(features, dtype=torch.float32)

        if self.is_test:
            return features, file
        else:
            label = torch.tensor(row['label'], dtype=torch.float32)
            return features, label

# Load datasets
train_dataset = GrammarDataset(train_df, TRAIN_AUDIO_DIR)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Model
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

model = SimpleNN(N_MFCC).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train
EPOCHS = 30
model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x).squeeze()
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluate RMSE
model.eval()
true_vals = []
pred_vals = []
with torch.no_grad():
    for x, y in train_loader:
        x = x.to(device)
        preds = model(x).squeeze().cpu().numpy()
        true = y.numpy()
        pred_vals.extend(preds)
        true_vals.extend(true)

rmse = np.sqrt(mean_squared_error(true_vals, pred_vals))
print(f"Final RMSE on training data: {rmse:.4f}")

# Predict on Test Data
test_dataset = GrammarDataset(test_df, TEST_AUDIO_DIR, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=1)

predictions = []
file_names = []

model.eval()
with torch.no_grad():
    for x, fname in tqdm(test_loader):
        x = x.to(device)
        pred = model(x).squeeze().cpu().numpy().item()
        predictions.append(pred)
        file_names.append(fname[0])

# Save Submission
submission = pd.DataFrame({'filename': file_names, 'label': predictions})
submission.to_csv("submission.csv", index=False)
print("Saved predictions to submission.csv")


Using device: cuda
Epoch 1/30, Loss: 12.4295
Epoch 2/30, Loss: 4.9971
Epoch 3/30, Loss: 3.5309
Epoch 4/30, Loss: 2.7806
Epoch 5/30, Loss: 2.2367
Epoch 6/30, Loss: 2.2640
Epoch 7/30, Loss: 2.1547
Epoch 8/30, Loss: 1.9114
Epoch 9/30, Loss: 1.9885
Epoch 10/30, Loss: 1.9259
Epoch 11/30, Loss: 1.6841
Epoch 12/30, Loss: 1.6927
Epoch 13/30, Loss: 1.5922
Epoch 14/30, Loss: 1.6310
Epoch 15/30, Loss: 1.6239
Epoch 16/30, Loss: 1.5304
Epoch 17/30, Loss: 1.5551
Epoch 18/30, Loss: 1.4373
Epoch 19/30, Loss: 1.6351
Epoch 20/30, Loss: 1.3518
Epoch 21/30, Loss: 1.4202
Epoch 22/30, Loss: 1.4126
Epoch 23/30, Loss: 1.4599
Epoch 24/30, Loss: 1.5291
Epoch 25/30, Loss: 1.4499
Epoch 26/30, Loss: 1.3767
Epoch 27/30, Loss: 1.3777
Epoch 28/30, Loss: 1.4345
Epoch 29/30, Loss: 1.3560
Epoch 30/30, Loss: 1.3079
✅ Final RMSE on training data: 1.2449


100%|██████████| 204/204 [00:11<00:00, 17.39it/s]

✅ Saved predictions to submission.csv





In [2]:
import os
import torch
import torch.nn as nn
import librosa
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

# CUDA Settings
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Folder paths
TRAIN_AUDIO_DIR = "train_audio_folder"
TEST_AUDIO_DIR = "test_audio_folder"

# Load Data
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

# Parameters
SAMPLE_RATE = 16000
N_MFCC = 40

# Extract base MFCCs only
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC)
    return np.mean(mfcc.T, axis=0)

# Fit Standard Scaler on training data
print("Fitting scaler on training data...")
train_features = []
for _, row in train_df.iterrows():
    try:
        path = os.path.join(TRAIN_AUDIO_DIR, row['filename'])
        feats = extract_features(path)
    except Exception:
        feats = np.zeros(N_MFCC)
    train_features.append(feats)

scaler = StandardScaler()
scaler.fit(train_features)

# Custom Dataset
class GrammarDataset(Dataset):
    def __init__(self, df, folder, scaler, is_test=False):
        self.df = df
        self.folder = folder
        self.scaler = scaler
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        file = row['filename'] if 'filename' in row else row['file']
        file_path = os.path.join(self.folder, file)
        try:
            features = extract_features(file_path)
        except Exception as e:
            print(f"Error processing {file}: {e}")
            features = np.zeros(N_MFCC)

        features = self.scaler.transform([features])[0]
        features = torch.tensor(features, dtype=torch.float32)

        if self.is_test:
            return features, file
        else:
            label = torch.tensor(row['label'], dtype=torch.float32)
            return features, label

# Load datasets
train_dataset = GrammarDataset(train_df, TRAIN_AUDIO_DIR, scaler)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Model
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

model = SimpleNN(N_MFCC).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train
EPOCHS = 30
model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x).squeeze()
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss / len(train_loader):.4f}")

# Evaluate RMSE
model.eval()
true_vals = []
pred_vals = []
with torch.no_grad():
    for x, y in train_loader:
        x = x.to(device)
        preds = model(x).squeeze().cpu().numpy()
        true = y.numpy()
        pred_vals.extend(preds)
        true_vals.extend(true)

rmse = np.sqrt(mean_squared_error(true_vals, pred_vals))
print(f"Final RMSE on training data: {rmse:.4f}")

# Predict on Test Data
test_dataset = GrammarDataset(test_df, TEST_AUDIO_DIR, scaler, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=1)

predictions = []
file_names = []

model.eval()
with torch.no_grad():
    for x, fname in tqdm(test_loader):
        x = x.to(device)
        pred = model(x).squeeze().cpu().numpy().item()
        predictions.append(pred)
        file_names.append(fname[0])

# Save Submission
submission = pd.DataFrame({'filename': file_names, 'label': predictions})
submission.to_csv("submission1.csv", index=False)
print("Saved predictions to submission1.csv")


Using device: cuda
Fitting scaler on training data...
Epoch 1/30, Loss: 17.6696
Epoch 2/30, Loss: 15.6350
Epoch 3/30, Loss: 13.8993
Epoch 4/30, Loss: 12.3215
Epoch 5/30, Loss: 10.7804
Epoch 6/30, Loss: 9.4199
Epoch 7/30, Loss: 8.1004
Epoch 8/30, Loss: 7.0475
Epoch 9/30, Loss: 5.9998
Epoch 10/30, Loss: 5.0845
Epoch 11/30, Loss: 4.2407
Epoch 12/30, Loss: 3.5873
Epoch 13/30, Loss: 2.9400
Epoch 14/30, Loss: 2.4084
Epoch 15/30, Loss: 1.9742
Epoch 16/30, Loss: 1.6267
Epoch 17/30, Loss: 1.4330
Epoch 18/30, Loss: 1.2444
Epoch 19/30, Loss: 0.9645
Epoch 20/30, Loss: 0.8958
Epoch 21/30, Loss: 0.7673
Epoch 22/30, Loss: 0.6792
Epoch 23/30, Loss: 0.6490
Epoch 24/30, Loss: 0.5820
Epoch 25/30, Loss: 0.6100
Epoch 26/30, Loss: 0.5726
Epoch 27/30, Loss: 0.5873
Epoch 28/30, Loss: 0.5444
Epoch 29/30, Loss: 0.5320
Epoch 30/30, Loss: 0.4928
Final RMSE on training data: 0.6097


100%|██████████| 204/204 [00:11<00:00, 17.45it/s]

Saved predictions to submission1.csv



