In [25]:
import torch
import os, sys
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.metrics import classification_report, accuracy_score
import numpy as np

notebook_dir = %pwd
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.utils import create_train_test
import src.utils.misc as misc

In [40]:
f_path = os.path.join(notebook_dir, "..", "data", "raw", "features_3_sec.csv")

df = pd.read_csv(f_path)  # shape (9990, 60) for 3_sec

X_train, X_test, y_train, y_test = create_train_test(df)

IMPORTANT_FEATURE_INDICES = [15, 2, 3, 0, 23, 6, 14, 13, 8, 17, 18, 33, 9, 5, 27]

In [28]:
# debug cell
print(y_train)
print(misc.genres_to_digits(y_train))

['jazz' 'disco' 'rock' ... 'jazz' 'blues' 'pop']
<map object at 0x1307b83d0>


In [46]:
X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(list(misc.genres_to_digits(y_train)), dtype=torch.long)
y_test_tensor = torch.tensor(list(misc.genres_to_digits(y_test)), dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes, important_indices):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.drop1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, 64)
        self.drop2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(64, num_classes)
        self.softmax = nn.Softmax(dim=1)
        # self.init_weights(important_indices)

    def init_weights(self, important_indices):
        with torch.no_grad():
            nn.init.zeros_(self.fc1.weight)
            nn.init.zeros_(self.fc1.bias)
            nn.init.zeros_(self.fc2.weight)
            nn.init.zeros_(self.fc2.bias)
            nn.init.zeros_(self.fc3.weight)
            nn.init.zeros_(self.fc3.bias)

            for idx in important_indices:
                nn.init.constant_(
                    self.fc1.weight[:, idx], 0.01
                )  # Positive value for important features

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.drop1(x)
        x = torch.relu(self.fc2(x))
        x = self.drop2(x)
        x = self.fc3(x)
        x = self.softmax(x)
        return x


input_size = X_train.shape[1]
num_classes = 10
model = NeuralNet(input_size, num_classes, IMPORTANT_FEATURE_INDICES)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 150
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

test_accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {test_accuracy}")

report = classification_report(all_labels, all_preds)
print("Classification Report:")
print(report)

Epoch [1/150], Loss: 2.1436
Epoch [2/150], Loss: 2.0046
Epoch [3/150], Loss: 1.9494
Epoch [4/150], Loss: 1.9211
Epoch [5/150], Loss: 1.8916
Epoch [6/150], Loss: 1.8742
Epoch [7/150], Loss: 1.8617
Epoch [8/150], Loss: 1.8435
Epoch [9/150], Loss: 1.8344
Epoch [10/150], Loss: 1.8253
Epoch [11/150], Loss: 1.8122
Epoch [12/150], Loss: 1.8133
Epoch [13/150], Loss: 1.8015
Epoch [14/150], Loss: 1.7934
Epoch [15/150], Loss: 1.7892
Epoch [16/150], Loss: 1.7891
Epoch [17/150], Loss: 1.7846
Epoch [18/150], Loss: 1.7774
Epoch [19/150], Loss: 1.7803
Epoch [20/150], Loss: 1.7634
Epoch [21/150], Loss: 1.7640
Epoch [22/150], Loss: 1.7655
Epoch [23/150], Loss: 1.7657
Epoch [24/150], Loss: 1.7578
Epoch [25/150], Loss: 1.7561
Epoch [26/150], Loss: 1.7573
Epoch [27/150], Loss: 1.7573
Epoch [28/150], Loss: 1.7548
Epoch [29/150], Loss: 1.7511
Epoch [30/150], Loss: 1.7482
Epoch [31/150], Loss: 1.7370
Epoch [32/150], Loss: 1.7403
Epoch [33/150], Loss: 1.7354
Epoch [34/150], Loss: 1.7372
Epoch [35/150], Loss: 1

In [37]:
# debug cell

print(X_train)
print(all_preds)
print(all_labels)
print(list(misc.genres_to_digits(y_train)))
print(list(misc.genres_to_digits(y_test)))

[[ 3.14394951e-01  8.01136345e-02  5.52052781e-02 ...  1.18548889e+02
   3.42707944e+00  6.04194107e+01]
 [ 3.71759415e-01  8.01260024e-02  1.09283902e-01 ...  3.22251663e+01
   3.17000628e+00  3.77350464e+01]
 [ 3.75269264e-01  7.15749040e-02  1.18705824e-01 ...  3.01879139e+01
  -1.48474777e+00  2.37936382e+01]
 ...
 [ 2.40018561e-01  9.37353447e-02  8.37583095e-02 ...  4.89456787e+01
   2.04962945e+00  9.57702942e+01]
 [ 3.65542769e-01  9.22607407e-02  1.08228065e-01 ...  6.64801788e+01
  -1.74777150e+00  2.79143181e+01]
 [ 4.24610168e-01  8.05154964e-02  1.73596039e-01 ...  5.32438011e+01
   1.32553148e+00  8.11580658e+01]]
[8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 

In [47]:
model_path = os.path.join(notebook_dir, "..", "models", "saved_models", "baselineNN")
torch.save(model.state_dict(), model_path)

In [52]:
# ---------------- VALIDATION ----------------
validation_path = os.path.join(
    notebook_dir, "..", "data", "external", "validation_set.csv"
)
df_val = pd.read_csv(validation_path)

X_val = df_val.iloc[:, 1:-1].to_numpy()
y_val = list(misc.genres_to_digits(df_val.iloc[:, -1]))

X_val = (X_val - np.mean(X_val, axis=0)) / np.std(X_val, axis=0)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

all_val_preds = []
all_val_labels = []
with torch.no_grad():
    for inputs, labels in val_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_val_preds.extend(preds.numpy())
        all_val_labels.extend(labels.numpy())

accuracy = accuracy_score(all_val_labels, all_val_preds)
print(f"Accuracy: {accuracy:.2f}")  # accuracy of 0.25 for baseline DNN

misclassified_indices = [
    i
    for i, (pred, true) in enumerate(zip(all_val_preds, all_val_labels))
    if pred != true
]
print(df_val.iloc[misclassified_indices[30:60], 0])

Accuracy: 0.25
30      blues1.wav.1.0
32      blues1.wav.3.0
33      blues1.wav.4.0
34      blues1.wav.5.0
35      blues1.wav.6.0
38      blues1.wav.9.0
39      blues2.wav.0.0
40      blues2.wav.1.0
42      blues2.wav.3.0
43      blues2.wav.4.0
48      blues2.wav.9.0
56      class1.wav.7.0
58      class1.wav.9.0
60      class2.wav.1.0
65      class2.wav.6.0
66      class2.wav.7.0
67      class2.wav.8.0
69    country1.wav.0.0
71    country1.wav.2.0
72    country1.wav.3.0
73    country1.wav.4.0
75    country1.wav.6.0
76    country1.wav.7.0
78    country1.wav.9.0
79    country2.wav.0.0
80    country2.wav.1.0
81    country2.wav.2.0
83    country2.wav.4.0
84    country2.wav.5.0
89      disco1.wav.0.0
Name: hawking_jazz.wav.0.0, dtype: object
