# Genre classification using spectrograms

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/enter-opy/genre-classification/blob/main/notebooks/spectrograms.ipynb)

In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

## Data preprocessing

In [12]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [13]:
train_datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

train_generator = train_datagen.flow_from_directory(
    "../Data/images_original/train",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="training"
)

validation_generator = train_datagen.flow_from_directory(
    "../Data/images_original/train",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="validation"
)

test_generator = train_datagen.flow_from_directory(
    "../Data/images_original/test",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
)

Found 640 images belonging to 10 classes.
Found 159 images belonging to 10 classes.
Found 199 images belonging to 10 classes.


## VGG16

In [14]:
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False 

I0000 00:00:1741041251.818676   28126 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2248 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5


In [15]:
x = Flatten()(base_model.output)
x = Dense(512, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(10, activation="softmax")(x)

In [16]:
model = Model(inputs=base_model.input, outputs=x)

In [17]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [18]:
model.summary()

## Training

In [19]:
model.fit(train_generator, validation_data=validation_generator, epochs=50)

  self._warn_if_super_not_called()


Epoch 1/50


I0000 00:00:1741041302.567776   28473 service.cc:148] XLA service 0x7f6238010b30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1741041302.568222   28473 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
I0000 00:00:1741041302.912282   28473 cuda_dnn.cc:529] Loaded cuDNN version 90600
I0000 00:00:1741041315.053243   28473 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 877ms/step - accuracy: 0.1249 - loss: 3.0290 - val_accuracy: 0.1321 - val_loss: 2.2485
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 310ms/step - accuracy: 0.1522 - loss: 2.3276 - val_accuracy: 0.2830 - val_loss: 2.0471
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 317ms/step - accuracy: 0.2688 - loss: 2.0026 - val_accuracy: 0.3333 - val_loss: 1.8591
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 336ms/step - accuracy: 0.3459 - loss: 1.8159 - val_accuracy: 0.3019 - val_loss: 1.7600
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 357ms/step - accuracy: 0.4001 - loss: 1.6530 - val_accuracy: 0.4088 - val_loss: 1.5983
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 443ms/step - accuracy: 0.3811 - loss: 1.4866 - val_accuracy: 0.4465 - val_loss: 1.6837
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x7f6360698f10>

## Evaluation

In [21]:
loss, accuracy = model.evaluate(test_generator)

print(f"Test Accuracy: {(accuracy * 100):.2f}%")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.4736 - loss: 2.0536  
Test Accuracy: 49.75%
