In [None]:
# Import necessary libraries
import os
import tarfile
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


# Define the file path
file_path = '/content/drive/MyDrive/genres.tar.gz'

# Extract the audio files
tar = tarfile.open(file_path, "r:gz")
tar.extractall()
tar.close()


# Define the genres
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# Define the number of features to extract from each audio file
n_features = 20

# Load the audio files and extract features
X = []
y = []
for genre in genres:
    genre_folder = './genres/' + genre
    print(f'Loading audio files from {genre_folder}...')
    for filename in os.listdir(genre_folder):
        if filename.endswith('.au'):
            filepath = os.path.join(genre_folder, filename)
            signal, sr = librosa.load(filepath, sr=22050)
            mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_features)
            X.append(mfccs.T)
            y.append(genres.index(genre))
    print(f'{len(X)} audio files loaded.')


Loading audio files from ./genres/blues...
100 audio files loaded.
Loading audio files from ./genres/classical...
200 audio files loaded.
Loading audio files from ./genres/country...
300 audio files loaded.
Loading audio files from ./genres/disco...
400 audio files loaded.
Loading audio files from ./genres/hiphop...
500 audio files loaded.
Loading audio files from ./genres/jazz...
600 audio files loaded.
Loading audio files from ./genres/metal...
700 audio files loaded.
Loading audio files from ./genres/pop...
800 audio files loaded.
Loading audio files from ./genres/reggae...
900 audio files loaded.
Loading audio files from ./genres/rock...
1000 audio files loaded.


In [None]:

# Pad the feature arrays with zeros to make them all the same shape
X = pad_sequences(X, padding='post', dtype='float32')


In [None]:

# Split the data into train and validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
# Reshape data for Conv2D layer
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

# Define the model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3])))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(len(genres)))
model.add(Activation('softmax'))

# Compile the model
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, to_categorical(y_train, num_classes=len(genres)),
                    batch_size=32, epochs=50, validation_data=(X_test, to_categorical(y_test, num_classes=len(genres))))

# Evaluate the model on the test set
score = model.evaluate(X_test, to_categorical(y_test, num_classes=len(genres)), verbose=0)
print(f'Test loss: {score[0]}')
print(f'Test accuracy: {score[1]}')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test loss: 2.305236577987671
Test accuracy: 0.06499999761581421


In [None]:
model.save('music_genre_identification.h5')

In [None]:
from keras.models import load_model

# Load the trained model
model = load_model('music_genre_identification.h5')

# Load and extract features from a single audio file
file_path = 'audio_file.wav'
signal, sr = librosa.load(file_path, sr=22050)
n_features = 20
mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_features)
mfccs = np.expand_dims(mfccs.T, axis=0)

# Predict the genre of the audio file using the trained model
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
predictions = model.predict(mfccs)
predicted_genre = genres[np.argmax(predictions)]

print(f'The predicted genre of {file_path} is: {predicted_genre}')
