Instruments: lead guitar, rhythm guitar, bass guitar, and drums
Features: spectral features, rhythm features, timbre features, harmony features, lyrics and metadata

# Music genre classification using spectral features

In [None]:
import numpy as np
import librosa
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# Define the list of sub-genres to classify
sub_genres = ['Instrumental Rock', 'Punk Rock', 'Electric Rock', 'Metal Rock', 'Soft Rock', 'Jazz Rock', 'House Rock', 'Experimental Rock']

# Define the number of mel frequency bands to use
n_mels = 128

# Define the hop length (in samples)
hop_length = 512

# Define the window size (in samples)
n_fft = 2048

# Define the number of MFCCs to use
n_mfcc = 20

# Define a function to extract the features from an audio file
def extract_features(filename):
    # Load the audio file
    y, sr = librosa.load(filename, sr=None)

    # Extract the mel spectrogram
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=n_mels, hop_length=hop_length, n_fft=n_fft)

    # Convert the mel spectrogram to decibels (dB)
    log_S = librosa.power_to_db(S, ref=np.max)

    # Extract the MFCCs
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=n_mfcc)

    # Compute the first-order and second-order differences of the MFCCs
    delta1_mfcc = librosa.feature.delta(mfcc, order=1)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)

    # Concatenate the MFCCs and their differences
    features = np.concatenate((mfcc, delta1_mfcc, delta2_mfcc), axis=0)

    # Return the extracted features
    return features

# Define a function to load the dataset and extract the features
def load_dataset():
    # Define the list of audio files
    audio_files = []
    for sub_genre in sub_genres:
        sub_genre_dir = 'music/' + sub_genre
        for filename in os.listdir(sub_genre_dir):
            if filename.endswith('.mp3'):
                audio_files.append((sub_genre, os.path.join(sub_genre_dir, filename)))

    # Extract the features from the audio files
    X = []
    y = []
    for sub_genre, filename in audio_files:
        features = extract_features(filename)
        X.append(features.T)
        y.append(sub_genres.index(sub_genre))

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Return the dataset
    return X_train, X_test, y_train, y_test

# Load the dataset and extract the features
X_train, X_test, y_train, y_test = load_dataset()

# Train a SVM classifier using the extracted features
clf = SVC(kernel='linear', C=1, gamma='scale')
clf.fit(X_train, y_train)

# Test the classifier on the testing set
y_pred = clf.predict(X_test)

# Print the classification report and confusion matrix
print(classification_report(y_test, y_pred, target_names=sub_genres))
print(confusion_matrix(y_test, y_pred))


# Music genre classification using rhythm features

In [None]:
import numpy as np
import librosa
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

# Define the list of sub-genres to classify
sub_genres = ['Instrumental Rock', 'Punk Rock', 'Electric Rock', 'Metal Rock', 'Soft Rock', 'Jazz Rock', 'House Rock', 'Experimental Rock']

# Define the hop length (in samples)
hop_length = 512

# Define the window size (in samples)
n_fft = 2048

# Define the number of MFCCs to use
n_mfcc = 20

# Define a function to extract the rhythm features from an audio file
def extract_features(filename):
    # Load the audio file
    y, sr = librosa.load(filename, sr=None)

    # Extract the onset envelope
    onset_env = librosa.onset.onset_strength(y, sr=sr, hop_length=hop_length, n_fft=n_fft)

    # Compute the tempo and beat positions
    tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=hop_length)

    # Extract the rhythm features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_fft=n_fft, n_mfcc=n_mfcc)
    rmse = librosa.feature.rmse(y=y, hop_length=hop_length, frame_length=n_fft)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=hop_length, n_fft=n_fft)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr, hop_length=hop_length, n_fft=n_fft)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=hop_length, n_fft=n_fft)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, hop_length=hop_length, n_fft=n_fft)

    # Compute the mean and standard deviation of the rhythm features
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_std = np.std(mfcc, axis=1)
    rmse_mean = np.mean(rmse)
    rmse_std = np.std(rmse)
    spectral_centroid_mean = np.mean(spectral_centroid)
    spectral_centroid_std = np.std(spectral_centroid)
    spectral_bandwidth_mean = np.mean(spectral_bandwidth)
    spectral_bandwidth_std = np.std(spectral_bandwidth)
    spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
    spectral_contrast_std = np.std(spectral_contrast, axis=1)
    spectral_rolloff_mean = np.mean(spectral_rolloff)
    spectral_rolloff_std = np.std(spectral_rolloff)

    # Concatenate the rhythm features
    features = np.concatenate((mfcc_mean, mfcc_std, [rmse_mean, rmse_std], [spectral_centroid_mean, spectral_centroid_std], [spectral_bandwidth_mean, spectral_bandwidth_std], spectral_contrast_mean, spectral_contrast_std, [spectral_rolloff_mean, spectral_rolloff_std]), axis=0)

    # Return the extracted features
    return features

# Define a function to load the dataset and extract the rhythm features
def load_dataset():
    # Define the list of audio files
    audio_files = []
    for sub_genre in sub_gen


# Extracting timbre features using Mel-Frequency Cepstral Coefficients (MFCC) from an audio file, and using them to train a machine learning model for music genre classification

In [None]:
import librosa
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Define the list of genres
genres = ['Instrumental Rock', 'Punk Rock', 'Electric Rock', 'Metal Rock',
          'Soft Rock', 'Jazz Rock', 'House Rock', 'Experimental Rock']

# Define a function to extract MFCC features from an audio file
def extract_features(file_path):
    try:
        audio, sr = librosa.load(file_path, res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
        mfccs_mean = np.mean(mfccs.T, axis=0)
    except Exception as e:
        print("Error encountered while parsing file: ", file_path)
        return None
    return mfccs_mean

# Create a dictionary to store the MFCC features and genre labels
features = {}
labels = {}

# Iterate through each audio file in the dataset directory
for genre in genres:
    audio_files = os.listdir(f'dataset/{genre}')
    for file in audio_files:
        file_path = f'dataset/{genre}/{file}'
        mfccs = extract_features(file_path)
        if mfccs is not None:
            features[file] = mfccs
            labels[file] = genre

# Split the data into training and testing sets
X = np.array(list(features.values()))
y = np.array(list(labels.values()))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a machine learning model (here, we use a multi-layer perceptron classifier)
model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500)
model.fit(X_train, y_train)

# Evaluate the accuracy of the model on the testing set
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


This code loads audio files from a directory named dataset, where each subdirectory is named after a genre and contains audio files belonging to that genre. The extract_features function uses Librosa to load an audio file, extract the MFCC features, and return their mean value. The features and genre labels are stored in a dictionary. The code then splits the data into training and testing sets, trains a multi-layer perceptron classifier on the training data, and evaluates its accuracy on the testing data.

# Music genre classification using harmony features

In [None]:
import librosa
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Define the list of genres
genres = ['Instrumental Rock', 'Punk Rock', 'Electric Rock', 'Metal Rock',
          'Soft Rock', 'Jazz Rock', 'House Rock', 'Experimental Rock']

# Define a function to extract chroma features from an audio file
def extract_features(file_path):
    try:
        audio, sr = librosa.load(file_path, res_type='kaiser_fast')
        chroma = librosa.feature.chroma_stft(y=audio, sr=sr, n_chroma=12)
        chroma_mean = np.mean(chroma.T, axis=0)
    except Exception as e:
        print("Error encountered while parsing file: ", file_path)
        return None
    return chroma_mean

# Create a dictionary to store the chroma features and genre labels
features = {}
labels = {}

# Iterate through each audio file in the dataset directory
for genre in genres:
    audio_files = os.listdir(f'dataset/{genre}')
    for file in audio_files:
        file_path = f'dataset/{genre}/{file}'
        chroma = extract_features(file_path)
        if chroma is not None:
            features[file] = chroma
            labels[file] = genre

# Split the data into training and testing sets
X = np.array(list(features.values()))
y = np.array(list(labels.values()))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train a machine learning model (here, we use a multi-layer perceptron classifier)
model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500)
model.fit(X_train, y_train)

# Evaluate the accuracy of the model on the testing set
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


This code is similar to the previous sample code for timbre features, but it uses librosa.feature.chroma_stft to extract chroma features from each audio file. Chroma features are 12-dimensional vectors that represent the pitch class profiles of the music. The code then trains a machine learning model (again, a multi-layer perceptron classifier), and evaluates its accuracy on the testing data.