In [6]:
%%writefile genre_classifier.py
import warnings
from io import BytesIO

import joblib
import librosa
import numpy as np
import pandas as pd
import streamlit as st
from pydub import AudioSegment
from scipy import stats

model = joblib.load("logistic_regression_model.pkl")
scaler = joblib.load("scaler.pkl")

genres = pd.read_csv("genres.csv")
id_to_title = dict(zip(genres["genre_id"], genres["title"]))


def columns():
    feature_sizes = dict(
        chroma_stft=12,
        chroma_cqt=12,
        chroma_cens=12,
        tonnetz=6,
        mfcc=20,
        rmse=1,
        zcr=1,
        spectral_centroid=1,
        spectral_bandwidth=1,
        spectral_contrast=7,
        spectral_rolloff=1,
    )
    moments = ("mean", "std", "skew", "kurtosis", "median", "min", "max")

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = ((name, moment, "{:02d}".format(i + 1)) for i in range(size))
            columns.extend(it)

    names = ("feature", "statistics", "number")
    columns = pd.MultiIndex.from_tuples(columns, names=names)

    return columns.sort_values()


def compute_features(audio_object: BytesIO):
    features = pd.Series(index=columns(), dtype=np.float32, name="input_audio")
    warnings.filterwarnings("error", module="librosa")

    def feature_stats(name, values):
        features[name, "mean"] = np.mean(values, axis=1)
        features[name, "std"] = np.std(values, axis=1)
        features[name, "skew"] = stats.skew(values, axis=1)
        features[name, "kurtosis"] = stats.kurtosis(values, axis=1)
        features[name, "median"] = np.median(values, axis=1)
        features[name, "min"] = np.min(values, axis=1)
        features[name, "max"] = np.max(values, axis=1)

    try:
        audio_object.seek(0)
        audio = AudioSegment.from_file(audio_object, format="mp3")

        # Step 3: Convert pydub audio to numpy array
        samples = np.array(audio.get_array_of_samples()).astype(np.float32)

        # Step 4: Normalize and reshape if stereo
        if audio.channels == 2:
            samples = samples.reshape((-1, 2)).mean(axis=1)  # Convert to mono

        samples /= np.iinfo(audio.array_type).max  # Normalize to [-1.0, 1.0]

        # Step 5: (Optional) Resample using librosa
        sr = audio.frame_rate
        x = librosa.resample(samples, orig_sr=sr, target_sr=sr)

        f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512)
        feature_stats("zcr", f)

        cqt = np.abs(
            librosa.cqt(
                x, sr=sr, hop_length=512, bins_per_octave=12, n_bins=7 * 12, tuning=None
            )
        )
        f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7)
        feature_stats("chroma_cqt", f)
        f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)
        feature_stats("chroma_cens", f)
        f = librosa.feature.tonnetz(chroma=f)
        feature_stats("tonnetz", f)

        del cqt
        stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))

        f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12)
        feature_stats("chroma_stft", f)

        f = librosa.feature.rmse(S=stft)
        feature_stats("rmse", f)

        f = librosa.feature.spectral_centroid(S=stft)
        feature_stats("spectral_centroid", f)
        f = librosa.feature.spectral_bandwidth(S=stft)
        feature_stats("spectral_bandwidth", f)
        f = librosa.feature.spectral_contrast(S=stft, n_bands=6)
        feature_stats("spectral_contrast", f)
        f = librosa.feature.spectral_rolloff(S=stft)
        feature_stats("spectral_rolloff", f)

        mel = librosa.feature.melspectrogram(sr=sr, S=stft**2)
        del stft
        f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20)
        feature_stats("mfcc", f)

    except Exception as e:
        st.error(f"Error processing audio: {e}")

    return features


# streamlit

st.set_page_config(page_title="Music Genre Classifier", layout="wide")
st.title(" Genre Classifier ")

tab1, tab2 = st.tabs(["Visual Stats", " Audio file classification"])

with tab1:
    st.header("Visualized Statistics")

with tab2:

    st.header("Upload an audio file for genre classification")

    uploaded_audio = st.file_uploader("Upload an audio file (MP3)", type=["mp3", "wav"])

    if uploaded_audio:
        st.audio(uploaded_audio, format="audio/mp3")

        st.info("Extracting features and predicting genre...")
        features = compute_features(uploaded_audio)

        X_input = features.values.reshape(1, -1)
        X_input_scaled = scaler.transform(X_input)

        predicted_genre_id = model.predict(X_input_scaled)[0]

        genre_name = id_to_title.get(predicted_genre_id, "Unknown Genre")

        st.success(f"Predicted Genre: **{genre_name}**")


Writing genre_classifier.py


In [9]:
! streamlit run genre_classifier.py


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.75.121.99:8501[0m
[0m
[34m  Stopping...[0m
^C


In [8]:
! pip install streamlit

Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m66.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m84.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hI