<a href="https://colab.research.google.com/github/uday8897-oss/musical-instrument-identification/blob/main/musical_instrument_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install numpy librosa scikit-learn tqdm matplotlib



In [3]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import warnings
import librosa
import librosa.display
import tkinter as tk
from tkinter import filedialog

warnings.filterwarnings("ignore")

In [4]:

import soundfile as sf

# Sampling rate and duration
sr = 22050  # Standard audio sampling rate
duration = 2  # 2 seconds per sample
t = np.linspace(0, duration, int(sr * duration), endpoint=False)

# Define instrument characteristics (frequency, modulation, harmonics)
instrument_params = {
    "piano": {"freq": 261.63, "harmonics": [1, 2, 3], "am_mod": False},
    "guitar": {"freq": 196.00, "harmonics": [1, 2.5, 3.5], "am_mod": False},
    "violin": {"freq": 440.00, "harmonics": [1, 2, 3, 4], "am_mod": True},
    "flute": {"freq": 880.00, "harmonics": [1], "am_mod": False},
    "saxophone": {"freq": 220.00, "harmonics": [1, 2, 3], "am_mod": True},
    "trumpet": {"freq": 233.08, "harmonics": [1, 2, 3, 4], "am_mod": True},
    "cello": {"freq": 130.81, "harmonics": [1, 2, 3], "am_mod": True},
    "clarinet": {"freq": 147.83, "harmonics": [1, 3, 5], "am_mod": False},
    "drums": {"freq": None, "noise": True, "am_mod": False},
    "trombone": {"freq": 174.61, "harmonics": [1, 2, 3], "am_mod": True},
    "harp": {"freq": 329.63, "harmonics": [1, 2], "am_mod": False},
    "banjo": {"freq": 220.00, "harmonics": [1, 2, 4], "am_mod": True},
    "xylophone": {"freq": 523.25, "harmonics": [1, 2], "am_mod": False},
    "oboe": {"freq": 392.00, "harmonics": [1, 2, 3], "am_mod": True},
    "bassoon": {"freq": 98.00, "harmonics": [1, 2, 3], "am_mod": False},
    "accordion": {"freq": 440.00, "harmonics": [1, 2], "am_mod": True},
    "mandolin": {"freq": 293.66, "harmonics": [1, 2, 3], "am_mod": False},
    "sitar": {"freq": 246.94, "harmonics": [1, 2, 3, 5], "am_mod": True},
    "tabla": {"freq": None, "noise": True, "am_mod": False},
    "harmonica": {"freq": 466.16, "harmonics": [1, 2], "am_mod": True},
}

# Dataset directory
dataset_dir = "synthetic_dataset"
os.makedirs(dataset_dir, exist_ok=True)

# Generate synthetic audio for each instrument
for instrument, params in instrument_params.items():
    instrument_dir = os.path.join(dataset_dir, instrument)
    os.makedirs(instrument_dir, exist_ok=True)

    for i in range(10):  # 10 samples per instrument
        if params.get("noise"):
            signal = np.random.randn(len(t)) * 0.3  # White noise for percussive instruments
        else:
            signal = np.zeros_like(t)
            for h in params["harmonics"]:
                signal += np.sin(2 * np.pi * params["freq"] * h * t)

            # Apply amplitude modulation if required
            if params["am_mod"]:
                signal *= (1 + 0.3 * np.sin(2 * np.pi * 3 * t))

            signal /= np.max(np.abs(signal))  # Normalize

        # Save audio file
        file_path = os.path.join(instrument_dir, f"{instrument}_{i+1}.wav")
        sf.write(file_path, signal, sr)

dataset_dir


'synthetic_dataset'

In [5]:
# Define dataset path (Update with your dataset location)
DATASET_PATH = "synthetic_dataset"  # Change this to your dataset directory

# Define a broader set of musical instruments
INSTRUMENTS = [
    "piano", "guitar", "violin", "flute", "saxophone", "trumpet", "cello", "clarinet", "drums", "trombone",
    "harp", "banjo", "xylophone", "oboe", "bassoon", "accordion", "mandolin", "sitar", "tabla", "harmonica"
]

In [6]:
# Function to extract audio features
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=22050)
        if y is None or len(y) == 0:
            return None  # Skip empty files
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        return np.hstack([
            np.mean(mfccs, axis=1),
            np.mean(spectral_centroid),
            np.mean(zcr)
        ])
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None


In [7]:
# Load dataset and extract features
data = []
labels = []
for instrument in INSTRUMENTS:
    instrument_path = os.path.join(DATASET_PATH, instrument)
    if not os.path.isdir(instrument_path):
        print(f"Warning: {instrument_path} does not exist.")
        continue

    for file in tqdm(os.listdir(instrument_path), desc=f"Processing {instrument}"):
        if file.endswith(".wav"):
            file_path = os.path.join(instrument_path, file)
            features = extract_features(file_path)
            if features is not None:
                data.append(features)
                labels.append(instrument)

Processing piano: 100%|██████████| 10/10 [00:23<00:00,  2.37s/it]
Processing guitar: 100%|██████████| 10/10 [00:00<00:00, 63.10it/s]
Processing violin: 100%|██████████| 10/10 [00:00<00:00, 66.13it/s]
Processing flute: 100%|██████████| 10/10 [00:00<00:00, 60.40it/s]
Processing saxophone: 100%|██████████| 10/10 [00:00<00:00, 66.62it/s]
Processing trumpet: 100%|██████████| 10/10 [00:00<00:00, 66.69it/s]
Processing cello: 100%|██████████| 10/10 [00:00<00:00, 68.02it/s]
Processing clarinet: 100%|██████████| 10/10 [00:00<00:00, 69.19it/s]
Processing drums: 100%|██████████| 10/10 [00:00<00:00, 69.48it/s]
Processing trombone: 100%|██████████| 10/10 [00:00<00:00, 70.13it/s]
Processing harp: 100%|██████████| 10/10 [00:00<00:00, 59.52it/s]
Processing banjo: 100%|██████████| 10/10 [00:00<00:00, 71.11it/s]
Processing xylophone: 100%|██████████| 10/10 [00:00<00:00, 59.92it/s]
Processing oboe: 100%|██████████| 10/10 [00:00<00:00, 67.58it/s]
Processing bassoon: 100%|██████████| 10/10 [00:00<00:00, 71.

In [8]:
# Check if data is loaded
if len(data) == 0:
    raise ValueError("No audio data found. Check your dataset path and ensure WAV files exist.")

# Convert lists to NumPy arrays
data = np.array(data)
labels = np.array(labels)

# Encode labels
encoder = LabelEncoder()
labels_encoded = encoder.fit_transform(labels)

# Split dataset safely
if len(data) < 2:
    raise ValueError("Not enough data samples to split. Ensure your dataset contains multiple audio files.")
X_train, X_test, y_train, y_test = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

# Train classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Get the unique labels present in the test data
unique_labels = np.unique(np.concatenate([y_test, y_pred]))

# Filter encoder.classes_ to include only the unique labels
target_names = [encoder.classes_[i] for i in unique_labels]

print(classification_report(y_test, y_pred, target_names=target_names))

# Function to classify an uploaded audio file
def classify_audio(  audio_file_path  ):
    # root = tk.Tk()
    # root.withdraw()
    # file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav")])
    #if not file_path:
    #    print("No file selected.")
     #   return

    features = extract_features( audio_file_path)
    if features is None:
        print("Error extracting features from file.")
        return

    features = np.array(features).reshape(1, -1)
    prediction = clf.predict(features)
    predicted_instrument = encoder.inverse_transform(prediction)[0]
    print(f"Predicted Instrument: {predicted_instrument}")



Accuracy: 0.95
              precision    recall  f1-score   support

   accordion       1.00      1.00      1.00         3
       banjo       1.00      1.00      1.00         1
     bassoon       1.00      1.00      1.00         1
       cello       1.00      1.00      1.00         6
    clarinet       1.00      1.00      1.00         3
       drums       0.50      1.00      0.67         2
       flute       1.00      1.00      1.00         1
      guitar       1.00      1.00      1.00         3
        harp       1.00      1.00      1.00         1
    mandolin       1.00      1.00      1.00         2
        oboe       1.00      1.00      1.00         3
       piano       1.00      1.00      1.00         1
   saxophone       1.00      1.00      1.00         1
       sitar       1.00      1.00      1.00         3
       tabla       0.00      0.00      0.00         2
    trombone       1.00      1.00      1.00         2
     trumpet       1.00      1.00      1.00         2
   xylophone

In [9]:
!pip install gradio librosa numpy joblib
import os
import gradio as gr
import librosa
import numpy as np
import joblib

# Define the model file path
model_file_path = os.path.join("synthetic_dataset", "trained_model.joblib")

# ... (After training the model) ...

# Save the model
joblib.dump((clf, encoder), model_file_path)

# ... (Later, when you want to load the model) ...

# Load the model
clf, encoder = joblib.load(model_file_path)

# Function to extract features
def predict_instrument(audio_file):
    y, sr = librosa.load(audio_file, sr=22050)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    features = np.hstack([np.mean(mfccs, axis=1), np.mean(spectral_centroid), np.mean(zcr)]).reshape(1, -1)

    prediction = clf.predict(features)
    return encoder.inverse_transform(prediction)[0]

# Gradio Interface
iface = gr.Interface(
    fn=predict_instrument,
    inputs=gr.Audio( type="filepath"),
    outputs="text",
    title="🎵 Music Instrument Identifier",
    description="Upload a audio file to identify the instruments in it."
)

iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9fe5c770a3bd824fa1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


