In [None]:
# Bibliotheques audio / midi
import mido
import threading

# Bibliotheques pour le traitement de signal et l'analyse
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

In [None]:
mido.get_output_names()

In [None]:
# Open the input port for your MIDI device
input_port = mido.open_input('LPK25')

# Example 1 - Clustering Piano Notes

In [None]:
'''Objective: Cluster MIDI notes played on a piano keyboard in real-time based on pitch and velocity.'''

### Get Notes

In [None]:
# Function to collect MIDI data
# Initialize an empty list to store notes
notes = []

def collect_midi_data(nb_notes=100):
    for msg in input_port:
        if msg.type == 'note_on' and msg.velocity > 0:  # Only consider note_on messages with non-zero velocity
            notes.append([msg.note, msg.velocity])
            print(f"Note: {msg.note}, Velocity: {msg.velocity}")
            if len(notes) >= nb_notes:  # Stop collecting after 100 notes
                break


In [None]:
# Run the MIDI data collection in a separate thread
midi_thread = threading.Thread(target=collect_midi_data)
midi_thread.start()

# Wait for the thread to finish
midi_thread.join()

print("MIDI data collection complete.")


### Clustering MIDI notes

In [None]:

# Convert the collected notes to a NumPy array
notes_array = np.array(notes)

# Perform KMeans clustering
kmeans = KMeans(n_clusters=3)
kmeans.fit(notes_array)

# Get the cluster labels
labels = kmeans.labels_

labels


### Plot Midi Notes

In [None]:

# Plot the clusters
plt.scatter(notes_array[:, 0], notes_array[:, 1], c=labels, cmap='viridis')
plt.xlabel('Note (Pitch)')
plt.ylabel('Velocity')
plt.title('MIDI Note Clustering')
plt.colorbar(label='Cluster')
plt.show()

## Clustering on live piano data

In [26]:
import mido
import threading
from sklearn.cluster import KMeans
import numpy as np
import time
import plotly.express as px
import pandas as pd


In [28]:
mido.get_output_names()

['IAC_Driver python_to_logic', 'IAC_Driver logic_to_python', 'LPK25']

In [29]:
# Open the input port for your MIDI device
input_port = mido.open_input('LPK25')

In [30]:

# Function to collect MIDI data
# Initialize a dictionary to store note_on times and velocities, and a list to store notes
note_on_times = {}
note_velocities = {}
notes = []

def collect_midi_data():
    for msg in input_port:
        current_time = time.time()  # Get the current time
        if msg.type == 'note_on' and msg.velocity > 0:  # Only consider note_on messages with non-zero velocity
            note_on_times[msg.note] = current_time  # Record the time of the note_on event
            note_velocities[msg.note] = msg.velocity  # Record the velocity of the note_on event
        elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):  # Note off events
            if msg.note in note_on_times:
                note_length = current_time - note_on_times[msg.note]
                velocity = note_velocities[msg.note]
                notes.append([msg.note, note_length, velocity])
                print(f"Note: {msg.note}, Length: {note_length:.2f}, Velocity: {velocity}")
                del note_on_times[msg.note]
                del note_velocities[msg.note]
                if len(notes) >= 100:  # Stop collecting after 100 notes
                    break

# Run the MIDI data collection in a separate thread
midi_thread = threading.Thread(target=collect_midi_data)
midi_thread.start()

# Wait for the thread to finish
midi_thread.join()

print("MIDI data collection complete.")

# Convert the collected notes to a NumPy array
notes_array = np.array(notes)


Note: 0, Length: 0.17, Velocity: 84
Note: 7, Length: 0.15, Velocity: 100
Note: 9, Length: 0.17, Velocity: 96
Note: 4, Length: 0.20, Velocity: 86
Note: 7, Length: 0.11, Velocity: 98
Note: 9, Length: 0.11, Velocity: 98
Note: 4, Length: 0.13, Velocity: 97
Note: 0, Length: 0.14, Velocity: 102
Note: 9, Length: 0.14, Velocity: 97
Note: 0, Length: 0.16, Velocity: 100
Note: 4, Length: 0.17, Velocity: 102
Note: 4, Length: 0.18, Velocity: 109
Note: 0, Length: 0.19, Velocity: 108
Note: 9, Length: 0.20, Velocity: 100
Note: 4, Length: 0.16, Velocity: 60
Note: 0, Length: 0.18, Velocity: 65
Note: 9, Length: 0.14, Velocity: 21
Note: 9, Length: 3.14, Velocity: 78
Note: 0, Length: 3.17, Velocity: 90
Note: 4, Length: 3.18, Velocity: 63
Note: 117, Length: 0.21, Velocity: 94
Note: 120, Length: 0.24, Velocity: 99
Note: 113, Length: 0.24, Velocity: 102
Note: 117, Length: 0.23, Velocity: 106
Note: 120, Length: 0.24, Velocity: 96
Note: 113, Length: 0.25, Velocity: 103
Note: 105, Length: 0.20, Velocity: 105
Not

In [31]:
# Show our notes in a  3D plot
df = pd.DataFrame(notes_array, columns=['Note', 'Length', 'Velocity'])
fig = px.scatter_3d(df, x='Note', y='Length', z='Velocity',
                    labels={'Note': 'Note (Pitch)', 'Length': 'Length', 'Velocity': 'Velocity'},
                    title='MIDI Notes in 3D Space')
fig.show()

In [32]:

# Perform KMeans clustering
kmeans = KMeans(n_clusters=3)
kmeans.fit(notes_array)

# Get the cluster labels
labels = kmeans.labels_

print("Clustering complete.")


Clustering complete.


In [33]:
labels

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

In [34]:

# Convert the notes array to a pandas DataFrame for Plotly
df = pd.DataFrame(notes_array, columns=['Note', 'Length', 'Velocity'])
df['Cluster'] = labels

# Plot the clusters in 3D using Plotly Express
fig = px.scatter_3d(df, x='Note', y='Length', z='Velocity', color='Cluster',
                    labels={'Note': 'Note (Pitch)', 'Length': 'Length', 'Velocity': 'Velocity'},
                    title='MIDI Note Clustering')

fig.show()

# Let's dig deeper : Play with chords

In [None]:
# import the chord mapping dictionary
from chord_mapping import chord_mapping

In [None]:
# Chord identification
# Define common chords and their note structures

def identify_chord(notes):
    notes = sorted(notes)  # Sort the notes
    for chord_name, chord_notes in chord_mapping.items():
        if set(chord_notes).issubset(notes):
            return chord_name
    return 'Unknown'

# Extract chords from the collected notes
chords = []
current_chord = []
for note in notes:
    current_chord.append(note[0])
    chord_name = identify_chord(current_chord)
    if chord_name != 'Unknown':
        chords.append((note, chord_name))
        current_chord = []  # Reset for the next chord

# Print identified chords
for chord in chords:
    print(f"Chord: {chord[1]}, Note: {chord[0][0]}, Velocity: {chord[0][1]}")

In [None]:
# Clustering and Visualization
# Convert the collected notes to a NumPy array
notes_array = np.array([note[0] for note in notes])

# Perform KMeans clustering
kmeans = KMeans(n_clusters=3)
kmeans.fit(notes_array.reshape(-1, 1))

# Get the cluster labels
labels = kmeans.labels_

# Create a list to store labeled data
labeled_data = []
for i, note in enumerate(notes):
    chord_name = identify_chord([note[0]])
    labeled_data.append((note[0], note[1], labels[i], chord_name))

# Plot the clusters with chord labels
fig, ax = plt.subplots()
scatter = ax.scatter(notes_array, [note[1] for note in notes], c=labels, cmap='viridis')
for i, txt in enumerate([ld[3] for ld in labeled_data]):
    ax.annotate(txt, (notes_array[i], notes[i][1]), fontsize=8, ha='right')
plt.xlabel('Note (Pitch)')
plt.ylabel('Velocity')
plt.title('MIDI Note Clustering with Chord Labels')
plt.colorbar(scatter, label='Cluster')
plt.show()

# What would a supervised method look like?

In [None]:
import mido
import threading
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import time
from chord_mapping import chord_mapping  # Import the chord mapping dictionary


In [None]:

def collect_midi_data(input_port, num_notes=100):
    notes = []
    start_time = time.time()
    for msg in input_port:
        if msg.type == 'note_on' and msg.velocity > 0:  # Only consider note_on messages with non-zero velocity
            timestamp = time.time() - start_time
            notes.append((msg.note, msg.velocity, timestamp))
            print(f"Note: {msg.note}, Velocity: {msg.velocity}, Timestamp: {timestamp}")
            if len(notes) >= num_notes:  # Stop collecting after the specified number of notes
                break
    return notes


In [None]:
def group_notes(notes):
    # Sort notes by their start time
    notes.sort(key=lambda x: x[2])

    chords = []
    current_chord = []
    
    for note in notes:
        # If current_chord is empty or there is overlap
        if not current_chord or note[2] <= max(n[3] for n in current_chord):
            current_chord.append(note)
        else:
            chords.append(current_chord)
            current_chord = [note]
    
    if current_chord:
        chords.append(current_chord)
    
    return chords

In [None]:
def normalize_notes(notes):
    """Normalize notes to a single octave."""
    return [note % 12 for note in notes]

In [None]:

def identify_chord(notes):
    """Identify the chord from normalized notes."""
    normalized_notes = normalize_notes([note[0] for note in notes])
    sorted_notes = sorted(normalized_notes)  # Sort the notes by pitch
    for chord_name, chord_notes in chord_mapping.items():
        normalized_chord_notes = normalize_notes(chord_notes)
        if set(normalized_chord_notes) == set(sorted_notes):
            return chord_name
    return 'Unknown'


In [None]:

def label_chords(chords):
    labeled_chords = []
    for chord in chords:
        chord_notes = [note[0] for note in chord]
        chord_name = identify_chord(chord_notes)
        if chord_name != 'Unknown':
            labeled_chords.append((chord, chord_name))
    return labeled_chords


In [None]:

def prepare_data(labeled_chords):
    features = []
    labels = []
    for chord, chord_name in labeled_chords:
        note_features = [note[0] for note in chord] + [note[1] for note in chord]
        features.append(note_features)
        labels.append(chord_name)
    return np.array(features), np.array(labels)


In [None]:

def train_classifier(X_train, y_train):
    classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    classifier.fit(X_train, y_train)
    return classifier


In [None]:

def evaluate_classifier(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report


In [None]:
from sklearn.decomposition import PCA
def plot_chord_clusters(features, labels, classifier):
    if len(features) > 0:
        # Reduce dimensions to 2D for visualization using PCA

        pca = PCA(n_components=2)
        reduced_features = pca.fit_transform(features)
        
        # Predict the clusters for the features
        cluster_labels = classifier.predict(features)
        
        unique_chords = list(set(labels))
        colors = plt.get_cmap('tab20')(np.linspace(0, 1, len(unique_chords)))
        chord_color_map = {chord: colors[i] for i, chord in enumerate(unique_chords)}

        fig, ax = plt.subplots()
        for chord in unique_chords:
            idxs = [i for i, label in enumerate(labels) if label == chord]
            ax.scatter(reduced_features[idxs, 0], reduced_features[idxs, 1], 
                       color=chord_color_map[chord], label=chord, alpha=0.6, edgecolors='w', s=100)

        ax.set_xlabel('Principal Component 1')
        ax.set_ylabel('Principal Component 2')
        ax.set_title('MIDI Note Clustering with Chord Labels')
        ax.legend(title='Chords')
        plt.show()
    else:
        print("No identified chords to plot.")


In [None]:

def main():
    input_port = mido.open_input('LPK25')

    notes = collect_midi_data(input_port)
    chords = group_notes(notes)
    labeled_chords = label_chords(chords)

if __name__ == "__main__":
    main()

# Example 3 : Live chord identification

In [None]:
# Chord is unknown because octaves are not considered
# We'll normalize the notes to a single octave

#### New approach 

In [None]:
'''
Let's generate the chords on the fly based on the note intervals
'''

In [None]:
import mido

# Define intervals for common chords
CHORD_INTERVALS = {
    'major': [0, 4, 7],
    'minor': [0, 3, 7],
    'diminished': [0, 3, 6],
    'augmented': [0, 4, 8],
    'sus2': [0, 2, 7],
    'sus4': [0, 5, 7],
    'add9': [0, 4, 7, 14],
    'add11': [0, 4, 7, 17],
    'major_6': [0, 4, 7, 9],
    'minor_6': [0, 3, 7, 9],
    'major_7': [0, 4, 7, 11],
    'minor_7': [0, 3, 7, 10],
    'dominant_7': [0, 4, 7, 10],
    'diminished_7': [0, 3, 6, 9],
    'half_diminished_7': [0, 3, 6, 10],
    'minor_major_7': [0, 3, 7, 11],
    'major_9': [0, 4, 7, 11, 14],
    'minor_9': [0, 3, 7, 10, 14],
    'dominant_9': [0, 4, 7, 10, 14],
    'minor_11': [0, 3, 7, 10, 14, 17],
    'dominant_11': [0, 4, 7, 10, 14, 17],
    'major_13': [0, 4, 7, 11, 14, 21],
    'minor_13': [0, 3, 7, 10, 14, 21],
    'dominant_13': [0, 4, 7, 10, 14, 21],
    'augmented_7': [0, 4, 8, 10],
    'augmented_major_7': [0, 4, 8, 11],
    'dominant_7_b5': [0, 4, 6, 10],
    'dominant_7_sharp5': [0, 4, 8, 10],
    'dominant_7_b9': [0, 4, 7, 10, 13],
    'dominant_7_sharp9': [0, 4, 7, 10, 15],
    'dominant_7_b5_b9': [0, 4, 6, 10, 13],
    'dominant_7_sharp5_sharp9': [0, 4, 8, 10, 15],
}

NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

def normalize_notes(notes):
    """Normalize notes to a single octave and sort them."""
    return sorted([note % 12 for note in notes])

def intervals_from_root(notes):
    """Calculate intervals from the root note."""
    root = notes[0]
    return [(note - root) % 12 for note in notes]

def identify_chord(notes):
    """Identify the chord based on intervals."""
    normalized_notes = normalize_notes([note[0] for note in notes])
    intervals = intervals_from_root(normalized_notes)

    for chord_name, chord_intervals in CHORD_INTERVALS.items():
        if intervals == chord_intervals:
            root_note = NOTE_NAMES[normalized_notes[0]]
            return f"{root_note} {chord_name}"
    return 'Unknown'

def process_midi_input(port_name):
    with mido.open_input(port_name) as port:
        notes_on = []  # Store currently pressed notes
        played_chords = []  # Store identified chords

        print(f"Listening on {port_name}...")

        for msg in port:
            if msg.type == 'note_on' and msg.velocity > 0:
                notes_on.append((msg.note, msg.velocity, msg.time))
            elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                notes_on = [note for note in notes_on if note[0] != msg.note]

            if len(notes_on) > 2:  # Consider chords only if more than two notes are pressed
                chord_name = identify_chord(notes_on)
                print(f"Chord: {chord_name}")
                played_chords.append((chord_name, notes_on))

if __name__ == "__main__":
    available_ports = mido.get_input_names()
    lpk25_port = None
    for port in available_ports:
        if 'LPK25' in port:
            lpk25_port = port
            break
    
    if lpk25_port:
        process_midi_input(lpk25_port)
    else:
        print("LPK25 MIDI port not found.")

NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

def normalize_notes(notes):
    """Normalize notes to a single octave and sort them."""
    return sorted([note % 12 for note in notes])

def intervals_from_root(notes):
    """Calculate intervals from the root note."""
    root = notes[0]
    return [(note - root) % 12 for note in notes]

def identify_chord(notes):
    """Identify the chord based on intervals."""
    normalized_notes = normalize_notes([note[0] for note in notes])
    intervals = intervals_from_root(normalized_notes)

    for chord_name, chord_intervals in CHORD_INTERVALS.items():
        if intervals == chord_intervals:
            root_note = NOTE_NAMES[normalized_notes[0]]
            return f"{root_note} {chord_name}"
    return 'Unknown'

def process_midi_input(port_name):
    with mido.open_input(port_name) as port:
        notes_on = []  # Store currently pressed notes
        played_chords = []  # Store identified chords

        print(f"Listening on {port_name}...")

        for msg in port:
            if msg.type == 'note_on' and msg.velocity > 0:
                notes_on.append((msg.note, msg.velocity, msg.time))
            elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                notes_on = [note for note in notes_on if note[0] != msg.note]

            if len(notes_on) > 2:  # Consider chords only if more than two notes are pressed
                chord_name = identify_chord(notes_on)
                print(f"Chord: {chord_name}")
                played_chords.append((chord_name, notes_on))

if __name__ == "__main__":
    available_ports = mido.get_input_names()
    lpk25_port = None
    for port in available_ports:
        if 'LPK25' in port:
            lpk25_port = port
            break
    
    if lpk25_port:
        process_midi_input(lpk25_port)
    else:
        print("LPK25 MIDI port not found.")

In [None]:
import chord_mapping # imports the content of chord_mapping.py

In [None]:
CHORD_INTERVALS

# Avec inversions

In [None]:
''' Inversions ne sont pas affichées, mais detéctées comme la root note'''

In [None]:
import mido


NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

def normalize_notes(notes):
    """Normalize notes to a single octave and sort them."""
    return sorted([note % 12 for note in notes])

def intervals_from_root(notes):
    """Calculate intervals from the root note."""
    root = notes[0]
    return [(note - root) % 12 for note in notes]

def identify_chord(notes):
    """Identify the chord based on intervals."""
    normalized_notes = normalize_notes([note[0] for note in notes])
    
    for _ in range(len(normalized_notes)):
        intervals = intervals_from_root(normalized_notes)
        for chord_name, chord_variants in CHORD_INTERVALS.items():
            for inversion_name, chord_intervals in chord_variants.items():
                if intervals == chord_intervals:
                    root_note = NOTE_NAMES[normalized_notes[0]]
                    return f"{root_note} {chord_name} ({inversion_name})"
        normalized_notes.append(normalized_notes.pop(0))  # Rotate the list for next inversion check
    
    return 'Unknown'

def process_midi_input(port_name):
    with mido.open_input(port_name) as port:
        notes_on = []  # Store currently pressed notes
        played_chords = []  # Store identified chords

        print(f"Listening on {port_name}...")

        for msg in port:
            if msg.type == 'note_on' and msg.velocity > 0:
                notes_on.append((msg.note, msg.velocity, msg.time))
            elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                notes_on = [note for note in notes_on if note[0] != msg.note]

            if len(notes_on) > 2:  # Consider chords only if more than two notes are pressed
                chord_name = identify_chord(notes_on)
                print(f"Chord: {chord_name}")
                played_chords.append((chord_name, notes_on))

if __name__ == "__main__":
    available_ports = mido.get_input_names()
    lpk25_port = None
    for port in available_ports:
        if 'LPK25' in port:
            lpk25_port = port
            break
    
    if lpk25_port:
        process_midi_input(lpk25_port)
    else:
        print("LPK25 MIDI port not found.")

In [None]:
#Clustering chords progressions as a music genre 

In [None]:
pip install plotly

In [None]:
pip install nbformat