In [1]:
import music21 as m21
from collections import Counter
from scipy.stats import entropy
import numpy as np
import pandas as pd

import os

In [7]:
# def get_file_metrics(part, key, scale):
def get_file_metrics(part):

    
    # Initialize metrics
    total_empty_bars = 0
    pitch_counts = []
    # pitch_entropies = [0]*4
    # chord_frequencies = []
    # melody_pitches = [{}]*4

    measures = part.getElementsByClass(m21.stream.Measure)
    total_bars = len(measures)
    
    part_name = measures[0].getInstruments()[0].partName

    for measure in measures:
        # print(f"processing measure number {measure.measureNumber}, offset {measure.offset}")
        measure_idx = measure.measureNumber - 1
        notes = measure.notes
        voices = measure.voices
        if len(voices) > 0:
            notes_in_voices = [note for voice in voices for note in voice.notes]
        else:
            notes_in_voices = []
        
        if len(notes) + len(notes_in_voices) == 0:
            total_empty_bars += 1
        else:
            # Collect unique pitch classes in the measure (ignoring octave differences)
            note_pitches = [note for note in notes if note.isNote]
            
            voice_note_pitches = [note for note in notes_in_voices if note.isNote]
            voice_chords_pitches = [note for chord in notes_in_voices if chord.isChord for note in chord.notes]

            
            measure_notes = note_pitches + voice_note_pitches + voice_chords_pitches
            measure_pitch_classes = [note.pitch.pitchClass for note in measure_notes]
            
            
            # unique_pitch_classes = set(note_pitch_classes + voice_note_pitch_classes + voice_chords_pitch_classes)
            unique_pitch_classes = set(measure_pitch_classes)
            # print(f"unique pitches in bar: {unique_pitch_classes}")
            pitch_counts.append(len(unique_pitch_classes))
            
            
            # Calculate entropy of pitches in the measure
            # pitch_counter = Counter(note.pitch.midi for note in measure_notes)
            # pitch_probs = np.array(list(pitch_counter.values())) / sum(pitch_counter.values())
            # if not pitch_probs.all():
            #     print(f"pitch_probs empty, notes: {measure_notes}")
            # pitch_entropies[measure_idx] = entropy(pitch_probs)
            
#             if part_name == 'Melody':
#                 # Identify melody pitches in context of I chord
#                 melody_pitch_set = set()
#                 tonic_pitch_class = key.tonic.pitchClass
#                 melody_pitch_count = {f'{i+1}': 0 for i in range(0, 7)}
#                 for note in notes:
#                     if note.isChord:
#                         curr_notes = note.notes
#                     else:
#                         curr_notes = [note]
                    
#                     for curr_note in curr_notes:
#                         scale_degree = scale.getScaleDegreeFromPitch(curr_note.pitch)
#                         if scale_degree and 1 <= scale_degree <= 7:
#                             melody_pitch_count[str(scale_degree)] += 1
#                             # melody_pitch_set.add(scale_degree)
#                 melody_pitches[measure_idx] = melody_pitch_count
    
    # Ratio of empty bars
    empty_bar_ratio = total_empty_bars / total_bars
    # Calculate average number of pitch classes per bar
    avg_pitch_classes_per_bar = sum(pitch_counts) / total_bars if total_bars > 0 else 0
        
    
    return part_name, {
        "empty_bar_ratio": empty_bar_ratio,
        "apc_per_bar": avg_pitch_classes_per_bar,
        # "pitch_entropies": pitch_entropies,
        # "chord_freq": chord_frequencies,
        # "melody_pitches": melody_pitches,
    }

In [8]:
def get_metrics(midi_file_path):
    # Load the MIDI file using music21
    score = m21.converter.parse(midi_file_path)
    
    # Determine the key signature
    # key = score.analyze('key')
    # scale = key.getScale()
    
    # Get the parts (tracks) of the score
    parts = score.parts
    # print(f"score has {len(parts)} parts")
    
    # metrics_all = {"chord_freqs": [{}]*4}
    metrics_all = {}
    
    # chord_frequencies = []
    # chords = score.chordify()
    # for chords_in_measure in chords:
    #     measure_idx = chords_in_measure.measureNumber - 1
    #     measure_chord_frequencies = {f'{i}': 0 for i in ["i", "ii", "iii", "iv", "v", "vi", "vii"]}
    #     for chord in chords_in_measure.recurse().getElementsByClass('Chord'):
    #         roman = m21.roman.romanNumeralFromChord(chord, key)
    #         # print(roman.figure)
    #         if roman.figure in measure_chord_frequencies:
    #             measure_chord_frequencies[roman.figure] += 1
    #     metrics_all["chord_freqs"][measure_idx] = measure_chord_frequencies
    
    for part in parts:
        # part_name, metrics = get_file_metrics(part, key, scale)
        part_name, metrics = get_file_metrics(part)
        metrics_all[part_name] = metrics
        
    return metrics_all

In [9]:
def get_track_metrics(track_metrics):

        
    df = pd.DataFrame(track_metrics)
    # print(df)
    empty_bar_ratio = df.empty_bar_ratio.mean()
    apc_per_bar = df.apc_per_bar.mean()
    # pitch_entropies_per_bar = pd.DataFrame(df.pitch_entropies.tolist()).mean().values
    
    # melody_pitches_per_bar = pd.DataFrame(df.melody_pitches.tolist())
    # melody_pitch_all = melody_pitches_per_bar.apply(lambda x: pd.DataFrame([p for p in x.tolist() if p]).sum().to_dict(), axis=0)

    return empty_bar_ratio, apc_per_bar
# , pitch_entropies_per_bar, melody_pitch_all

In [10]:
def get_all_metrics(file_dir):
    
    files = os.listdir(file_dir)

    track_metrics = {
        "drums": [],
        "bass": [],
        "strings": [],
        "guitar": [],
        # "chord_freqs":[],
    }
    

    for file in files:
        midi_file_path = f"{file_dir}/{file}/generated.mid"
        # midi_file_path = f"{file_dir}/{file}/{file}.mid"
        print(f"processing {midi_file_path}")
        file_metrics = get_metrics(midi_file_path)
        # print(file_metrics)
    
        for track_name, file_track_metric in file_metrics.items():
            track_metrics[track_name.lower()].append(file_track_metric)
            
    # return track_metrics

    metrics_avg = {}
    # print(track_metrics)
    for track_name, metrics in track_metrics.items():
        # if track_name == 'chord_freqs':
        #     # print(metrics)
        #     all_chord_merge = pd.DataFrame(metrics)
        #     chord_freq_all = all_chord_merge.apply(lambda x: pd.DataFrame([p for p in x.tolist() if p]).sum().to_dict(), axis=0)

    
    
            # all_chord_merge = [d for track_metric in track_metrics['chord_freqs'] for d in track_metric]
            # chord_freq_all = pd.DataFrame(all_chord_merge).sum().to_dict()
            # metrics_avg[track_name] = chord_freq_all
        # else:
            print(metrics)
            # empty_bar_ratio, apc_per_bar, pitch_entropies_per_bar, melody_pitch_all = get_track_metrics(metrics)
            empty_bar_ratio, apc_per_bar= get_track_metrics(metrics)
            metrics_avg[track_name] = {
                "empty_bar_ratio": empty_bar_ratio,
                "apc_per_bar": apc_per_bar,
                # "pitch_entropies_per_bar": pitch_entropies_per_bar,
                # "melody_pitch_all": melody_pitch_all,
            }
        
    # all_chord_merge = [d for track_metric in track_metrics['chord_freqs'] for d in track_metric]
    # chord_freq_all = pd.DataFrame(all_chord_merge).sum().to_dict()

    return metrics_avg

In [11]:
track_metrics = get_all_metrics("music_LMD4")


processing music_LMD4/6/generated.mid
processing music_LMD4/56/generated.mid
processing music_LMD4/98/generated.mid
processing music_LMD4/52/generated.mid
processing music_LMD4/10/generated.mid
processing music_LMD4/72/generated.mid
processing music_LMD4/99/generated.mid
processing music_LMD4/45/generated.mid
processing music_LMD4/3/generated.mid
processing music_LMD4/17/generated.mid
processing music_LMD4/48/generated.mid
processing music_LMD4/96/generated.mid
processing music_LMD4/46/generated.mid
processing music_LMD4/60/generated.mid
processing music_LMD4/65/generated.mid
processing music_LMD4/40/generated.mid
processing music_LMD4/58/generated.mid
processing music_LMD4/34/generated.mid
processing music_LMD4/22/generated.mid
processing music_LMD4/50/generated.mid
processing music_LMD4/91/generated.mid
processing music_LMD4/69/generated.mid
processing music_LMD4/71/generated.mid
processing music_LMD4/13/generated.mid
processing music_LMD4/82/generated.mid
processing music_LMD4/80/ge

In [12]:
track_metrics

{'drums': {'empty_bar_ratio': 0.0025, 'apc_per_bar': 0.0},
 'bass': {'empty_bar_ratio': 0.005, 'apc_per_bar': 2.135},
 'strings': {'empty_bar_ratio': 0.06313131313131314,
  'apc_per_bar': 1.244949494949495},
 'guitar': {'empty_bar_ratio': 0.05697278911564625,
  'apc_per_bar': 1.7040816326530612}}

In [14]:
lmd_metrics = get_all_metrics("LMD4_phrases")


processing LMD4_phrases/5101/generated.mid
processing LMD4_phrases/1342/generated.mid
processing LMD4_phrases/2900/generated.mid
processing LMD4_phrases/5520/generated.mid
processing LMD4_phrases/4493/generated.mid
processing LMD4_phrases/3923/generated.mid
processing LMD4_phrases/3407/generated.mid
processing LMD4_phrases/3854/generated.mid
processing LMD4_phrases/5768/generated.mid
processing LMD4_phrases/295/generated.mid
processing LMD4_phrases/3768/generated.mid
processing LMD4_phrases/3252/generated.mid
processing LMD4_phrases/6096/generated.mid
processing LMD4_phrases/1870/generated.mid
processing LMD4_phrases/6990/generated.mid
processing LMD4_phrases/2917/generated.mid
processing LMD4_phrases/2371/generated.mid
processing LMD4_phrases/4818/generated.mid
processing LMD4_phrases/5535/generated.mid
processing LMD4_phrases/183/generated.mid
processing LMD4_phrases/5213/generated.mid
processing LMD4_phrases/4662/generated.mid
processing LMD4_phrases/5296/generated.mid
processing LM

In [15]:
lmd_metrics

{'drums': {'empty_bar_ratio': 0.007914651620992297, 'apc_per_bar': 0.0},
 'bass': {'empty_bar_ratio': 0.006459340856170517,
  'apc_per_bar': 2.272837184309511},
 'strings': {'empty_bar_ratio': 0.011261866380082391,
  'apc_per_bar': 2.914382948235715},
 'guitar': {'empty_bar_ratio': 0.01545987820168368,
  'apc_per_bar': 1.9085840945728103}}