In [1]:
import mido
import heapq # merge (i.e. from mergesort)
import ripser
import numpy as np

In [35]:
from tqdm import tqdm_notebook as tqdm
from sklearn.manifold import MDS
from sklearn.manifold import TSNE
import sklearn_tda as tda

Gudhi not found--GraphInducedComplex not available


In [2]:
mid = mido.MidiFile('data/011.mid')
msgArray = []
for i, track in enumerate(mid.tracks):
    print('Track {}: {}'.format(i, track.name))
    for msg in track:
        msgArray.append(msg)
#         print(msg)

Track 0: 
Track 1: 


In [3]:
# put together midi files 
def convertToAbsoluteTimeAndMerge(mid):
    # takes the full midi object as input
    # outputs list of pairs: [..., (abs_time, midi_message), ...]
    allEvents = []
    for i, track in enumerate(mid.tracks):
        allEvents.append([])
        abs_time = 0
        for msg in track:
            abs_time = abs_time + msg.time
            if msg.type == 'note_on' and msg.velocity > 0:
                allEvents[i].append((abs_time, msg))

    return list(heapq.merge(*allEvents, key=lambda x: x[0]))

In [4]:
# return one dimensional point cloud -> n nodes
def ExtractSingleNotesUnique(notes, getNote=lambda x: x):
    # takes the ordered set of notes as inputs. The actual note is accessed using the getNote function
    # outputs the set of notes (modulo 12) in the midi file
    noteSet = set()
    for note in notes:
        noteSet.add(getNote(note) % 12)
    return np.array(noteSet)

In [5]:
# 3D point cloud return -> note 1 note 2 note 3; note 2 note 3 note 4 -> easiest way to preserve time series. return M X N point cloud
def ExtractNotesTimeSeriesUnique(notes, N, getNote=lambda x: x):
    noteSeriesSet = set()
    for ind in range(len(notes) - N + 1):
        noteSeries = tuple(getNote(notes[ind + j]) % 12 for j in range(N))
        noteSeriesSet.add(noteSeries)
    return np.array([np.array(noteSeries) for noteSeries in noteSeriesSet])

In [6]:
# defines the distance function between every pair of points in the point cloud
def ComputeNNoteSeriesDistances(noteSeriesSet):
    dists = np.zeros((len(noteSeriesSet), len(noteSeriesSet)))
    for i in range(noteSeriesSet.shape[0]):
        for j in range(i, noteSeriesSet.shape[0]):
            v = np.abs(noteSeriesSet[i] - noteSeriesSet[j])
            diffVec = np.minimum(v, 12 - v)
            dists[i,j] = np.sum(diffVec)
            dists[j,i] = dists[i,j]
    return dists

In [7]:
mid.tracks[0][20].note

93

In [8]:
mid.ticks_per_beat

480

In [19]:
mid = mido.MidiFile('data/013.mid')
notes = ExtractNotesTimeSeriesUnique(convertToAbsoluteTimeAndMerge(mid), 1, getNote=lambda x: x[1].note)
print(len(notes))
notes = ExtractNotesTimeSeriesUnique(convertToAbsoluteTimeAndMerge(mid), 2, getNote=lambda x: x[1].note)
print(len(notes))
notes = ExtractNotesTimeSeriesUnique(convertToAbsoluteTimeAndMerge(mid), 3, getNote=lambda x: x[1].note)
print(len(notes))

7
41
130


In [10]:
notes.shape

(130, 3)

In [11]:
notes[0]

array([8, 6, 6])

In [20]:
notes = ExtractNotesTimeSeriesUnique(convertToAbsoluteTimeAndMerge(mid), 2, getNote=lambda x: x[1].note)
print(len(notes))
notes[0].shape

41


(2,)

In [21]:
dists = ComputeNNoteSeriesDistances(notes)

In [22]:
print(notes[2])
print(notes[16])

[11 11]
[5 5]


In [23]:
dgms1 = ripser.ripser(dists, distance_matrix=True, maxdim=1)['dgms']
len(dgms1)

2

In [24]:
# dgms

In [25]:
#dgms[0]

In [26]:
mid = mido.MidiFile('data/001.mid')

In [27]:
notes = ExtractNotesTimeSeriesUnique(convertToAbsoluteTimeAndMerge(mid), 2, getNote=lambda x: x[1].note)

In [28]:
dists = ComputeNNoteSeriesDistances(notes)

In [29]:
dgms2 = ripser.ripser(dists, distance_matrix=True, maxdim=1)['dgms']

In [34]:
## Compute the distances between the 0 and 1 dimensional persistence diagrams and plot it using TSNE
bottleneck_distances_0 = np.zeros(20, 20)
bottleneck_distances_1 = np.zeros(20, 20)
dgmArr0 = []
dgmArr1 = []

for i in range(1, 21):
    if i <= 9:
        path = "data/00" + str(i) + ".mid"
    else:
        path = "data/0" + str(i) + ".mid"
    mid = mido.MidiFile(path)
    notes = ExtractNotesTimeSeriesUnique()

In [34]:
## For dimension 0 - ignore the last row
dgms1_0 = dgms1[0][:-1]
dgms2_0 = dgms2[0][:-1]