# Bachelor project notebook
### Musical patterns for prediction

Before continuing, make sure to download and install <a href="https://abjad.github.io/">abjad</a>, <a href="http://lilypond.org">LilyPond</a> (needed by abjad) and <a href="https://github.com/craffel/pretty-midi">pretty_midi</a>.

First we'll need to import the libraries we will be using throughout this notebook.

In [1]:
import pretty_midi
import abjad
import numpy as np
import random
# We can safely ignore the warning if there is one.

NOTE: The Pärt demo requires abjad-ext-tonality


Then we'll need some utility functions that will be used by the three models.

In [2]:
def find_closest(values, val):
    """
    values: array-like.
    val: some value of the same type as the items of values.
    
    This method finds the closest to "val" value in the array "values" and returns it. 
    """
    closest = values[0]
    distance = float("inf")
    for d in values:
        new_dist = abs(d-val)
        if new_dist < distance:
            distance = new_dist
            closest = d
    return closest

def parse_midi(notes,round_durations=4):
    """
    notes: array-like of pretty_midi Notes
    
    Parses each pretty_midi note into four attributes: pitches, onsets, velocities and durations.
    """
    length = len(notes)
    pitches = np.zeros(length)
    onsets = np.zeros(length)
    velocities = np.zeros(length)
    durations = np.zeros(length)
    for i in range(length):
        pitches[i] = notes[i].pitch
        onsets[i] = notes[i].start
        velocities[i] = notes[i].velocity
        durations[i] = round(notes[i].get_duration(),round_durations)

    return pitches, onsets, velocities, durations

def markov_model_first_order(table,with_smoothing=False,probability_known_states=0.9):
    """
    table: array-like of items to calculate a 1-order markov model from
    with_smoothing: if set to true, will do an additive smoothing of the markov table
        according to the histogram of "table"
    probability_known_states: probability of each known event happening in the additive smoothing, must be between 0 and 1.
    Returns a dictionary of event:probability of that event happening.
    """
    assert probability_known_states>=0 and probability_known_states<=1
    ret = {}
    length = len(table)
    assert length > 0
    nb_dict = {}
    for i in range(length-1):
        item = table[i]
        next_item = table[i+1]
        if item in ret:
            nb_dict[item] += 1
            if next_item in ret[item]:
                ret[item][next_item] += 1
            else:
                ret[item][next_item] = 1            
        else:
            nb_dict[item] = 1
            ret[item] = {}
            ret[item][next_item] = 1
    for key_1 in ret.keys():
        for key_2 in ret[key_1].keys():
            ret[key_1][key_2] /= nb_dict[key_1]

    # special case for the last element
    # need to "fallback" -> go back to a known state
    last_item = table[length-1]
    if last_item not in ret:
        ret[last_item] = {}
        for i in range(length):
            next_item = table[i]
            if next_item in ret[last_item]:
                ret[last_item][next_item] += 1
            else:
                ret[last_item][next_item] = 1
        for key in ret[last_item].keys():
            ret[last_item][key] /= length
    if with_smoothing:
        probability_keys = {}
        for item in table:
            if item in probability_keys:
                probability_keys[item]+=1
            else:
                probability_keys[item]=1
        for key in probability_keys:
            probability_keys[key]/=length
        # alpha smoothing for all states
        probability_known_patterns = probability_known_states
        probability_unknown_patterns = 1-probability_known_patterns
        for item in ret:
            keys_ret = list(ret.keys())
            for key in ret[item]:
                ret[item][key]*=probability_known_patterns
                ret[item][key] += probability_keys[key]*probability_unknown_patterns
                keys_ret.remove(key)
            for key in keys_ret:
                ret[item][key] = probability_keys[key]*probability_unknown_patterns
    return ret

And two methods to read/write from/to csv/midi files.

In [3]:
def midi_to_csv(notes,filename):
    """
    notes: array-like of pretty_midi notes
    filename: string, name of the file to write to
    
    Writes each note on a line into a csv file.
    """
    csv = ""
    for note in notes:
        # write onto csv, each line like: start,pitch,morph_pitch,duration,channel\n
        # morph pitch == pitch here. It is unused, as well as the channel
        csv += str(note.start) + "," + str(note.pitch) + "," + str(note.pitch) + "," + str(note.get_duration()) + "," + str(4) + "\n"
    file = open(filename, "w")
    file.write(csv)
    file.close()
    
def csv_to_notes(filename):
    """
    filename: string, name of the file to read from
    
    Read each note from a csv file.
    """
    import csv
    notes = list()
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            notes.append(pretty_midi.Note(velocity=80,start=float(row[0]),pitch=int(row[1]),end=float(row[0])+float(row[3])))
    return notes

### Simple first-order Markov model
With the utility methods now written, we can start the generation process with the simplest method: simple first-order markov model, where each note depends on the attributes of the previous notes: duration, pitch and velocity

In [4]:
def generate_prediction_with_simple_markov(filename, patterns_to_generate = 4,with_smoothing=False,probability_known_states=0.9):
    """
    filename: string of the filename to read, has to be a midi (.mid) file.

    """
    NB_ITERATIONS = patterns_to_generate
    notes = pretty_midi.PrettyMIDI(filename).instruments[0].notes
    
    
    result = pretty_midi.PrettyMIDI()
    result_program = pretty_midi.instrument_name_to_program("Acoustic Grand Piano")
    result_instrument = pretty_midi.Instrument(program=result_program)

    # Statistic model with first order markov model
    pitches,onsets,velocities,durations = parse_midi(notes)

    # difference of onsets, will be used as durations
    diff_onsets = onsets[1:] - onsets[:len(onsets)-1]

    markov_pitches = markov_model_first_order(pitches,with_smoothing,probability_known_states)
    markov_velocities = markov_model_first_order(velocities,with_smoothing,probability_known_states)
    markov_diff_onsets = markov_model_first_order(diff_onsets,with_smoothing,probability_known_states)

    # write current notes, each note ends when the next note starts
    for i in range(len(notes)-1):
        note = notes[i]
        result_instrument.notes.append(pretty_midi.Note(velocity=note.velocity,pitch=note.pitch,start=note.start,end=notes[i+1].start))
    # special case for last note, as there isn't a next note
    last_note = notes[len(notes)-1]
    result_instrument.notes.append(pretty_midi.Note(velocity=last_note.velocity,pitch=last_note.pitch,start=last_note.start,end=last_note.start + find_closest(list(markov_diff_onsets.keys()),last_note.get_duration())))

    for i in range(NB_ITERATIONS):
        last_note = result_instrument.notes[len(result_instrument.notes)-1]
        #duration using difference of onsets
        diff_start = find_closest(list(markov_diff_onsets.keys()),last_note.get_duration())
        new_note_duration = random.choices(list(markov_diff_onsets[diff_start].keys()),weights=markov_diff_onsets[diff_start].values())[0]
        # velocity
        new_note_velocity = int(random.choices(list(markov_velocities[
            last_note.velocity].keys()),weights=markov_velocities[last_note.velocity].values())[0])
        # pitch
        new_note_pitch = int(random.choices(list(markov_pitches[
            last_note.pitch].keys()),weights=markov_pitches[last_note.pitch].values())[0])
        # new_note
        new_note = pretty_midi.Note(velocity=new_note_velocity,pitch=new_note_pitch,start=last_note.end,end=last_note.end+new_note_duration)

        # append note to result
        result_instrument.notes.append(new_note)
    result.instruments.append(result_instrument)
    # 4) Write results
    filename = filename.split("/")
    filename = filename[len(filename)-1]
    result.write("result_" + filename[:len(filename)-3] + "mid")
    # 5) Show results using abjad
    notes_abjad = list()
    for n in result_instrument.notes:
        notes_abjad.append(abjad.Note(n.pitch-5*12,abjad.Duration(n.get_duration()/2).equal_or_greater_assignable))
    staff = abjad.Staff(notes_abjad)
    abjad.show(staff)

In [5]:
generate_prediction_with_simple_markov("midi_sample_c_major.mid",10,with_smoothing = False)



### String-based pattern recognition
We can now continue with the methods to find patterns. We'll start with the exact patterns finding (string-based approach).

In [7]:
def is_note_equal(this,that):
    """
    this: pretty_midi Note
    that: pretty_midi Note
    """
    if this==None or that==None:
        return False
    return this.pitch == that.pitch and this.get_duration() == that.get_duration()

def find_biggest_recurring_pattern(seq):
    """
    seq: array-like of pretty_midi Note.
    
    Returns the biggest pattern (sublist of notes) that appears at least twice, as well as the index of its first appearance in "seq".
    """
    A = np.zeros((len(seq)+1,len(seq)+1),dtype=int)
    res = list()
    res_length = 0
    index = 0
    for i in range(1,len(seq)+1):
        for j in range(i+1,len(seq)+1):
            if seq[i-1]!=None and seq[j-1]!=None and is_note_equal(seq[i-1],seq[j-1]) and (j-i) > A[i-1][j-1]:
                A[i][j] = A[i-1][j-1] + 1
                if A[i][j] > res_length:
                    res_length = A[i][j]
                    index = max(i,index)
            else:
                A[i][j] = 0
    if res_length > 0:
        for i in range(index-res_length + 1, index+1):
            res.append(seq[i-1])
    return res, index-res_length

def find_occurrences_and_indexes(seq):
    """
    seq: array-like of pretty_midi Note
    
    Returns the sequence of notes with the biggest pattern removed, the biggest recurring pattern, and the indexes of each occurrence of that pattern.
    """
    res, index_first_occurrence = find_biggest_recurring_pattern(seq)
    if len(res)==0:
        return seq, None, list()
    temp_seq = seq[0:index_first_occurrence]
    i = index_first_occurrence
    index_occurrences = list()
    while i < len(seq):
        is_start = False
        if is_note_equal(seq[i],res[0]):
            is_start = True
            for j in range(len(res)):
                if i + j >= len(seq) or not is_note_equal(seq[i+j],res[j]):
                    is_start = False
                    break
        if not is_start:
            temp_seq.append(seq[i])
            i+=1
        else:
            index_occurrences.append(i)
            for j in range(len(res)):
                temp_seq.append(None)
            i+=len(res)
    return temp_seq, res, index_occurrences

def find_all_occurrences_and_indexes(seq):
    """
    seq: array-like of pretty_midi Note
    
    Finds all patterns and indexes of those patterns.
    """
    list_patterns = list()
    list_indexes = list()
    res = list()
    seq_x = seq
    while res!=None:
        seq_x, res, indexes = find_occurrences_and_indexes(seq_x)
        if res!=None:
            list_patterns.append(res)
            list_indexes.append(indexes)
    for i in range(len(seq_x)):
        # special case for non recurring patterns: notes that appear only once
        if seq_x[i]!=None:
            list_patterns.append([seq_x[i]])
            list_indexes.append([i])
    return list_patterns,list_indexes

def first_order_markov_with_patterns(seq,with_smoothing=False,probability_known_patterns=0.9):
    """
    seq: array-like of pretty_midi Note.
    
    Returns a first-order Markov model of the patterns found in the sequence of note,
        the list of patterns, the list of indexes, and a transformation of notes->patterns.
    """
    list_patterns, list_indexes = find_all_occurrences_and_indexes(seq)
    index_to_pattern_index = {}
    for i in range(len(list_indexes)):
        for j in range(len(list_indexes[i])):
            index_to_pattern_index[list_indexes[i][j]] = i
    pattern_indexes_seq = list()
    if len(index_to_pattern_index.keys())>0:
        head = 0
        while head < len(seq):
            pattern_indexes_seq.append(index_to_pattern_index[head])
            head += len(list_patterns[index_to_pattern_index[head]])
    return markov_model_first_order(pattern_indexes_seq,with_smoothing,probability_known_patterns),list_patterns,list_indexes,pattern_indexes_seq


Now let's try it out!

In [8]:
def generate_prediction_with_string_based(filename, patterns_to_generate = 4,with_smoothing=False,probability_known_patterns=0.9):
    """
    filename: string of the filename to read, has to be a midi (.mid) file.

    """
    NB_ITERATIONS = patterns_to_generate
    seq_temp = pretty_midi.PrettyMIDI(filename).instruments[0].notes
    
    # 0) Transform seq_notes so it has correct durations
    # Statistic model with first order markov model
    _,onsets,_,_ = parse_midi(seq_temp)
    diff_onsets = onsets[1:] - onsets[:len(onsets)-1]
    seq = list()
    # write current notes, each note ends when the next note starts
    for i in range(len(seq_temp)-1):
        note = seq_temp[i]
        seq.append(pretty_midi.Note(velocity=note.velocity,pitch=note.pitch,start=note.start,end=seq_temp[i+1].start))
    # special case for last note, as there isn't a next note
    last_note = seq_temp[len(seq_temp)-1]
    seq.append(pretty_midi.Note(velocity=last_note.velocity,pitch=last_note.pitch,start=last_note.start,end=last_note.start + find_closest(diff_onsets,last_note.get_duration())))
  
    # 1) Transform sequence of notes into sequence of patterns
    markov,patterns,_,transformed_seq = first_order_markov_with_patterns(seq,with_smoothing,probability_known_patterns)
    # 2) Generate next patterns
    for i in range(NB_ITERATIONS):
        last_pattern = transformed_seq[len(transformed_seq)-1]
        next_pattern = random.choices(list(markov[last_pattern].keys()),weights=markov[last_pattern].values())[0]
        transformed_seq.append(next_pattern)
    
    # 3) Transform back into notes
    notes = list()
    # special case for first pattern
    first_pattern = patterns[transformed_seq[0]]
    first_note = first_pattern[0]
    notes.append(first_note)
    for i in range(1,len(first_pattern)):
        current_note = first_pattern[i]
        previous_note = notes[len(notes)-1]
        new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch,start=previous_note.end,end=previous_note.end+current_note.get_duration())
        notes.append(new_note)
    for i in range(1,len(transformed_seq)):
        current_pattern = patterns[transformed_seq[i]]
        for j in range(len(current_pattern)):
            current_note = current_pattern[j]
            previous_note = notes[len(notes)-1]
            new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch, start = previous_note.end,end=previous_note.end + current_note.get_duration())
            notes.append(new_note)
    # 4) Write results
    result = pretty_midi.PrettyMIDI()
    result_program = pretty_midi.instrument_name_to_program("Acoustic Grand Piano")
    result_instrument = pretty_midi.Instrument(program=result_program)
    result_instrument.notes = notes#[len(seq_temp):]
    result.instruments.append(result_instrument)
    filename = filename.split("/")
    filename = filename[len(filename)-1]
    result.write("result_" + filename[:len(filename)-3] + "mid")
    # 5) Show results using abjad
    notes_abjad = list()
    for n in notes:
        notes_abjad.append(abjad.Note(n.pitch-5*12,abjad.Duration(n.get_duration()/2).equal_or_greater_assignable))
    staff = abjad.Staff(notes_abjad)
    abjad.show(staff)

In [12]:
generate_prediction_with_string_based("midi_sample_c_major.mid",10,with_smoothing = True)

### Translation-based pattern recognition
Now we'll go with the non-exact pattern approach (translation-based). Again, we'll need some utility functions.

In [54]:
def find_approximate_patterns(seq_notes):
    """
    Based on SIA(TEC) algorithm
    seq_notes: list of (onset,pitch) elements
    
    Returns a dictionary of translation vectors: (onset,pitch).
    """
    vector_matrix = np.empty((len(seq_notes),len(seq_notes)),dtype=object)
    for i in range(len(seq_notes)): # rows
        if seq_notes[i]==None:
            continue
        for j in range(len(seq_notes)): # columns
            if seq_notes[j]==None:
                continue
            if j<i:
                vector_matrix[i,j] = (seq_notes[i][0]-seq_notes[j][0],seq_notes[i][1]-seq_notes[j][1])
            else:
                vector_matrix[i,j] = (0,0)
    result = {}
    for i in range(len(seq_notes)):
        if seq_notes[i]==None:
            continue
        for j in range(len(seq_notes)):
            if seq_notes[j]==None:
                continue
            if vector_matrix[i,j][0]==0 and vector_matrix[i,j][1]==0:
                continue
            else:
                if vector_matrix[i,j] in result:
                    result[vector_matrix[i,j]].append(seq_notes[j])
                else:
                    result[vector_matrix[i,j]] = list()
                    result[vector_matrix[i,j]].append(seq_notes[j])
    return result

def is_note_in_seq(note,seq):
    """
    note: (onset,pitch) tuple
    seq: list of (onset,pitch) tuples
    
    Returns True if note is in seq.
    """
    for n in seq:
        if n[0] == note[0] and n[1]==note[1]:
            return True
    return False

def are_seqs_equal(seq_1,seq_2):
    """
    seq_1: list of (onset,pitch) tuples
    seq_2: list of (onset,pitch) tuples
    
    Returns True if the two lists are equal.
    """
    if len(seq_1)!=len(seq_2):
        return False
    for i in range(len(seq_1)):
        if seq_1[i][0]!=seq_2[i][0] or seq_1[i][1]!=seq_2[i][1]:
            return False
    return True

def filter_patterns(patterns, notes):
    """
    patterns: dictionary, keys are translation vectors, values are (onset,pitch) tuples
        which can be transformed into other tuples using the key
        
    Returns a filtered version of the patterns found. Keeps only the values which don't overlap:
    ex: pattern = [(0,1),(1,2)], translation vector =(1,1), the first note can be turned into a 
    note within the same pattern, so we remove that entry.
    
    And values whose notes are not continuous, let's say we have four notes:
    [(0,0),(1,1),(2,0),(3,1)], a pattern would be [(0,0),(2,0)] and its corresponding translection vector is
    (1,1). However, the notes in the patterns are not continuous, so we remove that entry.
    
    """
    # first transform list of notes into dictionary
    new_patterns = {}
    for key in patterns:
        # transform list of notes from pattern into dictionary
        temp_notes = {}
        for n in patterns[key]:
            temp_notes[n[0]]=n[1]
        temp_pattern = list()
        # filter patterns so that a note isn't repeated twice within a pattern
        for n in patterns[key]:
            new_note = (n[0]+key[0],n[1]+key[1])
            if new_note[0] not in temp_notes:
                temp_pattern.append(n)
        # now we need to check if all elements are contiguous within a pattern
        new_patterns[key] = list()
        for i in range(len(notes)):
            if notes[i]==None:
                continue
            if notes[i][0] == temp_pattern[0][0] and notes[i][1] == temp_pattern[0][1]:
                for j in range(0,min(len(notes),len(temp_pattern))):
                    if notes[j+i]==None:
                        continue
                    if notes[j+i][0] == temp_pattern[j][0] and notes[j+i][1] == temp_pattern[j][1]:
                        new_patterns[key].append(temp_pattern[j])
                    else:
                        break
    # now remove entries containing 0 or 1 note
    result = {}
    for key in new_patterns:
        if len(new_patterns[key])>1:
            result[key] = new_patterns[key]
    return result

def find_biggest_pattern_in_patterns(dict):
    """
    dict: dictionary of translation vector->pattern
    
    Returns the biggest pattern and its corresponding translation vector.
    """
    max_length = -1
    pattern = None
    trans_vector = None
    for key in dict:
        if len(dict[key])>max_length:
            max_length=len(dict[key])
            trans_vector = key
            pattern = dict[key]
    return pattern, trans_vector

def find_all_trans_vector_with_pattern(dict,pattern):
    """
    dict: dictionary of translation vector->pattern
    pattern: list of (onset,pitch)
    
    Returns all translation vectors which have the same pattern as key
    """
    ret = list()
    for key in dict:
        if are_seqs_equal(pattern,dict[key]):
            ret.append(key)
    return ret

def find_pattern_with_indices(seq,list_patterns,pattern_to_indices,index_pattern):
    """
    seq: list of (onset,pitch) elements
    pattern_to_indices: already existent patterns
    index_pattern: key where to add the new pattern, i.e. pattern_to_indices[index_pattern] = ...
    This method is supposed to find one pattern.
    """
    result = find_approximate_patterns(seq)
    result_filter = filter_patterns(result,seq)
    pattern,trans_vector = find_biggest_pattern_in_patterns(result_filter)
    if pattern!=None:
        all_trans_vectors = find_all_trans_vector_with_pattern(result_filter,pattern)
        all_trans_vectors.insert(0,(0,0))
        index_before = index_pattern
        ret_trans_vectors = list()
        for trans in all_trans_vectors:

            first_trans_note = (pattern[0][0]+trans[0],pattern[0][1]+trans[1])
            length_pattern = len(pattern)
            i = 0
            current_pattern = list()
            while i < len(seq):
                current_note = seq[i]
                if current_note!=None and current_note[0]==first_trans_note[0] and current_note[1] == first_trans_note[1]:
                    pattern_to_indices[index_pattern] = i
                    for j in range(length_pattern):
                        trans_note = (pattern[j][0]+trans[0],pattern[j][1]+trans[1])
                        current_pattern.append(trans_note)
                        seq[i+j] = None
                    break
                else:
                    i+=1
            if len(current_pattern)!=0:
                index_pattern+=1
                ret_trans_vectors.append(trans)
                list_patterns.append(current_pattern)
        return pattern,index_pattern,ret_trans_vectors
    return pattern,index_pattern,None

def is_list_empty(seq):
    """
    Checks whether "seq" containes only None values
    """
    for n in seq:
        if n!=None:
            return False
    return True

def find_all_patterns(seq):
    """
    finds all patterns in a list of (onset,pitch) tuples.
    Ex: let a certain sequence be 0,1,2,0,1,2,2,3,4,1,3,1,3
    Then a first pattern would be [0,1,2], appearing three times, 
    the third time being shifted up by 2 ([2,3,4]).
    A second pattern would be [1,3], appearing twice.
    In this case, the function should return:
    list_patterns = [[0,1,2],[0,1,2],[2,3,4],[1,3],[1,3]]
    patterns_to_indices = {
        0: 0,
        1: 3,
        2: 6,
        3: 9,
        4: 11
    }
    """
    list_patterns = list()
    pattern_to_indices = {}
    index_pattern = 0
    trans_vectors = list()
    while not is_list_empty(seq):
        pattern,index_pattern,all_trans_vectors = find_pattern_with_indices(seq,list_patterns,pattern_to_indices,index_pattern)
        if pattern==None: # case only one note at the end
            for i in range(len(seq)):
                if seq[i]!=None:
                    list_patterns.append([seq[i]])
                    pattern_to_indices[index_pattern] = i
                    index_pattern+=1
                    trans_vectors.append((0,0))
            break
        for i in range(len(all_trans_vectors)):
            trans_vectors.append(all_trans_vectors[i])
    return list_patterns,pattern_to_indices,trans_vectors


def collapse_pattern_to_indices(trans_vectors):
    """
    trans_vectors: list of translation vectors
    
    Let's say we have the following translation vectors:
    [(0,0),(1,0),(0,0),(2,1),(0,0)], then for the generation process, we'll
    need to have different patterns if the translation vectors are different in their second value.
    So technically, (0,0) and (1,0) are the same patterns, only time-shifted. However,
    (0,0) and (2,1) are different, as they are vertically shifted.
    """
    trans_vectors.append((0,0))
    collapsed_indices_to_pattern = {}
    current_index = 0
    i = 0
    while i<len(trans_vectors)-1:
        trans_v = trans_vectors[i]
        if trans_v[0]==0 and trans_v[1]==0:
            temp_dict = {}
            temp_index = 0.0
            temp_dict[temp_index] = list()
            temp_dict[temp_index].append(i)
            for j in range(i+1,len(trans_vectors)):
                current_trans = trans_vectors[j]
                if current_trans[0]==0 and current_trans[1]==0:
                    for key in temp_dict:
                        collapsed_indices_to_pattern[current_index] = temp_dict[key]
                        current_index+=1
                    break
                if current_trans[1] not in temp_dict:
                    temp_dict[current_trans[1]] = list()
                temp_dict[current_trans[1]].append(j)
            else:
                continue
        i+=1
    return collapsed_indices_to_pattern


def transform_collapsed_and_indices(collapsed,pattern_to_indices):
    """
    Takes results from collapse_pattern_to_indices and patterns to indices and 
    returns the indices of each pattern within the sequence of notes
    """
    true_indices = {}
    for key in collapsed:
        true_indices[key] = list()
        for val in collapsed[key]:
            true_indices[key].append(pattern_to_indices[val])
    return true_indices

def transform_back_into_seq(true_indices):
    reversed_indices = {}
    for key in true_indices:
        for val in true_indices[key]:
            reversed_indices[val] = key
    seq = list()
    keys = list(reversed_indices.keys())
    keys.sort()
    for val in keys:
        seq.append(reversed_indices[val])
    return seq

def midi_notes_to_tuples(notes):
    """
    converts a sequence of pretty_midi notes into a list of (onset,pitch) elements
    """
    seq = list()
    for n in notes:
        seq.append((n.start,n.pitch))
    return seq


Now that the utility functions are written, we can generate a continuation.

In [55]:
def generate_prediction_with_translation_based(filename, patterns_to_generate = 4,with_smoothing=False,probability_known_patterns=0.9):
    """
    filename: string of the filename to read, has to be a midi (.mid) file.

    """
    NB_ITERATIONS = patterns_to_generate
    seq_temp = pretty_midi.PrettyMIDI(filename).instruments[0].notes
    
    # 0) Transform seq_temp so it has correct durations
    _,onsets,_,_ = parse_midi(seq_temp)
    diff_onsets = onsets[1:] - onsets[:len(onsets)-1]
    notes = list()
    # write current notes, each note ends when the next note starts
    for i in range(len(seq_temp)-1):
        note = seq_temp[i]
        notes.append(pretty_midi.Note(velocity=note.velocity,pitch=note.pitch,start=note.start,end=seq_temp[i+1].start))
    # special case for last note, as there isn't a next note
    last_note = seq_temp[len(seq_temp)-1]
    notes.append(pretty_midi.Note(velocity=last_note.velocity,pitch=last_note.pitch,start=last_note.start,end=last_note.start + find_closest(diff_onsets,last_note.get_duration())))

    tuples = midi_notes_to_tuples(seq_temp)

    # 1) Transform sequence of notes into sequence of patterns 
    list_patterns,pattern_to_indices,trans_vectors = find_all_patterns(tuples)
    collapsed = collapse_pattern_to_indices(trans_vectors)
    true_indices = transform_collapsed_and_indices(collapsed,pattern_to_indices)
    seq = transform_back_into_seq(true_indices)
    mm1 = markov_model_first_order(seq,with_smoothing,probability_known_patterns)
    # 2) Generate next patterns
    for i in range(NB_ITERATIONS):
        last_pattern = seq[len(seq)-1]
        next_pattern = random.choices(list(mm1[last_pattern].keys()),weights=mm1[last_pattern].values())[0]
        seq.append(next_pattern)
    # 3) Transform back into notes
    # need to use collapsed, and list of patterns and seq
    notes_to_write = list()
    # need index first pattern and length of pattern
    first_pattern = notes[true_indices[seq[0]][0]:true_indices[seq[0]][0]+len(list_patterns[collapsed[seq[0]][0]])]
    first_note = first_pattern[0]
    notes_to_write.append(first_note)
    for i in range(1,len(first_pattern)):
        current_note = first_pattern[i]
        previous_note = notes_to_write[len(notes_to_write)-1]
        new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch,start=previous_note.end,end=previous_note.end+current_note.get_duration())
        notes_to_write.append(new_note)
    for i in range(1,len(seq)):
        current_pattern = notes[true_indices[seq[i]][0]:true_indices[seq[i]][0]+len(list_patterns[collapsed[seq[i]][0]])]
        for j in range(len(current_pattern)):
            current_note = current_pattern[j]
            previous_note = notes_to_write[len(notes_to_write)-1]
            new_note = pretty_midi.Note(velocity=current_note.velocity,pitch=current_note.pitch, start = previous_note.end,end=previous_note.end + current_note.get_duration())
            notes_to_write.append(new_note)
    # 4) Write results
    result = pretty_midi.PrettyMIDI()
    result_program = pretty_midi.instrument_name_to_program("Acoustic Grand Piano")
    result_instrument = pretty_midi.Instrument(program=result_program)
    result_instrument.notes = notes_to_write#[len(seq_temp):]
    result.instruments.append(result_instrument)
    filename = filename.split("/")
    filename = filename[len(filename)-1]
    result.write("result_" + filename[:len(filename)-3] + "mid")
    # 5) Show results using abjad
    notes_abjad = list()
    for n in notes_to_write:
        notes_abjad.append(abjad.Note(n.pitch-5*12,abjad.Duration(n.get_duration()/2).equal_or_greater_assignable))
    staff = abjad.Staff(notes_abjad)
    abjad.show(staff)

In [58]:
generate_prediction_with_translation_based("midi_sample_c_major.mid",20,with_smoothing = True)