In [23]:
import numpy as np
import pandas as pd
import music21

In [12]:
data = pd.read_csv('data_mapped.csv')
data.head()

Unnamed: 0,piece,notes
0,./abc/1b651a94a8f8680da3cef1e60705194d.abc,"[[1, 3, 5, 6, 7, 8, 9, 11], [1, 3, 5, 6, 7, 8,..."
1,./abc/3679c385a618c89f00d159f418022b39.abc,"[[1, 3, 4, 5, 7, 8, 9, 11], [1, 4, 5, 6, 7, 8,..."
2,./abc/60ae81952628613e03d563b0dae8bbc2.abc,"[[1, 4, 5, 7, 9, 10, 11, 12], [1, 3, 5, 9, 11]]"
3,./abc/6a7d307d1f0712090e4043a6b4637efa.abc,"[[1, 5, 9], [1, 1, 3, 4, 5, 7, 9, 11], [1, 4, ..."
4,./abc/8c6965e3c831e8cb769e578423c12d1e.abc,"[[1, 5, 7, 8, 9], [1, 3, 4, 5, 6, 7, 8, 9, 10,..."


In [None]:
import ast
# change the note column to list
data['notes'] = data['notes'].apply(ast.literal_eval)

In [42]:
from collections import defaultdict
from collections import Counter

# get the bigram of the music data
bigram = []
bigram_freq = defaultdict(int)

# get the bigram of each song
for i in range(len(data)):
    for bar in data.loc[i,'notes']:
        bigram.extend(zip(bar, bar[1:]))
    
print(bigram)

# get the frequency of every bigram
bigram_freq = dict(Counter(bigram))
print(len(bigram_freq))


[(1, 3), (3, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 11), (1, 3), (3, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 11), (1, 3), (3, 4), (4, 5), (5, 7), (7, 8), (8, 9), (9, 11), (1, 3), (3, 4), (4, 5), (5, 7), (7, 8), (8, 9), (9, 11), (1, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (1, 4), (4, 5), (5, 7), (7, 9), (9, 11), (1, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (1, 3), (3, 4), (4, 5), (5, 8), (8, 9), (9, 11), (1, 3), (3, 4), (4, 5), (5, 8), (8, 9), (9, 11), (1, 3), (3, 4), (4, 5), (5, 8), (8, 9), (9, 11), (1, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 11), (1, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (1, 3), (3, 4), (4, 5), (5, 8), (8, 9), (9, 11), (1, 2), (2, 3), (3, 4), (4, 5), (1, 4), (4, 5), (5, 7), (7, 9), (9, 10), (10, 11), (11, 12), (1, 3), (3, 5), (5, 9), (9, 11), (1, 5), (5, 9), (1, 1), (1, 3), (3, 4), (4, 5), (5, 7), (7, 9), (9, 11), (1, 

In [44]:
# remove the identical bigram
for key in bigram_freq:
    if key[0] == key[1]:
        key = (0,0)
print(len(bigram_freq))

49


In [15]:
def predict_next_state(bigram_freq, current_state):
    """
    predict the next state based on the bigram frequency
    """
    # get the possible next states
    possible_next_states = [k[1] for k, v in bigram_freq.items() if k[0] == current_state]
    # get the frequency of each possible next state
    freq = [v for k, v in bigram_freq.items() if k[0] == current_state]
    # normalize the frequency
    freq = np.array(freq) / np.sum(freq)
    # predict the next state
    next_state = np.random.choice(possible_next_states, p=freq)
    return next_state

In [50]:
def generate_sequence(bigram_freq, start_state, length, num_bars):
    """
    generate a sequence based on the bigram frequency
    """
    # initialize the sequence
    sequence = [[start_state] for i in range(num_bars)]
    print(sequence)
    # generate the sequence
    for i in range(len(sequence)):
        for j in range(length):
            next_state = predict_next_state(bigram_freq, sequence[i][-1])
            if next_state == 12:
                break
            # randomly stop if the length has reached 4
            if len(sequence[i]) >= 4 and j >= 8:
                if np.random.random() > 0.5:
                    break
            sequence[i].append(next_state)
    return sequence

In [51]:
# generate a sequence based on the bigram frequency
sequence = generate_sequence(bigram_freq, 1, 12,8)
print(sequence)

[[1], [1], [1], [1], [1], [1], [1], [1]]
[[1, 2, 3, 4, 5, 9, 11], [1, 2, 3, 4, 5, 6, 7, 9, 11], [1, 1, 2, 3, 4, 5, 7, 9, 11], [1, 3, 4, 5, 6, 7, 8, 9, 11], [1, 2, 3, 4, 5, 7, 8, 9, 11], [1, 2, 3, 7, 9, 11], [1, 2, 3, 4, 5, 9, 11], [1, 4, 5, 7, 8, 9, 11]]


In [25]:
def convert_to_duration(random_song_list):
    last = 4.0
    # read the list backwards
    random_song_list.reverse()
    # create a list with the duration of each note
    duration_list = []
    for bar in random_song_list:
        # reverse the inner list
        bar.reverse()
        for note in bar:
            duration_list.append(last - note)
            last = note
        last += 4.0
    # reverse the list again
    duration_list.reverse()
    return duration_list

def convert_to_stream(duration_list):
    # create an empty stream
    stream = music21.stream.Stream()
    # define the tempo as 3/4
    stream.append(music21.meter.TimeSignature('3/4'))
    # create note
    # assign random pitch (C4)
    # assign duration (quarter)
    # append the note into empty stream
    for duration in duration_list:
        note = music21.note.Note(pitch='C4', quarterLength=duration)
        stream.append(note)
    return stream

In [33]:
# generate 10 sequences based on the bigram frequency
for i in range(1):
    sequence = generate_sequence(bigram_freq, 1, 12, 8)
    print(sequence)
    duration_list = convert_to_duration(sequence)
    stream = convert_to_stream(duration_list)
    stream.show()

[[1], [1], [1], [1], [1], [1], [1], [1]]
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 11], [1, 3, 4, 5, 7, 8, 9, 10, 11], [1, 3, 4, 5, 6, 7, 7, 8, 9, 11], [1, 4, 5, 7, 9, 11], [1, 2, 3, 4, 5, 7, 9, 11], [1, 3, 4, 5, 7, 9, 11], [1, 4, 5, 7, 8, 9, 11], [1, 2, 3, 4, 5, 8, 9, 11]]


StreamException: cannot place element <music21.note.Note C> with start/end 39.0/32.0 within any measures