# CIS 519/419 Final Project
Team: Ruxuan (Cici) Ji (519, Pooja’s cohort), Joan Shaho (419, Pooja’s Cohort), Jiacheng Wei (519, Shubham’s cohort)

Project Mentor TA: Pooja




Instructions:

1.   Go to https://magenta.tensorflow.org/datasets/maestro#download
2.   Download maestro-v3.0.0-midi.zip (this may take some time)
3.   Unzip the folder
4.   Upload the unzipped folder to you google drive
5.   You are ready to run this colab doc


In [None]:
!pip install mido==1.2.9
!pip install pretty_midi

First, we import the required packages and libraries.

In [None]:
import pretty_midi
import numpy as np
import os
from mido import MidiFile

Here, we load the dataset obtained from google drive. Note that the MAESTRO dataset should be uploaded to google drive first.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Here, we extract the notes from the piano instruments. After we get the array of piano roll, we convert them into dictionary and then the values of it into string. Then we will convert it into sequences of notes which will be used as inputs for our models. The notes present in the next timestep will act as the target.

In [None]:
def generate_dict_time_notes(file_names, fs=1.5):
  """ Generate map (dictionary) where the key is the index of the song file 
  name and the value is a piano roll (i.e. a 2D numpy array)
  
  Parameters
  ==========
  file_names : list
      List of midi file names
  fs : int
    Sampling frequency of the columns, i.e. each column is spaced apart
      by ``1./fs`` seconds.
  
  Returns
  =======
  dictionary of midi file names to piano_roll (in np.array)
  """
  
  # initialize dictionary
  piano_roll_dict = {}

  for i in range(len(file_names)):
      midi_file_name = file_names[i]
      
      # generate the dictionary entry for each file
      try:
          midi_pretty_format = pretty_midi.PrettyMIDI(midi_file_name)
          piano_midi = midi_pretty_format.instruments[0] # Get the piano channels
          piano_roll = piano_midi.get_piano_roll(fs=fs)
          piano_roll_dict[i] = piano_roll
      except Exception as e:
          print(e)
          print("broken file : {}".format(midi_file_name))
          pass

  return piano_roll_dict

def toString(arr):
  result = ""
  for el in arr:
    result = result + str(el) + " "
  
  return result

def process_notes_in_song(piano_roll_dict, dictionary, reverse_dict):
  """
  Iterate the dictionary of piano rolls and convert each one to a 2D array of 
  timesteps and notes played
  
  Parameters
  ==========
  piano_roll_dict : dict
    piano roll dictionary where the index is the midi file name and the value 
    is the corresponding piano_roll (as a np.array). This is the output of the
    function generate_dict_time_notes.
  
  dictionary: dict
    a dictionary object which will keep record of all the  
    
  reverse_dic: dict
    a dictionary from integers to the corresponding note combination 
    (in array form)
  Returns
  =======
  List of arrays where each array contains d inner arrays, where d is the 
  number of timesteps for a midi file. Each inner array i 
  (where 0 <= i < d) corresponds to the notes played at timestep i for a 
  specific a midi file.
  """

  # initialize the list
  list_of_keys_time = []
  
  for key in piano_roll_dict:
      # each sample has 128 rows (each one corresponding to a pitch). The 
      # columns correspond to timesteps. So by taking the transpose we are 
      # making the rows be the timesteps and the columns the notes being 
      # played on that timestep
      sample = piano_roll_dict[key].T
      keys_time = []

      # for each time step create a compact array of notes that are played in 
      # that timestep
      for i in range(len(sample)):
        notes = np.where(sample[i] > 1)[0]
        # convert array of notes into a string and store that into a dictionary
        val = 0
        key = toString(notes)
        if key in dictionary:
          val = dictionary[key]
        else:
          val = len(dictionary)
          dictionary[key] = val
          reverse_dict[val] = notes
        keys_time.append(val)
      
      list_of_keys_time.append(keys_time)
  return list_of_keys_time

def create_train_set(list_of_keys_time, reverse_dict, window_length = 50):
  """
  Iterate the list of timesteps to notes played and create the training set
  
  Parameters
  ==========
  list_of_keys_time : list
    list of timesteps to notes played (this is the output of the function 
    process_notes_in_song)

  reverse_dic: dict
    a dictionary from integers to the corresponding note combination 
    (in array form)

  window_length : int
    the number of timesteps to be used for predicting the next timestep
  Returns
  =======
  tuple of (array of note windows, array of notes played in the next timestep, 
  index of midi file name)
  """
  X = []
  y = {}
  y_comb = []

  for i in range(len(list_of_keys_time)):
    inner_list = list_of_keys_time[i]
    
    for j in range(len(inner_list) - window_length-1):
      X.append(inner_list[j:j+window_length])
      y_comb.append(inner_list[j+window_length])
      for pitch in range(128):
        key = inner_list[j+window_length]
        if (pitch in y):
          y[pitch].append(int(pitch in reverse_dict[key]))
        else:
          y[pitch] = [int(pitch in reverse_dict[key])]

  return np.array(X), y, np.array(y_comb)

def get_midi_files(percent = 1.0):
  """
  Get the directory paths of all midi files
  
  Parameters
  ==========
  percent : float
    the percentage of the songs to use

  Returns
  =======
  a list of all midi file directories
  """
  folder_path = '/content/drive/My Drive/maestro-v3.0.0/2018/'
  os.chdir(folder_path)
  midi_file_names = !ls
  num_files = int(np.round(percent * len(midi_file_names)))
  midi_file_names = midi_file_names[:num_files]  # sequence
  for i in range(len(midi_file_names)):
    midi_file_names[i] = folder_path + midi_file_names[i]

  return midi_file_names

We build two dictionaries to represent note combinations by integers. Then we choose the portion of songs to use for training. Lastly, we seperate training songs into timesteps and combine timesteps into fixed-size windows which will form the training and validation sets

In [None]:
dictionary = {}
reverse_dict = {}
midi_file_names = get_midi_files(percent=0.01)
piano_rolls = generate_dict_time_notes(midi_file_names, fs = 1.5)
timestep_notes_list = process_notes_in_song(piano_rolls, dictionary, reverse_dict)
X, y, y_comb = create_train_set(timestep_notes_list, reverse_dict, window_length=100)

The following forms the seed for music generation

In [None]:
seed = X[40]

We train the 128 logistic regression models. We also use 10-fold cross-validation and record the mean accuracy and f1 scores. Then we generate new music using models trained.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, f1_score
import pandas as pd

# Variable setup
new_dict = dictionary
log_model = [0 for x in range(0,128)]
y_train = {}
y_test = {}
train_accuracy = []
test_accuracy = []
train_f1 = []
test_f1 = []
train_score = {}
test_score = {}
train_score_list = []
test_score_list = []

# Train the logistic regression models (also perform 10-fold cross-validation)
for i in range(128):
  train_score_i = 0
  test_score_i = 0
  for j in range(10):
    X_train, X_test, y_train[i], y_test[i] = train_test_split(X, y[i], test_size=0.1)  # split into training data and testing data
    
    if (max(y_train[i]) == min(y_train[i])):
      break

    log_model[i] = LogisticRegression(random_state=0,tol=1e-4,max_iter=50000).fit(X_train, y_train[i])
    
    y_pred_train = log_model[i].predict(X_train)  # Make predictions using the training set
    y_pred_test = log_model[i].predict(X_test)  # Make predictions using the testing set

    train_accuracy.append(log_model[i].score(X_train, y_train[i]))
    test_accuracy.append(log_model[i].score(X_test, y_test[i]))

    train_f1.append(f1_score(y_train[i], y_pred_train, zero_division=0))
    test_f1.append(f1_score(y_test[i], y_pred_test, zero_division=0))

    train_score_i += r2_score(y_train[i], y_pred_train)  # Calculate R2 scores
    test_score_i += r2_score(y_test[i], y_pred_test)

  if (max(y_train[i]) != min(y_train[i])):
    train_score_i = train_score_i/10
    test_score_i = test_score_i/10
    train_score[i] = train_score_i
    train_score_list.append(train_score_i)
    test_score[i] = test_score_i
    test_score_list.append(test_score_i)

average_train_score = np.mean(train_score_list)  # Calculate average of train score
median_train_score = np.median(train_score_list)   # Calculate median of train score
average_test_score = np.mean(test_score_list)  # Calculate average of test score
median_test_score = np.median(test_score_list)   # Calculate median of test score

print("mean train accuracy:",np.mean(train_accuracy))
print("mean test accuracy:",np.mean(test_accuracy))
print("mean train f1 score:",np.mean(train_f1))
print("mean test f1 score:",np.mean(test_f1))

# Generate 200 new time steps from the given seen
X_new = seed
yhat = [0 for x in range(128)]
yhat_note = []

print('length of initial dict:', len(new_dict))
el = 0 # Record how many new note combinations are generated
for j in range(200):
  union = []
  for k in range(128):
    if (log_model[k] == 0):
      continue;
    yhat[k] = log_model[k].predict(X_new[j:j+200].reshape(1,-1))
    if yhat[k] == 1:
      yhat[k] = k
      union.append(k)

  union = toString(np.array(union))
  if union in new_dict:            # Update X_new by appending new pitches
    X_new = X_new.tolist()
    X_new.append(new_dict[union])
    X_new = np.array(X_new)
  else:
    new_dict[union] = count
    X_new = X_new.tolist()     
    X_new.append(count)
    X_new = np.array(X_new)
    count += 1
    el += 1

# skip the seed
X_new = X_new[99:]

print('length of new dict:',len(new_dict))
print(X_new)
print('# of new note combinations', el)

Add one pitch at the end as the signal to stop if the notes generated are all empty.

In [None]:
X_n = np.hstack((X_new,[1]))

The following two blocks transform the sequence of piano rolls to a midi file.

In [None]:
def piano_roll_to_pretty_midi(piano_roll, fs=1.5, program=0):
    '''Convert a Piano Roll array into a PrettyMidi object
     with a single instrument.
    Parameters
    ----------
    piano_roll : np.ndarray, shape=(128,frames), dtype=int
        Piano roll of one instrument
    fs : int
        Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    program : int
        The program number of the instrument.
    Returns
    -------
    midi_object : pretty_midi.PrettyMIDI
        A pretty_midi.PrettyMIDI class instance describing
        the piano roll.
    '''
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity # 1/0
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_velocities[note],
                pitch=note,
                start=note_on_time[note],
                end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    return pm

In [None]:
def write_midi_file_from_generated(generate, midi_file_name = "result.mid", start_index=0, fs=1.5, max_generated=100):
  reverse_dict = dict(zip(new_dict.values(),new_dict.keys()))
  note_string = [reverse_dict[ind_note] for ind_note in generate]
  array_piano_roll = np.zeros((128,max_generated+1), dtype=np.int16)
  for index, note in enumerate(note_string[start_index:]):
    if note == '':
      pass
    else:
      splitted_note = note.split()
      for j in splitted_note:
        array_piano_roll[int(j),index] = 1
  generate_to_midi = piano_roll_to_pretty_midi(array_piano_roll, fs=fs)
  for note in generate_to_midi.instruments[0].notes:
    note.velocity = 100
  generate_to_midi.write(midi_file_name)

write_midi_file_from_generated(X_new, midi_file_name = "1_song_fs_1.5.mid", start_index=0 , fs=1.5, max_generated = 201)