### Dataset Preprocessing
Extract piece information without skyline from pre-processed pop1k7 dataset provided by original Compose & Embellish Repo, converted to event_ids for simba training.

In [2]:
import os
import pickle

def write_pkl(obj, filename):
    with open(filename, 'wb') as f:
        pickle.dump(obj, f)

def read_pkl(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

In [4]:
import os
original_path = "/home/yihsin/dataset/compose-and-embellish-pop1k7/datasets/stage02_embellish/pop1k7_leedsheet2midi"
new_path = "/home/yihsin/dataset/pop1k7_cne"
dict_path = "/home/yihsin/mamba/vocab/skyline2midi_vocab.pkl"

event2idx = read_pkl(dict_path)[0]
idx2event = read_pkl(dict_path)[1]

In [5]:
idx2event

{0: 'Bar_None',
 1: 'Beat_0',
 2: 'Beat_1',
 3: 'Beat_10',
 4: 'Beat_11',
 5: 'Beat_12',
 6: 'Beat_13',
 7: 'Beat_14',
 8: 'Beat_15',
 9: 'Beat_2',
 10: 'Beat_3',
 11: 'Beat_4',
 12: 'Beat_5',
 13: 'Beat_6',
 14: 'Beat_7',
 15: 'Beat_8',
 16: 'Beat_9',
 17: 'Chord_0_+',
 18: 'Chord_0_/o7',
 19: 'Chord_0_7',
 20: 'Chord_0_M',
 21: 'Chord_0_M7',
 22: 'Chord_0_m',
 23: 'Chord_0_m7',
 24: 'Chord_0_o',
 25: 'Chord_0_o7',
 26: 'Chord_0_sus2',
 27: 'Chord_0_sus4',
 28: 'Chord_10_+',
 29: 'Chord_10_/o7',
 30: 'Chord_10_7',
 31: 'Chord_10_M',
 32: 'Chord_10_M7',
 33: 'Chord_10_m',
 34: 'Chord_10_m7',
 35: 'Chord_10_o',
 36: 'Chord_10_o7',
 37: 'Chord_10_sus2',
 38: 'Chord_10_sus4',
 39: 'Chord_11_+',
 40: 'Chord_11_/o7',
 41: 'Chord_11_7',
 42: 'Chord_11_M',
 43: 'Chord_11_M7',
 44: 'Chord_11_m',
 45: 'Chord_11_m7',
 46: 'Chord_11_o',
 47: 'Chord_11_o7',
 48: 'Chord_11_sus2',
 49: 'Chord_11_sus4',
 50: 'Chord_1_+',
 51: 'Chord_1_/o7',
 52: 'Chord_1_7',
 53: 'Chord_1_M',
 54: 'Chord_1_M7',
 55: 

In [None]:
for pkl_file in os.listdir(original_path):
    data = read_pkl(os.path.join(original_path, pkl_file))
    piece_events = []
    for start, end in data[1]: # full song positions
        piece_events+=data[2][start+1:end]
    idxs = [event2idx[x["name"]+"_"+str(x["value"])] for x in piece_events]
    write_pkl(idxs, os.path.join(new_path, pkl_file))

### Event2Midi functions

In [1]:
import miditoolkit

##############################
# constants
##############################
DEFAULT_BEAT_RESOL = 480
DEFAULT_BAR_RESOL = 480 * 4
DEFAULT_FRACTION = 16


##############################
# containers for conversion
##############################
class ConversionEvent(object):
  def __init__(self, event, is_full_event=False):
    if not is_full_event:
      if 'Note' in event:
        self.name, self.value = '_'.join(event.split('_')[:-1]), event.split('_')[-1]
      elif 'Chord' in event:
        self.name, self.value = event.split('_')[0], '_'.join(event.split('_')[1:])
      else:
        if(len(event.split('_'))==2):
          self.name, self.value = event.split('_')
    else:
      self.name, self.value = event['name'], event['value']
  def __repr__(self):
    return 'Event(name: {} | value: {})'.format(self.name, self.value)

class NoteEvent(object):
  def __init__(self, pitch, bar, position, duration, velocity, microtiming=None):
    self.pitch = pitch
    self.start_tick = bar * DEFAULT_BAR_RESOL + position * (DEFAULT_BAR_RESOL // DEFAULT_FRACTION)
    self.duration = duration
    self.velocity = velocity

    if microtiming is not None:
      self.start_tick += microtiming

  def set_microtiming(self, microtiming):
    self.start_tick += microtiming
  
  def set_velocity(self, velocity):
    self.velocity = velocity
  
  def __repr__(self):
    return 'Note(pitch = {}, duration = {}, start_tick = {})'.format(
      self.pitch, self.duration, self.start_tick
    )
  
class TempoEvent(object):
  def __init__(self, tempo, bar, position):
    self.tempo = tempo
    self.start_tick = bar * DEFAULT_BAR_RESOL + position * (DEFAULT_BAR_RESOL // DEFAULT_FRACTION)
  
  def set_tempo(self, tempo):
    self.tempo = tempo

  def __repr__(self):
    return 'Tempo(tempo = {}, start_tick = {})'.format(
      self.tempo, self.start_tick
    )

class ChordEvent(object):
  def __init__(self, chord_val, bar, position):
    self.chord_val = chord_val
    self.start_tick = bar * DEFAULT_BAR_RESOL + position * (DEFAULT_BAR_RESOL // DEFAULT_FRACTION)


##############################
# conversion functions
##############################
def event_to_midi(events, mode, output_midi_path=None, is_full_event=False, 
                  return_tempos=False, enforce_tempo=False, enforce_tempo_evs=None):
  events = [ConversionEvent(ev, is_full_event=is_full_event) for ev in events]
  print (events[:20])

  # assert events[0].name == 'Bar'
  temp_notes = []
  temp_tempos = []
  temp_chords = []

  cur_bar = -1
  cur_position = 0

  for i in range(len(events)):
    if events[i].name == 'Bar':
      cur_bar += 1
    elif events[i].name == 'Beat':
      cur_position = int(events[i].value)
      assert cur_position >= 0 and cur_position < DEFAULT_FRACTION
    #   print (cur_bar, cur_position)
    elif events[i].name == 'Tempo' and 'Conti' not in events[i].value:
      temp_tempos.append(TempoEvent(
        int(events[i].value), max(cur_bar, 0), cur_position
      ))
    elif 'Note_Pitch' in events[i].name:
      if mode == 'full' and \
         (i+1) < len(events) and 'Note_Duration' in events[i+1].name and \
         (i+2) < len(events) and 'Note_Velocity' in events[i+2].name:
        # check if the 3 events are of the same instrument
        temp_notes.append(
          NoteEvent(
            pitch=int(events[i].value), 
            bar=cur_bar, position=cur_position, 
            duration=int(events[i+1].value), velocity=int(events[i+2].value)
          )
        )
      elif mode == 'skyline' and \
        (i+1) < len(events) and 'Note_Duration' in events[i+1].name:
        temp_notes.append(
          NoteEvent(
            pitch=int(events[i].value), 
            bar=cur_bar, position=cur_position, 
            duration=int(events[i+1].value), velocity=80
          )
        )
    elif 'Chord' in events[i].name and 'Conti' not in events[i].value:
      temp_chords.append(
        ChordEvent(events[i].value, cur_bar, cur_position)
      )
    elif events[i].name in ['EOS', 'PAD']:
      continue

  print ('# tempo changes:', len(temp_tempos), '| # notes:', len(temp_notes))
  midi_obj = miditoolkit.midi.parser.MidiFile()
  midi_obj.instruments = [
    miditoolkit.Instrument(program=0, is_drum=False, name='Piano')
  ]

  for n in temp_notes:
    midi_obj.instruments[0].notes.append(
      miditoolkit.Note(int(n.velocity), n.pitch, int(n.start_tick), int(n.start_tick + n.duration))
    )

  if enforce_tempo is False:
    for t in temp_tempos:
      midi_obj.tempo_changes.append(
        miditoolkit.TempoChange(t.tempo, int(t.start_tick))
      )
  else:
    if enforce_tempo_evs is None:
      enforce_tempo_evs = temp_tempos[1]
    for t in enforce_tempo_evs:
      midi_obj.tempo_changes.append(
        miditoolkit.TempoChange(t.tempo, int(t.start_tick))
      )

  
  for c in temp_chords:
    midi_obj.markers.append(
      miditoolkit.Marker('Chord-{}'.format(c.chord_val), int(c.start_tick))
    )
  for b in range(cur_bar):
    midi_obj.markers.append(
      miditoolkit.Marker('Bar-{}'.format(b+1), int(DEFAULT_BAR_RESOL * b))
    )

  if output_midi_path is not None:
    midi_obj.dump(output_midi_path)

  if not return_tempos:
    return midi_obj
  else:
    return midi_obj, temp_tempos

### Test Generation

In [5]:
idxs = read_pkl(new_path+"/001_001.pkl")
events = [idx2event[i] for i in idxs]

In [6]:
event_to_midi(events,'full',output_midi_path="test.mid")

[Event(name: Bar | value: None), Event(name: Beat | value: 0), Event(name: Tempo | value: 110), Event(name: Chord | value: None_None), Event(name: Beat | value: 8), Event(name: Tempo | value: Conti), Event(name: Chord | value: Conti_Conti), Event(name: Beat | value: 12), Event(name: Tempo | value: Conti), Event(name: Chord | value: Conti_Conti), Event(name: Bar | value: None), Event(name: Beat | value: 0), Event(name: Tempo | value: 86), Event(name: Chord | value: 8_7), Event(name: Note_Pitch | value: 79), Event(name: Note_Duration | value: 120), Event(name: Note_Velocity | value: 79), Event(name: Note_Pitch | value: 70), Event(name: Note_Duration | value: 120), Event(name: Note_Velocity | value: 70)]
# tempo changes: 260 | # notes: 2937


ticks per beat: 480
max tick: 0
tempo changes: 260
time sig: 0
key sig: 0
markers: 305
lyrics: False
instruments: 1