# Pre-process POP909 for Polyphemus
- split 909 into phrases instead of bars
- Phrases are of varying bar-length --> Add padding??
- Use 4/4 only

In [9]:
import re
import os
import time
import sys
import multiprocessing
import itertools
import argparse
from itertools import product

import numpy as np
import tqdm
import pypianoroll as pproll
import muspy

In [311]:
filepath = "POP909/001/001.mid"
structure_path = "POP909_structure/001/human_label2.txt"

In [312]:
pproll_song = pproll.read(filepath, resolution=8)
muspy_song = muspy.read(filepath)

In [313]:
def split_string(s):
    # This regex pattern matches a letter followed by one or more digits
    pattern = re.compile(r'[a-zA-Z]\d+')
    # Find all matches in the string
    matches = pattern.findall(s)
    return matches

In [314]:
f = open(f"{structure_path}", "r")
structure = f.read()
phrases = split_string(structure)

In [315]:
phrases

['i4',
 'A4',
 'B4',
 'B4',
 'A4',
 'A4',
 'b4',
 'B4',
 'B4',
 'A4',
 'A4',
 'b4',
 'b4',
 'A4',
 'A4',
 'b4',
 'A4',
 'o3']

In [316]:
MAX_PHRASE_LEN = 8


def split_song_into_phrases(pproll_song, phrases):
    muspy_conv = muspy.from_pypianoroll(pproll_song)

    n_beats = [track.notes[-1].time//8 for track in muspy_conv.tracks]
    num_beats_per_phrase = 4*MAX_PHRASE_LEN
    num_bars = max(n_beats)//4

    tracks = []
    # For each piece, interate through each track
    for track in muspy_conv.tracks:
        # For each track, split track into bars
        bars = [[] for i in range(num_bars+1)]

        for note in track.notes:
            bar_num = note.time//(4*8)
            bars[bar_num].append(note)
        tracks.append(bars)
        
    # Split one song into phrases
    start_bar_idx = 0
    phrase_songs=[]
    for phrase in phrases: 
        num_bars = int(phrase[1:])
        end_bar_idx = start_bar_idx+num_bars
        phrase_song = muspy.Music(resolution=8)
        phrase_tracks = [muspy.Track(notes=[note for t in track[start_bar_idx:end_bar_idx] for note in t]) for track in tracks]
        phrase_song.tracks = phrase_tracks
        for idx, track_type in enumerate(["MELODY", "BRIDGE", "PIANO"]):
            phrase_song.tracks[idx].name = track_type
        phrase_songs.append(phrase_song)
        start_bar_idx = end_bar_idx
        
    return phrase_songs

In [319]:
len(phrases)

18

In [320]:
phrase_songs = split_song_into_phrases(pproll_song, phrases)
len(phrase_songs)

18

In [323]:
phrase_songs[0].tracks

[Track(program=0, is_drum=False, name='MELODY'),
 Track(program=0, is_drum=False, name='BRIDGE', notes=[Note(time=29, pitch=66, duration=3, velocity=64), Note(time=33, pitch=75, duration=3, velocity=64), Note(time=37, pitch=73, duration=1, velocity=64), ...]),
 Track(program=0, is_drum=False, name='PIANO', notes=[Note(time=33, pitch=47, duration=10, velocity=64), Note(time=35, pitch=54, duration=9, velocity=64), Note(time=37, pitch=59, duration=5, velocity=64), ...])]

In [324]:
tracks_notes = [track.notes for track in phrase_songs[0].tracks]

In [327]:
# Obtain length of subsong (maximum of each track's length)
length = 0
for notes in tracks_notes:
    track_length = max(note.end for note in notes) if notes else 0
    length = max(length, track_length)
length += 1


# Add timesteps until length is a multiple of resolution
length = length if length % (4*resolution) == 0 \
    else length + (4*resolution-(length % (4*resolution)))

In [329]:
resolution

8

In [334]:
import constants
from constants import PitchToken, DurationToken


In [377]:
def process_track_notes(tracks_notes):
    tracks_content = []
    tracks_structure = []

    max_phrase_len_res = 4*resolution*MAX_PHRASE_LEN

    for notes in tracks_notes:

        # track_content: length x MAX_SIMU_TOKENS x 2
        # This is used as a basis to build the final content tensors for
        # each sequence.
        # The last dimension contains pitches and durations. int16 is enough
        # to encode small to medium duration values.
        track_content = np.zeros((max_phrase_len_res, constants.MAX_SIMU_TOKENS, 2), 
                                np.int16)

        track_content[:, :, 0] = PitchToken.PAD.value
        track_content[:, 0, 0] = PitchToken.SOS.value
        track_content[:, :, 1] = DurationToken.PAD.value
        track_content[:, 0, 1] = DurationToken.SOS.value

        # Keeps track of how many notes have been stored in each timestep
        # (int8 imposes MAX_SIMU_TOKENS < 256)
        notes_counter = np.ones(max_phrase_len_res, dtype=np.int8)

        # Todo: np.put_along_axis?
        for note in notes:
            # Insert note in the lowest position available in the timestep

            t = note.time

            if notes_counter[t] >= constants.MAX_SIMU_TOKENS-1:
                # Skip note if there is no more space
                continue

            pitch = max(min(note.pitch, constants.MAX_PITCH_TOKEN), 0)
            track_content[t, notes_counter[t], 0] = pitch
            dur = max(min(note.duration, constants.MAX_DUR_TOKEN + 1), 1)
            track_content[t, notes_counter[t], 1] = dur-1
            notes_counter[t] += 1
        # print(f"num notes: {notes_counter}")
        # Add EOS token
        t_range = np.arange(0, max_phrase_len_res)
        track_content[t_range, notes_counter, 0] = PitchToken.EOS.value
        track_content[t_range, notes_counter, 1] = DurationToken.EOS.value

        # Get track activations, a boolean tensor indicating whether notes
        # are being played in a timestep (sustain does not count)
        # (needed for graph rep.)
        activations = np.array(notes_counter-1, dtype=bool)

        tracks_content.append(track_content)
        tracks_structure.append(activations)
        
        # n_tracks x length x MAX_SIMU_TOKENS x 2
        c_tensor = np.stack(tracks_content, axis=0)

        # n_tracks x length
        s_tensor = np.stack(tracks_structure, axis=0)
    
    return c_tensor, s_tensor


num notes: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
num notes: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1
 2 1 1 1 2 1 1 1 2 1 2 1 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 1 1
 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 1 1 1 1 1 2 1
 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1

In [None]:
c_tensor, s_tensor = process_track_notes(tracks_notes)

In [421]:
# Save sample (content and structure) to file
saved_samples=0
dest_dir = "preprocessed_909"
filename = os.path.basename(filepath)

sample_filepath = os.path.join(
    dest_dir, filename+str(saved_samples))
np.savez(sample_filepath, c_tensor=c_tensor, s_tensor=s_tensor)
print(f"saved to {sample_filepath}")

saved to preprocessed_909/001.mid0


In [101]:
tracks[1]

[Music(metadata=Metadata(schema_version='0.1'), resolution=480, tracks=[Track(program=0, is_drum=False, notes=[Note(time=1720, pitch=66, duration=879, velocity=118), Note(time=1720, pitch=66, duration=204, velocity=121)])]),
 Music(metadata=Metadata(schema_version='0.1'), resolution=480, tracks=[Track(program=0, is_drum=False, notes=[Note(time=40, pitch=75, duration=237, velocity=121)])]),
 Music(metadata=Metadata(schema_version='0.1'), resolution=480, tracks=[Track(program=0, is_drum=False, notes=[Note(time=280, pitch=73, duration=131, velocity=118)])]),
 Music(metadata=Metadata(schema_version='0.1'), resolution=480, tracks=[Track(program=0, is_drum=False, notes=[Note(time=520, pitch=71, duration=148, velocity=123)])]),
 Music(metadata=Metadata(schema_version='0.1'), resolution=480, tracks=[Track(program=0, is_drum=False, notes=[Note(time=760, pitch=80, duration=95, velocity=118)])]),
 Music(metadata=Metadata(schema_version='0.1'), resolution=480, tracks=[Track(program=0, is_drum=Fals

In [87]:
muspy_song.tracks[1].notes

[Note(time=1720, pitch=66, duration=204, velocity=121),
 Note(time=1960, pitch=75, duration=237, velocity=121),
 Note(time=2200, pitch=73, duration=131, velocity=118),
 Note(time=2440, pitch=71, duration=148, velocity=123),
 Note(time=2680, pitch=80, duration=95, velocity=118),
 Note(time=2800, pitch=82, duration=115, velocity=104),
 Note(time=2920, pitch=80, duration=597, velocity=121),
 Note(time=3640, pitch=66, duration=164, velocity=118),
 Note(time=3880, pitch=75, duration=248, velocity=118),
 Note(time=4120, pitch=73, duration=374, velocity=118),
 Note(time=4600, pitch=70, duration=746, velocity=118),
 Note(time=5560, pitch=73, duration=142, velocity=112),
 Note(time=5800, pitch=78, duration=122, velocity=118),
 Note(time=5920, pitch=80, duration=59, velocity=118),
 Note(time=6040, pitch=82, duration=421, velocity=118),
 Note(time=6520, pitch=73, duration=99, velocity=78),
 Note(time=6760, pitch=78, duration=104, velocity=115),
 Note(time=6880, pitch=80, duration=61, velocity=118

In [56]:
tracks_notes = [track.notes for track in muspy_song.tracks]

# Obtain length of subsong (maximum of each track's length)
length = 0
for notes in tracks_notes:
    track_length = max(note.end for note in notes) if notes else 0
    length = max(length, track_length)
length += 1

In [57]:
length

134562

In [58]:
resolution=8
# Add timesteps until length is a multiple of resolution
length = length if length % (4*resolution) == 0 \
    else length + (4*resolution-(length % (4*resolution)))

In [59]:
length

134592

In [23]:
muspy_song.tracks[0].notes[0]

Note(time=9160, pitch=61, duration=69, velocity=115)

In [15]:
pproll_song

Multitrack(name=None, resolution=8, tempo=array(shape=(7080, 1), dtype=float64), downbeat=array(shape=(2360, 1), dtype=bool), tracks=[StandardTrack(name='MELODY', program=0, is_drum=False, pianoroll=array(shape=(2360, 128), dtype=uint8)), StandardTrack(name='BRIDGE', program=0, is_drum=False, pianoroll=array(shape=(2360, 128), dtype=uint8)), StandardTrack(name='PIANO', program=0, is_drum=False, pianoroll=array(shape=(2360, 128), dtype=uint8))])

In [63]:
for i in range(10,100):
    filepath = f"POP909/0{i}/0{i}.mid"
    print(f"Processing {filepath}")
    pproll_song = pproll.read(filepath, resolution=8)
    muspy_song = muspy.read(filepath)
    # Only accept songs that have a time signature of 4/4 and no time changes
    for t in muspy_song.time_signatures:
        # print(t)
        if t.numerator != 4 or t.denominator != 4:
            # continue
            print("Song skipped ({}/{} time signature)".
                  format(t.numerator, t.denominator))
        else:
            print(f"Song accepted! {filepath}")

Processing POP909/010/010.mid
Song skipped (2/4 time signature)
Song skipped (1/4 time signature)
Song skipped (2/4 time signature)
Processing POP909/011/011.mid
Song skipped (2/4 time signature)
Processing POP909/012/012.mid
Song skipped (2/4 time signature)
Processing POP909/013/013.mid
Song skipped (2/4 time signature)
Processing POP909/014/014.mid
Song skipped (1/4 time signature)
Processing POP909/015/015.mid
Song skipped (2/4 time signature)
Processing POP909/016/016.mid
Song skipped (2/4 time signature)
Processing POP909/017/017.mid
Song skipped (2/4 time signature)
Processing POP909/018/018.mid
Song skipped (2/4 time signature)
Processing POP909/019/019.mid
Song skipped (2/4 time signature)
Processing POP909/020/020.mid
Song skipped (2/4 time signature)
Processing POP909/021/021.mid
Song skipped (2/4 time signature)
Processing POP909/022/022.mid
Song accepted! POP909/022/022.mid
Song skipped (1/4 time signature)
Processing POP909/023/023.mid
Song skipped (1/4 time signature)
Pr

In [16]:
pproll_song.tracks

[StandardTrack(name='MELODY', program=0, is_drum=False, pianoroll=array(shape=(3792, 128), dtype=uint8)),
 StandardTrack(name='BRIDGE', program=0, is_drum=False, pianoroll=array(shape=(3792, 128), dtype=uint8)),
 StandardTrack(name='PIANO', program=0, is_drum=False, pianoroll=array(shape=(3792, 128), dtype=uint8))]

### TODO:
1. Figure out how to process 2/4 time signature in midi files