#### Utils setup

In [None]:
# define miditok tokenizer config
from miditok import REMI, TokenizerConfig
from symusic import Score
import numpy as np

BEAT_RES = {(0, 1): 24, (1, 2): 8, (2, 4): 4, (4, 8): 2}
TOKENIZER_PARAMS = {
    "pitch_range": (21, 109),
    "beat_res": BEAT_RES,
    "num_velocities": 24,
    "special_tokens": ["PAD", "BOS", "EOS"],
    "use_chords": True,
    "use_rests": True,
    "use_tempos": True,
    "use_time_signatures": True,
    "use_programs": False,  # no multitrack here
    "num_tempos": 32,
    "tempo_range": (50, 200),  # (min_tempo, max_tempo)
}

# Creating a multitrack tokenizer, read the doc to explore all the parameters
config = TokenizerConfig(**TOKENIZER_PARAMS)
tokenizer = REMI(config)

import pickle
def read_pickle(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

dict_path = "./stage02_embellish/vocab/skyline_miditok_vocab.pkl"
event2idx, idx2event = read_pickle(dict_path)

def midi2txt(midi_path, txt_path):
    midi = Score(midi_path)
    tokens = tokenizer(midi)  # calling the tokenizer will automatically detect MIDIs, paths and tokens
    with open(txt_path, "w") as file:
        for item in tokens[0].tokens:
            file.write(item + "\n")

def gnpy2midi(npy_path, midi_path="/content/test_from_npy.mid"):
    tokens = np.load(npy_path, allow_pickle=True)
    #tokens = tokens.reshape(1, -1)
    tokens = np.array([event2idx[e] for e in tokens]).reshape(1,-1)
    converted_back_midi = tokenizer(tokens)
    converted_back_midi.dump_midi(midi_path) # Save the MIDI file

def pkl2txt(pkl_path, txt_path):
    skyline_pos, midi_pos, all_events = read_pickle(pkl_path)
    tokens = [all_events[pos[0]+1:pos[1]] for pos in skyline_pos]
    flattened = [item for row in tokens for item in row]
    
    # truncate for testing
    length = len(flattened)//10
    flattened = flattened[:1000]

    with open(txt_path, "w") as file:
        for item in flattened:
            file.write(item["name"]+"_"+item["value"] + "\n")

: 

In [2]:
def pkl2orig(pkl_path, orig_path):
    skyline_pos, midi_pos, all_events = read_pickle(pkl_path)
    tokens = [all_events[pos[0]+1:pos[1]] for pos in midi_pos]
    flattened = [item for row in tokens for item in row]
    
    tokens = np.array([event2idx[e["name"]+"_"+e["value"]] for e in flattened]).reshape(1,-1)
    converted_back_midi = tokenizer(tokens)
    converted_back_midi.dump_midi(orig_path) # Save the MIDI file

In [None]:
# test pkl2orig
# pkl2orig("../dataset/gp-piano-parsed/ _60cemeCu6E.pkl","test.mid")

In [18]:
midi2txt("../demos/46414_skyline.mid","generation/stage01_testpieces/46414.txt")

In [4]:
train_split = read_pickle("stage02_embellish/pkl/train.pkl")
valid_split = read_pickle("stage02_embellish/pkl/valid.pkl")
compo_split = read_pickle("stage02_embellish/pkl/composer_split.pkl")

In [5]:
import os
import random

to_test = ["Bach_JohannSebastian", "Mozart_WolfgangAmadeus", "Beethoven_Ludwigvan"]
generation_split = {}

for composer in to_test:
    all_songs = compo_split[composer]
    temp_train = random.sample([s for s in all_songs if f" {s}.pkl" in train_split],3)
    temp_valid = random.sample([s for s in all_songs if f" {s}.pkl" in valid_split],2)
    generation_split[composer] = {"train":temp_train, "valid":temp_valid, "all":temp_train+temp_valid}


In [6]:
generation_split

{'Bach_JohannSebastian': {'train': ['7Dc3en1ntpM',
   '6n3n6Ouw_4c',
   'X9Z_FVjMSWc'],
  'valid': ['feikrhaRFTk', 'dBj7TNg4uWs'],
  'all': ['7Dc3en1ntpM',
   '6n3n6Ouw_4c',
   'X9Z_FVjMSWc',
   'feikrhaRFTk',
   'dBj7TNg4uWs']},
 'Mozart_WolfgangAmadeus': {'train': ['5nwML8h89tw',
   'SItm3YOLjmc',
   'yHSPuZu0z4k'],
  'valid': ['J9866zX07iw', '49oiE8Tj1UU'],
  'all': ['5nwML8h89tw',
   'SItm3YOLjmc',
   'yHSPuZu0z4k',
   'J9866zX07iw',
   '49oiE8Tj1UU']},
 'Beethoven_Ludwigvan': {'train': ['noAU3qDS1dA',
   '0_5iQCV62S4',
   'yibghhX9TdA'],
  'valid': ['LFZxrkiWvMU', 'wxR-khJsx3s'],
  'all': ['noAU3qDS1dA',
   '0_5iQCV62S4',
   'yibghhX9TdA',
   'LFZxrkiWvMU',
   'wxR-khJsx3s']}}

In [2]:
test_composer = "mozart"
postfix = "finetuned"

In [11]:
for piece in generation_split["Mozart_WolfgangAmadeus"]["all"]:
    generation_home = "/home/yihsin/MidiStyleTransfer/Compose_and_Embellish_classical/generation"
    pkl_file = f"/home/yihsin/MidiStyleTransfer/dataset/gp-piano-parsed/ {piece}.pkl"

    if not os.path.exists(f"{generation_home}/stage01_{test_composer}{postfix}"):
        os.makedirs(f"{generation_home}/stage01_{test_composer}{postfix}")

    pkl2txt(
        pkl_file, 
        f"{generation_home}/stage01_{test_composer}{postfix}/{piece}.txt")
    
    if not os.path.exists(f"{generation_home}/stage02_{test_composer}{postfix}"):
        os.makedirs(f"{generation_home}/stage02_{test_composer}{postfix}")
        
    pkl2orig(
        pkl_file,
        f"/home/yihsin/MidiStyleTransfer/Compose_and_Embellish_classical/generation/stage02_{test_composer}{postfix}/{piece}.mid")

In [2]:
!python3 stage02_embellish/inference.py \
  stage02_embellish/config/gp_gpt2.yaml \
  generation/stage01_testpieces\
  generation/stage02_test

[preparing data] now at #0
[preparing data] now at #200
2025-04-16 10:01:10.764062: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-16 10:01:10.769544: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda/lib64
2025-04-16 10:01:10.769568: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
[info] model init completed
[info] temp = 1.2 | top_p = 0.97
loading check point from: ckpt/random_mask_mozart/params/ep700_loss0.912_params.pt
[info] model loaded
[# pieces] 1
[info] generated 1 bars

In [3]:
import os
generation_home = "/home/yihsin/MidiStyleTransfer/Compose_and_Embellish_classical/generation"
for g in os.listdir(f"{generation_home}/stage02_{test_composer}{postfix}"):
    if(g.split(".")[1]=="npy"):
        idx = g.split(".")[0]
        gnpy2midi(
            f"{generation_home}/stage02_{test_composer}{postfix}/{g}",
            #f"{generation_home}/midi_samples_0413/{test_composer}/{idx}_finetuned.mid"
            f"{generation_home}/midi_samples_0413/finetune/{idx}.mid"
        )

In [3]:
import os
generation_home = "/home/yihsin/MidiStyleTransfer/Compose_and_Embellish_classical/generation"

for g in os.listdir(f"{generation_home}/stage02_test"):
    idx = g.split(".")[0]
    gnpy2midi(
        f"{generation_home}/stage02_test/{g}",
        f"{generation_home}/midi_samples_0413/_{g}.mid"
    )

In [23]:
midi2txt("generation/midi_samples/cmaj.mid", "generation/stage01/test.txt")

In [6]:
generation_home = "/home/yihsin/MidiStyleTransfer/Compose_and_Embellish_classical/generation/"
gnpy2midi(
    generation_home+"stage02_gpt2_new/qjk_2stage_samp01.npy", 
    generation_home+"midi_samples/mozart_generated.mid"
)
gnpy2midi(
    generation_home+"stage02_gpt2_new/qjk_skyline.npy", 
    generation_home+"midi_samples/mozart_skyline_condition.mid"
)

In [7]:
gnpy2midi(
    generation_home+"stage02_gpt2_new/bach_2stage_samp01.npy", 
    generation_home+"midi_samples/bach_generated.mid"
)
gnpy2midi(
    generation_home+"stage02_gpt2_new/bach_skyline.npy", 
    generation_home+"midi_samples/bach_skyline_condition.mid"
)