In [None]:
import datetime
import sys
import os
import torch
import numpy as np
import random
from pathlib import Path
from collections import namedtuple
from miditok import REMIPlus, MMM
from miditok.constants import ADDITIONAL_TOKENS
from torch.nn import functional as F
from importlib import reload

In [None]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
CTX_LEN = 1024
PRECISION = 'bf16'

os.environ['RWKV_T_MAX'] = str(CTX_LEN)
os.environ['RWKV_FLOAT_MODE'] = PRECISION

sys.path.append('./src/model')

os.getcwd()

In [None]:
from runner import RWKV_RNN

In [None]:
PROJ_NAME = 'techno'
IS_BPE = True
TOKENS_PATH = f"/home/nico/data/ai/models/midi/{PROJ_NAME}{'/bpe' if IS_BPE else ''}"

Path(f'./out/{PROJ_NAME}').mkdir(parents=True, exist_ok=True)

BINS_VELOCITY = (24)
BINS_TEMPO = (24)

additional_tokens = ADDITIONAL_TOKENS
additional_tokens['Chord'] = True
additional_tokens['TimeSignature'] = True
additional_tokens['Program'] = True
additional_tokens['nb_tempos'] = BINS_TEMPO
TOKENIZER = MMM(
    additional_tokens=additional_tokens, 
    params=f'{TOKENS_PATH}/token_params.cfg',
    nb_velocities=BINS_VELOCITY
)

ORIG_VOCAB_SIZE = len(TOKENIZER.vocab)
BPE_VOCAB_SIZE = int(ORIG_VOCAB_SIZE * 1.25)

(ORIG_VOCAB_SIZE, BPE_VOCAB_SIZE, len(TOKENIZER))

In [None]:
N_EMBED = 768
N_LAYER = 10
CTX_LEN = 1024

params = {
    'ctx_len': CTX_LEN,
    'n_embd': N_EMBED,
    'n_layer': N_LAYER,
}

params_obj = namedtuple('RWKVParams', params.keys())(*params.values())

In [None]:
import matplotlib.pyplot as plt

MAX_ITER = 1024*4

# this is where we introduce some randomness
NOISE_LEVEL = 0.55
NOISE_FREQ = 10
PHASE = 0


def gen_sin_wave(total_iterations, min_value, max_value, noise_scale, noise_frequency, main_phase):
    progress = np.linspace(0, 1, total_iterations)
    main_wave = np.sin(2 * np.pi * progress + main_phase)
    noise_wave = np.sin(2 * np.pi * noise_frequency * progress - main_phase / 2)
    noise = noise_scale * noise_wave
    values = min_value + (max_value - min_value) * \
        (1 + main_wave) / 2 + noise

    # Clip values within the specified range
    np.clip(values, min_value, max_value, out=values)

    return values.tolist()


temp_values = gen_sin_wave(MAX_ITER, 0.25, 0.7, NOISE_LEVEL, NOISE_FREQ, PHASE)
top_p_values = gen_sin_wave(MAX_ITER, 0.5, 0.95, NOISE_LEVEL, NOISE_FREQ*2, PHASE+6)

plt.plot(temp_values)
plt.plot(top_p_values)

In [None]:
import types

def sample_logits(logits, temperature=1.0, top_p=0.8):
    # apply repetition penalty beforehand
    
    probs = F.softmax(logits, dim=-1).numpy()
    sorted_probs = np.sort(probs)[::-1]
    cumulative_probs = np.cumsum(sorted_probs)
    cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
    probs[probs < cutoff] = 0

    if temperature != 1.0:
        probs = np.power(probs, 1.0 / temperature)
    probs = probs / np.sum(probs)
    out = np.random.choice(a=len(probs), p=probs)

    return (out, sorted_probs)

SEED = random.randint(1000, 10000)
np.random.seed(SEED)

args = types.SimpleNamespace()
args.RUN_DEVICE = "cuda"
args.FLOAT_MODE = "bf16"
args.map_location = 'cpu'
args.base_model = f'/home/nico/dev/projects/ai/musai/out/{PROJ_NAME}/rwkv-4'
args.n_layer = params['n_layer']
args.n_embd = params['n_embd']
args.ctx_len = int(params['ctx_len'])

model_rnn = RWKV_RNN(args)
model_rnn.to(DEVICE)

In [None]:
# initial state
pitches = [v for v in TOKENIZER.vocab.keys() if 'Pitch_' in v]

random.shuffle(pitches)

"""
REMIPlus' note seq: Program-Pitch-Velocity-Duration
Initial state should include at least one of such sequence to be reasonable.
"""
programs = [TOKENIZER.vocab[k]
            for k in TOKENIZER.vocab.keys() if 'Program' in k]
random.shuffle(programs)

init_state = None
out_tokens = []
init_tokens = [
    TOKENIZER.vocab['Track_Start'],
    programs[0],
    TOKENIZER.vocab['NoteDensity_0'],
    TOKENIZER.vocab['Bar_Start'],
    TOKENIZER.vocab[pitches[0]]
]

In [None]:
for n in range(len(init_tokens)):
    init_out, init_state = model_rnn.forward([init_tokens[n]], init_state)
    out_token, sorted_probs = sample_logits(
        init_out.detach().cpu(), temperature=temp_values[n], top_p=top_p_values[n])

    out_tokens.append(out_token)

for i in range(MAX_ITER):
    init_out, init_state = model_rnn.forward(out_tokens[-1:], init_state)
    out_token, sorted_probs = sample_logits(
        init_out.detach().cpu(), temperature=temp_values[i], top_p=top_p_values[i])

    out_tokens.append(out_token)


In [None]:
vocab_tokens = [key for key, value in TOKENIZER.vocab.items() if value in out_tokens]
vocab_tokens

In [None]:
import re

d = datetime.datetime.now().isoformat()
d = re.sub(r'[^\dT]{1,}', '-', d)
fname = f'out/{PROJ_NAME}/{d}.mid'

TOKENIZER(out_tokens).dump(fname)