<a href="https://colab.research.google.com/github/ypghanate/NEA/blob/main/NEA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files, drive
import pretty_midi
import numpy as np
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from music21 import converter
from scipy import ndimage

!pip install pretty_midi music21 torch scipy --quiet

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using:", device)

drive.mount('/content/drive')

def midi_to_pianoroll(midi_file, fs=16):
    midi_data = pretty_midi.PrettyMIDI(midi_file)
    n_steps = int(midi_data.get_end_time() * fs) + 1
    roll = np.zeros((n_steps, 88), dtype=np.float32)
    for inst in midi_data.instruments:
        for n in inst.notes:
            idx = n.pitch - 21
            if 0 <= idx < 88:
                roll[int(n.start*fs):int(n.end*fs), idx] = n.velocity / 127.0
    return roll

def smooth_roll_temporal(roll, window=3):
    smoothed = np.zeros_like(roll)
    for pitch_idx in range(roll.shape[1]):
        smoothed[:, pitch_idx] = ndimage.uniform_filter1d(
            roll[:, pitch_idx], size=window, mode='constant')
    return smoothed

def threshold_and_clean_roll(roll, threshold=0.5, min_duration_steps=3):
    binary = (roll > threshold).astype(np.float32)
    n_steps, n_notes = binary.shape
    cleaned = np.zeros_like(binary)
    for pitch_idx in range(n_notes):
        i = 0
        while i < n_steps:
            if binary[i, pitch_idx]:
                j = i
                while j < n_steps and binary[j, pitch_idx]:
                    j += 1
                if j - i >= min_duration_steps:
                    cleaned[i:j, pitch_idx] = roll[i:j, pitch_idx]
                i = j
            else:
                i += 1
    return cleaned

def reduce_simultaneous_notes(roll, max_simultaneous=6):
    out = np.zeros_like(roll)
    for t in range(roll.shape[0]):
        active = np.where(roll[t] > 0)[0]
        if len(active) <= max_simultaneous:
            out[t, active] = roll[t, active]
        else:
            chosen = active[np.argsort(roll[t, active])[-max_simultaneous:]]
            out[t, chosen] = roll[t, chosen]
    return out

def pianoroll_to_midi(roll, output_file, fs=16):
    midi = pretty_midi.PrettyMIDI(initial_tempo=120)
    piano = pretty_midi.Instrument(program=0)

    n_steps, n_notes = roll.shape
    active = {}

    for t in range(n_steps):
        for p in range(n_notes):
            v = roll[t, p]
            pitch = p + 21
            if v > 0.1:
                if p not in active:
                    active[p] = (t/fs, int(v*127))
            else:
                if p in active:
                    start, vel = active[p]
                    end = t/fs
                    if end > start + 0.03:
                        piano.notes.append(pretty_midi.Note(
                            velocity=vel, pitch=pitch, start=start, end=end))
                    del active[p]

    midi.instruments.append(piano)
    midi.write(output_file)
    return len(piano.notes)

def note_density(roll):
    return float(np.mean(np.sum(roll > 0, axis=1)))

def key_complexity(midi_file):
    try:
        k = converter.parse(midi_file).analyze("key")
        return 0 if k.mode == "major" else 1
    except:
        return 0

def pitch_complexity(roll):
    active = np.where(roll > 0)[1]
    if active.size == 0:
        return 0
    return 1 if np.ptp(active) > 12 else 0

def overallComplexity(midi_file, roll):
    score = note_density(roll) + key_complexity(midi_file) + pitch_complexity(roll)
    return "hard" if score >= 3 else "easy"

def pad(roll, length=600):
    if roll.shape[0] > length:
        return roll[:length]
    return np.pad(roll, ((0, length-roll.shape[0]), (0,0)))

def add_difficulty_channel(roll, target_diff):
    extra = np.ones((roll.shape[0], 1), dtype=np.float32) if target_diff == "hard" else \
            np.zeros((roll.shape[0], 1), dtype=np.float32)
    return np.concatenate([roll, extra], axis=1)

class PianoRollDataset(Dataset):
    def __init__(self, inp, tgt, diffs):
        self.inputs = inp
        self.targets = tgt
        self.diffs = diffs
    def __len__(self):
        return len(self.inputs)
    def __getitem__(self, i):
        return (torch.tensor(self.inputs[i]).float(),
                torch.tensor(self.targets[i]).float())

class SeqSimplifier(nn.Module):
    def __init__(self, input_size=89, hidden=256, layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden, layers, batch_first=True)
        self.fc = nn.Linear(hidden, 88)
        self.sig = nn.Sigmoid()
    def forward(self, x):
        y, _ = self.lstm(x)
        return self.sig(self.fc(y))
def midi_prep(folder):
    inputs, targets, diffs = [], [], []
    easy = os.path.join(folder, "easy")
    med  = os.path.join(folder, "medium")
    hard = os.path.join(folder, "hard")

    for f in os.listdir(med):
        if not f.endswith(".mid"):
            continue
        normal = os.path.join(med, f)
        roll_norm = pad(midi_to_pianoroll(normal))

        easy_path = os.path.join(easy, f.replace("medium", "easy"))
        hard_path = os.path.join(hard, f.replace("medium", "hard"))

        if os.path.exists(easy_path):
            roll_easy = pad(midi_to_pianoroll(easy_path))
            inputs.append(add_difficulty_channel(roll_norm, "easy"))
            targets.append(roll_easy)
            diffs.append("easy")

        if os.path.exists(hard_path):
            roll_hard = pad(midi_to_pianoroll(hard_path))
            inputs.append(add_difficulty_channel(roll_norm, "hard"))
            targets.append(roll_hard)
            diffs.append("hard")

    return PianoRollDataset(inputs, targets, diffs)

midi_folder = "/content/drive/MyDrive/dataset"
dataset = midi_prep(midi_folder)
loader = DataLoader(dataset, batch_size=4, shuffle=True)

model = SeqSimplifier().to(device)

crit = nn.BCELoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)

EPOCHS = 20
for epoch in range(EPOCHS):
    model.train()
    total = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)

        opt.zero_grad()
        out = model(x)
        loss = crit(out, y)
        loss.backward()
        opt.step()

        total += loss.item()
    print(f"Epoch {epoch+1}: loss = {total/len(loader):.4f}")

MODEL_PATH = "/content/simplifier_model.pth"
torch.save(model.state_dict(), MODEL_PATH)
print("Saved model.")




Using: cpu
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1: loss = 0.6111
Epoch 2: loss = 0.1472
Epoch 3: loss = 0.0674
Epoch 4: loss = 0.0633
Epoch 5: loss = 0.0619
Epoch 6: loss = 0.0602
Epoch 7: loss = 0.0612
Epoch 8: loss = 0.0596
Epoch 9: loss = 0.0597
Epoch 10: loss = 0.0595
Epoch 11: loss = 0.0595
Epoch 12: loss = 0.0593
Epoch 13: loss = 0.0588
Epoch 14: loss = 0.0588
Epoch 15: loss = 0.0599
Epoch 16: loss = 0.0593
Epoch 17: loss = 0.0591
Epoch 18: loss = 0.0596
Epoch 19: loss = 0.0591
Epoch 20: loss = 0.0589
Saved model.


In [None]:
from google.colab import files, drive
import pretty_midi
import numpy as np
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from music21 import converter
from scipy import ndimage

!pip install pretty_midi music21 torch scipy --quiet

drive.mount('/content/drive')
uploaded = files.upload()
midi_path = list(uploaded.keys())[0]

def midi_to_pianoroll(midi_file, fs=16):
    midi_data = pretty_midi.PrettyMIDI(midi_file)
    n_steps = int(midi_data.get_end_time() * fs) + 1
    roll = np.zeros((n_steps, 88), dtype=np.float32)
    for inst in midi_data.instruments:
        for n in inst.notes:
            idx = n.pitch - 21
            if 0 <= idx < 88:
                roll[int(n.start*fs):int(n.end*fs), idx] = n.velocity / 127.0
    return roll

def smooth_roll_temporal(roll, window=3):
    smoothed = np.zeros_like(roll)
    for pitch_idx in range(roll.shape[1]):
        smoothed[:, pitch_idx] = ndimage.uniform_filter1d(roll[:, pitch_idx], size=window, mode='constant')
    return smoothed

def threshold_and_clean_roll(roll, threshold=0.5, min_duration_steps=3):
    binary_roll = (roll > threshold).astype(np.float32)
    n_steps, n_notes = binary_roll.shape
    cleaned_roll = np.zeros_like(binary_roll)
    for pitch_idx in range(n_notes):
        i = 0
        while i < n_steps:
            if binary_roll[i, pitch_idx] > 0:
                j = i
                while j < n_steps and binary_roll[j, pitch_idx] > 0:
                    j += 1
                duration = j - i
                if duration >= min_duration_steps:
                    cleaned_roll[i:j, pitch_idx] = roll[i:j, pitch_idx]
                i = j
            else:
                i += 1
    return cleaned_roll

def reduce_simultaneous_notes(roll, max_simultaneous=6):
    n_steps, n_notes = roll.shape
    reduced = np.zeros_like(roll)
    for t in range(n_steps):
        active = np.where(roll[t, :] > 0)[0]
        if len(active) <= max_simultaneous:
            reduced[t, active] = roll[t, active]
        else:
            top_indices = active[np.argsort(roll[t, active])[-max_simultaneous:]]
            reduced[t, top_indices] = roll[t, top_indices]
    return reduced

def pianoroll_to_midi(roll, output_file, fs=16, velocity_threshold=0.1):
    midi_data = pretty_midi.PrettyMIDI(initial_tempo=120)
    midi_data.time_signature_changes.append(pretty_midi.TimeSignature(numerator=4, denominator=4, time=0))
    piano = pretty_midi.Instrument(program=0, is_drum=False, name='Piano')
    n_steps, n_notes = roll.shape
    active_notes = {}
    for t in range(n_steps):
        for pitch_idx in range(n_notes):
            vel = roll[t, pitch_idx]
            pitch = pitch_idx + 21
            if vel > velocity_threshold:
                if pitch_idx not in active_notes:
                    active_notes[pitch_idx] = (t/fs, max(30, int(vel * 127)))
            else:
                if pitch_idx in active_notes:
                    start_time, velocity = active_notes[pitch_idx]
                    end_time = t/fs
                    if end_time > start_time + 0.05:
                        piano.notes.append(pretty_midi.Note(velocity=velocity, pitch=pitch, start=start_time, end=end_time))
                    del active_notes[pitch_idx]
    final_time = n_steps / fs
    for pitch_idx, (start_time, velocity) in active_notes.items():
        if final_time > start_time + 0.05:
            piano.notes.append(pretty_midi.Note(velocity=velocity, pitch=pitch_idx + 21, start=start_time, end=final_time))
    if len(piano.notes) == 0:
        piano.notes.append(pretty_midi.Note(velocity=64, pitch=60, start=0.0, end=1.0))
    piano.notes.sort(key=lambda x: x.start)
    midi_data.instruments.append(piano)
    midi_data.write(output_file)
    return len(piano.notes)

def note_density(roll):
    return float(np.mean(np.sum(roll > 0, axis=1)))

def key_complexity(midi_file):
    try:
        k = converter.parse(midi_file).analyze('key')
        if k.mode == 'major':
            return 0
        else:
            return 1
    except:
        return 0

def pitch_complexity(roll):
    active_positions = np.where(roll > 0)
    if active_positions[1].size == 0:
        return 0
    span = int(np.ptp(active_positions[1]))
    if span > 12:
        return 1
    else:
        return 0

def overallComplexity(midi_file, roll):
    score = note_density(roll) + key_complexity(midi_file) + pitch_complexity(roll)
    if score >= 3:
        return "hard"
    else:
        return "easy"

def pad(roll, length=600):
    if roll.shape[0] > length:
        return roll[:length]
    elif roll.shape[0] < length:
        pad_amount = length - roll.shape[0]
        return np.pad(roll, ((0, pad_amount), (0, 0)))
    else:
        return roll

def add_difficulty_channel(roll, target_diff):
    c = np.zeros((roll.shape[0], 1), np.float32)
    if target_diff == 'hard':
        c[:] = 1.0
    return np.concatenate([roll, c], axis=1)

class PianoRollDataset(Dataset):
    def __init__(self, input_rolls, target_rolls, target_diffs):
        self.inputs = input_rolls
        self.targets = target_rolls
        self.target_diffs = target_diffs
    def __len__(self):
        return len(self.inputs)
    def __getitem__(self, i):
        return (torch.tensor(self.inputs[i]), torch.tensor(self.targets[i]), self.target_diffs[i])

class SeqSimplifier(nn.Module):
    def __init__(self, input_size=89, hidden=256, layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden, layers, batch_first=True)
        self.fc = nn.Linear(hidden, 88)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        y, _ = self.lstm(x)
        return self.sigmoid(self.fc(y))

def midi_prep(midi_folder):
    inputs, targets, target_diffs = [], [], []

    easy_folder = os.path.join(midi_folder, "easy")
    normal_folder = os.path.join(midi_folder, "medium")
    hard_folder = os.path.join(midi_folder, "hard")
    max_length = 600

    for file in os.listdir(normal_folder):
        if not file.endswith(".mid"):
            continue

        normal_path = os.path.join(normal_folder, file)

        easy_path = os.path.join(easy_folder, file.replace("medium", "easy"))
        hard_path = os.path.join(hard_folder, file.replace("medium", "hard"))
        normal_roll = pad(midi_to_pianoroll(normal_path), max_length)

        if os.path.exists(easy_path):
            easy_roll = pad(midi_to_pianoroll(easy_path), max_length)
            inputs.append(add_difficulty_channel(normal_roll, 'easy'))
            targets.append(easy_roll)
            target_diffs.append("easy")
        if os.path.exists(hard_path):
            hard_roll = pad(midi_to_pianoroll(hard_path), max_length)
            inputs.append(add_difficulty_channel(normal_roll, 'hard'))
            targets.append(hard_roll)
            target_diffs.append("hard")
    return PianoRollDataset(inputs, targets, target_diffs)

midi_folder = "/content/drive/MyDrive/dataset"
dataset = midi_prep(midi_folder)
loader = DataLoader(dataset, batch_size=4, shuffle=True)

model = SeqSimplifier()
MODEL_PATH = "/content/simplifier_model.pth"
if os.path.exists(MODEL_PATH):
    model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu"))
model.eval()

roll = midi_to_pianoroll(midi_path)
user_skill = input("Enter your skill level: ").strip().lower()

sheet_diff = overallComplexity(midi_path, roll)

if sheet_diff == 'hard' and user_skill in ['easy', 'medium']:
    target = 'easy'
elif sheet_diff == 'easy' and user_skill == 'hard':
    target = 'hard'
else:
    target = sheet_diff

roll_input = add_difficulty_channel(roll, target)
roll_tensor = torch.tensor(roll_input[None, :, :]).float()

with torch.no_grad():
    simplified = model(roll_tensor).squeeze(0).numpy()

simplified = smooth_roll_temporal(simplified, window=3)
simplified = threshold_and_clean_roll(simplified, threshold=0.5, min_duration_steps=5)
simplified = reduce_simultaneous_notes(simplified, max_simultaneous=4)

num_notes = pianoroll_to_midi(simplified, "generated_song.mid", fs=16)
if num_notes < 10:
    pianoroll_to_midi(roll * 0.5, "generated_song.mid", fs=16)

score = converter.parse("generated_song.mid")
xml_file = "generated_song.xml"
score.write('musicxml', fp=xml_file)
files.download(xml_file)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Saving abcdeg.mid to abcdeg (1).mid
Enter your skill level: hard
this is target hard


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# def smooth_roll_temporal(roll, window=3):
#     smoothed = np.zeros_like(roll)
#     for pitch_idx in range(roll.shape[1]):
#         smoothed[:, pitch_idx] = ndimage.uniform_filter1d(roll[:, pitch_idx], size=window, mode='constant')
#     return smoothed

# def threshold_and_clean_roll(roll, threshold=0.5, min_duration_steps=3):
#     binary_roll = (roll > threshold).astype(np.float32)
#     n_steps, n_notes = binary_roll.shape
#     cleaned_roll = np.zeros_like(binary_roll)
#     for pitch_idx in range(n_notes):
#         i = 0
#         while i < n_steps:
#             if binary_roll[i, pitch_idx] > 0:
#                 j = i
#                 while j < n_steps and binary_roll[j, pitch_idx] > 0:
#                     j += 1
#                 duration = j - i
#                 if duration >= min_duration_steps:
#                     cleaned_roll[i:j, pitch_idx] = roll[i:j, pitch_idx]
#                 i = j
#             else:
#                 i += 1
#     return cleaned_roll

# def reduce_simultaneous_notes(roll, max_simultaneous=6):
#     n_steps, n_notes = roll.shape
#     reduced = np.zeros_like(roll)
#     for t in range(n_steps):
#         active = np.where(roll[t, :] > 0)[0]
#         if len(active) <= max_simultaneous:
#             reduced[t, active] = roll[t, active]
#         else:
#             top_indices = active[np.argsort(roll[t, active])[-max_simultaneous:]]
#             reduced[t, top_indices] = roll[t, top_indices]
#     return reduced

# def adaptive_threshold(roll, min_notes=20, max_notes=80):
#     best_thresh = None
#     best_score = float("inf")
#     best_cleaned = None
#     center = (min_notes + max_notes) / 2.0
#     for thresh in [0.6, 0.5, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1]:
#         cleaned = threshold_and_clean_roll(roll, threshold=thresh, min_duration_steps=4)
#         cleaned = reduce_simultaneous_notes(cleaned, max_simultaneous=6)
#         note_count = int(np.sum(np.any(cleaned > 0, axis=0)))
#         avg_polyphony = float(np.mean(np.sum(cleaned > 0, axis=1)))
#         score = abs(note_count - center) + max(0.0, avg_polyphony - 8.0) * 10.0
#         if score < best_score:
#             best_score = score
#             best_thresh = thresh
#             best_cleaned = cleaned
#     return best_cleaned, best_thresh

In [None]:
!pip install pretty_midi music21 torch --quiet

import os, numpy as np, torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pretty_midi
from music21 import converter

def midi_to_pianoroll(midi_file, fs=16):
    midi_data = pretty_midi.PrettyMIDI(midi_file)
    n_steps = int(midi_data.get_end_time() * fs) + 1
    roll = np.zeros((n_steps, 88), dtype=np.float32)
    for inst in midi_data.instruments:
        for n in inst.notes:
            idx = n.pitch - 21
            if 0 <= idx < 88:
                roll[int(n.start*fs):int(n.end*fs), idx] = n.velocity / 127.0
    return roll


def note_density(roll):
    return np.mean(np.sum(roll > 0, axis=1))

def key_complexity(midi_file):
    k = converter.parse(midi_file).analyze('key')
    return 0 if k.mode == 'major' else 1

def pitch_complexity(roll):
    return int(np.ptp(np.where(roll > 0)[1]) > 12)

def overallComplexity(midi_file, roll):
    score = note_density(roll) + key_complexity(midi_file) + pitch_complexity(roll)
    return "hard" if score >= 3 else "easy"

def add_difficulty_channel(roll, target_diff):
    c = np.ones((roll.shape[0], 1), np.float32)
    if target_diff == 'easy':
        c[:] = 0
    elif target_diff == 'hard':
        c[:] = 1
    return np.concatenate([roll, c], axis=1)

def pad(roll, length=600):
  if roll.shape[0] > length:
    return roll[:length]
  elif roll.shape[0] < length:
    pad_amount = length - roll.shape[0]
    return np.pad(roll, ((0, pad_amount), (0, 0)))
  else:
    return roll

class PianoRollDataset(Dataset):
    def __init__(self, input_rolls, target_rolls, target_diffs):
        self.inputs = input_rolls
        self.targets = target_rolls
        self.target_diffs = target_diffs

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, i):
        return (torch.tensor(self.inputs[i]), torch.tensor(self.targets[i]), self.target_diffs[i])

def midi_prep(midi_folder):
    inputs, targets, target_diffs = [], [], []

    easy_folder = os.path.join(midi_folder, "easy")
    normal_folder = os.path.join(midi_folder, "medium")
    hard_folder = os.path.join(midi_folder, "hard")

    max_length = 600

    for file in os.listdir(normal_folder):
      if not file.endswith(".mid"):
        continue

      normal_file = file
      easy_file = normal_file.replace("medium", "easy")
      hard_file = normal_file.replace("medium", "hard")


      normal_path = os.path.join(normal_folder, normal_file)
      easy_path = os.path.join(easy_folder, easy_file)
      hard_path = os.path.join(hard_folder, hard_file)

      normal_roll = midi_to_pianoroll(normal_path)
      normal_roll = pad(normal_roll, max_length)

      if os.path.exists(easy_path):
          easy_roll = midi_to_pianoroll(easy_path)
          easy_roll = pad(easy_roll, max_length)
          inputs.append(add_difficulty_channel(normal_roll, 'easy'))
          targets.append(easy_roll)
          target_diffs.append("easy")

      if os.path.exists(hard_path):
          hard_roll = midi_to_pianoroll(hard_path)
          hard_roll = pad(hard_roll, max_length)
          inputs.append(add_difficulty_channel(normal_roll, 'hard'))
          targets.append(hard_roll)
          target_diffs.append("hard")

      normal_roll = pad(midi_to_pianoroll(normal_path), max_length)
      easy_roll = pad(midi_to_pianoroll(easy_path), max_length)
      hard_roll = pad(midi_to_pianoroll(hard_path), max_length)

    return PianoRollDataset(inputs, targets, target_diffs)

class SeqSimplifier(nn.Module):
    def __init__(self, input_size=89, hidden=256, layers=2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden, layers, batch_first=True)
        self.fc = nn.Linear(hidden, 88)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        y, _ = self.lstm(x)
        return self.sigmoid(self.fc(y))


midi_folder = "/content/drive/MyDrive/dataset/"
dataset = midi_prep(midi_folder)
loader = DataLoader(dataset, batch_size=4, shuffle=True)

model = SeqSimplifier()
crit = nn.BCELoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)

print(f"Training on {len(dataset)} pairs...")

for epoch in range(20):
    total_loss = 0
    for x, y, target_diffs in loader:
        opt.zero_grad()
        out = model(x)
        loss = crit(out, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1:02d} | loss = {total_loss / len(loader):.4f}")

print("Training complete.")
torch.save(model.state_dict(), "simplifier_model.pth")
print("Model saved as simplifier_model.pth")


Training on 30 pairs...
Epoch 01 | loss = 0.6092


KeyboardInterrupt: 

In [None]:
!git clone https://github.com/HemantKArya/Melodfy.git
%cd Melodfy

!pip install --upgrade pip setuptools wheel
!pip install -r requirements.txt

!pip install .

!ffmpeg -version

from melodfy import audio_to_midi

audio_file = "my_piano.wav"
output_midi = "output.mid"

audio_to_midi(audio_file, output_midi)
print("Conversion complete! Saved as:", output_midi)

fatal: destination path 'Melodfy' already exists and is not an empty directory.
/content/Melodfy
Collecting audioread@ git+https://github.com/HemantKArya/audioread.git (from -r requirements.txt (line 1))
  Cloning https://github.com/HemantKArya/audioread.git to /tmp/pip-install-32e1pp1s/audioread_aa01ed2dbb1c4a3b9885be3df2131cd4
  Running command git clone --filter=blob:none --quiet https://github.com/HemantKArya/audioread.git /tmp/pip-install-32e1pp1s/audioread_aa01ed2dbb1c4a3b9885be3df2131cd4
  Resolved https://github.com/HemantKArya/audioread.git to commit 8ead66d8156a18ca1a2e39752c4c4165162124fd
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting librosa==0.9.2 (from -r requirements.txt (line 2))
  Downloading librosa-0.9.2-py3-none-any.whl.metadata (8.2 kB)
Collecting mido==1.3.2 (from -r requirements.txt (line 3))
  Downloading mido-1.3.2-py3-none-any.

ModuleNotFoundError: No module named 'melodfy'

In [None]:
import numpy as np
from collections import Counter

def euclidean_distance(x1, x2):
    distance = np.sqrt(np.sum((x1-x2)**2))
    return distance

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions

    def _predict(self, x):
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        most_common = Counter(k_nearest_labels).most_common()
        return most_common[0][0]

In [None]:
import pretty_midi
from music21 import converter, stream, midi
import os

# ------------------------
# Step 1: Convert piano roll → MIDI
# ------------------------
def pianoroll_to_midi_pm(roll, output_file, fs=16):
    midi_data = pretty_midi.PrettyMIDI()
    piano = pretty_midi.Instrument(program=0)
    n_steps, n_notes = roll.shape

    for pitch_idx in range(n_notes):
        note_active = False
        for t in range(n_steps):
            vel = int(roll[t, pitch_idx] * 127)
            if vel > 0 and not note_active:
                note_active, start = True, t/fs
            elif (vel == 0 or t==n_steps-1) and note_active:
                note_active = False
                end = t/fs
                piano.notes.append(pretty_midi.Note(velocity=max(vel,60),
                                                    pitch=pitch_idx+21,
                                                    start=start,
                                                    end=end))
    midi_data.instruments.append(piano)
    midi_data.write(output_file)
    print("Saved MIDI:", output_file)

# ------------------------
# Step 2: Convert MIDI → MusicXML
# ------------------------
def midi_to_musicxml(midi_file, xml_file):
    score = converter.parse(midi_file)
    score.write('musicxml', fp=xml_file)
    print("Saved MusicXML:", xml_file)

# ------------------------
# Step 3: Test with model output
# ------------------------
os.makedirs("test_output", exist_ok=True)

midi_file = "test_output/test_bar.mid"
xml_file  = "test_output/test_bar.xml"

# Assume 'output' is your model piano roll (0/1)
pianoroll_to_midi_pm(output, midi_file)
midi_to_musicxml(midi_file, xml_file)


Saved MIDI: test_output/test_bar.mid
Saved MusicXML: test_output/test_bar.xml


In [None]:
with torch.no_grad():
    output = model(roll_tensor).squeeze(0).numpy()

print("Output shape:", output.shape)
print("Non-zero notes:", np.sum(output > 0))
print("Max value in output:", np.max(output))

Output shape: (16, 88)
Non-zero notes: 1408
Max value in output: 0.51992184
