With rhythmic synthetix here we are defining new musical notes from informational music.


. Taking data about musical instrument


. Training the data with deeplearning recurrent neural networks LSTM model


. Generating future musical notes using current training data


. Converting the newly generated musical notes into audio playback using Synthesizer


1. INSTALLING REQUIRED LIBRARIES AND SYNTHESIZER.

In [None]:
# pretty_midi -> It is a python toolkit which will be used for processing of MIDI Files.
!pip install pretty_midi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# fluidsynth -> It is a software synthesize based on the SoundFont2  that generates digital audio from informational musical notes.
!sudo apt install -y fluidsynth

Reading package lists... Done
Building dependency tree       
Reading state information... Done
fluidsynth is already the newest version (2.1.1-2).
0 upgraded, 0 newly installed, 0 to remove and 24 not upgraded.


In [None]:
!pip install --upgrade pyfluidsynth

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install IPython



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


2. IMPORTING LIBRARIES.

In [None]:
import pretty_midi
import fluidsynth
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
# glob(GLOBAL) -> function which is used to search files in data 
import glob 
# pathlib -> function used for representing file paths with dedicated path objects.
import pathlib
import collections
# display -> function used for diaplaying data in a more organised and visually appealing format .
from IPython import display 
from typing import Optional
import collections 
import time
import random
from random import shuffle



In [None]:
# Setting some parameters for synthesizer 
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
# Sampling rate for audio playback
_sampling_rate = 16000

 DOWNLOADING THE DATASET

In [None]:
dataset = pd.read_csv("MAESTRO_MIDI.csv")

FileNotFoundError: ignored

In [None]:
# MAESTRO DATASET
data_dir = pathlib.Path('data/maestro-v2.0.0')
if not data_dir.exists():
  else_data_dir = tf.keras.utils.get_file(
    fname = 'maestro-v2.0.0-midi.zip',
    origin = 'https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip',
    extract = True,
    cache_dir ='.',
    cache_subdir = 'data', 


      )

In [None]:
# Printing total MIDI files in dataset
files = glob.glob(str(data_dir/'**/*.mid*'))
# files = glob.glob('dataset/**/.mid')
print('Total MIDI Files in dataset :', len(files))

Total MIDI Files in dataset : 1282


In [None]:
sample_file = files[2:20]
print(sample_file)

['data/maestro-v2.0.0/2011/MIDI-Unprocessed_02_R1_2011_MID--AUDIO_R1-D1_09_Track09_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_09_R1_2011_MID--AUDIO_R1-D3_14_Track14_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_17_R3_2011_MID--AUDIO_R3-D6_02_Track02_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_25_R3_2011_MID--AUDIO_R3-D9_02_Track02_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_11_R1_2011_MID--AUDIO_R1-D4_10_Track10_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_06_R3_2011_MID--AUDIO_R3-D3_02_Track02_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_19_R1_2011_MID--AUDIO_R1-D7_13_Track13_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_17_R1_2011_MID--AUDIO_R1-D7_04_Track04_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_25_R3_2011_MID--AUDIO_R3-D9_04_Track04_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_21_R1_2011_MID--AUDIO_R1-D8_10_Track10_wav.midi', 'data/maestro-v2.0.0/2011/MIDI-Unprocessed_02_R1_2011_MID--AUDIO_R1-D

In [None]:
# Generate a Prettymidi file
pm = pretty_midi.PrettyMIDI(files[1])
print(pm)

<pretty_midi.pretty_midi.PrettyMIDI object at 0x7f7e6b947d90>


In [None]:
# Playing the input midi file from dataset by converting it to audio playback using synthesizer fluidsynth.
def display_audio(pm: pretty_midi.PrettyMIDI, seconds = 60):
  waveform = pm.fluidsynth(fs = _sampling_rate)
# Taking a sample of the generated waveform to mitigate kernel resets
  waveform_short = waveform[:seconds*_sampling_rate]
  return display.Audio(waveform_short, rate = _sampling_rate)



In [None]:
display_audio(pm)

PARSING: EXTRACTING THE RELEVANT NOTES FROM MIDI FILES

EXTRACTING THREE FEATURES: 
. PITCH
. STEP
. DURATION

In [None]:
instrument = pm.instruments[0]
for i, note in enumerate(instrument.notes[:10]):
  note_name = pretty_midi.note_number_to_name(note.pitch)
  duration = note.end - note.start
  print(f'{i}: pitch={note.pitch}, note_name={note_name},'
        f' duration={duration:.4f}')

0: pitch=65, note_name=F4, duration=1.0469
1: pitch=50, note_name=D3, duration=1.1380
2: pitch=62, note_name=D4, duration=1.1484
3: pitch=64, note_name=E4, duration=0.3477
4: pitch=60, note_name=C4, duration=0.3555
5: pitch=69, note_name=A4, duration=2.2760
6: pitch=59, note_name=B3, duration=0.7331
7: pitch=62, note_name=D4, duration=0.8112
8: pitch=52, note_name=E3, duration=0.7734
9: pitch=68, note_name=G#4, duration=0.3789


In [None]:
def extract_from_midi(midi_file: str) -> pd.DataFrame:
  pm = pretty_midi.PrettyMIDI(midi_file)
  instrument = pm.instruments[0]
  notes = collections.defaultdict(list)

  # sorting the notes w.r.t start time
  sort_notes = sorted(instrument.notes, key = lambda note: note.start)
  prev_start = sort_notes[0].start

  for note in sort_notes:
    start = note.start
    end = note.end
    notes['pitch'].append(note.pitch)
    notes['start'].append(start)
    notes['end'].append(end)
    notes['step'].append(start - prev_start)
    notes['duration'].append(end-start)
    prev_start = start

  return pd.DataFrame({name: np.array(value) for name, value in notes.items()})

In [None]:
extracted_notes = extract_from_midi(files[1])
extracted_notes.head()

Unnamed: 0,pitch,start,end,step,duration
0,69,0.997396,3.273438,0.0,2.276042
1,62,1.020833,2.169271,0.023438,1.148437
2,65,1.022135,2.06901,0.001302,1.046875
3,50,1.023438,2.161458,0.001302,1.138021
4,60,1.947917,2.303385,0.924479,0.355469


In [None]:
get_note_names = np.vectorize(pretty_midi.note_number_to_name)
sample_note_names = get_note_names(extracted_notes['pitch'])
sample_note_names[:10]

array(['A4', 'D4', 'F4', 'D3', 'C4', 'E4', 'D4', 'B3', 'E3', 'G#4'],
      dtype='<U3')

CREATING THE TRAINING DATASET

In [None]:
# creating a training set
# train_set = extracted_notes.iloc[: , :].values
num_files = 5
train_set = []
for f in files[:num_files]:
  notes = extract_from_midi(f)
  train_set.append(notes)

train_set = pd.concat(train_set)

train_length= len(train_set)
print('Number of notes parsed:', train_length)

Number of notes parsed: 24655


In [None]:
# creating training dataset from the extracted notes from MIDI Files.
key_order = ['pitch', 'step', 'duration']
train_notes = np.stack([train_set[key] for key in key_order], axis= 1) 
train_dataset = tf.data.Dataset.from_tensor_slices(train_notes)



We will train the model by providing batches of sequences of notes as input value and next note as label

Here we are training the model to predict the next note of a sequence of note .


In [None]:
# creating sequence of notes in batches to provide it to the training model.
# informally it is forming a dataset of sequence of nots.
def create_sequences(
    dataset: tf.data.Dataset,
    seq_length: int,
    vocab_size = 128,
) -> tf.data.Dataset:

 seq_length = seq_length+1

#  
 windows = dataset.window(seq_length, shift=1, stride=1, drop_remainder = True)


 
# lambda is used to define anonymous function.It is similar to user defined function but without a name. syntax -> lambda x:  
# x.batch is a function that gives batches of data.
# flat_map is a transformation operation which flattens the dataset by applying the function on every element and returns a new dataset.  
 flatten = lambda x: x.batch(seq_length, drop_remainder = True)
 sequences = windows.flat_map(flatten)

  # Normalize note pitch
def scale_pitch(x):
    x = x/[vocab_size, 1.0, 1.0]
    return x

  # Split the labels. syntax -> def split_label(pmids, fold):   ["pmids-> a list of same label", "fold-> number of splits"] 
    def split_label(sequences):
      inputs = sequences[:-1]
      labels_dense = sequence[-1]
      labels = {key:labels_dense[i] for i, key in enumerate(key_order)}

      return scale_pitch(inputs), label
      
      return sequence.map(split_labels, num_paralell_call=tf.data.AUTOTUNE)




In [None]:
seq_length = 25
vocab_size = 128
sequence_dataset = create_sequences(train_dataset, seq_length, vocab_size)

In [None]:
for seq, target in sequence_dataset.take(1):
  print('sequence shape:', seq.shape)
  print('sequence elements (first 10):', seq[0: 10])
  print()
  print('target:', target)

AttributeError: ignored

In [None]:
# creating batch size -> number of batches of sequence of notes to be given to the training model
batch_size = 64
# buffer size -> number of items in the dataset.
buffer_size = train_length - seq_length
# redefing the training dataset

# train_dataset = (sequence_dataset
                #  .shuffle(buffer_size)
                #  .batch(batch_size, drop_remainder=True)
                #  .cache()
                #  .prefetch(tf.data.experimental.AUTOTUNE))

CREATING AND TRAINING THE MODEL ON THE TRAINING DATASET

In [None]:
#  Model will have three outputs for all three features: PITCH, STEP, DURATION.
#  For STEP and DURATION we will use customized loss function based on mean squared error(mse) which will only print non-negative values as output.

# defining mean square error
def mse_with_positive_values(y_true: tf.Tensor, y_pred: tf.Tensor):
  mse = (y_true - y_pred)**2
  positive_values = 10*tf.maximum(-y_pred, 0.0)
  return tf.reduce_mean(mse + positive_pressure)

In [2]:
# creating RNN's LSTM Model
input_shape = (seq_length, 3)
learning_rate = 0.005

inputs = tf.keras.Input(input_shape)
x = tf.keras.layers.LSTM(128)(inputs)

outputs = {
    'pitch': tf.keras.layers.Dense(128, name = 'pitch')(x),
    'step': tf.keras.layers.Dense(1, name = 'step')(x),
    'duration': tf.keras.layers.Dense(1, name = 'duration')(x),
}

model = tf.keras.Model(inputs, outputs)

loss = {
    'pitch': tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True),
    'step': mse_with_positive_values,
    'duration': mse_with_positive_values,
    
}

optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005)

model.compile(loss= loss, optimizer= optimizer)
model.summary()

NameError: ignored

In [None]:
# Training the model 
callbacks = [
    
    
    tf.keras.callbacks.ModelCheckpoint(
        filepath= './training_checkpoints/ckpt_{epoch}',
        save_weights_only= True),


    tf.keras.callbacks.EarlyStopping(
        monitor = 'loss',
        patience = 5,
        verbose = 1,
        restore_best_weights = True),
    
    
]

epochs = 50

model.fit(train_dataset, epochs = epochs, callbacks = callbacks) 

Epoch 1/50


ValueError: ignored

GENERATING NOTES.

Since we are predicting the next note in a sequence of note we need to provide a sequence of note in output so that the model can predict next note. 

In [None]:
# To generate new notes initially we need to provide a sequence of notes.

def predict_next_note(
    notes: np.ndarray,
    keras_model: tf.keras.Model,
    # generating note as a tuple of (pitch, step, duration)
    temperature: float = 1.0) -> tuple[int, float, float]:
    assert temperature > 0
    

In [None]:
# Generating new notes
temperature = 2.0
num_predictions = 120

# Initial sequence of notes to predict future notes
# pitch is normalized
initial_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

input_notes = (
    initial_notes[:seq_length] /np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_prediction):
  pitch, step, duration = predict_next_note(input_notes, model, temperature)
  start = prev_start + step
  end = start + duration
  input_note = (pitch, step, duration)
  generated_notes.appen(*input_note, start, end))
  input_notes = np.delete(input_notes, 0, axis=0)
  input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axos=0)
  prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end')) 



In [None]:
# output
output_file = 
output_pm = extract_from_midi(
    generated_notes, output_file = output_file, instrument_name = instrument_name)
display_audio(output_pm))


PICKLING THE MODEL FOR THE DEPLOYMENT

In [1]:
import pickle
pickle.dump(model,open('model.pkl', 'wb'))

NameError: ignored