# All Installations

In [None]:
# All Installations.
!pip install zipfile36
!pip install pydub
!pip install basic-pitch
!pip install note_seq
!pip install music_pb2

# All Imports

In [None]:
# All Imports
import os
from pydub import AudioSegment
from basic_pitch.inference import predict_and_save
import pretty_midi
import librosa
import numpy as np
import note_seq
from note_seq.protobuf import music_pb2
from google.colab import files
import os
from contextlib import redirect_stdout

#Audio Slicing Function

In [None]:
# @title
def clip_audio_into_parts(input_folder, output_folder, clip_duration_ms=30000):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Process each audio file in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(('.mp3', '.wav', '.ogg', '.flac')):
            file_path = os.path.join(input_folder, file_name)

            audio = AudioSegment.from_file(file_path)

            # Calculate the number of parts based on the clip duration
            num_parts = len(audio) // clip_duration_ms
            remaining_duration = len(audio) % clip_duration_ms
            if remaining_duration > 0:
                num_parts += 1

            # Clip the audio and save the parts
            for i in range(num_parts):
                start_time = i * clip_duration_ms
                end_time = start_time + clip_duration_ms

                # Clip the audio
                audio_part = audio[start_time:end_time]

                # Save the audio part
                output_file = os.path.join(output_folder, f"{file_name}_part{i+1}.mp3")
                audio_part.export(output_file, format="mp3")

#WAV to MIDI

In [None]:
# @title
# def get_file_paths(directory_path):
#     file_paths = []
#     for root, dirs, files in os.walk(directory_path):
#         for file in files:
#             file_path = os.path.join(root, file)
#             file_paths.append(file_path)
#     return file_paths

# def wav_to_midi_conversion(file_paths, output_directory):
#   #     <input-audio-path-list>,
#   #     <output-directory>,
#   #     <save-midi>,
#   #     <sonify-midi>,
#   #     <save-model-outputs>,
#   #     <save-notes>,
#   predict_and_save(
#       file_paths,
#       output_directory,
#       "false",
#       "true",
#       "true",
#       "true",
#   )

# from predict_module import predict_and_save  # Make sure to import the appropriate module/function

def get_file_paths(directory_path):
    file_paths = []
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            file_path = os.path.join(root, file)
            file_paths.append(file_path)
    return file_paths

def wav_to_midi_conversion(file_paths, output_directory):
    # Disable unnecessary print statements during execution
    with open(os.devnull, 'w') as null_file, redirect_stdout(null_file):
        predict_and_save(
            file_paths,
            output_directory,
            "false",
            "true",
            "true",
            "true",
        )

#Dataset Generation (Note Sequences)

In [None]:
def midi_to_note_sequence(path):
  midi_sequence = note_seq.midi_file_to_note_sequence(path)
  note_sequence = music_pb2.NoteSequence()
  note_sequence.notes.extend(midi_sequence.notes)
  note_sequence.total_time = midi_sequence.total_time
  note_sequence.tempos.extend(midi_sequence.tempos)
  note_sequence.key_signatures.extend(midi_sequence.key_signatures)
  note_sequence.time_signatures.extend(midi_sequence.time_signatures)
  return note_sequence

def note_sequence_to_note_list(note_sequence):
  note_list = []
  for note in note_sequence.notes:
      note_dict = {
          'pitch': note.pitch,
          'velocity': note.velocity,
          'start_time': note.start_time,
          'end_time': note.end_time
      }
      note_list.append(note_dict)
  return note_list

def convert_note_list_to_string(note_list):
    result = "PIECE_START TRACK_START INST=0 DENSITY=0 BAR_START "

    for note in note_list:
        pitch = note['pitch']
        if(int((note['end_time'] - note['start_time'])*4) != 0):
          result += f"NOTE_ON={pitch} TIME_DELTA={int((note['end_time'] - note['start_time'])*4)} NOTE_OFF={pitch} "
        if(note['end_time'] > 12):
            break;

    result += "BAR_END TRACK_END"

    return result

def get_initial_sequence_from_midi(midi_path):
  note_sequence = midi_to_note_sequence(midi_path)
  note_list = note_sequence_to_note_list(note_sequence)
  initial_sequence = convert_note_list_to_string(note_list)
  return initial_sequence

In [None]:
def get_dataset(library_path):
  dataset = []
  # Iterate over each file in the library directory
  for root, dirs, files in os.walk(library_path):
      for file in files:
          # Append the file name to the list
          if(file[-3:] == 'mid' or file[-4:] == 'midi'):
            path = library_path+ '/' + file
            string = get_initial_sequence_from_midi(path)
            dataset.append(string)
  return dataset

#Download Dataset

In [None]:
def downloadDataset():
  # Provide the path to the file you want to download
  file_path = '/content/DATASET.txt'

  # Use the files.download() function to trigger the download
  files.download(file_path)




In [None]:
  library_path = "/content/basic_pitch_output"
  dataset = get_dataset(library_path)
  with open('DATASET.txt', 'w') as file:
    # Iterate over each element in the list
    for element in dataset:
        # Write the element to a new line in the file
        file.write(element + '\n')
        file.write('\n')

#PIPELINE

In [None]:
# PIPELINE CODE

# first upload your directory. i.e. your database
from google.colab import files
import shutil

# Upload the directory as a zip file
uploaded = files.upload()

# Extract the uploaded zip file
for file_name in uploaded.keys():
    if file_name.endswith('.zip'):
        shutil.unpack_archive(file_name, '/content/uploaded_directory')
        print('Directory uploaded and extracted successfully!')
        break

while(1):
  print("Choose the operation you want to perform on the data : " )
  print("1. Audio Slicing")
  print("2. Conversion from wav to midi")
  print("3. Conversion from midi to text")
  print("4. All in One")
  choice = int(input())
  if choice not in [1,2,3,4]:
    print("Wrong choice")
  else:
    break

if(choice == 1):
  input_folder = "/content/uploaded_directory"
  output_directory = '/content/slicid_audio'
  os.makedirs(output_directory, exist_ok=True)
  clip_duration = int(input("Enter the cliping duration in s: ", ))
  clip_audio_into_parts(input_folder, output_directory, clip_duration*1000)

elif(choice == 2):
  input_directory = "/content/uploaded_directory"

  file_paths = get_file_paths(input_directory)
  output_directory = '/content/basic_pitch_output'
  os.makedirs(output_directory, exist_ok=True)
  wav_to_midi_conversion(file_paths, output_directory)

elif(choice == 3):
  library_path = "/content/uploaded_directory"
  dataset = get_dataset(library_path)
  with open('DATASET.txt', 'w') as file:
    # Iterate over each element in the list
    for element in dataset:
        # Write the element to a new line in the file
        file.write(element + '\n')
        file.write('\n')
  downloadDataset()
  print("Datset Downloaded succesfully!!!")

elif(choice == 4):
  input_folder = "/content/uploaded_directory"
  output_directory = '/content/slicid_audio'
  os.makedirs(output_directory, exist_ok=True)
  clip_duration = int(input("Enter the cliping duration in s: ", ))
  clip_audio_into_parts(input_folder, output_directory, clip_duration*1000)

  input_directory = output_directory

  file_paths = get_file_paths(input_directory)
  output_directory = '/content/basic_pitch_output'
  os.makedirs(output_directory, exist_ok=True)
  wav_to_midi_conversion(file_paths, output_directory)

  library_path = output_directory
  dataset = get_dataset(library_path)
  with open('DATASET.txt', 'w') as file:
    # Iterate over each element in the list
    for element in dataset:
        # Write the element to a new line in the file
        file.write(element + '\n')
        file.write('\n')
  downloadDataset()
  print("Datset Downloaded succesfully!!!")


Saving data.zip to data.zip
Directory uploaded and extracted successfully!
Choose the operation you want to perform on the data : 
1. Audio Slicing
2. Conversion from wav to midi
3. Conversion from midi to text
4. All in One
3


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Datset Downloaded succesfully!!!


In [None]:
import os
import shutil

# Specify the directory path you want to delete
directory_to_delete = '/content/uploaded_directory'

# Use shutil.rmtree() to remove the directory and its contents
shutil.rmtree(directory_to_delete)


In [None]:
library_path = "/content/uploaded_directory"
dataset = get_dataset(library_path)
with open('DATASET.txt', 'w') as file:
  # Iterate over each element in the list
  for element in dataset:
      # Write the element to a new line in the file
      file.write(element + '\n')
      file.write('\n')
downloadDataset()
print("Datset Downloaded succesfully!!!")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Datset Downloaded succesfully!!!
