In [None]:
import numpy as np

# Run parse_data
%run ~/violin-renderer/src/data/parse_data.ipynb

In [None]:
# helper functions to normalize data
def scale_timings(column):
    return column / (64 * 24) # beats * resolution

def scale_pitch(column):
    # return column / 128
    return 0 # ignore pitch

# helper functions to convert to duration and use (onset, duration) instead of (onset, offset)
def set_duration(start, end):
    return end - start

In [None]:
# compile all training data into one big matrix and normalize
# @return: np array of normalized notes of source input and np array of ground truth
def processed_training_datasets():
    # Load the training dataset
    training_X, training_y = load_training_data()

    # combine into a matrix
    training_source_inputs = []
    training_ground_truths = []
    for (source_input_song, ground_truth_song) in zip(training_X.values(), training_y.values()):
        for (note, timing) in zip(source_input_song, ground_truth_song):
            # Cheat: Remove notes that have offset greater than a set threshold so we can normalize better
            if note[1] < 64 * 24: # beats * resolution
                training_source_inputs.append(note)
                training_ground_truths.append(timing)

    # convert to an np arrays
    training_source_inputs = np.array(training_source_inputs)
    training_ground_truths = np.array(training_ground_truths)

    # changing the offset feature to duration
    # training_source_inputs[:, 1] = set_duration(training_source_inputs[:, 0], training_source_inputs[:, 1])
    # training_ground_truths[:, 1] = set_duration(training_ground_truths[:, 0], training_ground_truths[:, 1])

    # normalizing the input
    training_source_inputs[:, 0] = scale_timings(training_source_inputs[:, 0])
    training_source_inputs[:, 1] = scale_timings(training_source_inputs[:, 1])
    training_source_inputs[:, 2] = scale_pitch(training_source_inputs[:, 2])

    return training_source_inputs, training_ground_truths

In [None]:
# compile all testing data into one big matrix and normalize
# @return: map of { path : np array of normalized notes of source input } and { path : np array of ground truth }
def processed_testing_datasets():
    # Load the training dataset
    testing_X, testing_y = load_testing_data()

    testing_source_inputs = {}
    testing_ground_truths = {}

    # normalize each song separately
    for (source_input_pair, ground_truth_pair) in zip(testing_X.items(), testing_y.items()):
        source_input_song = source_input_pair[1]
        ground_truth_song = ground_truth_pair[1]

        filtered_source_input = []
        filtered_ground_truth = []
        for (note, timing) in zip(source_input_song, ground_truth_song):
            # Cheat: Remove notes that have offset greater than a set threshold so we can normalize better
            if note[1] < 64 * 24: # beats * resolution
                filtered_source_input.append(note)
                filtered_ground_truth.append(timing)

        # convert to np arrays
        filtered_source_input = np.array(filtered_source_input)
        filtered_ground_truth = np.array(filtered_ground_truth)

        # changing the offset feature to duration
        # testing_source_inputs[:, 1] = set_duration(testing_source_inputs[:, 0], testing_source_inputs[:, 1])
        # testing_ground_truths[:, 1] = set_duration(testing_ground_truths[:, 0], testing_ground_truths[:, 1])

        # normalizing the input
        filtered_source_input[:, 0] = scale_timings(filtered_source_input[:, 0])
        filtered_source_input[:, 1] = scale_timings(filtered_source_input[:, 1])
        filtered_source_input[:, 2] = scale_pitch(filtered_source_input[:, 2])

        # append to map
        source_input_song_path = source_input_pair[0]
        ground_truth_song_path = ground_truth_pair[0]
        testing_source_inputs[source_input_song_path] = filtered_source_input
        testing_ground_truths[ground_truth_song_path] = filtered_ground_truth

    return testing_source_inputs, testing_ground_truths