In [1]:
import os
from sklearn import linear_model
from collections import defaultdict
from sklearn.metrics import mean_squared_error

# Run data_processing 
%run ~/violin-renderer/src/models/linear-regression/data_processing.ipynb

In [2]:
# Initialize linear regression and train based on dataset
# @param X_dataset: source input data in an array format
# @param y_dataset: ground truth data in an array format
# @return: instance of linear regression model that takes (onset, pitch) -> start
def train_model(X_dataset, y_dataset):
    model = linear_model.LinearRegression()
    model.fit(X_dataset, y_dataset)
    return model

In [3]:
# @param start_model: linear regression model to generate start values
# @param end_model: linear regression model to generate end values
# @return: an array of MSE values for each song in the testing dataset
def MSE_error(start_model, end_model):
    _ , _ , testing_y_start, testing_y_end, _ = processed_testing_datasets()

    errors = []
    testing_results = generate_all_testing_data(start_model, end_model)
    for i, generated_song in enumerate(testing_results.values()):
        start = [pair[0] for pair in generated_song]
        end = [pair[1] for pair in generated_song]
        onset_error = mean_squared_error(testing_y_start[i], start)
        offset_error = mean_squared_error(testing_y_end[i], end)
        errors.append((onset_error + offset_error) / 2)
    
    return errors

In [4]:
# @param start_model: linear regression model to generate start values
# @param end_model: linear regression model to generate end values
# @returns a dictionary { path : [(start, end), ...]}
def generate_all_testing_data(start_model, end_model):
    # Initialize all testing datasets
    testing_X_onset, testing_X_offset, _ , _ , testing_paths = processed_testing_datasets()
    testing_results = defaultdict(list)

    # Generating output for each piece in the testing input dataset
    for i, song_path in enumerate(testing_paths):
        onset_song = testing_X_onset[i]
        offset_song = testing_X_offset[i]

        y_onset_pred = start_model.predict(onset_song)
        y_offset_pred = end_model.predict(offset_song)

        # Add to dictionary
        for start, end in zip(y_onset_pred, y_offset_pred):
            testing_results[song_path].append((start, end))

    return testing_results

# model_onset = LR_onset()
# model_offset = LR_offset()
# generate_all_testing_data(model_onset, model_offset)