<a href="https://colab.research.google.com/github/wolfisberg/zhaw-ba-online/blob/main/scripts/experiment_player/experiment_player.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mir_eval
!pip install rt_pie_lib
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import math

from google.colab import drive
drive.mount('/content/drive')

# Load Data

## CREPE

### CREPE 2048 / 512

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

###CREPE 1024 / 512

In [None]:
true_hz = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/crepe_1024/crepe_true_hz_1024_512.npy')
predicted_hz = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/crepe_1024/crepe_predicted_hz_1024_512.npy')
true_vector = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/crepe_1024/crepe_true_vector_1024_512.npy')
pred_vector = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/crepe_1024/crepe_pred_vector_1024_512.npy')
diff = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/crepe_1024/crepe_diff_1024_512.npy')

### CREPE 512 / 256

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

### CREPE 256 / 128

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

## DeepF0

### DeepF0 2048 / 1024

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

### DeepF0 1024 / 512

In [None]:
true_hz = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/deepf0_1024/deepf0_true_hz_1024_512.npy')
predicted_hz = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/deepf0_1024/deepf0_predicted_hz_1024_512.npy')
true_vector = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/deepf0_1024/deepf0_true_vector_1024_512.npy')
pred_vector = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/deepf0_1024/deepf0_pred_vector_1024_512.npy')
diff = np.load('/content/drive/MyDrive/BA_2021/saved_vectors/deepf0_1024/deepf0_diff_1024_512.npy')

### DeepF0 512 / 256

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

### DeepF0 256 / 128

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

## LSTM

### LSTM 1024 / 512

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

### LSTM 512 / 256

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

### LSTM 256 / 128

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

### LSTM reduced Layers

In [None]:
true_hz = np.load()
predicted_hz = np.load()
true_vector = np.load()
pred_vector = np.load()
diff = np.load()

# Load Local Average Functions

In [None]:
from scipy.signal import argrelextrema
def convert_bin_to_local_average_cents(salience, center=None):
    """
    find the weighted average cents near the argmax bin
    """
    if not hasattr(convert_bin_to_local_average_cents, 'cents_mapping'):
        # the bin number-to-cents mapping
        convert_bin_to_local_average_cents.cents_mapping = (
                np.linspace(0, 7180, 360) + 2051.14876287)
    if salience.ndim == 1:
        center = int(np.argmax(salience))
        start = max(0, center - 4)
        end = min(len(salience), center + 5)
        salience = salience[start:end]
        product_sum = np.sum(
            salience * convert_bin_to_local_average_cents.cents_mapping[start:end])
        weight_sum = np.sum(salience)
        return product_sum / weight_sum
    if salience.ndim == 2:
        return np.array([convert_bin_to_local_average_cents(salience[i, :]) for i in
                         range(salience.shape[0])])
    raise Exception("Label should be either 1d or 2d ndarray.")


def convert_bin_to_local_average_cents_lowest_maxima(salience, center=None, maxima_order=5, maxima_minval=0.2, tolerance=0.1):
    """
    find the weighted average cents near the argmax bin todo
    """
    if salience.ndim == 1:
        
        if salience[0] > 0.2:
            salience = __create_maximum_bin(0)
            return convert_bin_to_local_average_cents(np.squeeze(salience), center=center)
        
        else:
            maxima = argrelextrema(salience, np.greater, order=maxima_order)[0]
        # maxima = np.argmax(salience)
        # if maxima == 0 and salience[0] < 0.8:    
        #     maxima = argrelextrema(salience, np.greater, order=maxima_order)[0]
        #     maxima = [x if x >= 51 and x <= 217 else 0 for x in maxima]
        #     maxima = np.max(maxima)
        #     salience = __create_maximum_bin(maxima)
        maxima = [(x, converters.convert_cent_to_hz(convert_bin_to_local_average_cents(__create_maximum_bin(x))))
                  for x in maxima if salience[x] >= maxima_minval]
        if len(maxima) > 1:
            success, idx = __try_find_f0_in_maxima(maxima, tolerance=tolerance)
            if success:
                salience = np.zeros(360)
                salience[maxima[idx][0]] = 1
        return convert_bin_to_local_average_cents(salience, center=center)

    if salience.ndim == 2:
        return np.array([convert_bin_to_local_average_cents_lowest_maxima(salience[i, :]) for i in
                         range(salience.shape[0])])

    raise Exception("Label should be either 1d or 2d ndarray.")


def __create_maximum_bin(index):
    b = np.zeros(360)
    b[index] = 1
    return b

def __try_find_f0_in_maxima_true_negativs(maxima):
    maxima.sort(key=lambda x: x[1])
    for i in range (len(maxima) -1):
        max_current = maxima[i][1]


def __try_find_f0_in_maxima(maxima, tolerance=0.1):
    maxima.sort(key=lambda x: x[1])
    for i in range(len(maxima) - 1):
        max_current = maxima[i][1]
        max_next = maxima[i + 1][1]
        rel_diff = abs(max_current * 2 - max_next) / max_next
        if rel_diff <= tolerance:
            return True, i
    return False, None

# Load all other methods

In [None]:
from rt_pie_lib import converters
def vector_to_hz(pred_vector):
    pred_cents = []
    for i in range(len(pred_vector)):
        pred_c = convert_bin_to_local_average_cents_lowest_maxima(pred_vector[i])
        pred_cents.append(pred_c)
    
    pred_cents = np.array(pred_cents)
    pred_hz = converters.convert_cent_to_hz(pred_cents)
    return pred_hz

def calc_new_diff(true_hz, pred_hz):
    diff = true_hz - pred_hz
    return diff

def histogram(diff):
    plt.rcParams.update({'font.size': 22})  
    n_bins = 250
    x = diff

    plt.figure(figsize=[16,9])
    plt.hist(x, bins=n_bins)
    #plt.xlim([-200, 200])
    plt.ylim([0, 30000])
    plt.axvline(np.median(x), color='k', linestyle='dashed', linewidth=2, label='MED')
    plt.axvline(np.mean(x), color='k', linestyle='solid', linewidth=2, label='MEAN')
    plt.axvline(np.quantile(x, 0.05), color='k', linestyle='dotted', linewidth=2, label='5% quantile')
    plt.axvline(np.quantile(x, 0.95), color='k', linestyle='dashdot', linewidth=2, label='95% quantile')
    plt.xlabel("Error in Hertz")
    plt.ylabel("Number of Errors")
    plt.legend()
    plt.show()
    plt.rcParams.update({'font.size': 12}) 

def zero_pitch_analysis_cnn_models(true_hz, predicted_hz):
    tn = 0
    tp = 0
    fp = 0
    fn = 0

    for i in range(len(true_hz)):
        if true_hz[i] >= 30 and true_hz[i] <= 35 and predicted_hz[i] >= 30 and predicted_hz[i] <= 35:
            tp += 1
            continue
        if true_hz[i] > 35 and predicted_hz[i] > 35:
            tn += 1
            continue
        if true_hz[i] >= 30 and true_hz[i] <= 35 and predicted_hz[i] > 35:
            fn += 1
            continue
        if true_hz[i] > 35 and predicted_hz[i] >= 30 and predicted_hz[i] <= 35:
            fp += 1
            continue


    try:
        sum = tp + fp + tn + fn
        percentage_zero_truth = (tp + fn) / sum * 100
        percentage_zero_predicted = (tp + fp) / sum * 100
        precision = tp / (tp + fp) * 100  # Anteil unserer 0 schätzungen die richtig sind
        recall = tp / (tp + fn) * 100  # Wieviele der tatsächlichen 0 schätzungen haben wir erwischt
        accuracy = (tp + tn) / sum * 100  # Anteil richtige predictions
        f1 = 2 * (precision * recall) / (precision + recall)

        tn_percentage = tn / sum * 100
        tp_percentage = tp / sum * 100
        fp_percentage = fp / sum * 100
        fn_percentage = fn / sum * 100

        print("ZERO PITCH ANALYSIS")
        print("Sample size (test data set): ", sum)
        print("0 - % in ground truth: ", "%.2f" % percentage_zero_truth)
        print("0 - % in predictions: ",  "%.2f" % percentage_zero_predicted)
        print("Accuarcy: ", "%.2f" % accuracy)
        print("Precision: ", "%.2f" % precision)
        print("Recall: ", "%.2f" % recall)
        print("F1-Score", "%.2f" % f1)
        print("True Negatives: ", "%.2f" % tn_percentage)
        print("True Positives: ", "%.2f" % tp_percentage)
        print("False Positives: ", "%.2f" % fp_percentage)
        print("False Negatives: ", "%.2f" % fn_percentage)

    except ZeroDivisionError:
        print("Zero Pitch Analysis not possible: divide by zero")


def zero_pitch_analysis_lstm():
    tn = 0
    tp = 0
    fp = 0
    fn = 0



    for i in range(len(true_hz)):
        if true_hz[i] == 0 and predicted_hz[i] == 0:
            tp += 1
            continue
        if true_hz[i] > 0 and predicted_hz[i] > 0:
            tn += 1
            continue
        if true_hz[i] == 0 and predicted_hz[i] > 0:
            fn += 1
            continue
        if true_hz[i] > 0 and predicted_hz[i] == 0:
            fp += 1
            continue

    try:
        sum = tp + fp + tn + fn
        percentage_zero_truth = (tp + fn) / sum * 100
        percentage_zero_predicted = (tp + fp) / sum * 100
        precision = tp / (tp + fp) * 100  # Anteil unserer 0 schätzungen die richtig sind
        recall = tp / (tp + fn) * 100  # Wieviele der tatsächlichen 0 schätzungen haben wir erwischt
        accuracy = (tp + tn) / sum * 100  # Anteil richtige predictions
        f1 = 2 * (precision * recall) / (precision + recall)

        tn_percentage = tn / sum * 100
        tp_percentage = tp / sum * 100
        fp_percentage = fp / sum * 100
        fn_percentage = fn / sum * 100

        print("ZERO PITCH ANALYSIS")
        print("Sample size (test data set): ", sum)
        print("0 - % in ground truth: ", "%.2f" % percentage_zero_truth)
        print("0 - % in predictions: ",  "%.2f" % percentage_zero_predicted)
        print("Accuarcy: ", "%.2f" % accuracy)
        print("Precision: ", "%.2f" % precision)
        print("Recall: ", "%.2f" % recall)
        print("F1-Score", "%.2f" % f1)
        print("True Negatives: ", "%.2f" % tn_percentage)
        print("True Positives: ", "%.2f" % tp_percentage)
        print("False Positives: ", "%.2f" % fp_percentage)
        print("False Negatives: ", "%.2f" % fn_percentage)

    except ZeroDivisionError:
        print("Zero Pitch Analysis not possible: divide by zero")

def filter_values(true_hz, pred_hz, filter_lower_bound, filter_upper_bound, filter_method):
    combined = zip(true_hz, predicted_hz)
    if filter_method == 'gt':
        filtered = [x for x in list(combined) if x[0] > filter_lower_bound and x[0] < filter_upper_bound]
    elif filter_method == 'pred':
        filtered = [x for x in list(combined) if x[1] > filter_lower_bound and x[1] < filter_upper_bound]
    elif filter_method == 'both' or filter_method == 'none':
        filtered = [x for x in list(combined) if x[0] > filter_lower_bound and x[0] < filter_upper_bound and x[1] > filter_lower_bound and x[1] < filter_upper_bound]
    filtered_unzipped = np.array(list(zip(*filtered)))
    true_hz_filtered = filtered_unzipped[0]
    pred_hz_filtered = filtered_unzipped[1]
    diff_filtered = true_hz_filtered - pred_hz_filtered
    return true_hz_filtered, pred_hz_filtered, diff_filtered

#Main

In [None]:
from rt_pie_lib import metrics
def main(true_hz, pred_vector, model, filter=0, filter_lower_bound=60, filter_upper_bound=400, filter_method='none'):
    pred_hz_new = vector_to_hz(pred_vector)
    diff_new = calc_new_diff(true_hz, pred_hz_new)
    ##include filter if necessary
    if filter == 1:
        true_hz_filtered, pred_hz_filtered, diff_filtered = filter_values(true_hz, pred_hz_new, filter_lower_bound, filter_upper_bound, filter_method)
        histogram(diff_filtered)
        print('METRICS')
        metrics.get_hz_metrics(true_hz_filtered, pred_hz_filtered, print_output=True)
        print('\n')
        if model == 'cnn':
            zero_pitch_analysis_cnn_models(true_hz_filtered, pred_hz_filtered)
        elif model == 'lstm':
            zero_pitch_analysis_lstm(true_hz_filtered, pred_hz_filtered)
    else:
        histogram(diff_new)
        print('METRICS')
        metrics.get_hz_metrics(true_hz, pred_hz_new, print_output=True)
        print('\n')
        if model == 'cnn':
            zero_pitch_analysis_cnn_models(true_hz, pred_hz_new)
        elif model == 'lstm':
            zero_pitch_analysis_lstm(true_hz, pred_hz_new)

# RUN

In [None]:
main(true_hz=true_hz, pred_vector=pred_vector, model='cnn', filter=1)