Mean vs. median
---

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import os

Define a set of values
---

In [None]:
# Create a set of values
# 25 is the outlier
x_values = [1, 2, 3, 5, 6, 25]

# Compute the mean and the median
mean, median = np.mean(x_values), np.median(x_values)

In [None]:
# Plot values with mean
plt.scatter(x_values, np.zeros_like(x_values), label='denote values')
plt.scatter(mean, 0, marker='*', s=160, label='is the mean')
plt.title('Mean value: {}'.format(mean))
plt.yticks([], [])
plt.xlabel('values')
plt.xlim(0, 30)
plt.legend()
plt.show()

In [None]:
# Plot values with median
plt.scatter(x_values, np.zeros_like(x_values), label='denote values')
plt.scatter(median, 0, marker='*', s=160, label='is the median')
plt.title('Median value: {}'.format(median))
plt.yticks([], [])
plt.xlabel('values')
plt.xlim(0, 30)
plt.legend()
plt.show()

Mean and median as optimal solutions
---

In [None]:
def MSE(y, y_pred):
    return np.mean(np.square(y-y_pred))

def MAE(y, y_pred):
    return np.mean(np.abs(y-y_pred))

In [None]:
# Define 50 candidate summaries
x_summaries = np.linspace(0, 25, num=50)
mse_values = [MSE(x_values, x) for x in x_summaries]
mae_values = [MAE(x_values, x) for x in x_summaries]

In [None]:
# Compute their MSE values
plt.plot(x_summaries, mse_values, zorder=1)
plt.scatter(mean, MSE(x_values, mean), color='C3', label='MSE of the mean value (x={})'.format(mean), zorder=2)
plt.xlabel('x values')
plt.ylabel('MSE metric')
plt.legend(loc='upper left')
plt.show()

In [None]:
# Compute their MAE values
plt.plot(x_summaries, mae_values, zorder=1)
plt.scatter(median, MAE(x_values, median), color='C3', label='MAE of the median value (x={})'.format(median), zorder=2)
plt.xlabel('x values')
plt.ylabel('MSE metric')
plt.legend(loc='upper left')
plt.show()