In [1]:
years = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
gdp = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]

In [2]:
def dot(v, w):
    """Computes v_1 * w_1 + ... + v_n * w_n"""
    
    assert len(v) == len(w), "vectors must be same length"
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

In [3]:
def sum_of_squares(v):
    """Returns v_1 * v_1 + ... + v_n * v_n"""
    
    return dot(v, v)

In [4]:
def mean(x):
    return sum(x) / len(x)

In [5]:
def de_mean(xs):
    '''Translate xs by subtracting its mean (so the result has mean 0)'''
    
    x_bar = mean(xs) 
    return [x - x_bar for x in xs]

## Variance

In [6]:
def variance(xs):
    '''Almost the average squared deviation from the mean'''
    
    assert len(xs) >= 2, 'variance requires at least two elements'
    n = len(xs)
    deviations = de_mean(xs)
    return sum_of_squares(deviations) / (n - 1)

## Standard Deviation

In [7]:
import math

def standard_deviation(xs):
    '''The standard deviation is the square root of the variance'''
    
    return math.sqrt(variance(xs))

## Covariance

In [8]:
def covariance(xs, ys):
    assert len(xs) == len(ys), 'xs and ys must have same number of elements'

    return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)


In [9]:
covariance(years, gdp)

113951.33333333333

## Correlation

In [10]:
def correlation(xs, ys):
    '''Measures how much xs and ys vary in tandem about their means'''
    
    stdev_x = standard_deviation(xs)
    stdev_y = standard_deviation(ys)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(xs, ys) / stdev_x / stdev_y
    else:
        return 0    # if no variation, correlation is zero

In [11]:
correlation(years, gdp)

0.9376732134541651