In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
teams = pd.read_csv("teams.csv")

In [3]:
train, test = train_test_split(teams, test_size=0.2, random_state=1)

In [7]:
predictors = ["athletes", "events"]
target = "medals"

In [8]:
X = train[predictors].copy()
y = train[[target]].copy()

In [10]:
X

Unnamed: 0,athletes,events
1794,2,2
104,224,105
693,2,2
1260,35,34
1489,5,5
...,...,...
960,29,28
905,81,43
1096,17,13
235,13,7


In [11]:
y

Unnamed: 0,medals
1794,0
104,51
693,0
1260,2
1489,0
...,...
960,1
905,1
1096,0
235,0


In [13]:
x_mean = X.mean()
x_std = X.std()

In [14]:
x = (X - x_mean) / x_std

In [15]:
X["intercept"] = 1

In [17]:
X = X[["intercept"] + predictors]

In [18]:
X.describe()

Unnamed: 0,intercept,athletes,events
count,1715.0,1715.0,1715.0
mean,1.0,73.201749,35.506706
std,0.0,128.291352,49.613104
min,1.0,1.0,1.0
25%,1.0,7.0,6.0
50%,1.0,19.0,13.0
75%,1.0,69.0,44.0
max,1.0,839.0,270.0


In [20]:
X.T

Unnamed: 0,1794,104,693,1260,1489,712,950,1733,1555,1691,...,1278,1300,1202,129,144,960,905,1096,235,1061
intercept,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
athletes,2,224,2,35,5,353,20,15,21,33,...,155,5,5,90,36,29,81,17,13,55
events,2,105,2,34,5,148,5,12,21,27,...,87,5,4,69,17,28,43,13,7,29


In [21]:
alpha = 2 
I = np.identity(X.shape[1])
penalty = alpha * I 

In [22]:
penalty

array([[2., 0., 0.],
       [0., 2., 0.],
       [0., 0., 2.]])

In [23]:
penalty[0][0] = 0

In [24]:
penalty

array([[0., 0., 0.],
       [0., 2., 0.],
       [0., 0., 2.]])

In [25]:
B = np.linalg.inv(X.T @ X + penalty) @ X.T @ y

In [26]:
B

Unnamed: 0,medals
0,0.357137
1,0.50571
2,-0.749151


In [28]:
B.index = ["intercept", "athletes", "events"]

In [30]:
test_X = test[predictors]
test_X = (test_X - x_mean) / x_std
test_X["intercept"] = 1
test_X = test_X[["intercept"] + predictors]

predictions = test_X @ B 

In [31]:
test_X

Unnamed: 0,intercept,athletes,events
808,1,-0.484848,-0.493956
2000,1,-0.531616,-0.614892
1114,1,-0.422490,-0.393176
2036,1,-0.453669,-0.433488
1217,1,-0.461463,-0.514112
...,...,...,...
1535,1,-0.461463,-0.655204
82,1,-0.274389,0.009943
1468,1,-0.547206,-0.655204
1944,1,-0.165262,0.090567


In [33]:
predictions

Unnamed: 0,medals
808,0.481993
2000,0.548940
1114,0.438028
2036,0.452461
1217,0.508918
...,...
1535,0.614617
82,0.210927
1468,0.571256
1944,0.205714


In [34]:
def ridge_fit(train, predictors, target, alpha):
    X = train[predictors].copy()
    y = train[[target]].copy()
    
    x_mean = X.mean()
    x_std  = X.std()
    
    X = (X - x_mean) / x_std 
    X["intercept"] = 1
    X = X[["intercept"] + predictors]
    
    penalty = alpha * np.identity(X.shape[1])
    penalty[0][0] = 0 
    
    B = np.linalg.inv(X.T @ X  + penalty) @ X.T @ y
    B.index = ["intercept", "athletes", "events"]
    return B, x_mean, x_std

In [35]:
B, x_mean, x_std = ridge_fit(train, predictors, target, alpha)


In [37]:
def ridge_predict(test, predictors, x_mean, x_std, B):
    test_X = test[predictors]
    test_X = (test_X - x_mean) / x_std
    test_X["intercept"] = 1
    test_X = test_X[["intercept"] + predictors]
    
    predictions = test_X @ B 
    return predictions


In [38]:
predictions = ridge_predict(test, predictors, x_mean, x_std, B)

In [39]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=alpha)

In [40]:
ridge.fit(X[predictors], y)

In [42]:
ridge.coef_

array([[ 0.50570976, -0.74915096]])

In [43]:
ridge.intercept_

array([0.35713693])

In [44]:
sklearn_predictions = ridge.predict(test_X[predictors])

In [45]:
predictions - sklearn_predictions

Unnamed: 0,medals
808,-2.816880
2000,-1.602368
1114,-2.380789
2036,-2.941408
1217,-0.683550
...,...
1535,4.112476
82,-6.911890
1468,-1.197531
1944,-2.894285


In [48]:
from sklearn.metrics import mean_absolute_error

errors = []
alphas = [10**i for i in range(-2,4)]

for alpha in alphas:
    B, x_mean, x_std = ridge_fit(train, predictors, target, alpha)
    predictions = ridge_predict(test, predictors, x_mean, x_std, B)
    
    errors.append(mean_absolute_error(test[target], predictions))

In [49]:
errors

[6.271670503184052,
 6.268007576619091,
 6.23318596116777,
 6.063392242218684,
 7.247025659769505,
 6.899447907723189]

In [50]:
alphas

[0.01, 0.1, 1, 10, 100, 1000]