In [1]:
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

In [3]:
X, y = load_svmlight_file("data", n_features=14)
X = X.toarray()
n_samples, n_features = X.shape
X = np.column_stack((X, np.ones((n_samples, 1))))
y = y.reshape((-1, 1))
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25)

In [4]:
penalty_factor = 0.5  # L2 regular term coefficients
learning_rate = 0.0004
max_epoch = 200

In [5]:
losses_train = []
losses_val = []

w = np.zeros((n_features + 1, 1))

In [6]:
for epoch in range(max_epoch):
    diff = np.dot(X_train, w) - y_train
    G = penalty_factor * w + np.dot(X_train.transpose(), diff)  # calculate the gradient
    G = -G
    w += learning_rate * G  # update the parameters

    Y_predict = np.dot(X_train, w)  # predict under the train set
    loss_train = np.average(np.abs(Y_predict - y_train))  # calculate the absolute differences
    losses_train.append(loss_train)

    Y_predict = np.dot(X_val, w)  # predict under the validation set
    loss_val = np.average(np.abs(Y_predict - y_val))  # calculate the absolute differences
    losses_val.append(loss_val)

In [7]:
print('loss_train:', loss_train)
print('loss_val', loss_val)

loss_train: 3.3148028495367776
loss_val 3.303657905733998


In [8]:
plt.figure(figsize=(18, 6))
plt.plot(losses_train, "-", color="r", label="train loss")
plt.plot(losses_val, "-", color="b", label="validation loss")
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend()
plt.title("The graph of absolute diff value varing with the number of iterations")
plt.show()