# HW5. Linear Regression

In [16]:
import numpy as np

SEED = 17
np.random.seed(SEED)

In [17]:
import pandas as pd
from sklearn import datasets

In [35]:
class SimpleLinearRegression:
    def __init__(self, step = 0.01, tol = 1e-4, max_iter=2000, verbose=False, random_state=SEED):
        self.max_iter = max_iter # max iter count of gradient descent
        self.step = step # step of descent in the direction of antigradient
        self.tol = tol # we compare norm of gradient with that threshold
        self._w = None # w_1
        self._intercept = None # w_0
        self.random_state = random_state 
        self.verbose = verbose
        
    def predict(self, X):
        """
        estimate target variable "y" based on features X 
        """
        y_pred = self._w * X + self._intercept #my code
        assert y_pred.shape[0] == X.shape[0]
        return y_pred

    def score(self, X, y):
        """
        MSE
        X - features
        y - true values of target variable
        """
        return np.mean((y - self.predict(X)) ** 2)
    
    def _gradient(self, X, y):
        """
        Compute gradient of MSE subject to w_1, w_0
        X - features
        y - true values of target variable
        """
        grad_w = -np.mean((y - self._w * X-self._intercept) * X)
        grad_intercept = -np.mean(y - self._w * X - self._intercept)
        # YOUR CODE HERE
        return grad_w, grad_intercept
        
    def fit(self, X, y):
        """
        Train model with gradient descent
        X - features
        y - true values of target variable
        """
        # for reproducable results
        np.random.seed(self.random_state)
        
        # initialize weights
        self._w, self._intercept = np.random.randn(2)
        # perform gradient descent
        for iter in range(self.max_iter):
            # compute gradient at current W
            grad_w, grad_intercept = self._gradient(X, y)
            
            # make step, update W
            
            self._w = self._w - self.step * grad_w # YOUR CODE HERE
            self._intercept = self._intercept - self.step*grad_intercept# YOUR CODE HERE
            
            # compute gradient norm            
            grad_norm = np.sqrt(grad_w ** 2 + grad_intercept ** 2)# YOUR CODE HERE
            
            # people like to watch how the error is reducing during iterations 
            if self.verbose:
                mse_score = self.score(X, y)
                print('iteration %d, MSE = %f, ||grad|| = %f' % (iter, mse_score, grad_norm))
                
            # compare gradient norm with threshold
            if grad_norm < self.tol:
                print('model converged')
                return self
        print('model did not converge')
        return self

In [37]:
boston_data = datasets.load_boston()
boston_data.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR'])

In [38]:
df = pd.DataFrame(boston_data['data'], columns=boston_data['feature_names'])
df['target'] = boston_data['target']
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [39]:
# реализуем функцию, которая считает MSE 
def mse_score(y_true, y_pred):
    """
    y_true - true values of target variable
    y_pred - predicted values of target variable 
    """
    result = np.mean((y_true - y_pred) ** 2)
    return result

In [40]:
from sklearn.model_selection import train_test_split

# разбили датасет в соотношении 60:40
df_train, df_test = train_test_split(df, test_size=0.4, random_state=SEED, shuffle=True)

In [46]:
# обучите модель на df_train c verbose=True
# Обратите внимание на отладочный вывод, ваша ошибка MSE должна уменьшаться с каждой итерацией
# мы хотим научится предсказывать значение target по признаку CRIM
model = SimpleLinearRegression(verbose=True)
model.fit(X=df_train['CRIM'], y=df_train['target'])
mse_train_score = mse_score(df_train['target'], model.predict(df_train['CRIM']))
print('MSE on train:', mse_train_score)

iteration 0, MSE = 628.369508, ||grad|| = 43.878104
iteration 1, MSE = 617.535762, ||grad|| = 24.368769
iteration 2, MSE = 608.590002, ||grad|| = 21.309670
iteration 3, MSE = 599.959701, ||grad|| = 20.827030
iteration 4, MSE = 591.485412, ||grad|| = 20.627228
iteration 5, MSE = 583.149171, ||grad|| = 20.457445
iteration 6, MSE = 574.947211, ||grad|| = 20.291902
iteration 7, MSE = 566.877216, ||grad|| = 20.127986
iteration 8, MSE = 558.937048, ||grad|| = 19.965422
iteration 9, MSE = 551.124616, ||grad|| = 19.804175
iteration 10, MSE = 543.437867, ||grad|| = 19.644230
iteration 11, MSE = 535.874777, ||grad|| = 19.485577
iteration 12, MSE = 528.433358, ||grad|| = 19.328206
iteration 13, MSE = 521.111651, ||grad|| = 19.172105
iteration 14, MSE = 513.907732, ||grad|| = 19.017265
iteration 15, MSE = 506.819706, ||grad|| = 18.863676
iteration 16, MSE = 499.845707, ||grad|| = 18.711327
iteration 17, MSE = 492.983901, ||grad|| = 18.560208
iteration 18, MSE = 486.232485, ||grad|| = 18.410310
ite

iteration 234, MSE = 85.742843, ||grad|| = 3.194226
iteration 235, MSE = 85.542875, ||grad|| = 3.168428
iteration 236, MSE = 85.346124, ||grad|| = 3.142839
iteration 237, MSE = 85.152538, ||grad|| = 3.117457
iteration 238, MSE = 84.962067, ||grad|| = 3.092279
iteration 239, MSE = 84.774660, ||grad|| = 3.067305
iteration 240, MSE = 84.590267, ||grad|| = 3.042532
iteration 241, MSE = 84.408841, ||grad|| = 3.017960
iteration 242, MSE = 84.230334, ||grad|| = 2.993586
iteration 243, MSE = 84.054698, ||grad|| = 2.969409
iteration 244, MSE = 83.881888, ||grad|| = 2.945427
iteration 245, MSE = 83.711858, ||grad|| = 2.921639
iteration 246, MSE = 83.544563, ||grad|| = 2.898043
iteration 247, MSE = 83.379960, ||grad|| = 2.874637
iteration 248, MSE = 83.218004, ||grad|| = 2.851421
iteration 249, MSE = 83.058655, ||grad|| = 2.828392
iteration 250, MSE = 82.901868, ||grad|| = 2.805549
iteration 251, MSE = 82.747604, ||grad|| = 2.782890
iteration 252, MSE = 82.595822, ||grad|| = 2.760415
iteration 25

iteration 399, MSE = 74.168401, ||grad|| = 0.838067
iteration 400, MSE = 74.154636, ||grad|| = 0.831299
iteration 401, MSE = 74.141092, ||grad|| = 0.824585
iteration 402, MSE = 74.127766, ||grad|| = 0.817925
iteration 403, MSE = 74.114654, ||grad|| = 0.811319
iteration 404, MSE = 74.101754, ||grad|| = 0.804767
iteration 405, MSE = 74.089060, ||grad|| = 0.798267
iteration 406, MSE = 74.076572, ||grad|| = 0.791820
iteration 407, MSE = 74.064283, ||grad|| = 0.785425
iteration 408, MSE = 74.052193, ||grad|| = 0.779082
iteration 409, MSE = 74.040297, ||grad|| = 0.772790
iteration 410, MSE = 74.028593, ||grad|| = 0.766549
iteration 411, MSE = 74.017077, ||grad|| = 0.760358
iteration 412, MSE = 74.005746, ||grad|| = 0.754217
iteration 413, MSE = 73.994597, ||grad|| = 0.748126
iteration 414, MSE = 73.983628, ||grad|| = 0.742083
iteration 415, MSE = 73.972835, ||grad|| = 0.736090
iteration 416, MSE = 73.962216, ||grad|| = 0.730145
iteration 417, MSE = 73.951767, ||grad|| = 0.724248
iteration 41

iteration 622, MSE = 73.335735, ||grad|| = 0.137382
iteration 623, MSE = 73.335365, ||grad|| = 0.136273
iteration 624, MSE = 73.335001, ||grad|| = 0.135172
iteration 625, MSE = 73.334643, ||grad|| = 0.134081
iteration 626, MSE = 73.334291, ||grad|| = 0.132998
iteration 627, MSE = 73.333944, ||grad|| = 0.131924
iteration 628, MSE = 73.333603, ||grad|| = 0.130858
iteration 629, MSE = 73.333267, ||grad|| = 0.129801
iteration 630, MSE = 73.332937, ||grad|| = 0.128753
iteration 631, MSE = 73.332612, ||grad|| = 0.127713
iteration 632, MSE = 73.332293, ||grad|| = 0.126682
iteration 633, MSE = 73.331978, ||grad|| = 0.125659
iteration 634, MSE = 73.331669, ||grad|| = 0.124644
iteration 635, MSE = 73.331364, ||grad|| = 0.123637
iteration 636, MSE = 73.331065, ||grad|| = 0.122638
iteration 637, MSE = 73.330770, ||grad|| = 0.121648
iteration 638, MSE = 73.330480, ||grad|| = 0.120666
iteration 639, MSE = 73.330194, ||grad|| = 0.119691
iteration 640, MSE = 73.329914, ||grad|| = 0.118724
iteration 64

iteration 822, MSE = 73.313639, ||grad|| = 0.027138
iteration 823, MSE = 73.313624, ||grad|| = 0.026919
iteration 824, MSE = 73.313610, ||grad|| = 0.026702
iteration 825, MSE = 73.313596, ||grad|| = 0.026486
iteration 826, MSE = 73.313582, ||grad|| = 0.026272
iteration 827, MSE = 73.313569, ||grad|| = 0.026060
iteration 828, MSE = 73.313556, ||grad|| = 0.025850
iteration 829, MSE = 73.313543, ||grad|| = 0.025641
iteration 830, MSE = 73.313530, ||grad|| = 0.025434
iteration 831, MSE = 73.313517, ||grad|| = 0.025228
iteration 832, MSE = 73.313504, ||grad|| = 0.025025
iteration 833, MSE = 73.313492, ||grad|| = 0.024822
iteration 834, MSE = 73.313480, ||grad|| = 0.024622
iteration 835, MSE = 73.313468, ||grad|| = 0.024423
iteration 836, MSE = 73.313457, ||grad|| = 0.024226
iteration 837, MSE = 73.313445, ||grad|| = 0.024030
iteration 838, MSE = 73.313434, ||grad|| = 0.023836
iteration 839, MSE = 73.313423, ||grad|| = 0.023644
iteration 840, MSE = 73.313412, ||grad|| = 0.023453
iteration 84

iteration 983, MSE = 73.312807, ||grad|| = 0.007355
iteration 984, MSE = 73.312806, ||grad|| = 0.007296
iteration 985, MSE = 73.312805, ||grad|| = 0.007237
iteration 986, MSE = 73.312804, ||grad|| = 0.007178
iteration 987, MSE = 73.312803, ||grad|| = 0.007120
iteration 988, MSE = 73.312802, ||grad|| = 0.007063
iteration 989, MSE = 73.312801, ||grad|| = 0.007006
iteration 990, MSE = 73.312800, ||grad|| = 0.006949
iteration 991, MSE = 73.312799, ||grad|| = 0.006893
iteration 992, MSE = 73.312799, ||grad|| = 0.006837
iteration 993, MSE = 73.312798, ||grad|| = 0.006782
iteration 994, MSE = 73.312797, ||grad|| = 0.006727
iteration 995, MSE = 73.312796, ||grad|| = 0.006673
iteration 996, MSE = 73.312795, ||grad|| = 0.006619
iteration 997, MSE = 73.312794, ||grad|| = 0.006566
iteration 998, MSE = 73.312793, ||grad|| = 0.006513
iteration 999, MSE = 73.312792, ||grad|| = 0.006460
iteration 1000, MSE = 73.312792, ||grad|| = 0.006408
iteration 1001, MSE = 73.312791, ||grad|| = 0.006356
iteration 

iteration 1173, MSE = 73.312745, ||grad|| = 0.001576
iteration 1174, MSE = 73.312745, ||grad|| = 0.001563
iteration 1175, MSE = 73.312744, ||grad|| = 0.001550
iteration 1176, MSE = 73.312744, ||grad|| = 0.001538
iteration 1177, MSE = 73.312744, ||grad|| = 0.001525
iteration 1178, MSE = 73.312744, ||grad|| = 0.001513
iteration 1179, MSE = 73.312744, ||grad|| = 0.001501
iteration 1180, MSE = 73.312744, ||grad|| = 0.001489
iteration 1181, MSE = 73.312744, ||grad|| = 0.001477
iteration 1182, MSE = 73.312744, ||grad|| = 0.001465
iteration 1183, MSE = 73.312744, ||grad|| = 0.001453
iteration 1184, MSE = 73.312744, ||grad|| = 0.001441
iteration 1185, MSE = 73.312744, ||grad|| = 0.001430
iteration 1186, MSE = 73.312744, ||grad|| = 0.001418
iteration 1187, MSE = 73.312744, ||grad|| = 0.001407
iteration 1188, MSE = 73.312744, ||grad|| = 0.001395
iteration 1189, MSE = 73.312744, ||grad|| = 0.001384
iteration 1190, MSE = 73.312744, ||grad|| = 0.001373
iteration 1191, MSE = 73.312744, ||grad|| = 0.

iteration 1335, MSE = 73.312742, ||grad|| = 0.000424
iteration 1336, MSE = 73.312742, ||grad|| = 0.000420
iteration 1337, MSE = 73.312742, ||grad|| = 0.000417
iteration 1338, MSE = 73.312742, ||grad|| = 0.000413
iteration 1339, MSE = 73.312742, ||grad|| = 0.000410
iteration 1340, MSE = 73.312742, ||grad|| = 0.000407
iteration 1341, MSE = 73.312742, ||grad|| = 0.000403
iteration 1342, MSE = 73.312742, ||grad|| = 0.000400
iteration 1343, MSE = 73.312742, ||grad|| = 0.000397
iteration 1344, MSE = 73.312742, ||grad|| = 0.000394
iteration 1345, MSE = 73.312742, ||grad|| = 0.000391
iteration 1346, MSE = 73.312742, ||grad|| = 0.000387
iteration 1347, MSE = 73.312742, ||grad|| = 0.000384
iteration 1348, MSE = 73.312742, ||grad|| = 0.000381
iteration 1349, MSE = 73.312742, ||grad|| = 0.000378
iteration 1350, MSE = 73.312742, ||grad|| = 0.000375
iteration 1351, MSE = 73.312742, ||grad|| = 0.000372
iteration 1352, MSE = 73.312742, ||grad|| = 0.000369
iteration 1353, MSE = 73.312742, ||grad|| = 0.

iteration 1495, MSE = 73.312742, ||grad|| = 0.000116
iteration 1496, MSE = 73.312742, ||grad|| = 0.000115
iteration 1497, MSE = 73.312742, ||grad|| = 0.000114
iteration 1498, MSE = 73.312742, ||grad|| = 0.000113
iteration 1499, MSE = 73.312742, ||grad|| = 0.000112
iteration 1500, MSE = 73.312742, ||grad|| = 0.000111
iteration 1501, MSE = 73.312742, ||grad|| = 0.000110
iteration 1502, MSE = 73.312742, ||grad|| = 0.000109
iteration 1503, MSE = 73.312742, ||grad|| = 0.000108
iteration 1504, MSE = 73.312742, ||grad|| = 0.000108
iteration 1505, MSE = 73.312742, ||grad|| = 0.000107
iteration 1506, MSE = 73.312742, ||grad|| = 0.000106
iteration 1507, MSE = 73.312742, ||grad|| = 0.000105
iteration 1508, MSE = 73.312742, ||grad|| = 0.000104
iteration 1509, MSE = 73.312742, ||grad|| = 0.000103
iteration 1510, MSE = 73.312742, ||grad|| = 0.000102
iteration 1511, MSE = 73.312742, ||grad|| = 0.000102
iteration 1512, MSE = 73.312742, ||grad|| = 0.000101
iteration 1513, MSE = 73.312742, ||grad|| = 0.

### MSE on train: 73.31274157452383

In [44]:
# # посчитали качество обученной модели на df_test
mse_test_score = mse_score(df_test['target'], model.predict(df_test['CRIM']))
print('MSE on test:', mse_test_score)

MSE on test: 74.28787061058355


### MSE on test: 74.28787061058355