In [21]:
import numpy as np

class LinearRegression(object):
    def __init__(self, fit_intercept=True, copy_X=True):
        self.fit_intercept = fit_intercept #절편값
        self.copy_X = copy_X #기존의 X값 copy
        
        self._coef = None
        self._intercept = None
        self._new_X = None
        
    def fit(self, X, y):
        self._new_X = np.array(X)
        y = y.reshape(-1,1)
        
        if self.fit_intercept:
            intercept_vector = np.ones([len(self._new_X),1])
            self._new_X = np.concatenate((intercept_vector, self._new_X),axis=1)
            
        weights = np.linalg.inv(self._new_X.T.dot(self._new_X)).dot(self._new_X.T.dot(y)).flatten()
        
        if self.fit_intercept:
            self._intercept = weights[0]
            self._coef = weights[1:]
        else:
            self._coef = weights
    
    def predict(self, X):
        test_X = np.array(X)
        
        if self.fit_intercept:
            intercept_vector = np.ones([len(test_X),1])
            test_X = np.concatenate((intercept_vector, test_X),axis=1)
            
            weights = np.concatenate(([self._intercept], self._coef),axis=0)
        else:
            weights = self._coef
        return test_X.dot(weights)
               
    @property
    def coef(self):
        return self._coef
    
    @property
    def intercept(self):
        return self._intercept

In [22]:
import pandas as pd
import numpy as np

## Load Dataset - simple variable

In [23]:
df = pd.read_csv("C:/Users/qual9/test.csv")
df.head()

Unnamed: 0,x,y
0,77,79.775152
1,21,23.177279
2,22,25.609262
3,20,17.857388
4,36,41.849864


In [24]:
X = df["x"].values.reshape(-1,1)
X

array([[ 77],
       [ 21],
       [ 22],
       [ 20],
       [ 36],
       [ 15],
       [ 62],
       [ 95],
       [ 20],
       [  5],
       [  4],
       [ 19],
       [ 96],
       [ 62],
       [ 36],
       [ 15],
       [ 65],
       [ 14],
       [ 87],
       [ 69],
       [ 89],
       [ 51],
       [ 89],
       [ 27],
       [ 97],
       [ 58],
       [ 79],
       [ 21],
       [ 93],
       [ 27],
       [ 99],
       [ 31],
       [ 33],
       [ 80],
       [ 28],
       [ 47],
       [ 53],
       [ 69],
       [ 28],
       [ 33],
       [ 91],
       [ 71],
       [ 50],
       [ 76],
       [  4],
       [ 37],
       [ 70],
       [ 68],
       [ 40],
       [ 35],
       [ 94],
       [ 88],
       [ 52],
       [ 31],
       [ 59],
       [  0],
       [ 39],
       [ 64],
       [ 69],
       [ 57],
       [ 13],
       [ 72],
       [ 76],
       [ 61],
       [ 82],
       [ 18],
       [ 41],
       [ 50],
       [ 55],
       [ 13],
       [ 46],
      

In [25]:
y = df["y"].values
y

array([ 79.77515201,  23.17727887,  25.60926156,  17.85738813,
        41.84986439,   9.80523488,  58.87465933,  97.61793701,
        18.39512747,   8.74674765,   2.81141583,  17.09537241,
        95.14907176,  61.38800663,  40.24701716,  14.82248589,
        66.95806869,  16.63507984,  90.65513736,  77.22982636,
        92.11906278,  46.91387709,  89.82634442,  21.71380347,
        97.41206981,  57.01631363,  78.31056542,  19.1315097 ,
        93.03483388,  26.59112396,  97.55155344,  31.43524822,
        35.12724777,  78.61042432,  33.07112825,  51.69967172,
        53.62235225,  69.46306072,  27.42497237,  36.34644189,
        95.06140858,  68.16724757,  50.96155532,  78.04237454,
         5.60766487,  36.11334779,  67.2352155 ,  65.01324035,
        38.14753871,  34.31141446,  95.28503937,  87.84749912,
        54.08170635,  31.93063515,  59.61247085,  -1.04011421,
        47.49374765,  62.60089773,  70.9146434 ,  56.14834113,
        14.05572877,  68.11367147,  75.59701346,  59.22

## Build Model

In [26]:
lr = LinearRegression(fit_intercept=True)

In [27]:
lr.fit(X,y)

In [28]:
lr.intercept

-0.46181077366111367

In [29]:
lr.coef

array([1.01433536])

In [30]:
lr.predict(X)[:10]

array([77.64201157, 20.83923168, 21.85356704, 19.82489633, 36.05426201,
       14.75321955, 62.42698124, 95.90004796, 19.82489633,  4.609866  ])

## Validation

In [32]:
from sklearn import linear_model
sk_lr = linear_model.LinearRegression(normalize=False)
sk_lr.fit(X,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [33]:
sk_lr.intercept_

-0.4618107736611776

In [34]:
import numpy.testing as npt
npt.assert_almost_equal(sk_lr.intercept_,lr.intercept)

In [35]:
sk_lr.coef_

array([1.01433536])

## Load Dataset - multiple variables

In [38]:
df = pd.read_csv("C:/Users/qual9/mlr09.csv")
df.head()

Unnamed: 0,height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
0,6.8,225,0.442,0.672,9.2
1,6.3,180,0.435,0.797,11.7
2,6.4,190,0.456,0.761,15.8
3,6.2,180,0.416,0.651,8.6
4,6.9,205,0.449,0.9,23.2


In [40]:
y = df["average_points_scored"].values
y

array([ 9.2, 11.7, 15.8,  8.6, 23.2, 27.4,  9.3, 16. ,  4.7, 12.5, 20.1,
        9.1,  8.1,  8.6, 20.3, 25. , 19.2,  3.3, 11.2, 10.5, 10.1,  7.2,
       13.6,  9. , 24.6, 12.6,  5.6,  8.7,  7.7, 24.1, 11.7,  7.7,  9.6,
        7.2, 12.3,  8.9, 13.6, 11.2,  2.8,  3.2,  9.4, 11.9, 15.4,  7.4,
       18.9,  7.9, 12.2, 11. ,  2.8, 11.8, 17.1, 11.6,  5.8,  8.3])

In [42]:
df.iloc[:,:-1].head()

Unnamed: 0,height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws
0,6.8,225,0.442,0.672
1,6.3,180,0.435,0.797
2,6.4,190,0.456,0.761
3,6.2,180,0.416,0.651
4,6.9,205,0.449,0.9


In [43]:
X = df.iloc[:,:-1].values

## Rescaled

In [45]:
mu_X = np.mean(X, axis=0)
std_X = np.std(X, axis=0)

rescaled_X = (X-mu_X)/std_X

In [46]:
rescaled_X[:5]

array([[ 0.46843663,  0.50336336, -0.12692668, -0.70404955],
       [-0.63137111, -0.99746237, -0.25187012,  0.55584824],
       [-0.41140956, -0.66394554,  0.12296022,  0.19299768],
       [-0.85133266, -0.99746237, -0.59100234, -0.91571238],
       [ 0.68839818, -0.1636703 , -0.00198323,  1.59400403]])

## Validation

In [48]:
lr.fit(rescaled_X,y)

In [49]:
lr.coef

array([-1.67779283,  0.28359762,  2.68586629,  1.12816882])

In [50]:
lr.intercept

11.790740740740738

In [51]:
sk_lr.fit(rescaled_X, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [52]:

sk_lr.coef_

array([-1.67779283,  0.28359762,  2.68586629,  1.12816882])

In [54]:
sk_lr.intercept_

11.790740740740736