In [161]:
import numpy as np


class LinearRegression(object):
    def __init__(self, fit_intercept=True, copy_X=True):
        self.fit_intercept = fit_intercept
        self.copy_X = copy_X

        self._coef = None
        self._intercept = None
        self._new_X = None
    
    def fit(self,X,y):
        self._new_X = np.array(X)
        y = y.reshape(-1,1)
        
        if self.fit_intercept:
            intercept_vector = np.ones([len(self._new_X),1])
            self._new_X = np.concatenate(
                (intercept_vector,self._new_X),axis=1)
            
        weights = np.linalg.inv(self._new_X.T.dot(self._new_X)).dot(self._new_X.T.dot(y)).flatten()
        
        if self.fit_intercept:
            self._intercept = weights[0]
            self._coef = weights[1:]
        else:
            self._coef = weights
    
    def predict(self,X):
        test_X = np.array(X)
        
        if self.fit_intercept:
            intercept_vector = np.ones([len(test_X),1])
            test_X = np.concatenate((intercept_vector,test_X),axis=1)
            
            weights = np.concatenate(([self._intercept],self._coef),axis=0)
        else:
            weights = self._coef
        return test_X.dot(weights)
    
    @property
    def coef(self):
        return self._coef

    @property
    def intercept(self):
        return self._intercept

In [162]:
import pandas as pd
import numpy as np

In [163]:
df = pd.read_csv("./test.csv")
df.head()

Unnamed: 0,x,y
0,77,79.775152
1,21,23.177279
2,22,25.609262
3,20,17.857388
4,36,41.849864


In [164]:
X = df["x"].values.reshape(-1,1)
y = df["y"].values

In [165]:
import solution_linear_model
import imp
imp.reload(solution_linear_model)

<module 'solution_linear_model' from 'C:\\Users\\user\\2021 프로젝트\\solution_linear_model.py'>

In [166]:
lr = LinearRegression(fit_intercept=True)

In [167]:
lr.fit(X, y)

In [168]:
lr.intercept

-0.4618107736611705

In [169]:
lr.coef

array([1.01433536])

In [170]:
lr.predict(X)[:10]

array([77.64201157, 20.83923168, 21.85356704, 19.82489633, 36.05426201,
       14.75321955, 62.42698124, 95.90004796, 19.82489633,  4.609866  ])

In [171]:
from sklearn import linear_model
sk_lr = linear_model.LinearRegression(normalize=False)
sk_lr.fit(X,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [172]:
sk_lr.intercept_

-0.4618107736611776

In [193]:
import numpy.testing as npt
npt.assert_almost_equal(sk_lr.intercept_,lr.intercept)

In [174]:
sk_lr.coef_

array([1.01433536])

In [175]:
lr._coef

array([1.01433536])

In [176]:
np.isclose(lr._coef,sk_lr.coef_)

array([ True])

In [177]:
lr.predict(X)[:10]

array([77.64201157, 20.83923168, 21.85356704, 19.82489633, 36.05426201,
       14.75321955, 62.42698124, 95.90004796, 19.82489633,  4.609866  ])

In [194]:
sk_lr.predict(X)[:5]

array([66.13649639, 54.33571994, 57.01970597, 54.28775511, 60.57278826])

In [179]:
df = pd.read_csv("./mlr09.csv")
df.head()

Unnamed: 0,height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws,average_points_scored
0,6.8,225,0.442,0.672,9.2
1,6.3,180,0.435,0.797,11.7
2,6.4,190,0.456,0.761,15.8
3,6.2,180,0.416,0.651,8.6
4,6.9,205,0.449,0.9,23.2


In [180]:
y = df["average_points_scored"].values

In [181]:
df.iloc[:,:-1].head()
# x1,x2,x3,x4

Unnamed: 0,height_in_feet,weight_in_pounds,successful_field_goals,percent_of_successful_free_throws
0,6.8,225,0.442,0.672
1,6.3,180,0.435,0.797
2,6.4,190,0.456,0.761
3,6.2,180,0.416,0.651
4,6.9,205,0.449,0.9


In [182]:
X = df.iloc[:,:-1].values

In [183]:
X[:5]

array([[  6.8  , 225.   ,   0.442,   0.672],
       [  6.3  , 180.   ,   0.435,   0.797],
       [  6.4  , 190.   ,   0.456,   0.761],
       [  6.2  , 180.   ,   0.416,   0.651],
       [  6.9  , 205.   ,   0.449,   0.9  ]])

In [184]:
mu_X = np.mean(X, axis=0)
std_X = np.std(X, axis=0)

rescaled_X = (X - mu_X) / std_X

In [185]:
rescaled_X[:5]

array([[ 0.46843663,  0.50336336, -0.12692668, -0.70404955],
       [-0.63137111, -0.99746237, -0.25187012,  0.55584824],
       [-0.41140956, -0.66394554,  0.12296022,  0.19299768],
       [-0.85133266, -0.99746237, -0.59100234, -0.91571238],
       [ 0.68839818, -0.1636703 , -0.00198323,  1.59400403]])

In [186]:
lr.fit(rescaled_X,y)

In [187]:
lr.coef

array([-1.67779283,  0.28359762,  2.68586629,  1.12816882])

In [188]:
lr.intercept

11.790740740740738

In [189]:
sk_lr.fit(rescaled_X,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [190]:
sk_lr.coef_

array([-1.67779283,  0.28359762,  2.68586629,  1.12816882])

In [191]:
sk_lr.intercept_

11.790740740740736