# Regularization

- 기존 회귀에서는 RSS를 최소화하는 것을 목표로 함
- 이 때 과적합을 피하기 위해 새로운 항을 더해 새로운 손실함수로 정의함
    - 이 것이 regularized regression
- 대표적으로 l1, l2 규제가 있음
    - l1: 기존 RSS항에 alpha x sum of abs(params)를 더해줌
    - l2: 위 항에서 abs(params) 대신 parmas^2 을 더해줌
- l1 regression을 lasso, l2 regression을 ridge, 결합한 것을 elasticnet이라 함

- 해당하는 alpha를 증가시키면 회귀계수의 크기를 감소시키는 효과를 얻을 수 있음

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.preprocessing import StandardScaler
import warnings

warnings.filterwarnings(action='ignore')

d = load_boston()

std = StandardScaler()
data = pd.DataFrame(std.fit_transform(d.data), columns=d.feature_names)
target = std.fit_transform(d.target.reshape(-1,1))
dd = data.iloc[:,0:3]

In [2]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression().fit(dd, target)
lr.coef_

array([[-0.23252837,  0.14834915, -0.30999003]])

In [3]:
## lasso
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.1).fit(dd, target)
lasso.coef_

array([-0.16010667,  0.08147935, -0.27513247])

In [4]:
def update(w, X, y, lr=0.001, alpha=0.1):
    X['int'] = np.ones(len(X))
    f = (X.T@X @ w).values
    b = X.T@y
    bb = list(list(zip(*b.values))[0])
    bb = (np.multiply(-1, bb))
    w_update = np.add(f, bb)
    
    def abss(a, alpha=alpha):
        if a <= 0:
            return -1*alpha*len(X)
        else:
            return alpha*len(X)
    l1 = list(map(abss, w))
    w_update = np.add(w_update, l1)
    
    w = np.add(w, np.multiply(-1*lr, w_update))
   
    y_pred = pd.DataFrame((X @ w).T)    
    diff = y - y_pred
    return w, np.sum(np.square(y-y_pred))[0]+sum(l1)

w = [-0.1, 0.3, -0.1, 0]
update(w, dd, target)

(array([-0.14427778,  0.24282989, -0.14195669,  0.0506    ]),
 276.0544790042091)

In [5]:
e = 10000
for _ in range(1000):
    w, error = update(w, dd, target)
#     print(error)
#     if error > e:
#         break
#     else:
#         e = error

In [6]:
w

array([-0.16010601,  0.08147383, -0.27513569, -0.03386881])

In [7]:
## ridge
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=10).fit(dd, target)
ridge.coef_


array([[-0.23002297,  0.14867902, -0.30480869,  0.        ]])

In [8]:
def update(w, X, y, lr=0.001, alpha=0.1):
    X['int'] = np.ones(len(X))
    f = (X.T@X @ w).values
    b = X.T@y
    bb = list(list(zip(*b.values))[0])
    bb = (np.multiply(-1, bb))
    w_update = np.add(f, bb)
    
    l2 = np.multiply(w, 2)
    w_update = np.add(w_update, l2)
    
    w = np.add(w, np.multiply(-1*lr, w_update))
   
    y_pred = pd.DataFrame((X @ w).T)    
    diff = y - y_pred
    return w, np.sum(np.square(y-y_pred))[0]+sum(l2)

w = [-0.1, 0.3, -0.1, 0]
update(w, dd, target)

(array([-1.94677784e-01,  2.92829895e-01, -1.92356692e-01, -3.36841666e-16]),
 367.41116905986775)

In [9]:
for _ in range(1000):
    w, error = update(w, dd, target)

In [10]:
w

array([-2.32026009e-01,  1.48428642e-01, -3.08930771e-01, -1.34708312e-15])

In [13]:
## elasticnet
## l1_ratio = a/(a+b), where a = parameter alpha
from sklearn.linear_model import ElasticNet

en = ElasticNet(alpha=0.05, l1_ratio=0.7).fit(dd, target)
en.coef_


array([-0.20564352,  0.12556627, -0.29367793,  0.        ])