## 보스턴 부동산 데이터의 특징들(Features)
1. CRIM: 도시별 범죄발생률
2. ZN: 25,000평을 넘는 토지의 비율
3. INDUS: 도시별 비상업 지구의 비유
4. CHAS: 찰스 강의 더미 변수(1 = 강의 경계, 0 = 나머지)
5. NOX: 일산화질소 농도
6. RM: 주거할 수 있는 평균 방의개수
7. AGE: 1940년 이전에 지어진 주택의 비율
8. DIS: 5개의 고용지원센터까지의 가중치가 고려된 거리
9. RAD: 고속도로의 접근 용이성에 대한 지표
10. TAX: 10,000달러당 재산세 비율
11. PTRATIO: 도시별 교사와 학생의 비율 
12. B: 도시의 흑인 거주 비유
13. LSTAT: 저소득층의 비율
14. MEDV: 본인 소유 주택 가격의 중앙값

# 필요한 라이브러리 임폴트

In [1]:
# 불필요한 경고 출력을 방지
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# 데이터 확인

In [4]:
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df.head(3)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03


# 데이터 정규화 - MinMaxScaler()
- 피처들의 데이터 수준을 맞춰주기 위해서 MinMaxScaler 수행

In [5]:
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

# make extend boston dataset
# Refernce : https://github.com/amueller/mglearn/blob/master/mglearn/datasets.py#L30
def load_extended_boston():
    boston = load_boston()
    X = boston.data

    X = MinMaxScaler().fit_transform(boston.data)
    #Feature Generation 수행 degree = 2
    #X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)

    return X, boston.target

In [6]:
# 특징이 확장된 보스턴 부동산 가격 데이터 불러오기
X, y = load_extended_boston()
print(X)
print(y)
print('Extended Feature Shape :', X.shape)

[[0.00000000e+00 1.80000000e-01 6.78152493e-02 ... 2.87234043e-01
  1.00000000e+00 8.96799117e-02]
 [2.35922539e-04 0.00000000e+00 2.42302053e-01 ... 5.53191489e-01
  1.00000000e+00 2.04470199e-01]
 [2.35697744e-04 0.00000000e+00 2.42302053e-01 ... 5.53191489e-01
  9.89737254e-01 6.34657837e-02]
 ...
 [6.11892474e-04 0.00000000e+00 4.20454545e-01 ... 8.93617021e-01
  1.00000000e+00 1.07891832e-01]
 [1.16072990e-03 0.00000000e+00 4.20454545e-01 ... 8.93617021e-01
  9.91300620e-01 1.31070640e-01]
 [4.61841693e-04 0.00000000e+00 4.20454545e-01 ... 8.93617021e-01
  1.00000000e+00 1.69701987e-01]]
[24.  21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 15.  18.9 21.7 20.4
 18.2 19.9 23.1 17.5 20.2 18.2 13.6 19.6 15.2 14.5 15.6 13.9 16.6 14.8
 18.4 21.  12.7 14.5 13.2 13.1 13.5 18.9 20.  21.  24.7 30.8 34.9 26.6
 25.3 24.7 21.2 19.3 20.  16.6 14.4 19.4 19.7 20.5 25.  23.4 18.9 35.4
 24.7 31.6 23.3 19.6 18.7 16.  22.2 25.  33.  23.5 19.4 22.  17.4 20.9
 24.2 21.7 22.8 23.4 24.1 21.4 20.  20.8 21.2

## baseline 성능
실습파일 : 3-3-2.Regression_boston_house_price_pred(EDA_Feature Selection)_cvs.ipynb
#### baseline #1 - Average MSE : 37.1318(기본 Linear Regression)
#### baseline #2 - Average MSE : 34.10008 (기본 Linear Regression + Feature Selection 적용)

In [7]:
from sklearn.model_selection import KFold

num_split = 5
# n_splits : validation split 갯수
# 매개변수 : shuffle = True, random_state = 40 
kf = KFold(n_splits=num_split)  

tot_MSE = 0.0
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # 선형회귀(Linear Regression) 모델 선언하기
    model_lr = LinearRegression()

    # 선형회귀(Linear Regression) 모델 학습하기
    model_lr.fit(X_train, y_train)

    # 테스트 데이터에 대한 예측 수행하기
    y_pred = model_lr.predict(X_test)

    # MSE(Mean Squared Error) 측정 수행하기
    tot_MSE = tot_MSE + mean_squared_error(y_test, y_pred)

# 평균 에러 구하기    
avg_MSE = tot_MSE / num_split 
print('Average MSE :', avg_MSE)
print('Avergae RMSE :', np.sqrt(avg_MSE))

Average MSE : 37.131807467698955
Avergae RMSE : 6.093587405436879


# KFold 교차검증 + L2 규제 알고리즘

In [8]:
from sklearn.linear_model import Ridge  # L2 규제

num_split = 5
# n_splits : validation split 갯수
# 매개변수 : shuffle = True, random_state = 40 
kf = KFold(n_splits=num_split)

tot_MSE = 0.0
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # 선형회귀(Linear Regression) 모델 선언하기
    ridge_reg = Ridge(alpha=0.8)
    #ridge_reg = Ridge(alpha=1)

    # 선형회귀(Linear Regression) 모델 학습하기
    ridge_reg.fit(X_train, y_train)

    # 테스트 데이터에 대한 예측 수행하기
    y_pred = ridge_reg.predict(X_test)

    # MSE(Mean Squared Error) 측정 수행하기
    tot_MSE = tot_MSE + mean_squared_error(y_test, y_pred)

# 평균 에러 구하기    
avg_MSE = tot_MSE / num_split 
print('Average MSE :', tot_MSE)
print('Avergae RMSE :', np.sqrt(tot_MSE))

Average MSE : 165.07708534131163
Avergae RMSE : 12.848232771136722


# KFold 교차검증 + L1 규제 알고리즘

In [9]:
from sklearn.linear_model import Lasso  # L1 규제 LASSO로 바꾸기만 하면됨

num_split = 5
# n_splits : validation split 갯수
# 매개변수 : shuffle = True, random_state = 40 
kf = KFold(n_splits=num_split)

tot_MSE = 0.0
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # 선형회귀(Linear Regression) 모델 선언하기
    #lasso_reg = Lasso(alpha=0.02)
    lasso_reg = Lasso(alpha=0.02)

    # 선형회귀(Linear Regression) 모델 학습하기
    lasso_reg.fit(X_train, y_train)

    # 테스트 데이터에 대한 예측 수행하기
    y_pred = lasso_reg.predict(X_test)

    # MSE(Mean Squared Error) 측정 수행하기
    tot_MSE = tot_MSE + mean_squared_error(y_test, y_pred)

# 평균 에러 구하기    
avg_MSE = tot_MSE / num_split 
print('Average MSE :', avg_MSE)
print('Avergae RMSE :', np.sqrt(avg_MSE))

Average MSE : 35.36971883670263
Avergae RMSE : 5.947244642412369


# KFold 교차검증 + ElasticNet(L1+ L2) 규제 알고리즘

In [10]:
from sklearn.linear_model import ElasticNet

num_split = 5
# n_splits : validation split 갯수
# 매개변수 : shuffle = True, random_state = 40 
kf = KFold(n_splits=num_split)

tot_MSE = 0.0
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # 선형회귀(Linear Regression) 모델 선언하기
    elasticnet_reg = ElasticNet(alpha=0.01)

    # 선형회귀(Linear Regression) 모델 학습하기
    elasticnet_reg.fit(X_train, y_train)

    # 테스트 데이터에 대한 예측을 수행합니다.
    y_pred = elasticnet_reg.predict(X_test)

    # MSE(Mean Squared Error)를 측정합니다.
    tot_MSE = tot_MSE + mean_squared_error(y_test, y_pred)

# 평균 에러 구하기    
avg_MSE = tot_MSE / num_split 
print('Average MSE :', avg_MSE)
print('Avergae RMSE :', np.sqrt(avg_MSE))

Average MSE : 31.72319469996399
Avergae RMSE : 5.632334746795861


#### 최적의 하이퍼 파라미터 찾는 방법

Classification과 Regression의 하이퍼 파라미터 최적화 하기
- https://machinelearningmastery.com/hyperparameter-optimization-with-random-search-and-grid-search/

### GridSearchCV 활용  
- 모델링시 필요한 하이퍼파라미터를 설정할 때 가장 최적의 파리미터값을 찾아주는 방법중 하나  
- https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html?highlight=gridsearchcv#sklearn.model_selection.GridSearchCV

#### GridSearchCV 주요 파라미터   
: estimator, param_grid, scoring=None, n_jobs=None, cv=None, refit=True 등이 있다.

- estimator : 평가할 모델을 전달  
- param_grid : 각 파라미터와 시험할 값들을 딕셔너리로 넣기  
  평가 방법은 scoring으로 측정하며 cv는 기본적으로 KFold의 횟수를 정하는 값

- https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter
(평가 방식)

- refit=True : 생성된 GridSearchCV 객체를, 가장 좋은 파라미터를 전달한 estimator로 바꿔줌

In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
# random search linear regression model on the auto insurance dataset
from scipy.stats import loguniform
from sklearn.linear_model import Lasso
#from sklearn.metrics import r2_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

In [12]:
model_lasso = Lasso()
#print(model_lasso.get_params().keys()) 

# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# define search space
param_grid = dict()
param_grid['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
param_grid['alpha'] = loguniform(1e-5, 100)
param_grid['fit_intercept'] = [True, False]
param_grid['normalize'] = [True, False]

##########모델 검증
param_grid = {'alpha':[0.01,0.1,1,10,100]}
#grid_search = GridSearchCV(model_lasso, param_grid=param_grid) 
grid_search = GridSearchCV(model_lasso, param_grid=param_grid, 
                           cv=cv, scoring='neg_mean_absolute_error',
                           refit=True, return_train_score=True, 
                           ) 
#grid_search = GridSearchCV(model_lasso, param_grid=param_grid, cv=KFold(n_splits=5), scoring='r2') 
#grid_search = GridSearchCV(model_lasso, param_grid=param_grid, cv=5, scoring=make_scorer(r2_score))
#grid_search = GridSearchCV(model_lasso, param_grid=param_grid, cv=KFold(n_splits=5), scoring=make_scorer(r2_score))
# define search
#search = GridSearchCV(model_lasso, param_grid, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv)

grid_search.fit(X_train, y_train)
print("1. 학습모델 best_estimator_ : ", grid_search.best_estimator_)
print("2. 학습모델 best_params_ : ", grid_search.best_params_)
print("3. 학습모델 best_score_ : ", -grid_search.best_score_)

y_grid_pred = grid_search.predict(X_test)

# MSE, RMSE 평가
MSE = mean_squared_error(y_test, y_grid_pred)
RMSE = np.sqrt(MSE)

print('4. 테스트 데이터 : MSE : {0:.5f}, RMSE : {1:.5f}'.format(MSE, RMSE))
print(grid_search.best_params_)
print(grid_search.best_score_) 
scores_df = pd.DataFrame(grid_search.cv_results_)
df_score = scores_df.sort_values(by='mean_test_score', ascending=False)

1. 학습모델 best_estimator_ :  Lasso(alpha=0.01)
2. 학습모델 best_params_ :  {'alpha': 0.01}
3. 학습모델 best_score_ :  3.42564800193987
4. 테스트 데이터 : MSE : 28.43635, RMSE : 5.33257
{'alpha': 0.01}
-3.42564800193987


In [13]:
# grid search linear regression model on the auto insurance dataset
from sklearn.linear_model import Ridge
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

# define model
model = Ridge()

# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# define search space
param_grid = dict()
param_grid['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
param_grid['alpha'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]
param_grid['fit_intercept'] = [True, False]
param_grid['normalize'] = [True, False]

# define search
grid_search = GridSearchCV(model, param_grid=param_grid,          
                          scoring='neg_mean_absolute_error', 
                          n_jobs=-1, cv=cv,
                          refit=True, return_train_score=True
                          )

# execute search
result = grid_search.fit(X_train, y_train)
# summarize result
print('1. Best Score: %s' % -result.best_score_)
print('2. Best best_params_: %s' % result.best_params_)

y_grid_pred = grid_search.predict(X_test)

# MSE, RMSE 평가
MSE = mean_squared_error(y_test, y_grid_pred)
RMSE = np.sqrt(MSE)

print('3. 테스트 데이터 : MSE : {0:.5f}, RMSE : {1:.5f}'.format(MSE, RMSE))
print('4. 테스트 데이터 :', grid_search.best_params_)
scores_df = pd.DataFrame(grid_search.cv_results_)
df_score = scores_df.sort_values(by='mean_test_score', ascending=False)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

1. Best Score: 3.384496503311512
2. Best best_params_: {'alpha': 0.1, 'fit_intercept': True, 'normalize': True, 'solver': 'lsqr'}
3. 테스트 데이터 : MSE : 25.02057, RMSE : 5.00206
4. 테스트 데이터 : {'alpha': 0.1, 'fit_intercept': True, 'normalize': True, 'solver': 'lsqr'}




In [17]:
# score 결과값(ndarray형태로 할당됨) 중 특정 칼럼들만 가져오기 
df_score[['params', 'mean_train_score', 'mean_test_score', 'rank_test_score',
           'split0_test_score', 'split1_test_score', 'split2_test_score',
           'split3_test_score','split4_test_score']]
df_score.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_fit_intercept,param_normalize,param_solver,params,split0_test_score,...,split22_train_score,split23_train_score,split24_train_score,split25_train_score,split26_train_score,split27_train_score,split28_train_score,split29_train_score,mean_train_score,std_train_score
66,0.010663,0.015873,0.001871,0.002793,0.1,True,True,lsqr,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.309078,...,-3.21083,-3.416174,-3.307229,-3.127909,-3.333811,-3.282538,-3.075013,-3.274009,-3.254743,0.087537
67,0.007518,0.006564,0.002719,0.006054,0.1,True,True,sag,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.308466,...,-3.210731,-3.415023,-3.30664,-3.12721,-3.333372,-3.281941,-3.074262,-3.273377,-3.254121,0.087514
65,0.006043,0.009264,0.003665,0.014106,0.1,True,True,cholesky,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.309129,...,-3.210381,-3.415069,-3.306782,-3.127038,-3.333053,-3.281896,-3.074337,-3.273201,-3.254107,0.087527
64,0.004063,0.00697,0.000912,0.001129,0.1,True,True,svd,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.309129,...,-3.210381,-3.415069,-3.306782,-3.127038,-3.333053,-3.281896,-3.074337,-3.273201,-3.254107,0.087527
50,0.007687,0.006634,0.001025,0.001134,0.01,True,True,lsqr,"{'alpha': 0.01, 'fit_intercept': True, 'normal...",-3.385862,...,-3.227975,-3.435131,-3.343107,-3.122246,-3.359028,-3.313788,-3.107692,-3.314375,-3.281871,0.092857


In [19]:
# 최고의 Hyperparameter 찾기 위한 GridsearchCV 사용
cv_split = 5
model_lr6 = ElasticNet()

# define search space
param_grid = dict()
param_grid['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
param_grid['alpha'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]
param_grid['fit_intercept'] = [True, False]
param_grid['normalize'] = [True, False]

# define search
grid_search = GridSearchCV(model, param_grid=param_grid,         
                          scoring='neg_mean_absolute_error', 
                          n_jobs=-1, cv=cv_split,
                          refit=True, return_train_score=True
                          )
                        
# GridSearchCV 인자설명
# cv = 하나의 파라미터 쌍으로 모델링할 때 train, test 교차검증을 7번실시하겠다는 뜻
# refit=True : GridSearch한 후 가장 최고로 좋은 파라미터로 학습시켜 놓겠다.
# ㄴ> 이것 때문에 애초에 GridSearchCV 적용한 객체만으로 최적의 파라미터 적용된 모델로드 가능

# GridSearch 하면서 모든 파라미터값들에 대해 학습 수행
grid_search.fit(X_train, y_train)
print("1. 학습모델 best_estimator_ : ", grid_search.best_estimator_)
print("2. 학습모델 best_params_ : ", grid_search.best_params_)
print("3. 학습모델 best_score_ : ", np.sqrt(-grid_search.best_score_))
y_grid_pred = grid_search.predict(X_test)
# MSE, RMSE 평가
MSE = mean_squared_error(y_test, y_grid_pred)
RMSE = np.sqrt(MSE)
print('4. 테스트 데이터 : MSE : {0:.5f}, RMSE : {1:.5f}'.format(MSE, RMSE))

# 각 파라미터값들에 대한 모델 결과값들이 cv_results_ 객체에 할당됨
scores_df = pd.DataFrame(grid_search.cv_results_)
df_score = scores_df.sort_values(by='mean_test_score', ascending=False)
#print(scores_df.columns)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

1. 학습모델 best_estimator_ :  Ridge(alpha=1, normalize=False, solver='svd')
2. 학습모델 best_params_ :  {'alpha': 1, 'fit_intercept': True, 'normalize': False, 'solver': 'svd'}
3. 학습모델 best_score_ :  1.9518131761429747
4. 테스트 데이터 : MSE : 24.12404, RMSE : 4.91162


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Ridge())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alp

In [18]:
# score 결과값(ndarray형태로 할당됨) 중 특정 칼럼들만 가져오기 
df_score[['params','mean_test_score', 'rank_test_score',
           'split0_test_score', 'split1_test_score', 'split2_test_score',
           'split3_test_score','split4_test_score']]

Unnamed: 0,params,mean_test_score,rank_test_score,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score
66,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.384497,1,-3.309078,-4.213636,-3.516328,-3.195430,-3.080083
67,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.384649,2,-3.308466,-4.215599,-3.517201,-3.195479,-3.079809
65,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.384696,3,-3.309129,-4.215425,-3.517139,-3.195997,-3.079253
64,"{'alpha': 0.1, 'fit_intercept': True, 'normali...",-3.384696,4,-3.309129,-4.215425,-3.517139,-3.195997,-3.079253
50,"{'alpha': 0.01, 'fit_intercept': True, 'normal...",-3.429703,5,-3.385862,-4.077027,-3.458305,-3.261254,-3.140480
...,...,...,...,...,...,...,...,...
123,"{'alpha': 100, 'fit_intercept': False, 'normal...",-6.576400,124,-6.922895,-7.810350,-6.813782,-7.140829,-5.588612
112,"{'alpha': 100, 'fit_intercept': True, 'normali...",-6.687787,125,-6.230221,-7.025068,-6.921623,-6.836949,-6.175428
113,"{'alpha': 100, 'fit_intercept': True, 'normali...",-6.687787,125,-6.230221,-7.025068,-6.921623,-6.836949,-6.175428
114,"{'alpha': 100, 'fit_intercept': True, 'normali...",-6.687787,127,-6.230220,-7.025067,-6.921624,-6.836949,-6.175429
