In [1]:
# 경고무시
import warnings
warnings.filterwarnings(action='ignore', category=DeprecationWarning)

In [2]:
# load library
import pandas as pd 
import numpy as np
from sklearn.linear_model import ElasticNet # 엘라스틱넷
import matplotlib.pyplot as plt
%matplotlib inline
# from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.metrics import accuracy_score

# 난수씨드값 지정하기.
np.random.seed(seed=2021)

In [3]:
# load data
감말랭이 = pd.read_csv("C:/ken/elasticnet/감말랭이_new.csv")
감말랭이.head()

Unnamed: 0,date,PC1,PC2,감말랭이
0,2018-01-01T00:00:00Z,-3.719474,0.965108,2.862813
1,2018-01-02T00:00:00Z,-3.741912,0.888377,4.32831
2,2018-01-03T00:00:00Z,-4.074829,0.748572,5.545913
3,2018-01-04T00:00:00Z,-4.325838,0.235772,5.867105
4,2018-01-05T00:00:00Z,-3.772842,0.246325,4.414783


In [4]:
# 날짜 컬럼 인덱스로 지정하기. 
감말랭이 = 감말랭이.set_index('date')

# 데이터셋의 target 변수 스케일링
from sklearn.preprocessing import MinMaxScaler
감말랭이['감말랭이_scaling'] = MinMaxScaler().fit_transform(감말랭이[['감말랭이']])

# 기존 target 변수 삭제
del 감말랭이['감말랭이']

# train, test 데이터셋을 연속적으로 분할하기. 
X_train = 감말랭이.iloc[0:365,:-1]
y_train = 감말랭이.iloc[0:365,-1]
X_test  = 감말랭이.iloc[365:730,:-1]
y_test  = 감말랭이.iloc[365:730,-1]

# 데이터셋 차원 확인하기.
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((365, 2), (365,), (365, 2), (365,))

In [9]:
alphas = [0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 1]

for a in alphas:
    model = ElasticNet(alpha=a).fit(X_train,y_train)
    score = model.score(X_test,y_test)
    pred_y = model.predict(X_test)
    mse = mean_squared_error(y_test, pred_y)
    
print("Alpha:{0:.4f}, R2:{1:.2f}, MSE:{2:.2f}, RMSE:{3:.2f}".format(a, score, mse, np.sqrt(mse)))

Alpha:1.0000, R2:-0.07, MSE:0.03, RMSE:0.17


In [5]:
# 회귀모델의 alpha값에 따라 RMSE 출력 후 회귀계수값을 DataFrame으로 반환하는 함수
def get_linear_reg_eval(
    model_name, params=None, X_train_n=None, y_test_n=None, verbose=True):
    coeff_df = pd.DataFrame()
    if verbose : print('######', model_name, '######')

    # 알파값마다 for 문을 돌면서 RMSE 계산
    for param in params:

        # 입력된 규제 선형회귀(ElasticNet) 조건에 맞게 객체 생성
        if model_name == 'ElasticNet' : model = ElasticNet(alpha=param, l1_ratio=0.7)

        # 학습 및 평가(cross_val_score:MSE를 리스트 형태로 반환해줌)
        neg_mse_scores = cross_val_score(model, X_train_n, y_test_n,
                                         scoring="neg_mean_squared_error", cv=5)
        avg_rmse = np.mean(np.sqrt(-1 * neg_mse_scores))
        print('alpha {0}일 때 5 폴드 세트의 평균 RMSE:{1:.3f}', format(param, avg_rmse))

        # 모델을 다시 학습해 변수 별 회귀계수 추출(cross_val_score는 evaluation metric만 반환하므로)
        model.fit(x_train, y_test)
        # alpha에 따른 피쳐별 회귀계수를 series로 변환하고 이를 dataframe의 칼럼으로 추가
        coeff = pd.Series(data=model.coef_, index=x_train.columns)
        colname='alpha:' + str(param)
        coeff_df[colname] = coeff
    return coeff_df

In [6]:
# 엘라스틱넷에 사용될 alpha 파라미터의 값들 정의
# get_linear_reg_eval() 함수 호출, l1_ratio=0.7로 고정
alphas = [0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 1]
coeff_elastic_df = get_linear_reg_eval(
    'ElasticNet',
    params=alphas,
    X_train_n=X_train,
    y_test_n=y_test)

###### ElasticNet ######


TypeError: format() argument 2 must be str, not numpy.float64