# 회귀 모델
### 오차 3가지(MSE, MAE, R 스코어)로 평가 하는 것이 핵심

In [17]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

housing = fetch_california_housing()
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['target'] = housing.target
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [18]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def evaluate_regression(y_pred, y_true):
    print("MSE:", mean_squared_error(y_pred, y_true))
    print("MAE:", mean_absolute_error(y_pred, y_true))
    print("R²:", r2_score(y_pred, y_true)) # 결정계수 : 1에 가까울 수록 설명력이 높음(파라미터 순서 주의)

In [19]:
from sklearn.model_selection import train_test_split

X = housing.data
y = housing.target

# stratify=y 설정 필요 없음, 같은 y 값으로 분배하는 것이 불가능 하므로
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

In [20]:
# 선형 회귀 - 단순/빠름
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("=== Linear Regression ===")
evaluate_regression(y_pred, y_test)

=== Linear Regression ===
MSE: 0.5558915986952424
MAE: 0.5332001304956974
R²: 0.33767016589310217


In [21]:
# 릿지 회귀 - 과적합 방지 : 특정 가중치가 전체 결과값에 편향을 만드는 것을 막기 위해, 가중치 크기를 제어(L2 규제)
from sklearn.linear_model import Ridge

model = Ridge(alpha=2) # 가중치를 2로 제한
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("=== Ridge Regression ===")
evaluate_regression(y_pred, y_test)

=== Ridge Regression ===
MSE: 0.5557160248197577
MAE: 0.5332080241497934
R²: 0.3375228808692976


In [22]:
# 라쏘 회귀 - 변수 선택 가능 : 절대값의 합을 제어(L1 규제)
from sklearn.linear_model import Lasso

model = Lasso(alpha=0.1) # 절대값의 합을 0.1로 제한
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("=== Lasso Regression ===")
evaluate_regression(y_pred, y_test)

=== Lasso Regression ===
MSE: 0.6135115198058131
MAE: 0.5816074623949868
R²: -0.018892308191097662
