### One-hot encoding

In [2]:
import matplotlib.pyplot as plt
import pandas as pd

In [6]:
dataset = pd.read_csv('hour_score1.csv')
dataset
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values
X, y

(array([[2.2, 4, 'Home'],
        [4.5, 2, 'Library'],
        [1.6, 5, 'Caffee'],
        [3.6, 3, 'Caffee'],
        [10.0, 0, 'Library'],
        [9.3, 0, 'Home'],
        [6.4, 1, 'Library'],
        [5.7, 2, 'Library'],
        [5.5, 2, 'Library'],
        [7.2, 1, 'Home'],
        [7.4, 1, 'Caffee'],
        [8.3, 1, 'Caffee'],
        [2.6, 5, 'Caffee'],
        [8.7, 3, 'Caffee'],
        [9.1, 3, 'Home'],
        [1.2, 6, 'Library'],
        [3.1, 5, 'Library'],
        [4.8, 4, 'Caffee'],
        [6.9, 3, 'Library'],
        [10.2, 0, 'Caffee']], dtype=object),
 array([ 20,  42,  15,  35,  99,  91,  63,  58,  57,  70,  75,  81,  28,
         89,  90,  10,  32,  50,  71, 100], dtype=int64))

In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(drop='first'), [2])], remainder='passthrough')
X = ct.fit_transform(X)
X

array([[1.0, 0.0, 2.2, 4],
       [0.0, 1.0, 4.5, 2],
       [0.0, 0.0, 1.6, 5],
       [0.0, 0.0, 3.6, 3],
       [0.0, 1.0, 10.0, 0],
       [1.0, 0.0, 9.3, 0],
       [0.0, 1.0, 6.4, 1],
       [0.0, 1.0, 5.7, 2],
       [0.0, 1.0, 5.5, 2],
       [1.0, 0.0, 7.2, 1],
       [0.0, 0.0, 7.4, 1],
       [0.0, 0.0, 8.3, 1],
       [0.0, 0.0, 2.6, 5],
       [0.0, 0.0, 8.7, 3],
       [1.0, 0.0, 9.1, 3],
       [0.0, 1.0, 1.2, 6],
       [0.0, 1.0, 3.1, 5],
       [0.0, 0.0, 4.8, 4],
       [0.0, 1.0, 6.9, 3],
       [0.0, 0.0, 10.2, 0]], dtype=object)

### 데이터 세트 분리

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### 학습 (다중 선형 회귀)

In [9]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(X_train, y_train)

LinearRegression()

### 예측 값과 실제 값 비교 (테스트 세트)

In [10]:
y_pred = reg.predict(X_test)
y_pred

array([ 69.2511661 ,  43.90194574, 102.35499174,  54.23643875])

In [11]:
y_test

array([ 71,  42, 100,  57], dtype=int64)

In [12]:
reg.coef_

array([-2.11076377, -0.63931016, 10.33449301,  0.54643714])

In [13]:
reg.intercept_

-3.056836909082122

### 모델 평가

In [14]:
reg.score(X_train, y_train)

0.998461490995052

In [15]:
reg.score(X_test, y_test)

0.98914211343024

### 다양한 평가 지표 (회귀 모델)

1. MAE (Mean Absolute Error)
2. MSE (Mean Squared Error)
3. RMSE (Root Mean Squared Error)
4. R2: 결정 계수
R2는 1에 가까울수록, 나머지는 0에 가까울수록 좋음.

In [16]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_pred)

2.192333160201615

In [17]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)

4.964768634022726

In [19]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred, squared=False)

2.2281760778768644

In [20]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.98914211343024