## Multiple Linear Regression (Multivariate Regression)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Read dataset
# df = pd.read_csv('data/advertising_1.csv')
df = pd.read_csv('data/advertising_1.csv', index_col=0)
df.head()

In [None]:
df.isnull().sum()   # ตรวจสอบมีข้อมูลสูญหาย? (check missing values) 

## Data Visualization

In [None]:
plt.scatter(df.TV, df.Sales)
plt.show()

In [None]:
plt.scatter(df.Radio, df.Sales)
plt.show()

In [None]:
plt.scatter(df.Newspaper, df.Sales)
plt.title('Sales-Newspaper')
plt.xlabel('Newspaper')
plt.ylabel('Sales')
plt.show()

## Pairplot

In [None]:
sns.pairplot(df)   
plt.show()

In [None]:
sns.pairplot(data=df, x_vars=['TV', 'Radio', 'Newspaper'],
                      y_vars='Sales', height=3.5, kind='reg'
    )
plt.show()

In [None]:
# Correlation
df.corr()

## Model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
dep_cols = ['TV','Radio','Newspaper']  # dependent vars columns (features)
X = df[dep_cols]

# X = df.drop(['Sales'], axis=1)
y = df.Sales

In [None]:
X.head()

In [None]:
y.head()

In [None]:
df.head()

### Train Model และ ประเมิน 

In [None]:
model = LinearRegression()
model.fit(X, y)

In [None]:
model.score(X, y)

In [None]:
y_predict = model.predict(X)    # R2
r2_score(y, y_predict)

In [None]:
mean_squared_error(y, y_predict)

In [None]:
mean_absolute_error(y, y_predict)

## Predict ทำนาย

In [None]:
df.head()

In [None]:
x_input = [[125.1, 9.8, 27.1]]   # TV, Radio, Newspaper
model.predict(x_input)

## สมการ Model

In [None]:
model.coef_         # สัมประสิทธ์

In [None]:
model.intercept_    # intercept (b)

In [None]:
list(zip(dep_cols, model.coef_))  # รวมข้อมูลเป็น List เดียวกัน Features , Coefficient (slope) 

In [None]:
# x_input (predict) = [125.1, 9.8, 27.1]
125.1*0.04039 + 9.8*0.175373 + 27.1*(-0.008838) + 6.125542

In [None]:
# ทำนาย (Predict) หลาย case
x_input = [ [0, 0, 0],
            [0, 0, 10],
            [216.5, 43.8, 27.1],
            [182.0, 78.3, 27.1],
            [180.0, 107.4, 0],
            [1.0, 286.4, 0]]
y_pred = model.predict(x_input)
y_pred

In [None]:
import numpy as np
x_in = np.array(x_input)

x_in.sum(axis=1)

In [None]:
216.5 + 43.8 + 27.1

In [None]:
182.0 + 78.3 + 27.1

## สรุป Report

In [None]:
print('|   TV  | Radio | Newsp |   Sales ')
for i, x in enumerate(x_input):
    print(f'| {x[0]:5} | {x[1]:5} | {x[2]:5} | = {y_pred[i] :5.2f}')

In [None]:
x_in_sum = x_in.sum(axis=1)

In [None]:
# สรุป Report
print('|   TV  | Radio | Newsp |  Sum  |  Sales ')
for i, x in enumerate(x_input):
    print('| {:5} | {:5} | {:5} | {:5.1f} | = {:5.2f}'.
          format(x[0],x[1],x[2],x_in_sum[i],y_pred[i] ))

In [None]:
# สรุป Report
print('|   TV  | Radio | Newsp |  Sum  |  Sales ')
for i, x in enumerate(x_input):
    print(f'| {x[0]:5} | {x[1]:5} | {x[2]:5} | {x_in_sum[i]:5.1f} | = {y_pred[i] :5.2f}')

In [None]:
for i, x in enumerate(x_input):
    print(x, x_in_sum[i], y_pred[i])