# Polynomial Regression

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

### Data

In [None]:
# df = pd.read_csv('data/gdp4c.csv')
df = pd.read_csv('data/gdp4c.csv', index_col=0)
df.head()              # NaN = Not a Number

In [None]:
df.tail()

In [None]:
df.describe()

In [None]:
df.shape        # มิติข้อมูล

In [None]:
# ตรวจสอบว่ามีข้อมูลสูญหายหรือไม่? Check missing data
df.isnull().sum()

### Data Visualization

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(7,4))
plt.scatter(df.index, df.THA, label='THA')
plt.scatter(df.index, df.SGP, label='SGP')
plt.scatter(df.index, df.VNM, label='VNM')
plt.ylabel('GDP')
plt.legend()
plt.show()

In [None]:
sns.set_style('darkgrid')
sns.scatterplot(data=df, x=df.index, y='THA', label='THA')
sns.scatterplot(data=df, x=df.index, y='SGP', label='SGP')
sns.scatterplot(data=df, x=df.index, y='VNM', label='VNM')
plt.ylabel('GDP')
plt.show()

In [None]:
plt.rcParams.update({'font.size':14})  
plt.scatter(df.index, df.THA)
plt.ylabel('GDP')
plt.show()

## Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
df.head()

In [None]:
x = df.index
x[:5]

In [None]:
x = df.index.values.reshape(-1, 1)
x[:5]

In [None]:
y = df.THA
y[:5]

In [None]:
model = LinearRegression()
model.fit(x, y)

### ประเมิน (Evaluation)

In [None]:
model.score(x, y)   # ประเมิน (Evaluate the Model)

### Visualize model

In [None]:
y_predict = model.predict(x)

plt.scatter(x, y, color='b',s=20)  # ข้อมูลจริง     # color, size
plt.plot(x, y_predict, linewidth='2', color='r')   # model

plt.show()

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
mean_squared_error(y, y_predict)

In [None]:
mean_absolute_error(y, y_predict)

In [None]:
np.sqrt(mean_squared_error(y, y_predict))

In [None]:
print('R2 = {:.5f} '.format(model.score(x, y)))
print('R2 = {:.5f} '.format(r2_score(y, y_predict)))

print('MSE = {:.4e} '.format(mean_squared_error(y, y_predict)))
print('RMSE = {:.4e} '.format(np.sqrt(mean_squared_error(y, y_predict))))

### ทำนาย (Predict)

In [None]:
y_predict = model.predict([[2020]])  # GDP ใน ปี 2020 มีค่าเท่าใด

y_predict[0]
# 

print('{:,.0f} '.format(y_predict[0]))
print('{:,.6E} '.format(y_predict[0]))

In [None]:
x_input = [2010, 2015, 2017, 2020]
x_input = np.array(x_input).reshape(-1, 1)
# x_input =  [[2010], 
#             [2015], 
#             [2017], 
#             [2020]]
y_predict = model.predict(x_input)
y_predict

In [None]:
x_input

In [None]:
for val in y_predict:
    print('{:.4e}'.format(val))

In [None]:
for i, val in enumerate(y_predict):
    # print(i, x_input[i] , val)
    print(f'{i} {x_input[i]} {val:.4e}')

In [None]:
df.THA.tail(8)

## Polynomial Regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
x = df.index
x[:5]

In [None]:
x = df.index.values.reshape(-1, 1)
x[:5]

In [None]:
y = df.THA

In [None]:
poly_features = PolynomialFeatures(degree=2)  ## Degrees
x_poly = poly_features.fit_transform(x)

In [None]:
model = LinearRegression()
model.fit(x_poly, y)

### ประเมิน (Evaluation)

In [None]:
model.score(x_poly, y)

### Visualize Model

In [None]:
y_poly_pred = model.predict(x_poly)

sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = 7,4  

plt.scatter(x, y, color='b',s=12) 
plt.plot(x, y_poly_pred, linewidth='2', color='r') 
plt.ylabel('GDP')
plt.title('Thai GDP')

plt.show()

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

print('R2 = {:.5f} '.format(model.score(x_poly, y)))
print('R2 = {:.5f} '.format(r2_score(y, y_poly_pred)))

print('MSE = {:.4e} '.format(mean_squared_error(y, y_poly_pred)))
print('RMSE = {:.4e} '.format(np.sqrt(mean_squared_error(y,  y_poly_pred))))

### ทำนาย (Predict)

In [None]:
x_input = [2010, 2015, 2017, 2020]
x_input = np.array(x_input).reshape(-1, 1)

# x_input = [[2010], [2015], [2018], [2020]]  # เขียนแบบนี้ก็ได้

x_input_poly = poly_features.fit_transform(x_input)
y_poly_pred = model.predict(x_input_poly)
y_poly_pred

for val in y_poly_pred:
    print('{:.4e}'.format(val))

In [None]:
for i, val in enumerate(y_poly_pred):
    # print(i, x_input[i] , val)
    print(f'{i} {x_input[i]} = {val :.4e}')   

In [None]:
df.THA.tail(8)

In [None]:
print('    Year     Linear    Polynomial')
print('----------------------------------')
for i, val in enumerate(y_poly_pred):
    # print(i, x_input[i] , val)
    print(f'{i}  {x_input[i]}  {y_predict[i] :.4e}  {val :.4e}') 

### เปรียบเทียบกับค่า GDP จริง ปี2020

In [None]:
# Polynomial Regression

gdp_p = 495.6e9     # gdp_p = 4.9563e11
gdp_a = 501.8e9

err = (gdp_a - gdp_p) / gdp_a * 100
print(f'Diff: {err:.2f} %')

In [None]:
# Linear
gdp_p = 350.42e9
err = (gdp_a - gdp_p) / gdp_a * 100
print(f'Diff: {err:.2f} %')

## Assignment
- Try degrees 3 4 .. 
- Try VNM

In [None]:
# df.dropna() 
df.dropna(axis=0)   # or this

df.dropna(axis=0, inplace=True)

In [None]:
df.head()

In [None]:
# ตรวจสอบว่ามีข้อมูลสูญหายหรือไม่? Check missing data
df.isnull().sum()

In [None]:
df.shape