# Lecture 08: Data Processing and Calibration in Machine Learning 
By the end of this lecture, you will be able to:
1. Calibrate the data for machine learning
2. Process the data for regression and classification problems


# 8.1. Import Libraries

---



In [None]:
#@title Import
import numpy  as np
import pandas as pd

import sklearn.preprocessing as pg
import keras.utils           as ku 

# 8.2. Calibration Necessity

---



In [None]:
#@title Load some examples 
data_tr   = pd.read_csv('/content/sample_data/california_housing_train.csv')
data_te   = pd.read_csv('/content/sample_data/california_housing_test.csv')

data_tr.head()


# 8.3. Data Processing

---



### 8.3.1. Data Processing - Regression Problems 

In [None]:
#@title Load some regression data and seperate inputs and outputs
data_tr   = pd.read_csv('/content/sample_data/california_housing_train.csv')
data_te   = pd.read_csv('/content/sample_data/california_housing_test.csv')

datain_tr = data_tr.iloc[:,:-1]
dataou_tr = data_tr.iloc[:,-1:]

datain_te = data_te.iloc[:,:-1]
dataou_te = data_te.iloc[:,-1:]

In [None]:
#@title Calibration

# Fit input function with training input
fun_calibration_in = pg.MinMaxScaler( feature_range=(0,1) )
fun_calibration_in.fit(datain_tr)

# Fit output function with training outputs
fun_calibration_ou = pg.MinMaxScaler( feature_range=(0,1) )
fun_calibration_ou.fit(dataou_tr)

# Calibrate training and testing inputs 
datain_tr_calibrated = fun_calibration_in.transform(datain_tr)
datain_te_calibrated = fun_calibration_in.transform(datain_te)

# Calibrate training and testing outputs
dataou_tr_calibrated = fun_calibration_ou.transform(dataou_tr)
dataou_te_calibrated = fun_calibration_ou.transform(dataou_te)

# Range of data in testing might have rang violation!

### 8.3.2. Data Processing - Classification Problems 
> **MNIST data:**

> <img src=	"	https://i.ibb.co/4S6xP3m/8-1.png	"	width="500"/>

[Image link](https://www.researchgate.net/profile/Steven_Young11/publication/306056875/figure/fig1/AS:393921575309346@1470929630835/Example-images-from-the-MNIST-dataset.png)

In [None]:
#@title Load MNIST classification data and split inputs and outputs 
data_tr   = pd.read_csv('/content/sample_data/mnist_train_small.csv')
data_te   = pd.read_csv('/content/sample_data/mnist_test.csv')

datain_tr = data_tr.iloc[:,1:]
dataou_tr = data_tr.iloc[:,0:1]

datain_te = data_te.iloc[:,1:]
dataou_te = data_te.iloc[:,0:1]

print("Training inputs shape: \n{}\n".format(datain_tr.values.shape) )
print("Testing  inputs shape: \n{}\n".format(datain_te.values.shape) )

print("Training outputs: \n{}\n".format(dataou_tr.values.transpose()) )
print("Testing  outputs: \n{}\n".format(dataou_te.values.transpose()) )


In [None]:
#@title Calibration
fun_calibration_in = pg.MinMaxScaler( feature_range=(0,1) )
fun_calibration_in.fit(datain_tr)

#fun_calibration_ou = pg.MinMaxScaler(feature_range=(0,1))
#fun_calibration_ou.fit(dataou_tr)

datain_tr_calibrated = fun_calibration_in.transform(datain_tr)
datain_te_calibrated = fun_calibration_in.transform(datain_te)

#dataou_tr_calibrated = fun_calibration_ou.transform(dataou_tr)
#dataou_te_calibrated = fun_calibration_ou.transform(dataou_te)

dataou_tr_categorical = ku.to_categorical(dataou_tr)
dataou_te_categorical = ku.to_categorical(dataou_te)

print("Training binary outputs: \n{}\n\nShape:\n{}\n".format( dataou_tr_categorical,dataou_tr_categorical.shape ) )
print("Testing  binary outputs: \n{}\n\nShape:\n{}\n".format( dataou_te_categorical,dataou_te_categorical.shape ) )



# Lecture 08: Data Processing and Calibrations in Machine Learning 
In this lecture, you learned about:
1. How to calibrate the data for machine learning
2. How to process the data for regression and classification problems

***In the next lecture, we will go over a Brief Introduction to Neural Networks***