# Feature Scaling

1. Min-Max Scaling

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [4]:
# Sample data
data = {'price': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,price
0,10
1,20
2,30
3,40
4,50


In [6]:
# Initialize the scaler
scaler = MinMaxScaler()

# Fit the scaler and transform the data and save into a new column
df['price_scaled'] = scaler.fit_transform(df[['price']])
df


Unnamed: 0,price,price_scaled
0,10,0.0
1,20,0.111111
2,30,0.222222
3,40,0.333333
4,50,0.444444
5,60,0.555556
6,70,0.666667
7,80,0.777778
8,90,0.888889
9,100,1.0


In [7]:
# now use standard scaler to scale the data and save into a new column
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler and transform the data and save into a new column
df['price_standard_scaled'] = scaler.fit_transform(df[['price']])
df

Unnamed: 0,price,price_scaled,price_standard_scaled
0,10,0.0,-1.566699
1,20,0.111111,-1.218544
2,30,0.222222,-0.870388
3,40,0.333333,-0.522233
4,50,0.444444,-0.174078
5,60,0.555556,0.174078
6,70,0.666667,0.522233
7,80,0.777778,0.870388
8,90,0.888889,1.218544
9,100,1.0,1.566699


In [8]:
# use robust scaler to scale the data and save into a new column
from sklearn.preprocessing import RobustScaler

# Initialize the scaler
scaler = RobustScaler()

# Fit the scaler and transform the data and save into a new column
df['price_robust_scaled'] = scaler.fit_transform(df[['price']])
df

Unnamed: 0,price,price_scaled,price_standard_scaled,price_robust_scaled
0,10,0.0,-1.566699,-1.0
1,20,0.111111,-1.218544,-0.777778
2,30,0.222222,-0.870388,-0.555556
3,40,0.333333,-0.522233,-0.333333
4,50,0.444444,-0.174078,-0.111111
5,60,0.555556,0.174078,0.111111
6,70,0.666667,0.522233,0.333333
7,80,0.777778,0.870388,0.555556
8,90,0.888889,1.218544,0.777778
9,100,1.0,1.566699,1.0


In [9]:
# use maxabs scaler to scale the data and save into a new column
from sklearn.preprocessing import MaxAbsScaler

# Initialize the scaler
scaler = MaxAbsScaler()

# Fit the scaler and transform the data and save into a new column
df['price_maxabs_scaled'] = scaler.fit_transform(df[['price']])
df

Unnamed: 0,price,price_scaled,price_standard_scaled,price_robust_scaled,price_maxabs_scaled
0,10,0.0,-1.566699,-1.0,0.1
1,20,0.111111,-1.218544,-0.777778,0.2
2,30,0.222222,-0.870388,-0.555556,0.3
3,40,0.333333,-0.522233,-0.333333,0.4
4,50,0.444444,-0.174078,-0.111111,0.5
5,60,0.555556,0.174078,0.111111,0.6
6,70,0.666667,0.522233,0.333333,0.7
7,80,0.777778,0.870388,0.555556,0.8
8,90,0.888889,1.218544,0.777778,0.9
9,100,1.0,1.566699,1.0,1.0


In [10]:
# Scale with log transformation
import numpy as np

# Add a new column with log transformation
df['price_log'] = np.log(df['price'])
df

Unnamed: 0,price,price_scaled,price_standard_scaled,price_robust_scaled,price_maxabs_scaled,price_log
0,10,0.0,-1.566699,-1.0,0.1,2.302585
1,20,0.111111,-1.218544,-0.777778,0.2,2.995732
2,30,0.222222,-0.870388,-0.555556,0.3,3.401197
3,40,0.333333,-0.522233,-0.333333,0.4,3.688879
4,50,0.444444,-0.174078,-0.111111,0.5,3.912023
5,60,0.555556,0.174078,0.111111,0.6,4.094345
6,70,0.666667,0.522233,0.333333,0.7,4.248495
7,80,0.777778,0.870388,0.555556,0.8,4.382027
8,90,0.888889,1.218544,0.777778,0.9,4.49981
9,100,1.0,1.566699,1.0,1.0,4.60517


In [11]:
# Scale with square root transformation
# Add a new column with square root transformation
df['price_sqrt'] = np.sqrt(df['price'])
df

Unnamed: 0,price,price_scaled,price_standard_scaled,price_robust_scaled,price_maxabs_scaled,price_log,price_sqrt
0,10,0.0,-1.566699,-1.0,0.1,2.302585,3.162278
1,20,0.111111,-1.218544,-0.777778,0.2,2.995732,4.472136
2,30,0.222222,-0.870388,-0.555556,0.3,3.401197,5.477226
3,40,0.333333,-0.522233,-0.333333,0.4,3.688879,6.324555
4,50,0.444444,-0.174078,-0.111111,0.5,3.912023,7.071068
5,60,0.555556,0.174078,0.111111,0.6,4.094345,7.745967
6,70,0.666667,0.522233,0.333333,0.7,4.248495,8.3666
7,80,0.777778,0.870388,0.555556,0.8,4.382027,8.944272
8,90,0.888889,1.218544,0.777778,0.9,4.49981,9.486833
9,100,1.0,1.566699,1.0,1.0,4.60517,10.0


In [12]:
# create a new column with exponential transformation
df['price_exp'] = np.exp(df['price'])
df

Unnamed: 0,price,price_scaled,price_standard_scaled,price_robust_scaled,price_maxabs_scaled,price_log,price_sqrt,price_exp
0,10,0.0,-1.566699,-1.0,0.1,2.302585,3.162278,22026.47
1,20,0.111111,-1.218544,-0.777778,0.2,2.995732,4.472136,485165200.0
2,30,0.222222,-0.870388,-0.555556,0.3,3.401197,5.477226,10686470000000.0
3,40,0.333333,-0.522233,-0.333333,0.4,3.688879,6.324555,2.353853e+17
4,50,0.444444,-0.174078,-0.111111,0.5,3.912023,7.071068,5.184706e+21
5,60,0.555556,0.174078,0.111111,0.6,4.094345,7.745967,1.142007e+26
6,70,0.666667,0.522233,0.333333,0.7,4.248495,8.3666,2.515439e+30
7,80,0.777778,0.870388,0.555556,0.8,4.382027,8.944272,5.5406219999999995e+34
8,90,0.888889,1.218544,0.777778,0.9,4.49981,9.486833,1.220403e+39
9,100,1.0,1.566699,1.0,1.0,4.60517,10.0,2.688117e+43
