# Feature Scaling 

In [1]:
pip install scikit-learn

Collecting scikit-learnNote: you may need to restart the kernel to use updated packages.

  Obtaining dependency information for scikit-learn from https://files.pythonhosted.org/packages/fe/6b/db949ed5ac367987b1f250f070f340b7715d22f0c9c965bdf07de6ca75a3/scikit_learn-1.3.2-cp312-cp312-win_amd64.whl.metadata
  Downloading scikit_learn-1.3.2-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting joblib>=1.1.1 (from scikit-learn)
  Obtaining dependency information for joblib>=1.1.1 from https://files.pythonhosted.org/packages/10/40/d551139c85db202f1f384ba8bcf96aca2f329440a844f924c8a0040b6d02/joblib-1.3.2-py3-none-any.whl.metadata
  Using cached joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Obtaining dependency information for threadpoolctl>=2.0.0 from https://files.pythonhosted.org/packages/81/12/fd4dea011af9d69e1cad05c75f3f7202cdcbeac9b712eea58ca779a72865/threadpoolctl-3.2.0-py3-none-any.whl.metadata
  Downloading threadpoolctl-3.2.0-

## Min-Max Scaling

In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
# Sample data
data={'Value':[10,20,30,40,50]}
df=pd.DataFrame(data)
print(df.head())
# Min-Max Scaling
scaler=MinMaxScaler()
df['Scaled_Value']=scaler.fit_transform(df[['Value']])
print(df)

   Value
0     10
1     20
2     30
3     40
4     50
   Value  Scaled_Value
0     10          0.00
1     20          0.25
2     30          0.50
3     40          0.75
4     50          1.00


## 2. Standard Scalar or Z score normalization

In [3]:
from sklearn.preprocessing import StandardScaler
# Sample data
data={'Value':[10,20,30,40,50]}
df=pd.DataFrame(data)
print(df.head())

   Value
0     10
1     20
2     30
3     40
4     50


In [4]:
scaler=StandardScaler()
df['Scaled_Value']=scaler.fit_transform(df[['Value']])
print(df)

   Value  Scaled_Value
0     10     -1.414214
1     20     -0.707107
2     30      0.000000
3     40      0.707107
4     50      1.414214


## 3. Robust Scalar

In [5]:
from sklearn.preprocessing import RobustScaler
# Sample data
data={'Value':[10,20,30,40,50]}
df=pd.DataFrame(data)
print(df.head())
# Robust Scaling
scaler=RobustScaler()
df['Scaled_Value']=scaler.fit_transform(df[['Value']])
print(df)

   Value
0     10
1     20
2     30
3     40
4     50
   Value  Scaled_Value
0     10          -1.0
1     20          -0.5
2     30           0.0
3     40           0.5
4     50           1.0


## 4. Logrithmic Scaling

In [6]:
import numpy as np
import pandas as pd
# random data with outliers
data={'Value':[10000,20000,30000,100000,50000]}
df=pd.DataFrame(data)
# Log Transform
df['log_value'] = np.log(df['Value'])
df['log_value10'] = np.log10(df['Value'])
df['log_value2'] = np.log2(df['Value'])
df.head()



Unnamed: 0,Value,log_value,log_value10,log_value2
0,10000,9.21034,4.0,13.287712
1,20000,9.903488,4.30103,14.287712
2,30000,10.308953,4.477121,14.872675
3,100000,11.512925,5.0,16.60964
4,50000,10.819778,4.69897,15.60964


# Feature Encoding

## 1. Hot Encoding

In [8]:
import pandas as pd
# Sample Data
data={'Color':['Red','Green','Blue','Red']}
df=pd.DataFrame(data)
print (df)
# One-Hot Encoding
encoded_data=pd.get_dummies(df,columns=['Color'])
print (encoded_data)

   Color
0    Red
1  Green
2   Blue
3    Red
   Color_Blue  Color_Green  Color_Red
0       False        False       True
1       False         True      False
2        True        False      False
3       False        False       True


## 2. Label Encoding

In [9]:
from sklearn.preprocessing import LabelEncoder
# sample Data
data={'Animal':['Dog','Cat','Mouse','Bird','Dog']}
df=pd.DataFrame(data)
# Label Encoding
label_encoder=LabelEncoder()
df["Animal_encoded"]=label_encoder.fit_transform(df['Animal'])
print(df)

  Animal  Animal_encoded
0    Dog               2
1    Cat               1
2  Mouse               3
3   Bird               0
4    Dog               2


## 3. Ordinal Encoding

In [11]:
from sklearn.preprocessing import OrdinalEncoder
# sample Data
data={'Size':['Small','Medium','Large','Medium']}
df=pd.DataFrame(data)
# Label Encoding
ordinal_encoder=OrdinalEncoder(categories=[['Small','Medium','Large']])
df["Size_encoded"]=ordinal_encoder.fit_transform(df[['Size']])
print(df)

     Size  Size_encoded
0   Small           0.0
1  Medium           1.0
2   Large           2.0
3  Medium           1.0
