# Normalizzazione

<img src="https://frenzy86.s3.eu-west-2.amazonaws.com/IFAO/normalization.jpg" width="600">

## Normalizzazione manuale con Pandas

In [None]:
import pandas as pd
df = pd.DataFrame({
               'A':[1,2,3],
               'B':[100,300,500],
               'C':list('abc')
             })

In [None]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

In [None]:
normalize(df[['A','B']])

Unnamed: 0,A,B
0,0.0,0.0
1,0.5,0.5
2,1.0,1.0


## Normalizzazione automatica con scikit-learn

In [None]:
import pandas as pd
df = pd.DataFrame({
               'A':[1,2,3],
               'B':[100,300,500],
             })

# Using sklearn
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler() 
arr_scaled = scaler.fit_transform(df) 

print(arr_scaled)

df_scaled = pd.DataFrame(arr_scaled, columns=df.columns,index=df.index)
df_scaled

[[0.  0. ]
 [0.5 0.5]
 [1.  1. ]]


Unnamed: 0,A,B
0,0.0,0.0
1,0.5,0.5
2,1.0,1.0


## Standardizzazione: sottrarre la media e dividere per la standard deviation

<img src="https://frenzy86.s3.eu-west-2.amazonaws.com/IFAO/standardization.jpg" width="600">

<img src="https://frenzy86.s3.eu-west-2.amazonaws.com/IFAO/fornlua_stand.png" width="400">

In [None]:
df.iloc[:,0:-1] = df.iloc[:,0:-1].apply(lambda x: (x-x.mean())/ x.std(), axis=0)
df

Unnamed: 0,A,B,C
0,-1.0,-1.0,a
1,0.0,0.0,b
2,1.0,1.0,c


In [None]:
import pandas as pd

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()


df = pd.DataFrame({
               'A':[1,2,3],
               'B':[100,300,500],
               'C':list('abc')
             })
df.iloc[:,0:-1] = scaler.fit_transform(df.iloc[:,0:-1].to_numpy())
df

Unnamed: 0,A,B,C
0,-1.224745,-1.224745,a
1,0.0,0.0,b
2,1.224745,1.224745,c
