# Feature Scaling

In [None]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Dataset


In [None]:
cancer = load_breast_cancer()

In [None]:
cancer.keys()

In [None]:
print(cancer.DESCR)

In [None]:
cancer.target_names

In [None]:
df = pd.DataFrame(cancer.data[:,0:2], columns=cancer.feature_names[0:2])

# df = pd.DataFrame(cancer.data[:,0:4], columns=cancer.feature_names[0:4])

df['class'] = cancer.target

df.sample(5, random_state=1)
# df.head()
df.tail()

In [None]:
df.describe().round(2)

In [None]:
df.isnull().sum()

In [None]:
df.groupby('class').count()

## Data visualization

In [None]:
plt.figure(figsize=(5.2, 4))

sns.scatterplot(x='mean radius', y='mean texture', data=df, hue='class', style='class', 
                alpha=0.9, edgecolor='w', s=80)

plt.title('Raw Data (Before)')
plt.show()

## X and y

In [None]:
X = df.drop('class', axis=1)
X.head()
# X.tail()

In [None]:
y = df['class']
y[:5]

## Feature Scaling

### 1. Normalization (MinMaxScaler)

In [None]:
from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler()
X_sc = sc.fit_transform(X)
X_sc[:5]

### 2. Standardization (StandardScaler)

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_sc = sc.fit_transform(X)
X_sc[:5]

### 3. RobustScaler

In [None]:
from sklearn.preprocessing import RobustScaler

sc = RobustScaler()
X_sc = sc.fit_transform(X)
X_sc[:5]

## Visualization

In [None]:
dfsc = pd.DataFrame(X_sc, columns=['mean radius','mean texture']) 
# dfsc = pd.DataFrame(X_sc, columns=cancer.feature_names[0:4])

dfsc['class'] = y

dfsc.describe().round(3)

## KDE Plot

In [None]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3.5))

ax1.set_title('Before Scale (Raw data)')
sns.kdeplot(df['mean radius'], fill=True, ax=ax1)
sns.kdeplot(df['mean texture'], fill=True, ax=ax1)

ax2.set_title('After Scale')
sns.kdeplot(X_sc[:,0], fill=True, ax=ax2)
sns.kdeplot(X_sc[:,1], fill=True, ax=ax2)
sns.kdeplot(df['mean texture'], fill=True, ax=ax1)

plt.show()

## Scatter Plot

In [None]:
plt.figure(figsize=(5.2, 4))

sns.scatterplot(x='mean radius', y='mean texture', data=dfsc, hue='class', style='class', 
                alpha=0.9, edgecolor='w', s=80)
plt.title('After Scaling')
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3.5))

ax1.set_title('Before Scale (Raw data)')
sns.kdeplot(df['mean perimeter'], fill=True, ax=ax1)
sns.kdeplot(df['mean perimeter'], fill=True, ax=ax1)

ax2.set_title('After Scale')
sns.kdeplot(X_sc[:,2], fill=True, ax=ax2)
sns.kdeplot(X_sc[:,3], fill=True, ax=ax2)
sns.kdeplot(df['mean area'], fill=True, ax=ax1)

plt.show()