# Min Max Scaler

In [91]:
import numpy as np
import pandas as pd

class MinMaxScaler:
    def fit(self, X):
        self._min = X.min()
        self._max = X.max()
        
    def transform(self, X):
        return (X - self._min) / (self._max - self._min)
    
    def fit_transform(self,X):
        self.fit(X)
        X = (X - self._min) / (self._max - self._min)
        return X
    
    def inverse_transform(self, X):
        return X * (self._max - self._min) + self._min
        
        

In [4]:
arr = np.array([[14,25,18,39,10],[2000,1500,1000,4000,200]])

df = pd.DataFrame(arr).T
df

Unnamed: 0,0,1
0,14,2000
1,25,1500
2,18,1000
3,39,4000
4,10,200


In [9]:
######################## fit method ########################

In [5]:
minmax = MinMaxScaler()
minmax.fit(df)
minmax.transform(df)

Unnamed: 0,0,1
0,0.137931,0.473684
1,0.517241,0.342105
2,0.275862,0.210526
3,1.0,1.0
4,0.0,0.0


In [10]:
######################## fit_transform method ########################

In [11]:
minmax1 = MinMaxScaler()
minmax1.fit_transform(df)

Unnamed: 0,0,1
0,0.137931,0.473684
1,0.517241,0.342105
2,0.275862,0.210526
3,1.0,1.0
4,0.0,0.0


In [14]:
######################## inverse_transform method ########################

In [15]:
minmax1.inverse_transform(np.array([0.7,0.9]))

0      30.3
1    3620.0
dtype: float64

In [40]:
(df - df.min())/ (df.max() - df.min())

Unnamed: 0,0,1
0,0.137931,0.473684
1,0.517241,0.342105
2,0.275862,0.210526
3,1.0,1.0
4,0.0,0.0


In [30]:
def minmaxscaler(vec):
    min_vec = np.min(vec, axis=1).reshape(vec.shape[0],1)
    max_vec = np.max(vec, axis=1).reshape(vec.shape[0],1)
    return (age-min_vec) / (max_vec-min_vec)

In [31]:
minmaxscaler(age)

array([[0.13793103, 0.51724138, 0.27586207, 1.        , 0.        ],
       [0.47368421, 0.34210526, 0.21052632, 1.        , 0.        ]])

In [25]:
np.min(age, axis=1).reshape(2,1)

array([[ 10],
       [200]])

In [29]:
(age-np.min(age, axis=1).reshape(2,1)) / (np.max(age, axis=1).reshape(2,1)-np.min(age, axis=1).reshape(2,1))

array([[0.13793103, 0.51724138, 0.27586207, 1.        , 0.        ],
       [0.47368421, 0.34210526, 0.21052632, 1.        , 0.        ]])

In [36]:
minmaxscaler(df)

AttributeError: 'Series' object has no attribute 'reshape'

# Standard Scaler

In [16]:
arr = df.values

array([[  14, 2000],
       [  25, 1500],
       [  18, 1000],
       [  39, 4000],
       [  10,  200]])

In [20]:
arr.mean(axis=1)

array([  21.2, 1740. ])

In [22]:
(14-21.2)**2+(25-21.2)**2+(18-21.2)**2+(39-21.2)**2+(10-21.2)**2

518.8

In [27]:
import math
518.8/5

103.75999999999999

In [26]:
arr.var(axis=1)

array([1.0376e+02, 1.6304e+06])

In [28]:
arr.std(axis=1)

array([  10.18626526, 1276.87117596])

In [40]:
np.sum((arr-arr.mean(axis=1).reshape((arr.shape[0],1)))**2, axis=1)/arr.shape[1]

array([1.0376e+02, 1.6304e+06])

In [39]:
arr.shape

(2, 5)

In [45]:
def mean(vec):
    return np.sum(vec, axis=1)/vec.shape[1]

In [65]:
def var(vec):
    return np.sum((vec-mean(vec).reshape((vec.shape[0],1)))**2, axis=1)/vec.shape[1]
var(arr)

array([1.0376e+02, 1.6304e+06])

In [66]:
def std(vec):
    return np.sqrt(var(vec))
std(arr)

array([  10.18626526, 1276.87117596])

In [56]:
class StandardScaler:
    def fit(self, X):
        self._mean = X.mean()
        self._std = X.std()
        
    def transform(self, X):
        return (X - self._mean) / self._std
    
    def fit_transform(self,X):
        self.fit(self, X):
        X = (X - self._mean) / self._std
        return X
    
    def inverse_transform(self, X):
        return X * self._std + self._mean

In [59]:
sc = StandardScaler()
sc.fit_transform(df)

Unnamed: 0,0,1
0,-0.632212,0.182126
1,0.333667,-0.168116
2,-0.280983,-0.518358
3,1.562968,1.583093
4,-0.98344,-1.078745


In [61]:
df.mean()

0      21.2
1    1740.0
dtype: float64

In [64]:
df.sum()/len(df)

0      21.2
1    1740.0
dtype: float64

# Robust Scaler

In [69]:
df.quantile(0.5)

0      18.0
1    1500.0
Name: 0.5, dtype: float64

In [73]:
np.quantile(arr, 0.5, axis=1)

array([  18., 1500.])

In [89]:
class RobustScaler:
    def fit(self, X):
        self._iqr = X.quantile(0.75) - X.quantile(0.25)
        self._median = X.median()
        
    def transform(self, X):
        return (X - self._median) / self._iqr
    
    def fit_transform(self,X):
        self.fit(X)
        X = (X - self._median) / self._iqr
        return X
    
    def inverse_transform(self, X):
        return X * self._iqr + self._median

In [90]:
rsc = RobustScaler()
rsc.fit_transform(df)

Unnamed: 0,0,1
0,-0.363636,0.5
1,0.636364,0.0
2,0.0,-0.5
3,1.909091,2.5
4,-0.727273,-1.3


0      11.0
1    1000.0
dtype: float64