In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from feature_engine.outliers import ArbitraryOutlierCapper

# function to load the titanic dataset

def load_titanic():
    data = pd.read_csv('titanic.csv')
    data['Cabin'] = data['Cabin'].astype(str).str[0]
    data['Pclass'] = data['Pclass'].astype('O')
    data['Embarked'].fillna('C', inplace=True)
    return data



In [4]:
data = load_titanic()
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,n,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,n,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,n,S


In [5]:
# max Age and maximum Fare
data.Age.max(),data.Fare.max()

(80.0, 512.3292)

In [6]:
capper = ArbitraryOutlierCapper(max_capping_dict={'Age': 50, 'Fare': 200},
                                min_capping_dict=None)
capper.fit(data.fillna(0))

In [7]:
capper.right_tail_caps_

{'Age': 50, 'Fare': 200}

In [8]:
capper.left_tail_caps_

{}

In [10]:
temp = capper.transform(data.fillna(0))
temp.Age.max(), temp.Fare.max()

(50.0, 200.0)

##### Minimum capping

In [11]:
capper = ArbitraryOutlierCapper(max_capping_dict=None,
                               min_capping_dict={
                                   'Age':10,
                                   'Fare':100
                               })

capper.fit(data.fillna(0))

In [12]:
capper.variables_

['Age', 'Fare']

In [13]:
capper.right_tail_caps_

{}

In [14]:
capper.left_tail_caps_

{'Age': 10, 'Fare': 100}

In [17]:
temp = capper.transform(data.fillna(0))

temp.Age.min(), temp.Fare.min()

(10.0, 100.0)

#### Both ends capping

In [18]:
capper = ArbitraryOutlierCapper(max_capping_dict={
    'Age': 50, 'Fare': 200},
    min_capping_dict={
    'Age': 10, 'Fare': 100})

capper.fit(data.fillna(0))

In [19]:
capper.right_tail_caps_

{'Age': 50, 'Fare': 200}

In [20]:
capper.left_tail_caps_

{'Age': 10, 'Fare': 100}

In [21]:
temp = capper.transform(data.fillna(0))

temp.Age.min(), temp.Fare.min()


(10.0, 100.0)

In [22]:
temp.Age.max(), temp.Fare.max()

(50.0, 200.0)