# Target Guided Ordinal Encoding - Utkarsh Gaikwad

In [1]:
import pandas as pd
df = pd.DataFrame({
    'city':['New York','London','Paris','Tokyo','New York','Paris'],
    'price':[200, 150, 300, 250, 180, 320]
})

In [2]:
df

Unnamed: 0,city,price
0,New York,200
1,London,150
2,Paris,300
3,Tokyo,250
4,New York,180
5,Paris,320


In [4]:
mean_price = df.groupby(by='city')['price'].mean().to_dict()
mean_price

{'London': 150.0, 'New York': 190.0, 'Paris': 310.0, 'Tokyo': 250.0}

In [5]:
df['city_encoded'] = df['city'].map(mean_price)

In [7]:
df

Unnamed: 0,city,price,city_encoded
0,New York,200,190.0
1,London,150,150.0
2,Paris,300,310.0
3,Tokyo,250,250.0
4,New York,180,190.0
5,Paris,320,310.0


In [8]:
df[['city_encoded','price']]

Unnamed: 0,city_encoded,price
0,190.0,200
1,150.0,150
2,310.0,300
3,250.0,250
4,190.0,180
5,310.0,320


## Internal Assignment

In [9]:
import seaborn as sns

In [10]:
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [19]:
import pandas as pd
def target_guided_ordinal_encoder(df,target:str):
    cat_cols = list(df.columns[df.dtypes=='category'])
    for i in cat_cols:
        mean = df.groupby(by=i)[target].mean().to_dict()
        df[f'{i}_encoded'] = df[i].map(mean)
        return df

In [17]:
df_new = target_guided_ordinal_encoder(df,'total_bill')

In [18]:
df_new

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,sex_encoded,smoker_encoded,day_encoded,time_encoded
0,16.99,1.01,Female,No,Sun,Dinner,2,18.056897,19.188278,21.410000,20.797159
1,10.34,1.66,Male,No,Sun,Dinner,3,20.744076,19.188278,21.410000,20.797159
2,21.01,3.50,Male,No,Sun,Dinner,3,20.744076,19.188278,21.410000,20.797159
3,23.68,3.31,Male,No,Sun,Dinner,2,20.744076,19.188278,21.410000,20.797159
4,24.59,3.61,Female,No,Sun,Dinner,4,18.056897,19.188278,21.410000,20.797159
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,20.744076,19.188278,20.441379,20.797159
240,27.18,2.00,Female,Yes,Sat,Dinner,2,18.056897,20.756344,20.441379,20.797159
241,22.67,2.00,Male,Yes,Sat,Dinner,2,20.744076,20.756344,20.441379,20.797159
242,17.82,1.75,Male,No,Sat,Dinner,2,20.744076,19.188278,20.441379,20.797159
