https://medium.com/@annettedolph/5-simple-techniques-for-working-with-categorical-data-in-python-75612fe4e98d

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Sample Data
data = {'Category': ['Very Poor', 'Poor', 'Neutral', 'Good', 'Very Good']}
df = pd.DataFrame(data)

# Label Encoding
label_encoder = LabelEncoder()
df['Category_LabelEncoded'] = label_encoder.fit_transform(df['Category'])

print(df)
#
# Observe how since no ordinal relationship was defined, it arbitrarily assigned number values
#

    Category  Category_LabelEncoded
0  Very Poor                      4
1       Poor                      2
2    Neutral                      1
3       Good                      0
4  Very Good                      3


In [2]:
# One-Hot Encoding
df_onehot = pd.get_dummies(df['Category'], prefix='Category_OneHot')

# Concatenate with the original DataFrame
df = pd.concat([df, df_onehot], axis=1)

print(df)

    Category  Category_LabelEncoded  Category_OneHot_Good  \
0  Very Poor                      4                 False   
1       Poor                      2                 False   
2    Neutral                      1                 False   
3       Good                      0                  True   
4  Very Good                      3                 False   

   Category_OneHot_Neutral  Category_OneHot_Poor  Category_OneHot_Very Good  \
0                    False                 False                      False   
1                    False                  True                      False   
2                     True                 False                      False   
3                    False                 False                      False   
4                    False                 False                       True   

   Category_OneHot_Very Poor  
0                       True  
1                      False  
2                      False  
3                      False  
4  

In [4]:
# Ordinal Encoding
ordinal_mapping = {'Very Poor': 1, 'Poor': 2, 'Neutral': 3,'Good': 4, 'Very Good':5}
df['Category_OrdinalEncoded'] = df['Category'].map(ordinal_mapping)

print(df)

    Category  Category_LabelEncoded  Category_OneHot_Good  \
0  Very Poor                      4                 False   
1       Poor                      2                 False   
2    Neutral                      1                 False   
3       Good                      0                  True   
4  Very Good                      3                 False   

   Category_OneHot_Neutral  Category_OneHot_Poor  Category_OneHot_Very Good  \
0                    False                 False                      False   
1                    False                  True                      False   
2                     True                 False                      False   
3                    False                 False                      False   
4                    False                 False                       True   

   Category_OneHot_Very Poor  Category_OrdinalEncoded  
0                       True                        1  
1                      False                  

In [5]:
# Frequency Encoding
frequency_encoding = df['Category'].value_counts(normalize=True)
df['Category_FrequencyEncoded'] = df['Category'].map(frequency_encoding)

print(df)

    Category  Category_LabelEncoded  Category_OneHot_Good  \
0  Very Poor                      4                 False   
1       Poor                      2                 False   
2    Neutral                      1                 False   
3       Good                      0                  True   
4  Very Good                      3                 False   

   Category_OneHot_Neutral  Category_OneHot_Poor  Category_OneHot_Very Good  \
0                    False                 False                      False   
1                    False                  True                      False   
2                     True                 False                      False   
3                    False                 False                      False   
4                    False                 False                       True   

   Category_OneHot_Very Poor  Category_OrdinalEncoded  \
0                       True                        1   
1                      False                

In [6]:
# Sample Target Variable
target = [10, 15, 12, 8, 14]

# Target Encoding
df['Category_TargetEncoded'] = df['Category'].map(dict(zip(df['Category'], target)))

print(df)

    Category  Category_LabelEncoded  Category_OneHot_Good  \
0  Very Poor                      4                 False   
1       Poor                      2                 False   
2    Neutral                      1                 False   
3       Good                      0                  True   
4  Very Good                      3                 False   

   Category_OneHot_Neutral  Category_OneHot_Poor  Category_OneHot_Very Good  \
0                    False                 False                      False   
1                    False                  True                      False   
2                     True                 False                      False   
3                    False                 False                      False   
4                    False                 False                       True   

   Category_OneHot_Very Poor  Category_OrdinalEncoded  \
0                       True                        1   
1                      False                

In [9]:
zzz=zip(df['Category'], target)
print(list(zzz))


[('Very Poor', 10), ('Poor', 15), ('Neutral', 12), ('Good', 8), ('Very Good', 14)]
