In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({'color' : ['red', 'green', 'blue', 'red', np.nan]})
df

Unnamed: 0,color
0,red
1,green
2,blue
3,red
4,


# --- Manual Way ---

Also allows to specify explicitly categories and their corresponding numerical values (especially for encoding the ordinal data).

Use this method if the data contain NANs.

### Forward Mapping

In [3]:
class_mapping = {label : idx for idx, label in enumerate(np.unique(df['color'].dropna()))}
class_mapping

{'blue': 0, 'green': 1, 'red': 2}

In [4]:
df['color'] = df['color'].map(class_mapping)
df

Unnamed: 0,color
0,2.0
1,1.0
2,0.0
3,2.0
4,


### Inverse Mapping

In [5]:
inv_class_mapping = {idx : label for label, idx in class_mapping.items()}
inv_class_mapping

{0: 'blue', 1: 'green', 2: 'red'}

In [6]:
df['color'] = df['color'].map(inv_class_mapping)
df

Unnamed: 0,color
0,red
1,green
2,blue
3,red
4,


# --- Label Encoder ---

Suitable for nominal categories. DOES NOT HANDLE NAN VALUES!

In [7]:
from sklearn.preprocessing import LabelEncoder

In [8]:
df = pd.DataFrame({'color' : ['red', 'green', 'blue', 'red']})
df

Unnamed: 0,color
0,red
1,green
2,blue
3,red


### Forward Mapping

In [9]:
encoder = LabelEncoder().fit(df['color'])
df['color'] = encoder.transform(df['color'])
df

Unnamed: 0,color
0,2
1,1
2,0
3,2


### Inverse Mapping

In [10]:
df['color'] = encoder.inverse_transform(df['color'])
df

Unnamed: 0,color
0,red
1,green
2,blue
3,red
