In [2]:
import pandas as pd
from io import StringIO
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [7]:
df = pd.DataFrame([['green', 'M', 10.1, 'class1'],['red', 'L', 13.5, 'class2'],['blue', 'XL', 15.3, 'class1']])
df.columns = ['color', 'size', 'price', 'classlabel']
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


In [8]:
size_mapping = {'XL':3,'L':2,'M':1}
df['size'] = df['size'].map(size_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class1
1,red,2,13.5,class2
2,blue,3,15.3,class1


In [24]:
inv_size_mapping = {v: k for k, v in size_mapping.items()}
df['size'] = df['size'].map(inv_size_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class1
1,red,L,13.5,class2
2,blue,XL,15.3,class1


In [9]:
class_mapping = {label:idx for idx,label in enumerate(np.unique(df['classlabel']))}
inv_class_mapping = {v:k for k,v in class_mapping.items()}
df['classlabel']= df['classlabel'].map(class_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,0
1,red,2,13.5,1
2,blue,3,15.3,0


In [39]:
class_le = LabelEncoder()
y = class_le.fit_transform(df['classlabel'].values)
y

array([0, 1, 0])

In [40]:
y = class_le.inverse_transform(y)
y

array(['class1', 'class2', 'class1'], dtype=object)

In [60]:
X=df[['color','size','price']].values
ct = ColumnTransformer([('one_hot_encoder', OneHotEncoder(),[0])],remainder='passthrough')
X = ct.fit_transform(X)
X

array([[0.0, 1.0, 0.0, 1, 10.1],
       [0.0, 0.0, 1.0, 2, 13.5],
       [1.0, 0.0, 0.0, 3, 15.3]], dtype=object)

In [11]:
pd.get_dummies(df,drop_first=True)

Unnamed: 0,size,price,classlabel,color_green,color_red
0,1,10.1,0,True,False
1,2,13.5,1,False,True
2,3,15.3,0,False,False
