## Implement One-hot encoding

In [60]:
import pandas as pd
df = pd.DataFrame({'color':["red", "blue", "green", "white", "black"]})

In [61]:
# Implement One-hot encoding using sklearn

from sklearn.preprocessing import OneHotEncoder

# Create an instance of the OneHotEncoder
encoder = OneHotEncoder()

# Fit and transform the data
encoded_data = encoder.fit_transform(df)

# # Convert the encoded data to an array
encoded_data_array = encoded_data.toarray()
encoded_data_array

array([[0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.]])

In [62]:
encoded_data

<5x5 sparse matrix of type '<class 'numpy.float64'>'
	with 5 stored elements in Compressed Sparse Row format>

In [63]:
encoder_df=pd.DataFrame(encoded_data_array,columns=encoder.get_feature_names_out())
encoder_df

Unnamed: 0,color_black,color_blue,color_green,color_red,color_white
0,0.0,0.0,0.0,1.0,0.0
1,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0
4,1.0,0.0,0.0,0.0,0.0


In [64]:
encoder.get_feature_names_out()

array(['color_black', 'color_blue', 'color_green', 'color_red',
       'color_white'], dtype=object)

In [65]:
# Fit new Data
encoder.transform([['black']]).toarray()



array([[1., 0., 0., 0., 0.]])

In [66]:
# Implement One-hot encoding using pandas
encoded_data = pd.get_dummies(df)
encoded_data

Unnamed: 0,color_black,color_blue,color_green,color_red,color_white
0,0,0,0,1,0
1,0,1,0,0,0
2,0,0,1,0,0
3,0,0,0,0,1
4,1,0,0,0,0


In [67]:
pd.concat([df, encoded_data], axis=1)

Unnamed: 0,color,color_black,color_blue,color_green,color_red,color_white
0,red,0,0,0,1,0
1,blue,0,1,0,0,0
2,green,0,0,1,0,0
3,white,0,0,0,0,1
4,black,1,0,0,0,0


In [68]:
# Internal Assignment

import seaborn as sns
df = sns.load_dataset("tips")
gender = df[['sex']]

In [69]:
df.isna().sum()

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64

In [70]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
ecoded_data = encoder.fit_transform(df[['sex']]).toarray()

In [71]:
encoded_df = pd.DataFrame(ecoded_data, columns=encoder.get_feature_names_out())
encoded_df

Unnamed: 0,sex_Female,sex_Male
0,1.0,0.0
1,0.0,1.0
2,0.0,1.0
3,0.0,1.0
4,1.0,0.0
...,...,...
239,0.0,1.0
240,1.0,0.0
241,0.0,1.0
242,0.0,1.0


In [72]:
new_df = pd.concat([df,encoded_df], axis=1)
new_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,sex_Female,sex_Male
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0
2,21.01,3.5,Male,No,Sun,Dinner,3,0.0,1.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0


In [73]:
df['day'].unique()

['Sun', 'Sat', 'Thur', 'Fri']
Categories (4, object): ['Thur', 'Fri', 'Sat', 'Sun']

In [74]:
day_encoder = encoder.fit_transform(df[['day']]).toarray()
df_day_encoder = pd.DataFrame(day_encoder, columns=encoder.get_feature_names_out())
new_df = pd.concat([new_df, df_day_encoder], axis=1)
new_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,sex_Female,sex_Male,day_Fri,day_Sat,day_Sun,day_Thur
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,0.0,0.0,1.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,0.0,0.0,1.0,0.0
2,21.01,3.5,Male,No,Sun,Dinner,3,0.0,1.0,0.0,0.0,1.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,0.0,0.0,1.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,0.0,0.0,1.0,0.0


In [75]:
df['time'].unique()

['Dinner', 'Lunch']
Categories (2, object): ['Lunch', 'Dinner']

In [76]:
time_encoder = encoder.fit_transform(df[['time']]).toarray()
df_time_encoder = pd.DataFrame(time_encoder, columns=encoder.get_feature_names_out())
new_df = pd.concat([new_df, df_time_encoder], axis=1)
new_df.head(20)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,sex_Female,sex_Male,day_Fri,day_Sat,day_Sun,day_Thur,time_Dinner,time_Lunch
0,16.99,1.01,Female,No,Sun,Dinner,2,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
1,10.34,1.66,Male,No,Sun,Dinner,3,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
2,21.01,3.5,Male,No,Sun,Dinner,3,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
3,23.68,3.31,Male,No,Sun,Dinner,2,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
4,24.59,3.61,Female,No,Sun,Dinner,4,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
5,25.29,4.71,Male,No,Sun,Dinner,4,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
6,8.77,2.0,Male,No,Sun,Dinner,2,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
7,26.88,3.12,Male,No,Sun,Dinner,4,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
8,15.04,1.96,Male,No,Sun,Dinner,2,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
9,14.78,3.23,Male,No,Sun,Dinner,2,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
