In [1]:
# Label Encoding & One-Hot Encoding
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Sample Data
data = {'Color': ['Red', 'Blue', 'Green', 'Blue', 'Red']}
df = pd.DataFrame(data)

# Label Encoding
label_encoder = LabelEncoder()
df['Color_Label'] = label_encoder.fit_transform(df['Color'])

# One Hot Encoding
df_onehot = pd.get_dummies(df['Color'], prefix='Color')
df = pd.concat([df, df_onehot], axis=1)

df


Unnamed: 0,Color,Color_Label,Color_Blue,Color_Green,Color_Red
0,Red,2,False,False,True
1,Blue,0,True,False,False
2,Green,1,False,True,False
3,Blue,0,True,False,False
4,Red,2,False,False,True


In [3]:
# Handling Missing Data - Imputation
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer

# Sample Data
data = {'Age': [25, np.nan, 30, 28, np.nan, 35]}
df = pd.DataFrame(data)

# Mean Imputation
mean_imputer = SimpleImputer(strategy='mean')
df['Age_Mean'] = mean_imputer.fit_transform(df[['Age']])

# Median Imputation
median_imputer = SimpleImputer(strategy='median')
df['Age_Median'] = median_imputer.fit_transform(df[['Age']])

# Most Frequent Imputation
mode_imputer = SimpleImputer(strategy='most_frequent')
df['Age_Mode'] = mode_imputer.fit_transform(df[['Age']])

df


Unnamed: 0,Age,Age_Mean,Age_Median,Age_Mode
0,25.0,25.0,25.0,25.0
1,,29.5,29.0,25.0
2,30.0,30.0,30.0,30.0
3,28.0,28.0,28.0,28.0
4,,29.5,29.0,25.0
5,35.0,35.0,35.0,35.0


In [5]:
# Feature Scaling - Standardization & Normalization
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Sample Data
data = {'Marks': [45, 60, 75, 90, 30]}
df = pd.DataFrame(data)

# Standard Scaling
standard_scaler = StandardScaler()
df['Marks_Standard'] = standard_scaler.fit_transform(df[['Marks']])

# Min-Max Scaling
minmax_scaler = MinMaxScaler()
df['Marks_MinMax'] = minmax_scaler.fit_transform(df[['Marks']])

df


Unnamed: 0,Marks,Marks_Standard,Marks_MinMax
0,45,-0.707107,0.25
1,60,0.0,0.5
2,75,0.707107,0.75
3,90,1.414214,1.0
4,30,-1.414214,0.0
