<a href="https://colab.research.google.com/github/zahid-bracu/feature-engineering/blob/main/Part_5_1_Column_Transformation_With_out_Column_Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, LabelEncoder

# Create sample data
data = {
    'age': np.random.randint(18, 70, size=100),  # Random ages between 18 and 70
    'gender': np.random.choice(['Male', 'Female'], size=100),  # Random genders
    'fever': np.round(np.random.uniform(97, 104, size=100), 1),  # Random temperatures between 97 and 104
    'cough': np.random.choice(['Mild', 'Strong'], size=100),  # Random cough severity
    'city': np.random.choice(['Dhaka', 'Mymensingh','Chittagong'], size=100),  # List of cities
    'has_covid': np.random.choice([True, False], size=100)  # Random covid status
}
# Convert to DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,55,Female,102.7,Strong,Dhaka,False
1,64,Female,98.8,Mild,Dhaka,False
2,22,Female,98.9,Mild,Chittagong,False
3,27,Male,100.6,Strong,Mymensingh,False
4,31,Female,102.2,Strong,Chittagong,True
...,...,...,...,...,...,...
95,60,Female,97.7,Mild,Chittagong,True
96,25,Male,98.3,Strong,Mymensingh,False
97,67,Female,101.1,Strong,Dhaka,True
98,65,Female,99.1,Strong,Dhaka,False


In [None]:
# Feature Scaling for Age and Fever
scaler = StandardScaler()
df[['age', 'fever']] = scaler.fit_transform(df[['age', 'fever']])
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,0.777654,Female,0.885949,Strong,Dhaka,False
1,1.356552,Female,-1.026171,Mild,Dhaka,False
2,-1.344974,Female,-0.977143,Mild,Chittagong,False
3,-1.023364,Male,-0.143654,Strong,Mymensingh,False
4,-0.766076,Female,0.640806,Strong,Chittagong,True
...,...,...,...,...,...,...
95,1.099264,Female,-1.565487,Mild,Chittagong,True
96,-1.152008,Male,-1.271315,Strong,Mymensingh,False
97,1.549519,Female,0.101489,Strong,Dhaka,True
98,1.420874,Female,-0.879085,Strong,Dhaka,False


In [None]:
# One-Hot Encoding for Gender and City
ohe_gender = OneHotEncoder(drop='first')
gender_encoded = ohe_gender.fit_transform(df[['gender']]).toarray()
df_gender = pd.DataFrame(gender_encoded, columns=ohe_gender.get_feature_names_out(['gender']))
df_gender

Unnamed: 0,gender_Male
0,0.0
1,0.0
2,0.0
3,1.0
4,0.0
...,...
95,0.0
96,1.0
97,0.0
98,0.0


In [None]:
ohe_city = OneHotEncoder(drop='first')
city_encoded = ohe_city.fit_transform(df[['city']]).toarray()
df_city = pd.DataFrame(city_encoded, columns=ohe_city.get_feature_names_out(['city']))
df_city

Unnamed: 0,city_Dhaka,city_Mymensingh
0,1.0,0.0
1,1.0,0.0
2,0.0,0.0
3,0.0,1.0
4,0.0,0.0
...,...,...
95,0.0,0.0
96,0.0,1.0
97,1.0,0.0
98,1.0,0.0


In [None]:
# Ordinal Encoding for Cough
ordinal_encoder = OrdinalEncoder(categories=[['Mild', 'Strong']])
df['cough'] = ordinal_encoder.fit_transform(df[['cough']])
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,0.777654,Female,0.885949,1.0,Dhaka,False
1,1.356552,Female,-1.026171,0.0,Dhaka,False
2,-1.344974,Female,-0.977143,0.0,Chittagong,False
3,-1.023364,Male,-0.143654,1.0,Mymensingh,False
4,-0.766076,Female,0.640806,1.0,Chittagong,True
...,...,...,...,...,...,...
95,1.099264,Female,-1.565487,0.0,Chittagong,True
96,-1.152008,Male,-1.271315,1.0,Mymensingh,False
97,1.549519,Female,0.101489,1.0,Dhaka,True
98,1.420874,Female,-0.879085,1.0,Dhaka,False


In [None]:
# Label Encoding for Has_covid
label_encoder = LabelEncoder()
df['has_covid'] = label_encoder.fit_transform(df['has_covid'])
df

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,0.777654,Female,0.885949,1.0,Dhaka,0
1,1.356552,Female,-1.026171,0.0,Dhaka,0
2,-1.344974,Female,-0.977143,0.0,Chittagong,0
3,-1.023364,Male,-0.143654,1.0,Mymensingh,0
4,-0.766076,Female,0.640806,1.0,Chittagong,1
...,...,...,...,...,...,...
95,1.099264,Female,-1.565487,0.0,Chittagong,1
96,-1.152008,Male,-1.271315,1.0,Mymensingh,0
97,1.549519,Female,0.101489,1.0,Dhaka,1
98,1.420874,Female,-0.879085,1.0,Dhaka,0


In [None]:
# Concatenate the encoded columns back to the dataframe
df = pd.concat([df, df_gender, df_city], axis=1)
df.drop(['gender', 'city'], axis=1, inplace=True)
df

Unnamed: 0,age,fever,cough,has_covid,gender_Male,city_Dhaka,city_Mymensingh
0,0.777654,0.885949,1.0,0,0.0,1.0,0.0
1,1.356552,-1.026171,0.0,0,0.0,1.0,0.0
2,-1.344974,-0.977143,0.0,0,0.0,0.0,0.0
3,-1.023364,-0.143654,1.0,0,1.0,0.0,1.0
4,-0.766076,0.640806,1.0,1,0.0,0.0,0.0
...,...,...,...,...,...,...,...
95,1.099264,-1.565487,0.0,1,0.0,0.0,0.0
96,-1.152008,-1.271315,1.0,0,1.0,0.0,1.0
97,1.549519,0.101489,1.0,1,0.0,1.0,0.0
98,1.420874,-0.879085,1.0,0,0.0,1.0,0.0
