In [40]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [41]:
df=pd.read_csv('covid_toy.csv')

In [42]:
df.sample(10)

Unnamed: 0,age,gender,fever,cough,city,has_covid
24,13,Female,100.0,Strong,Kolkata,No
27,33,Female,102.0,Strong,Delhi,No
72,83,Female,101.0,Mild,Kolkata,No
74,34,Female,104.0,Strong,Delhi,No
16,69,Female,103.0,Mild,Kolkata,Yes
88,5,Female,100.0,Mild,Kolkata,No
32,34,Female,101.0,Strong,Delhi,Yes
15,70,Male,103.0,Strong,Kolkata,Yes
28,16,Male,104.0,Mild,Kolkata,No
99,10,Female,98.0,Strong,Kolkata,Yes


In [43]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_test,y_train=train_test_split(df.drop(['has_covid'],axis=1),df['has_covid'],test_size=0.2)

# Without column transformer
age and fever is numerical values fever has null values so we implement simpleimputer
gender and city is nominal categorical value we apply One Hot Encoder
cough is ordinal so we apply Ordinal encoder

# Simple Imputer

In [44]:
si=SimpleImputer()
X_train_fever=si.fit_transform(X_train[['fever']])
X_test_fever=si.fit_transform(X_test[['fever']])

X_train_fever.shape

(80, 1)

# Ordinal Encoding

In [49]:
oe=OrdinalEncoder(categories=[['Mild','Strong']],dtype=np.int32)
X_train_cough=oe.fit_transform(X_train[['cough']])
X_test_cough=oe.fit_transform(X_test[['cough']])

X_train_cough.shape

(80, 1)

# One hot encoding

In [50]:
ohe=OneHotEncoder(drop='first',sparse=False,dtype=np.int32)
X_train_gender_city=ohe.fit_transform(X_train[['gender','city']])
X_test_gender_city=ohe.fit_transform(X_test[['gender','city']])

X_train_gender_city.shape

(80, 4)

# age train test split

In [51]:
X_train_age=X_train.drop(columns=['gender','fever','cough','city']).values
X_test_age=X_test.drop(columns=['gender','fever','cough','city']).values

X_train_age.shape

(80, 1)

# Concatinating

In [53]:
X_train_transformed=np.concatenate((X_train_age,X_train_fever,X_train_cough,X_train_gender_city),axis=1)
X_test_transformed=np.concatenate((X_train_age,X_train_fever,X_train_cough,X_train_gender_city),axis=1)

X_train_transformed.shape

(80, 7)

# With column transformer

In [62]:
from sklearn.compose import ColumnTransformer

transformer = ColumnTransformer(transformers=[
    ('tnf1',SimpleImputer(),['fever']),
    ('tnf2',OrdinalEncoder(categories=[['Mild','Strong']],dtype=np.int32),['cough']),
    ('tnf3',OneHotEncoder(sparse=False,drop='first',dtype=np.int32),['gender','city'])
],remainder='passthrough')



In [63]:
X_train=transformer.fit_transform(X_train)

In [64]:
X_test=transformer.transform(X_test)