In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shraddha4ever20/covid-19-patient-symptoms-and-diagnosis-dataset")

print("Path to dataset files:", path)

Downloading to C:\Users\Vinay kumar\.cache\kagglehub\datasets\shraddha4ever20\covid-19-patient-symptoms-and-diagnosis-dataset\1.archive...


100%|█████████████████████████████████████████████████████████████████████████████| 5.17k/5.17k [00:00<00:00, 2.56MB/s]

Extracting files...
Path to dataset files: C:\Users\Vinay kumar\.cache\kagglehub\datasets\shraddha4ever20\covid-19-patient-symptoms-and-diagnosis-dataset\versions\1





In [1]:
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

In [3]:
df = pd.read_csv('covid_dataset.csv')

df.head()

Unnamed: 0,Age,Gender,Fever,Cough,City,Has_Covid
0,56,Male,102.4,Mild,Mumbai,No
1,19,Female,101.6,Strong,Mumbai,No
2,76,Female,102.0,Mild,Kolkata,No
3,65,Male,99.7,Mild,Bangalore,No
4,25,Female,102.1,Mild,Delhi,Yes


In [5]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(columns=['Has_Covid']),  # Features
    df['Has_Covid'],                 # Target variable
    test_size=0.2                    # 20% test data
)

In [8]:
X_train

Unnamed: 0,Age,Gender,Fever,Cough,City
965,33,Female,98.5,Strong,Mumbai
849,24,Female,98.5,Mild,Mumbai
883,73,Female,101.9,Mild,Mumbai
991,47,Male,99.1,Mild,Bangalore
645,85,Female,98.4,Mild,Mumbai
...,...,...,...,...,...
662,8,Male,99.8,Mild,Delhi
634,30,Female,98.4,Mild,Bangalore
825,40,Female,98.8,Strong,Delhi
771,30,Male,99.2,Strong,Mumbai


# Aam Zindagi

In [7]:
# Fit on training data
si = SimpleImputer()
X_train_fever = si.fit_transform(X_train[['Fever']])

# Transform test data using the same fitted imputer
X_test_fever = si.transform(X_test[['Fever']])

In [13]:
X_train_fever

array([[ 98.5],
       [ 98.5],
       [101.9],
       [ 99.1],
       [ 98.4],
       [101.3],
       [ 98.4],
       [ 98.8],
       [ 99.7],
       [100.3],
       [100.4],
       [ 99.7],
       [102.4],
       [ 99.1],
       [ 98.1],
       [ 98.4],
       [102.8],
       [100.7],
       [101.3],
       [ 98.7],
       [ 99.1],
       [100.1],
       [101.6],
       [102.9],
       [100.7],
       [101.3],
       [101.9],
       [102.2],
       [102.8],
       [ 99.3],
       [ 99.2],
       [ 98.3],
       [ 98.6],
       [ 98. ],
       [101.5],
       [100.1],
       [ 98.1],
       [100.6],
       [101.8],
       [102.1],
       [101.6],
       [100.6],
       [ 98.2],
       [ 99.3],
       [ 98.1],
       [ 99.4],
       [ 98.1],
       [102.7],
       [100.8],
       [100.4],
       [ 99.3],
       [100.3],
       [101.1],
       [101.6],
       [103. ],
       [100.6],
       [ 98. ],
       [100.2],
       [100.3],
       [102.2],
       [100.1],
       [100.8],
       [

In [9]:
X_train_fever.shape

(800, 1)

In [10]:
# OrdinalEncoding -> cough
oe = OrdinalEncoder(categories=[['Mild', 'Strong']])
X_train_cough = oe.fit_transform(X_train[['Cough']])

# also the test data
X_test_cough = oe.fit_transform(X_test[['Cough']])

X_train_cough.shape


(800, 1)

In [12]:
# OneHotEncoding -> gender, city
ohe = OneHotEncoder(drop='first', sparse_output=False)
X_train_gender_city = ohe.fit_transform(X_train[['Gender', 'City']])

# also the test data
X_test_gender_city = ohe.fit_transform(X_test[['Gender', 'City']])

X_train_gender_city.shape


(800, 4)

In [15]:
# Extracting Age
X_train_age = X_train.drop(columns=['Gender', 'Fever', 'Cough', 'City']).values

# also the test data
X_test_age = X_test.drop(columns=['Gender', 'Fever', 'Cough', 'City']).values

X_train_age.shape

(800, 1)

In [16]:
X_train_transformed = np.concatenate((X_train_age, X_train_fever, X_train_gender_city, X_train_cough), axis=1)

# also the test data
X_test_transformed = np.concatenate((X_test_age, X_test_fever, X_test_gender_city, X_test_cough), axis=1)

X_train_transformed.shape

(800, 7)

# Mentos Zindagi

In [17]:
from sklearn.compose import ColumnTransformer

In [21]:
transformer=ColumnTransformer(transformers=[
    ('tnf1',SimpleImputer(),['Fever']),
    ('tnf2',OrdinalEncoder(categories=[['Mild','Strong']]),['Cough']),
    ('tnf3',OrdinalEncoder(),['Gender','City'])
],remainder='passthrough')
    

In [22]:
transformer.fit_transform(X_train)

array([[ 98.5,   1. ,   0. ,   3. ,  33. ],
       [ 98.5,   0. ,   0. ,   3. ,  24. ],
       [101.9,   0. ,   0. ,   3. ,  73. ],
       ...,
       [ 98.8,   1. ,   0. ,   1. ,  40. ],
       [ 99.2,   1. ,   1. ,   3. ,  30. ],
       [ 99.1,   1. ,   0. ,   0. ,  42. ]], shape=(800, 5))

In [23]:
transformer.fit_transform(X_train).shape

(800, 5)

In [25]:
transformer.transform(X_test)

array([[102.1,   0. ,   0. ,   1. ,  25. ],
       [102.8,   1. ,   0. ,   0. ,  37. ],
       [102.5,   0. ,   0. ,   2. ,  58. ],
       [100.5,   1. ,   1. ,   3. ,  79. ],
       [101.1,   0. ,   1. ,   1. ,  85. ],
       [102.6,   0. ,   1. ,   2. ,  24. ],
       [102.8,   1. ,   1. ,   1. ,   7. ],
       [ 98.2,   0. ,   1. ,   1. ,  11. ],
       [ 98.3,   0. ,   1. ,   0. ,  67. ],
       [100.6,   0. ,   1. ,   1. ,  19. ],
       [101.8,   1. ,   1. ,   1. ,  43. ],
       [ 99.2,   1. ,   0. ,   3. ,  23. ],
       [102.3,   1. ,   0. ,   1. ,  25. ],
       [ 99. ,   1. ,   0. ,   1. ,  60. ],
       [ 99.7,   0. ,   1. ,   0. ,  65. ],
       [101.4,   1. ,   1. ,   1. ,  25. ],
       [100.3,   1. ,   0. ,   0. ,  18. ],
       [ 99.6,   1. ,   1. ,   2. ,  12. ],
       [100.3,   0. ,   0. ,   0. ,  77. ],
       [101.6,   1. ,   1. ,   1. ,  60. ],
       [ 98.2,   0. ,   0. ,   0. ,  24. ],
       [101.7,   1. ,   0. ,   3. ,  31. ],
       [ 99.8,   1. ,   0. ,   1

In [26]:
transformer.transform(X_test).shape

(200, 5)