# importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, fbeta_score

#loading dataset

In [None]:
a=pd.read_csv("/content/corona_tested_006.csv")

In [None]:
a.nunique()

# Data preprocessing

In [None]:
b=a.copy()

In [None]:
b.shape

In [None]:
b.head(2)

In [None]:
b.Corona.value_counts()

In [None]:
b.Sex.value_counts()

In [None]:
b.loc[b["Corona"]=="n"]

In [None]:
b.isnull().sum()

In [None]:
b.dropna(inplace=True)

In [None]:
b=b.replace("None",np.nan,regex=True)

In [None]:
b.isnull().sum()

In [None]:
b["Sex"]=b["Sex"].fillna(b["Sex"].mode()[0])
b["Age_60_above"]=b["Age_60_above"].fillna(b["Age_60_above"].mode()[0])
b["Headache"]=b["Headache"].fillna(b["Headache"].mode()[0])
b["Shortness_of_breath"]=b["Shortness_of_breath"].fillna(b["Shortness_of_breath"].mode()[0])
b["Sore_throat"]=b["Sore_throat"].fillna(b["Sore_throat"].mode()[0])
b["Cough_symptoms"]=b["Cough_symptoms"].fillna(b["Cough_symptoms"].mode()[0])
b["Fever"]=b["Fever"].fillna(b["Fever"].mode()[0])

In [None]:
b.isnull().sum()

In [None]:
b.info()

In [None]:
b["Test_date"]=pd.to_datetime(b["Test_date"])

In [None]:
b["year"]=b["Test_date"].dt.year

In [None]:
b["month"]=b["Test_date"].dt.month
b["day"]=b["Test_date"].dt.day

In [None]:
b.head()

In [None]:
b=b.drop(columns=["Ind_ID","Sex","Age_60_above","Age_60_above","Test_date"])

In [None]:
b.head(5)

# EDA

In [None]:
b.Corona.value_counts().plot(kind="bar")

In [None]:
b.Corona.value_counts()/len(b["Corona"])*100

In [None]:
b.month.value_counts()

In [None]:
b.month.value_counts().plot(kind="bar")

In [None]:
c=b.copy()

In [None]:
for i, predictor in enumerate(c):
    plt.figure(i)
    sns.countplot(data=c, x=predictor, hue='Corona')

In [None]:
c.head(2)

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le=LabelEncoder()
c["Corona"]=le.fit_transform(c["Corona"])

In [None]:
c.Corona.value_counts()

In [None]:
d=pd.get_dummies(c,drop_first=True)
d

In [None]:
d.shape

In [None]:
x=d.drop("Corona",axis=1)
y=d["Corona"]

In [None]:
(x.shape),(y.shape)

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [None]:
scale=StandardScaler()

In [None]:
x_train=scale.fit_transform(x_train)
x_train

In [None]:
x_test=scale.transform(x_test)
x_test

In [None]:
lgr=LogisticRegression(random_state=0)

In [None]:
lgr.fit(x_train,y_train)

In [None]:
lgrp=lgr.predict(x_test)

In [None]:
lgrp

# confusion matrix

In [None]:
cunfuse_matrix=confusion_matrix(y_test,lgrp)

In [None]:
cunfuse_matrix

# Accuracy

In [None]:
print(round(accuracy_score(y_test, lgrp), 2))

# Recall

In [None]:
print(round(recall_score(y_test, lgrp, average='micro'), 2))

# precision score

In [None]:
print(round(precision_score(y_test, lgrp, average='micro'), 2))

#fbeta_score

In [None]:
print(round(fbeta_score(y_test, lgrp,beta=1,average='micro'), 2))

# f1 score

In [None]:
from sklearn.metrics import f1_score
print(round(f1_score(y_test, lgrp,average='micro'), 2))