<a href="https://colab.research.google.com/github/weasel-codes/covid-patient-recovery/blob/main/Covid19_Patient_Recovery_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Relevant imports

In [65]:
import numpy as np
import matplotlib.pyplot as plot
import pandas as pd

# Importing Dataset

In [66]:
dataset = pd.read_csv('PatientInfo.csv')
# print("\n\nAll Columns : ")
# print(dataset.info())
dataset = dataset[{'sex','age','infection_case','confirmed_date','released_date','deceased_date', 'state'}]
# print("\n\nRelevant Columns : ")
# print(dataset.info())
# print("Shape of Dataset : ", dataset.shape)
print("No. of Nan values per column : ")
dataset.isnull().sum()

No. of Nan values per column : 


age               1380
confirmed_date       3
sex               1122
deceased_date     5099
state                0
infection_case     919
released_date     3578
dtype: int64

In [67]:
print("Shape of dataset before removing Nan from any column : ", dataset.shape)

dataset = dataset.dropna(subset=['confirmed_date'], how='all')
# print("\nShape of dataset after removing Nan from confirmed_date : ", dataset.shape)
dataset = dataset[dataset.state != 'isolated']
# print("\nShape of dataset after removing Nan from column state = isolated : ", dataset.shape)
# print("No. of Nan values per column after removing Nan values from column state = isolated : ")
# print(dataset.isnull().sum())
dataset = dataset.dropna(subset=['deceased_date', 'released_date'], how='all')
# print("\nShape of dataset after removing Nan from column deceased and released date : ", dataset.shape)
# print("No. of Nan values per column after removing released, deceased, confirmed date : ")
# print(dataset.isnull().sum())
dataset['sex'].fillna(method="ffill", inplace = True)
dataset['age'].fillna(method="ffill", inplace = True)
dataset['infection_case'].fillna(method="ffill", inplace = True)
print("No. of Nan values per column after removing Nans : ")
print(dataset.isnull().sum())
print("\n\nSample Dataset : \n", dataset.iloc[0])

Shape of dataset before removing Nan from any column :  (5165, 7)
No. of Nan values per column after removing Nans : 
age                  0
confirmed_date       0
sex                  0
deceased_date     1580
state                0
infection_case       0
released_date       64
dtype: int64


Sample Dataset : 
 age                           50s
confirmed_date         2020-01-23
sex                          male
deceased_date                 NaN
state                    released
infection_case    overseas inflow
released_date          2020-02-05
Name: 0, dtype: object


In [68]:
age = np.array(dataset['age'])
sex = np.array(dataset['sex'])
state = np.array(dataset['state'])
confirm = np.array(dataset['confirmed_date'])
deceased = np.array(dataset['deceased_date'])
infection = np.array(dataset['infection_case'])
release = np.array(dataset['released_date'])

In [69]:
data = np.column_stack((age, sex, infection, confirm, release, deceased, state))
print(data)

[['50s' 'male' 'overseas inflow' ... '2020-02-05' nan 'released']
 ['30s' 'male' 'overseas inflow' ... '2020-03-02' nan 'released']
 ['50s' 'male' 'contact with patient' ... '2020-02-19' nan 'released']
 ...
 ['30s' 'female' 'Itaewon Clubs' ... '2020-06-12' nan 'released']
 ['30s' 'female' 'overseas inflow' ... '2020-06-13' nan 'released']
 ['30s' 'female' 'overseas inflow' ... '2020-06-24' nan 'released']]


In [70]:
for i in range(len(data)):
  data[i,0] = pd.to_numeric(data[i,0].replace('s',''))
print(data)

[[50 'male' 'overseas inflow' ... '2020-02-05' nan 'released']
 [30 'male' 'overseas inflow' ... '2020-03-02' nan 'released']
 [50 'male' 'contact with patient' ... '2020-02-19' nan 'released']
 ...
 [30 'female' 'Itaewon Clubs' ... '2020-06-12' nan 'released']
 [30 'female' 'overseas inflow' ... '2020-06-13' nan 'released']
 [30 'female' 'overseas inflow' ... '2020-06-24' nan 'released']]


# Generate Array out of Dataset

# Create new column for no. of days.

In [71]:
print(data.shape)
new_column = np.zeros((len(data), 1))
data = np.block([data, new_column])
print(data.shape)

(1646, 7)
(1646, 8)


# Update No. of days
Dataset example :
[age, sex, infection, confirm, release, deceased, state]

In [73]:
for i in range(len(data)) :
  if data[i,6] == 'released' :
    data[i,7] = (np.datetime64(data[i,4]) - np.datetime64(data[i,3]))/ np.timedelta64(1, 'D')
  else :
    if pd.isna(data[i,5]) : #if deceased date is empty
      data[i,7] = (np.datetime64(data[i,4]) - np.datetime64(data[i,3]))/ np.timedelta64(1, 'D')
    else :
      data[i,7] = (np.datetime64(data[i,5]) - np.datetime64(data[i,3]))/ np.timedelta64(1, 'D')
  print(data[i])

[50 'male' 'overseas inflow' '2020-01-23' '2020-02-05' nan 'released' 13.0]
[30 'male' 'overseas inflow' '2020-01-30' '2020-03-02' nan 'released' 32.0]
[50 'male' 'contact with patient' '2020-01-30' '2020-02-19' nan 'released'
 20.0]
[20 'male' 'overseas inflow' '2020-01-30' '2020-02-15' nan 'released' 16.0]
[20 'female' 'contact with patient' '2020-01-31' '2020-02-24' nan
 'released' 24.0]
[50 'female' 'contact with patient' '2020-01-31' '2020-02-19' nan
 'released' 19.0]
[20 'male' 'contact with patient' '2020-01-31' '2020-02-10' nan 'released'
 10.0]
[20 'male' 'overseas inflow' '2020-02-02' '2020-02-24' nan 'released' 22.0]
[30 'male' 'overseas inflow' '2020-02-05' '2020-02-21' nan 'released' 16.0]
[60 'female' 'contact with patient' '2020-02-05' '2020-02-29' nan
 'released' 24.0]
[50 'female' 'overseas inflow' '2020-02-06' '2020-02-29' nan 'released'
 23.0]
[20 'male' 'overseas inflow' '2020-02-07' '2020-02-27' nan 'released' 20.0]
[60 'female' 'contact with patient' '2020-02-16' 