# Project: Covid-19 Analysis

## 1. The Objective

To predict the confirmed cases and deaths based on the previous data.

In [None]:
#hide
from fastbook import *
from kaggle import api
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from fastai.tabular.all import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from dtreeviz.trees import *
from IPython.display import Image, display_svg, SVG

## Get the Data

Data was downloaded from Kaggle, which includes the following files:

In [None]:
path = Path()
(path/'data').ls()

(#10) [Path('data/vaccination-metadata.csv'),Path('data/time_series_covid_19_confirmed_US.csv'),Path('data/time_series_covid_19_recovered.csv'),Path('data/time_series_covid_19_deaths_US.csv'),Path('data/covid_19_data.csv'),Path('data/owid-covid-data.csv'),Path('data/vaccination-data.csv'),Path('data/time_series_covid_19_deaths.csv'),Path('data/WHO-COVID-19-global-data.csv'),Path('data/time_series_covid_19_confirmed.csv')]

Here we focus on 3 datasets, which are: 
* `time_series_covid_19_recovered.csv`
* `time_series_covid_19_deaths.csv`
* `time_series_covid_19_confirmed.csv`

## Explor the Data

In [None]:
df = pd.read_csv(path/'data/owid-covid-data.csv', low_memory=False)
df.shape

(181953, 67)

In [None]:
df.tail()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,total_tests,new_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,new_people_vaccinated_smoothed,new_people_vaccinated_smoothed_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
181948,ZWE,Africa,Zimbabwe,2022-04-22,247488.0,105.0,35.857,5468.0,1.0,0.857,16398.436,6.957,2.376,362.307,0.066,0.057,0.84,,,,,,,,,,,,,1690.0,0.112,0.0212,47.1,tests performed,10198039.0,5822326.0,3630122.0,745591.0,15020.0,10637.0,67.57,38.58,24.05,4.94,705.0,6635.0,0.044,,15092171.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,,,,
181949,ZWE,Africa,Zimbabwe,2022-04-23,247524.0,36.0,41.0,5468.0,0.0,0.857,16400.821,2.385,2.717,362.307,0.0,0.057,0.85,,,,,,,,,,,,,1710.0,0.113,0.024,41.7,tests performed,10213714.0,5828236.0,3636597.0,748881.0,15675.0,10972.0,67.68,38.62,24.1,4.96,727.0,5752.0,0.038,,15092171.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,,,,
181950,ZWE,Africa,Zimbabwe,2022-04-24,247524.0,0.0,35.143,5468.0,0.0,0.714,16400.821,0.0,2.329,362.307,0.0,0.047,,,,,,,,,,2236758.0,,148.207,,1730.0,0.115,0.0203,49.2,tests performed,,,,,,11072.0,,,,,734.0,5538.0,0.037,,15092171.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,,,,
181951,ZWE,Africa,Zimbabwe,2022-04-25,247598.0,74.0,43.0,5468.0,0.0,0.571,16405.725,4.903,2.849,362.307,0.0,0.038,,,,,,,,,,,,,,,,,,,10231251.0,5835677.0,3641341.0,754233.0,,11300.0,67.79,38.67,24.13,5.0,749.0,5370.0,0.036,,15092171.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,,,,
181952,ZWE,Africa,Zimbabwe,2022-04-26,247667.0,69.0,52.857,5469.0,1.0,0.714,16410.296,4.572,3.502,362.373,0.066,0.047,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,15092171.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,,,,


In [None]:
df.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'new_vaccinations', 'new_vaccinations_smoothed',
       't

In [None]:
# next step

## Prepare the Data

## Model selection

## Fine-Tune the model

## Conclusion