In [None]:
import pandas as pd
import numpy as np
import json
import os

## Methods

In [None]:
def drop_null_vals(df,axis='both'):
    '''
    Drops columns with all
    nan values from a given 
    data frame.
    
    Parameters
    ----------
    df : DataFrame
        DataFrame for which
        columns are to be
        dropped.
        
    axis : str
        Drops all rows with
        nan if axis=rows,
        all columns if axis=columns,
        and both if axis=both.
    '''
    assert(isinstance(df,pd.DataFrame))
    assert(isinstance(axis,str))
    
    if(axis=='rows'):
        return df.dropna(how='all',axis=0)
    elif(axis=='columns'):
        return df.dropna(how='all',axis=1)
    elif(axis=='both'):
        return df.dropna(how='all',axis=0).dropna(how='all',axis=1)        

## Data

### 1. covid_19_data.csv

Sno - Serial number<br/>
ObservationDate - Date of the observation in MM/DD/YYYY<br/>
Province/State - Province or state of the observation (Could be empty when missing)<br/>
Country/Region - Country of observation<br/>
Last Update - Time in UTC at which the row is updated for the given province or country. (Not standardised and so please clean before using it)<br/>
Confirmed - Cumulative number of confirmed cases till that date<br/>
Deaths - Cumulative number of of deaths till that date<br/>
Recovered - Cumulative number of recovered cases till that date

### 2. COVID_open_line_list_data.csv and COVID19_line_list_data.csv

Individual level data information
<br/>
ID<br/>
age<br/>
sex<br/>
city<br/>
province<br/>
country<br/>
wuhan(0)_not_wuhan(1)<br/>
latitude<br/>
longitude<br/>
geo_resolution<br/>
date_onset_symptoms<br/>
date_admission_hospital<br/>
date_confirmation<br/>
symptoms<br/>
lives_in_Wuhan<br/>
travel_history_dates<br/>
travel_history_location<br/>
reported_market_exposure<br/>
additional_information<br/>
chronic_disease_binary<br/>
chronic_disease<br/>
source<br/>
sequence_available<br/>
outcome<br/>
date_death_or_discharge<br/>
notes_for_discussion<br/>
location<br/>
admin3<br/>
admin2<br/>
admin1<br/>
country_new<br/>
admin_id<br/>
data_moderator_initials<br/>
Unnamed: 33<br/>
Unnamed: 34<br/>
Unnamed: 35<br/>
Unnamed: 36<br/>
Unnamed: 37<br/>
Unnamed: 38<br/>
Unnamed: 39<br/>
Unnamed: 40<br/>
Unnamed: 41<br/>
Unnamed: 42<br/>
Unnamed: 43<br/>
Unnamed: 44

### Region wise data

In [None]:
region_wise_data = drop_null_vals(pd.read_csv('data/covid_19_data.csv'),axis="both")
region_wise_data

### Individual information

In [None]:
open_line_list = drop_null_vals(pd.read_csv('data/COVID19_open_line_list.csv'),axis='both')
open_line_list

In [None]:
line_list = drop_null_vals(pd.read_csv('data/COVID19_line_list_data.csv'),'both')
line_list

### Time series data (John Hopkins)

In [None]:
time_series_confirmed = drop_null_vals(pd.read_csv('data/time_series_covid_19_confirmed.csv'))
time_series_deaths = drop_null_vals(pd.read_csv('data/time_series_covid_19_deaths.csv'))
time_series_recovered = drop_null_vals(pd.read_csv('data/time_series_covid_19_recovered.csv'))