In [1]:
import pandas as pd
import numpy as np
import json
import os
import requests
from datetime import datetime

In [2]:
API_KEY = 'VJga0fAn'

## Methods

In [3]:
def drop_null_vals(df,axis='both',subset=[]):
    '''
    Drops columns with all
    nan values from a given 
    data frame.
    
    Parameters
    ----------
    df : DataFrame
        DataFrame for which
        columns are to be
        dropped.
        
    axis : str
        Drops all rows with
        nan if axis=rows,
        all columns if axis=columns,
        and both if axis=both.
        
    subset : list of str
        For all columns in
        subset, remove the
        NaN rows.
    '''
    assert(isinstance(df,pd.DataFrame))
    assert(isinstance(axis,str))
    assert(isinstance(subset,list))
    assert(isinstance(col,str) for col in subset)
    
    df = df.dropna(subset=subset)
    
    if(axis=='rows'):
        df = df.dropna(how='all',axis=0)
    elif(axis=='columns'):
        df = df.dropna(how='all',axis=1)
    elif(axis=='both'):
        df = df.dropna(how='all',axis=0).dropna(how='all',axis=1)
    
    return df

def getTemperatureData(latitude,longitude):
    '''
    Returns weather data 
    as per the latitude 
    and longitude entered.
    
    Parameters
    ----------
    latitude : float
        Latitude of region
        for fetching the
        weather data
        
    longitude : float
        Longitude of region
        for fetching weather
        data
    '''
    assert(isinstance(latitude,float))
    assert(isinstance(longitude,float))
    
    station = getNearbyStation(latitude,longitude)
    url = createDailyURL(station,START_DATE,END_DATE)
    data = dict()
    response = requests.get(url)
    data = response.json()['data']
    
    return pd.DataFrame(data)

def getNearbyStation(latitude,longitude):
    '''
    Given the latitude and
    longitude of a area,
    returns the nearest station
    to it.
    
    Parameters
    ----------
    latitude : float
        Latitude of region
        for fetching the
        weather data
        
    longitude : float
        Longitude of region
        for fetching weather
        data
    '''
    assert(isinstance(latitude,float))
    assert(isinstance(longitude,float))

    url = createStationURL(latitude,longitude)
    response = requests.get(url)
    
    return response.json()['data'][0]['id']

def createStationURL(latitude,longitude):
    '''
    Returns station URL
    for given latitude
    and longitude.
    
    Parameters
    ----------
    latitude : float
        Latitude of region
        for fetching the
        weather data
        
    longitude : float
        Longitude of region
        for fetching weather
        data
    '''
    assert(isinstance(latitude,float))
    assert(isinstance(longitude,float))
    
    return 'https://api.meteostat.net/v1/stations/nearby?lat='+str(latitude)+'&lon='+str(longitude)+'&limit=1&key='+API_KEY

def createDailyURL(station_id,start_date,end_date):
    '''
    Creates weather URL
    for given station,
    start date and end
    date.
    
    Parameters
    ----------
    station_id : str
        Station id of the
        region for which
        data is to be fetched
        
    start_date : str
        Date starting from which
        data is to be fetched.
        
    end_date : str
        Date ending at which
        data is to be fetched
    '''
    assert(isinstance(station_id,str))
    assert(isinstance(start_date,str))
    assert(isinstance(end_date,str))
    
    url = 'https://api.meteostat.net/v1/history/daily?station='+station_id+'&start='+start_date+'&end='+end_date+'&key='+API_KEY

    return url

## Data

### 1. covid_19_data.csv

Sno - Serial number<br/>
ObservationDate - Date of the observation in MM/DD/YYYY<br/>
Province/State - Province or state of the observation (Could be empty when missing)<br/>
Country/Region - Country of observation<br/>
Last Update - Time in UTC at which the row is updated for the given province or country. (Not standardised and so please clean before using it)<br/>
Confirmed - Cumulative number of confirmed cases till that date<br/>
Deaths - Cumulative number of of deaths till that date<br/>
Recovered - Cumulative number of recovered cases till that date

### 2. COVID_open_line_list_data.csv and COVID19_line_list_data.csv

Individual level data information
<br/><br/>

ID<br/>
age<br/>
sex<br/>
city<br/>
province<br/>
country<br/>
wuhan(0)_not_wuhan(1)<br/>
latitude<br/>
longitude<br/>
geo_resolution<br/>
date_onset_symptoms<br/>
date_admission_hospital<br/>
date_confirmation<br/>
symptoms<br/>
lives_in_Wuhan<br/>
travel_history_dates<br/>
travel_history_location<br/>
reported_market_exposure<br/>
additional_information<br/>
chronic_disease_binary<br/>
chronic_disease<br/>
source<br/>
sequence_available<br/>
outcome<br/>
date_death_or_discharge<br/>
notes_for_discussion<br/>
location<br/>
admin3<br/>
admin2<br/>
admin1<br/>
country_new<br/>
admin_id<br/>
data_moderator_initials<br/>

### Region wise data

In [4]:
region_wise_data = drop_null_vals(pd.read_csv('data/covid_19_data.csv'),axis="both")
region_wise_data

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
6157,6158,03/16/2020,Cayman Islands,UK,2020-03-16T14:53:04,1.0,1.0,0.0
6158,6159,03/16/2020,Gibraltar,UK,2020-03-14T16:33:03,1.0,0.0,1.0
6159,6160,03/16/2020,Diamond Princess cruise ship,Australia,2020-03-14T02:33:04,0.0,0.0,0.0
6160,6161,03/16/2020,West Virginia,US,2020-03-10T02:33:04,0.0,0.0,0.0


### Individual information

In [5]:
open_line_list = drop_null_vals(pd.read_csv('data/COVID19_open_line_list.csv'),axis='both')
open_line_list

Unnamed: 0,ID,age,sex,city,province,country,wuhan(0)_not_wuhan(1),latitude,longitude,geo_resolution,...,outcome,date_death_or_discharge,notes_for_discussion,location,admin3,admin2,admin1,country_new,admin_id,data_moderator_initials
0,1.0,30,male,"Chaohu City, Hefei City",Anhui,China,1.0,31.646960,117.716600,admin3,...,,,,,Chaohu City,Hefei City,Anhui,China,340181,
1,2.0,47,male,"Baohe District, Hefei City",Anhui,China,1.0,31.778630,117.331900,admin3,...,,,,,Baohe District,Hefei City,Anhui,China,340111,
2,3.0,49,male,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,...,,,,High-Tech Zone,Shushan District,Hefei City,Anhui,China,340104,
3,4.0,47,female,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,...,,,,High-Tech Zone,Shushan District,Hefei City,Anhui,China,340104,
4,5.0,50,female,"Feidong County, Hefei City",Anhui,China,1.0,32.001230,117.568100,admin3,...,,,,,Feidong County,Hefei City,Anhui,China,340122,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13169,13475.0,60-69,male,Nagoya City,Aichi Prefecture,Japan,1.0,,,,...,,,,,,,,,,
13170,13476.0,40-49,female,,Osaka,Japan,1.0,,,,...,,,,,,,,,,
13171,13477.0,0-10,female,,Osaka,Japan,1.0,,,,...,,,,,,,,,,
13172,13478.0,,female,Amsterdam,,Netherlands,1.0,,,,...,,,,,,,,,,


In [6]:
line_list = drop_null_vals(pd.read_csv('data/COVID19_line_list_data.csv'),'both')
line_list

Unnamed: 0,id,case_in_country,reporting date,summary,location,country,gender,age,symptom_onset,If_onset_approximated,hosp_visit_date,exposure_start,exposure_end,visiting Wuhan,from Wuhan,death,recovered,symptom,source,link
0,1,,1/20/2020,First confirmed imported COVID-19 pneumonia pa...,"Shenzhen, Guangdong",China,male,66.0,01/03/20,0.0,01/11/20,12/29/2019,01/04/20,1,0.0,0,0,,Shenzhen Municipal Health Commission,http://wjw.sz.gov.cn/wzx/202001/t20200120_1898...
1,2,,1/20/2020,First confirmed imported COVID-19 pneumonia pa...,Shanghai,China,female,56.0,1/15/2020,0.0,1/15/2020,,01/12/20,0,1.0,0,0,,Official Weibo of Shanghai Municipal Health Co...,https://www.weibo.com/2372649470/IqogQhgfa?fro...
2,3,,1/21/2020,First confirmed imported cases in Zhejiang: pa...,Zhejiang,China,male,46.0,01/04/20,0.0,1/17/2020,,01/03/20,0,1.0,0,0,,Health Commission of Zhejiang Province,http://www.zjwjw.gov.cn/art/2020/1/21/art_1202...
3,4,,1/21/2020,new confirmed imported COVID-19 pneumonia in T...,Tianjin,China,female,60.0,,,1/19/2020,,,1,0.0,0,0,,人民日报官方微博,https://m.weibo.cn/status/4463235401268457?
4,5,,1/21/2020,new confirmed imported COVID-19 pneumonia in T...,Tianjin,China,male,58.0,,,1/14/2020,,,0,0.0,0,0,,人民日报官方微博,https://m.weibo.cn/status/4463235401268457?
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1080,1081,2.0,2/25/2020,"new COVID-19 patient confirmed in Austria: 24,...",Innsbruck,Austria,,24.0,,,,,,0,0.0,0,0,,Jakarta Post,https://www.thejakartapost.com/news/2020/02/25...
1081,1082,1.0,2/24/2020,new COVID-19 patient confirmed in Afghanistan:...,Afghanistan,Afghanistan,,35.0,,,,,,0,0.0,0,0,,Fox News,https://www.foxnews.com/health/afghanistan-fir...
1082,1083,1.0,2/26/2020,new COVID-19 patient confirmed in Algeria: mal...,Algeria,Algeria,male,,,,,,2/17/2020,0,0.0,0,0,,South China Morning Post,https://www.scmp.com/news/china/society/articl...
1083,1084,1.0,2/25/2020,new COVID-19 patient confirmed in Croatia: mal...,Croatia,Croatia,male,,,,,2/19/2020,2/21/2020,0,0.0,0,0,,Reuters,https://www.reuters.com/article/us-croatia-cor...


### Time series data (John Hopkins)

In [7]:
time_series_confirmed = drop_null_vals(pd.read_csv('data/time_series_covid_19_confirmed.csv'))
time_series_deaths = drop_null_vals(pd.read_csv('data/time_series_covid_19_deaths.csv'))
time_series_recovered = drop_null_vals(pd.read_csv('data/time_series_covid_19_recovered.csv'))


In [8]:
series_confirmed_coord = drop_null_vals(time_series_confirmed,subset=['Lat','Long'])
series_confirmed_coord

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20
0,,Thailand,15.0000,101.0000,2,3,5,7,8,8,...,47,48,50,50,50,53,59,70,75,82
1,,Japan,36.0000,138.0000,2,1,2,2,4,4,...,360,420,461,502,511,581,639,639,701,773
2,,Singapore,1.2833,103.8333,0,1,3,3,4,5,...,117,130,138,150,150,160,178,178,200,212
3,,Nepal,28.1667,84.2500,0,0,0,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,,Malaysia,2.5000,112.5000,0,0,0,3,4,4,...,50,83,93,99,117,129,149,149,197,238
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,,Saint Lucia,13.9094,-60.9789,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
438,,Saint Vincent and the Grenadines,12.9843,-61.2872,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
439,,Suriname,3.9193,-56.0278,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
440,"Virgin Islands, U.S.",US,18.3358,-64.8963,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


### Temperature and humidity data

In [9]:
lat_long_list = list(series_confirmed_coord[['Lat','Long']].values)

https://api.meteostat.net/v1/stations/nearby?lat=1.283&lon=103.83&limit=1&key=VJga0fAn

https://api.meteostat.net/v1/history/daily?station=10637&start=2017-01-01&end=2017-12-31&key=XXXXXXXX


In [10]:
START_DATE = str(datetime.strptime(series_confirmed_coord.columns[4], '%m/%d/%y').date())
END_DATE = str(datetime.strptime(series_confirmed_coord.columns[-1], '%m/%d/%y').date())