In [5]:
#Fetching data from Ireland COVID-19 API provided by data.gov.ie

import pandas as pd
import numpy as np
import urllib.request
import os
import csv
import requests
import json
import datetime

### This is the dataframe that provides the CONFIRMED covid-19 cases till date in Ireland along with population data county-wise
#### Gives data about 26 counties until the latest updated date

In [6]:
def get_data(csv):
    df = pd.read_csv(csv)
    df = df.drop(columns = ['ORIGID', 'IGEasting', 'IGNorthing', 'UniqueGeographicIdentifier', 'ConfirmedCovidDeaths',
       'ConfirmedCovidRecovered', 'x', 'y', 'FID']) #dropping unnecessary columns
    df = df.rename(columns= {'PopulationCensus16': 'Population','ConfirmedCovidCases': 'ConfirmedCases','TimeStampDate': 'Date','PopulationProportionCovidCases': 'Case by Population'})    
    df['Date'] = pd.to_datetime(df['Date']) #converting the date to the required date format
    df['Date'] = df['Date'].dt.date
    df.fillna(0, inplace = True) #filling NaN values
    return df
    

In [7]:
csv = 'http://opendata-geohive.hub.arcgis.com/datasets/4779c505c43c40da9101ce53f34bb923_0.csv?outSR={"latestWkid":3857,"wkid":102100}'
#This is the link that contains population information and Confirmed cases per county in Ireland 
Ireland_counties_df = get_data(csv)
Ireland_counties_df

Unnamed: 0,CountyName,Population,Lat,Long,ConfirmedCases,Case by Population,Date
0,Carlow,56932,52.7168,-6.8367,176,309.140729,2020-07-12
1,Cavan,76176,53.9878,-7.2937,865,1135.52825,2020-07-12
2,Clare,118817,52.8917,-8.9889,369,310.56162,2020-07-12
3,Cork,542868,51.9517,-8.6372,1548,285.152192,2020-07-12
4,Donegal,159192,54.8989,-7.96,468,293.984622,2020-07-12
5,Dublin,1347359,53.3605,-6.292,12312,913.787639,2020-07-12
6,Galway,258058,53.3705,-8.7362,489,189.492285,2020-07-12
7,Kerry,147707,52.1689,-9.565,314,212.583019,2020-07-12
8,Kildare,222504,53.238,-6.7837,1514,680.437206,2020-07-12
9,Kilkenny,99232,52.5816,-7.2175,356,358.75524,2020-07-12


In [8]:
#Ireland_counties_df.to_csv('C:\Users\apurv\Data Science in Python\Project\AllDumps\IrelandConfirmedCases.csv', index = False)

### This is the dataframe that provides with all of the attributes surrounding the covid-19 cases data. 
### It provides data like what's the number of critical ICU cases, hopsitalized cases by age demographic
#### Provides all of the statistical data till date, gender related data, reasons of spread of covid like human contact/travel abroad/community 

##### Ireland_Overall = pd.read_csv('http://opendata-geohive.hub.arcgis.com/datasets/d8eb52d56273413b84b0187a4e9117be_0.csv?outSR={"latestWkid":3857,"wkid":102100}')


In [9]:
def Ireland_Overall(url):
    df = pd.read_csv(url)
    df['Date'] = pd.to_datetime(df['Date'])
    df['Date'] = df['Date'].dt.date
    df.fillna(0, inplace = True)
    
    #this is the dataframe that'd consist of all the statistical data(confirmed/recovered/deaths)
    df1 = df.filter(['Date', 'ConfirmedCovidCases', 'TotalConfirmedCovidCases',
       'ConfirmedCovidDeaths', 'TotalCovidDeaths', 'ConfirmedCovidRecovered',
       'TotalCovidRecovered'], axis = 1)
    df1 = df1.rename(columns = {'ConfirmedCovidCases':'Confirmed', 'TotalConfirmedCovidCases':'TotalConfirmed',
       'ConfirmedCovidDeaths':'Deaths', 'TotalCovidDeaths': 'TotalDeaths','ConfirmedCovidRecovered':'Recovered',
       'TotalCovidRecovered':'TotalRecovered' })
    
    #this is the dataframe that'd consist of all the Hospital related statistical data 
    df2 = df.drop(columns = ['X', 'Y', 'ConfirmedCovidCases', 'TotalConfirmedCovidCases',
       'ConfirmedCovidDeaths', 'TotalCovidDeaths', 'ConfirmedCovidRecovered',
       'TotalCovidRecovered', 'StatisticsProfileDate', 'CovidCasesConfirmed','HealthcareWorkersCovidCases', 'ClustersNotified','Male', 'Female', 'Unknown', 'Aged1',
       'Aged1to4', 'Aged5to14', 'Aged15to24', 'Aged25to34', 'Aged35to44',
       'Aged45to54', 'Aged55to64', 'Aged65up', 'Median_Age',
       'CommunityTransmission', 'CloseContact', 'TravelAbroad',
       'UnderInvestigation', 'FID'])
    df2 = df2.rename(columns= {'HospitalisedCovidCases': 'HospitalisedCases','RequiringICUCovidCases': 'Critical_ICUCases',
                                                             'HospitalisedAged5': 'Aged5','HospitalisedAged5to14': 'Aged5-14','HospitalisedAged15to24': 'Aged15-24',
                                                             'HospitalisedAged25to34': 'Aged25-34','HospitalisedAged35to44': 'Aged35to44',
                                                             'HospitalisedAged45to54': 'Aged45to54','HospitalisedAged55to64': 'Aged55to64','HospitalisedAged65up': 'Aged65up'})
    
    #this dataframe consist of gender related data (number males/females affected)
    df3 = df.filter(['Date','Male','Female'], axis = 1)
    
    #this dataframe consist of reasons for the spread of covid amongst people
    df4 = df.filter(['Date','CommunityTransmission', 'CloseContact', 'TravelAbroad'], axis = 1)
    return df, df1, df2, df3, df4
    

In [10]:
link = 'http://opendata-geohive.hub.arcgis.com/datasets/d8eb52d56273413b84b0187a4e9117be_0.csv?outSR={"latestWkid":3857,"wkid":102100}'
Entire_Ireland, Ireland_All_Stats, Ireland_Hospital_Data, Ireland_Gender, Ireland_Spread = Ireland_Overall(link) 

In [11]:
Entire_Ireland

Unnamed: 0,X,Y,Date,ConfirmedCovidCases,TotalConfirmedCovidCases,ConfirmedCovidDeaths,TotalCovidDeaths,ConfirmedCovidRecovered,TotalCovidRecovered,StatisticsProfileDate,...,Aged35to44,Aged45to54,Aged55to64,Aged65up,Median_Age,CommunityTransmission,CloseContact,TravelAbroad,UnderInvestigation,FID
0,-856335.869598,7.036492e+06,2020-02-29,1,1,0,0,0,0,2020/02/27 00:00:00+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9167
1,-856335.869598,7.036492e+06,2020-03-03,1,2,0,0,0,0,2020/03/01 00:00:00+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9168
2,-856335.869598,7.036492e+06,2020-03-04,4,6,0,0,0,0,2020/03/02 00:00:00+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9169
3,-856335.869598,7.036492e+06,2020-03-05,7,13,0,0,0,0,2020/03/03 00:00:00+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9170
4,-856335.869598,7.036492e+06,2020-03-06,5,18,0,0,0,0,2020/03/04 00:00:00+00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9171
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,-856335.869598,7.036492e+06,2020-07-10,25,25589,1,1744,21929,21929,2020/07/08 00:00:00+00,...,4500.0,4580.0,3260.0,6471.0,48.0,33.0,65.0,2.0,14762.0,9297
131,-856335.869598,7.036492e+06,2020-07-11,23,25611,2,1746,21929,21929,2020/07/09 00:00:00+00,...,4505.0,4582.0,3260.0,6472.0,48.0,33.0,65.0,2.0,14762.0,9298
132,-856335.869598,7.036492e+06,2020-07-12,17,25628,0,1746,21929,21929,2020/07/10 00:00:00+00,...,4510.0,4585.0,3260.0,6474.0,48.0,33.0,65.0,2.0,14762.0,9299
133,-856335.869598,7.036492e+06,2020-07-13,11,25638,0,1746,21929,21929,2020/07/11 00:00:00+00,...,4512.0,4586.0,3261.0,6475.0,48.0,33.0,65.0,2.0,14762.0,9300


In [12]:
Entire_Ireland.to_csv(r'C:\Users\apurv\Data Science in Python\Project\AllDumps\Ireland_TimeSeries.csv', index = False)

In [13]:
Ireland_All_Stats

Unnamed: 0,Date,Confirmed,TotalConfirmed,Deaths,TotalDeaths,Recovered,TotalRecovered
0,2020-02-29,1,1,0,0,0,0
1,2020-03-03,1,2,0,0,0,0
2,2020-03-04,4,6,0,0,0,0
3,2020-03-05,7,13,0,0,0,0
4,2020-03-06,5,18,0,0,0,0
...,...,...,...,...,...,...,...
130,2020-07-10,25,25589,1,1744,21929,21929
131,2020-07-11,23,25611,2,1746,21929,21929
132,2020-07-12,17,25628,0,1746,21929,21929
133,2020-07-13,11,25638,0,1746,21929,21929


In [14]:
Ireland_All_Stats.to_csv('IrelandCaseTypes.csv', index = False)

In [15]:
Ireland_Hospital_Data

Unnamed: 0,Date,HospitalisedCases,Critical_ICUCases,Aged5,Aged5-14,Aged15-24,Aged25-34,Aged35to44,Aged45to54,Aged55to64,Aged65up
0,2020-02-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-03-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2020-03-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-03-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2020-03-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
130,2020-07-10,3328.0,436.0,23.0,17.0,74.0,197.0,271.0,447.0,495.0,1802.0
131,2020-07-11,3330.0,436.0,23.0,17.0,74.0,197.0,273.0,447.0,495.0,1802.0
132,2020-07-12,3330.0,436.0,23.0,17.0,74.0,197.0,273.0,447.0,495.0,1802.0
133,2020-07-13,3330.0,436.0,23.0,17.0,74.0,197.0,273.0,447.0,495.0,1802.0


In [16]:
Ireland_Hospital_Data.to_csv('IrelandHospitalData.csv', index = False)

In [17]:
Ireland_Gender

Unnamed: 0,Date,Male,Female
0,2020-02-29,0.0,0.0
1,2020-03-03,0.0,0.0
2,2020-03-04,0.0,0.0
3,2020-03-05,0.0,0.0
4,2020-03-06,0.0,0.0
...,...,...,...
130,2020-07-10,10930.0,14604.0
131,2020-07-11,10939.0,14619.0
132,2020-07-12,10947.0,14633.0
133,2020-07-13,10954.0,14643.0


In [18]:
Ireland_Gender.to_csv('IrelandGenderWise.csv', index = False)

In [19]:
Ireland_Spread

Unnamed: 0,Date,CommunityTransmission,CloseContact,TravelAbroad
0,2020-02-29,0.0,0.0,0.0
1,2020-03-03,0.0,0.0,0.0
2,2020-03-04,0.0,0.0,0.0
3,2020-03-05,0.0,0.0,0.0
4,2020-03-06,0.0,0.0,0.0
...,...,...,...,...
130,2020-07-10,33.0,65.0,2.0
131,2020-07-11,33.0,65.0,2.0
132,2020-07-12,33.0,65.0,2.0
133,2020-07-13,33.0,65.0,2.0


In [20]:
Ireland_Spread.to_csv('IrelandSpreadCategory.csv', index = False)

### This is the dataframe that consist of timeseries data for all the 26 counties in Ireland (for the number of cases recorded)

##### AllCounties_timeseries = pd.read_csv('http://opendata-geohive.hub.arcgis.com/datasets/d9be85b30d7748b5b7c09450b8aede63_0.csv?outSR={"latestWkid":3857,"wkid":102100}')
#### Gives Confirmed cases starting from February until the latest update

In [21]:
def IrelandCases(url):
    df = pd.read_csv(url)
    #This is the dataframe that consist of timeseries data for all the counties in Ireland
    #Gives Confirmed cases starting from February until the latest update
    df = df.drop(columns = ['OBJECTID', 'ORIGID','IGEasting','PopulationCensus16','Lat', 'Long', 'IGNorthing','UGI','PopulationProportionCovidCases','ConfirmedCovidDeaths',
       'ConfirmedCovidRecovered', 'Shape__Area', 'Shape__Length'])
    df['TimeStamp'] = pd.to_datetime(df['TimeStamp'])
    df['TimeStamp'] = df['TimeStamp'].dt.date
    df = df.rename(columns = {'CountyName' : 'County', 'TimeStamp' : 'Date', 'ConfirmedCovidCases':'Confirmed'})
    return df

In [22]:
IrelandCounties = IrelandCases('http://opendata-geohive.hub.arcgis.com/datasets/d9be85b30d7748b5b7c09450b8aede63_0.csv?outSR={"latestWkid":3857,"wkid":102100}')
IrelandCounties

Unnamed: 0,County,Date,Confirmed
0,Carlow,2020-02-27,0
1,Cavan,2020-02-27,0
2,Clare,2020-02-27,0
3,Cork,2020-02-27,0
4,Donegal,2020-02-27,0
...,...,...,...
3505,Tipperary,2020-07-12,543
3506,Waterford,2020-07-12,165
3507,Westmeath,2020-07-12,676
3508,Wexford,2020-07-12,221


In [23]:
def Time_Series_Data(df):
    Dates_list =(df['Date'].unique())#To get the list of all unique Dates
    County = (df['County'].unique())
    #Creating three new dataframes which will have the rows and different states and columns as all the unique date
    Confirmed_Data = pd.DataFrame(index = County,columns = (Dates_list))#Used the country as index in each of the dataframe and columns as Dates

    #In this section i have mapped and converted the data into the format defined in the abobve dataframe.
    X = Confirmed_Data.shape[1] #saving the column lenght
    Y = Confirmed_Data.shape[0] #saving the row lenght
    for i in range(Y):
        df_2 = df.loc[df.County == County[i]]
        df_2 = df_2.groupby(['Date']).sum()
        df_2T = df_2.T
        R = df_2T.shape[1]
        for j in range(R):
            Ao = df_2T.columns[j]
            for k in range(X):
                Ai = (Confirmed_Data.columns[k])
                if (Ai == Ao):
                    A = df_2T.loc["Confirmed"][Ai]
                    Confirmed_Data[Ai][County[i]] = A
                else:
                    continue

    return Confirmed_Data.T

In [24]:
Confirmed_Ireland = Time_Series_Data(IrelandCounties)

In [25]:
Confirmed_Ireland

Unnamed: 0,Carlow,Cavan,Clare,Cork,Donegal,Dublin,Galway,Kerry,Kildare,Kilkenny,...,Meath,Monaghan,Offaly,Roscommon,Sligo,Tipperary,Waterford,Westmeath,Wexford,Wicklow
2020-02-27,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-03-01,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-03-02,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-03-03,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-03-04,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-07-08,175,865,368,1545,468,12275,486,310,1503,356,...,812,539,486,346,147,543,165,676,218,700
2020-07-09,176,865,368,1545,468,12287,490,311,1506,356,...,812,539,486,346,148,543,165,676,218,702
2020-07-10,176,866,369,1545,468,12294,489,311,1513,356,...,812,539,486,346,148,543,165,676,221,705
2020-07-11,176,865,369,1546,468,12308,489,311,1513,356,...,812,539,486,346,148,543,165,676,221,707


In [26]:
# Date = AllCounties_timeseries['TimeStamp'].unique()
# CountyName = AllCounties_timeseries['CountyName'].unique()

In [27]:
# Counties_Timeseries = pd.DataFrame(index = CountyName, columns = (Date))
# Counties_Timeseries

In [28]:
Confirmed_Ireland.to_csv(r'C:\Users\apurv\Data Science in Python\Project\AllDumps\CountyWise_TimeSeries.csv', index = False)