# Prepare data

In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *  #data types
from pyspark.sql import functions as F   #functions
from pyspark.sql.functions import udf, col
from pyspark.sql import Row   # createDataFrame
from pyspark import SQLContext

# import xml.etree.ElementTree as ET
# import csv

import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt

import ipywidgets as widgets
from ipywidgets import interact, interact_manual


spark=SparkSession.builder.appName('Canada COVID19 Data').getOrCreate()

In [2]:
casefn=r'data/COVID19/Covid19Canada-master/cases.csv'
casedf=spark.read.csv(casefn,header=True,inferSchema=True)
casedf.printSchema()
mortfn=r'data/COVID19/Covid19Canada-master/mortality.csv'
mortdf=spark.read.csv(mortfn,header=True,inferSchema=True)
recofn=r'data/COVID19/Covid19Canada-master/recovered_cumulative.csv'
recodf=spark.read.csv(recofn,header=True,inferSchema=True)
testfn=r'data/COVID19/Covid19Canada-master/testing_cumulative.csv'
testdf=spark.read.csv(testfn,header=True,inferSchema=True)

root
 |-- case_id: integer (nullable = true)
 |-- provincial_case_id: integer (nullable = true)
 |-- age: string (nullable = true)
 |-- sex: string (nullable = true)
 |-- health_region: string (nullable = true)
 |-- province: string (nullable = true)
 |-- country: string (nullable = true)
 |-- date_report: string (nullable = true)
 |-- report_week: string (nullable = true)
 |-- travel_yn: string (nullable = true)
 |-- travel_history_country: string (nullable = true)
 |-- locally_acquired: string (nullable = true)
 |-- case_source: string (nullable = true)
 |-- additional_info: string (nullable = true)
 |-- additional_source: string (nullable = true)
 |-- method_note: string (nullable = true)



In [3]:
# casedf.show()

In [4]:
#mortdf.printSchema()
func =  udf (lambda x: datetime.strptime(x, '%d-%m-%Y'), DateType())
mortdf = mortdf.withColumn('Death_Date', func(col('date_death_report')))
casedf = casedf.withColumn('case_Date', func(col('date_report')))
recodf = recodf.withColumn('date_recovered', func(col('date_recovered')))
testdf = testdf.withColumn('date_testing', func(col('date_testing')))
#mortdf.printSchema()

In [5]:
recodf.printSchema()

root
 |-- date_recovered: date (nullable = true)
 |-- province: string (nullable = true)
 |-- cumulative_recovered: string (nullable = true)



In [6]:
recodf.select('province').distinct()

DataFrame[province: string]

In [7]:
testdf.printSchema()

root
 |-- date_testing: date (nullable = true)
 |-- province: string (nullable = true)
 |-- cumulative_testing: string (nullable = true)
 |-- testing_info: string (nullable = true)



In [8]:
recoverpd=recodf.toPandas()
recoverpd=recoverpd.replace('NA','0')
recoverpd=recoverpd.astype({"cumulative_recovered":"int32"})
recover_table=pd.pivot_table(recoverpd, values='cumulative_recovered',index='date_recovered', columns='province')
#recover_table.index=pd.to_datetime(recover_table.index)
recover_table['CA']=recover_table.sum(axis=1)
recover_table=recover_table.sort_values(['CA','date_recovered'])
toYuanPei_recover_table=recover_table[['CA','Ontario','Quebec','BC']]

toYuanPei_recover_table.to_csv('data/COVID19/toYuanPei/recover_table.csv')

In [9]:
testingpd=testdf.toPandas()
testingpd=testingpd.replace('NA','0')
testingpd=testingpd.astype({"cumulative_testing":"int32"})
testing_table=pd.pivot_table(testingpd, values='cumulative_testing',index='date_testing', columns='province')
#testing_table.index=pd.to_datetime(testing_table.index)
testing_table['CA']=testing_table.sum(axis=1)
testing_table=testing_table.sort_values(['CA','date_testing'])
toYuanPei_testing_table=testing_table[['CA','Ontario','Quebec','BC']]

toYuanPei_testing_table.to_csv('data/COVID19/toYuanPei/testing_table.csv')

In [10]:
deathtable=mortdf.crosstab('Death_Date','province')
deathtable=deathtable.toPandas()
deathtable=deathtable.sort_values('Death_Date_province')

cumsum_deathtable=deathtable.set_index('Death_Date_province').cumsum()
cumsum_deathtable['CA']=cumsum_deathtable.sum(axis=1)

toYuanPei_death_table=cumsum_deathtable[['CA','Ontario','Quebec','BC']]

#toYuanPei_death_table.head(5)

#mortdf.select('health_region').distinct().show(5) # Toronto  Vancouver Coastal Montréal,Ottawa 

mortdftable=mortdf.crosstab('Death_Date','health_region')
health_region_table=mortdftable.select(['Death_Date_health_region','Toronto','Montréal','Vancouver Coastal','Ottawa'])

health_region_table1=health_region_table.toPandas()
health_region_table1=health_region_table1.rename(columns={"Death_Date_health_region":"Death_Date"})
health_region_table1=health_region_table1.sort_values('Death_Date')
health_region_table1=health_region_table1.cumsum()
health_region_table1.columns


toYuanPei_death_table['Toronto']=np.array(health_region_table1['Toronto'])
toYuanPei_death_table['Montréal']=np.array(health_region_table1['Montréal'])
toYuanPei_death_table['Vancouver']=np.array(health_region_table1['Vancouver Coastal'])
toYuanPei_death_table['Ottawa']=np.array(health_region_table1['Ottawa'])

toYuanPei_death_table.to_csv('data/COVID19/toYuanPei/death_table.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

In [11]:
casetable=casedf.crosstab('case_Date','province')
casetable=casetable.toPandas()
casetable=casetable.sort_values('case_Date_province')

cumsum_casetable=casetable.set_index('case_Date_province').cumsum()
cumsum_casetable['CA']=cumsum_casetable.sum(axis=1)

toYuanPei_case_table=cumsum_casetable[['CA','Ontario','Quebec','BC']]

casedftable=casedf.crosstab('case_Date','health_region')
health_region_table=casedftable.select(['case_Date_health_region','Toronto','Montréal','Vancouver Coastal','Ottawa'])

health_region_table1=health_region_table.toPandas()
health_region_table1=health_region_table1.rename(columns={"case_Date_health_region":"case_Date"})
health_region_table1=health_region_table1.sort_values('case_Date')
health_region_table1=health_region_table1.cumsum()
health_region_table1.columns


toYuanPei_case_table['Toronto']=np.array(health_region_table1['Toronto'])
toYuanPei_case_table['Montréal']=np.array(health_region_table1['Montréal'])
toYuanPei_case_table['Vancouver']=np.array(health_region_table1['Vancouver Coastal'])
toYuanPei_case_table['Ottawa']=np.array(health_region_table1['Ottawa'])


toYuanPei_case_table.to_csv('data/COVID19/toYuanPei/case_table.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

In [12]:
rawdat=casedf.toPandas()
cols=rawdat.columns
LL=casedf.count()
LL
func =  udf (lambda x: datetime.strptime(x, '%d-%m-%Y'), DateType())
# interact with specification of arguments
@interact
def show_rawdata(column=cols, x=(1,LL,1)):
    return rawdat.iloc[x:x+10]

interactive(children=(Dropdown(description='column', options=('case_id', 'provincial_case_id', 'age', 'sex', '…

In [13]:
# data source: https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1310076601
indvidualCasescsvfile=r'data/COVID19/13100766-eng/13100766.csv'

In [14]:
indcases=spark.read.csv(indvidualCasescsvfile,header=True,inferSchema=True)

In [15]:
indcases.printSchema()

root
 |-- REF_DATE: integer (nullable = true)
 |-- GEO: string (nullable = true)
 |-- DGUID: string (nullable = true)
 |-- Case identifier number: integer (nullable = true)
 |-- Case information: string (nullable = true)
 |-- UOM: string (nullable = true)
 |-- UOM_ID: integer (nullable = true)
 |-- SCALAR_FACTOR: string (nullable = true)
 |-- SCALAR_ID: integer (nullable = true)
 |-- VECTOR: string (nullable = true)
 |-- COORDINATE: string (nullable = true)
 |-- VALUE: integer (nullable = true)
 |-- STATUS: string (nullable = true)
 |-- SYMBOL: string (nullable = true)
 |-- TERMINATED: string (nullable = true)
 |-- DECIMALS: integer (nullable = true)



In [16]:
indcases.select("Case information").distinct().show(20, truncate=False)

+------------------------------------+
|Case information                    |
+------------------------------------+
|Transmission                        |
|Death                               |
|Date case was last updated - month  |
|Hospitalization, previous status    |
|Death, previous status              |
|Age group                           |
|Episode date - month                |
|Date case was last updated - day    |
|Episode date - day                  |
|Hospitalization                     |
|Intensive care unit                 |
|Gender                              |
|Intensive care unit, previous status|
+------------------------------------+



In [17]:
indcases.select("COORDINATE").distinct().show()

+----------+
|COORDINATE|
+----------+
|    1.12.8|
|    1.15.8|
|   1.16.12|
|    1.19.4|
|    1.37.5|
|    1.63.8|
|    1.67.1|
|   1.93.11|
|  1.111.11|
|   1.121.2|
|   1.125.7|
|   1.176.2|
|   1.224.2|
|  1.237.11|
|   1.240.9|
|   1.250.9|
|  1.258.13|
|   1.270.7|
|  1.358.11|
|   1.375.9|
+----------+
only showing top 20 rows



In [18]:
indcases.select("REF_DATE").distinct().show()


+--------+
|REF_DATE|
+--------+
|    2020|
+--------+



In [19]:
#casetable=indcases.crosstab("Case identifier number","Case information") \
#                  .toPandas() 

In [20]:
# casetable = casetable.drop(["Death, previous status", \
#                             "Hospitalization, previous status", \
#                             "Intensive care unit, previous status", \
#                             "Date case was last updated - day", \
#                             "Date case was last updated - month"], axis = 1)

In [21]:
# casetable

In [22]:
casepandas=indcases.toPandas()

In [23]:
casetable1=pd.pivot_table(casepandas, 
                          values='VALUE',
                          index=["Case identifier number"],
                          columns=["Case information"], 
                          aggfunc=np.sum)

In [24]:
casetable1 = casetable1.drop(["Death, previous status", \
                              "Hospitalization, previous status", \
                              "Intensive care unit, previous status", \
                              "Date case was last updated - day", \
                              "Date case was last updated - month"], axis = 1)

In [25]:
casetable1.columns

Index(['Age group', 'Death', 'Episode date - day', 'Episode date - month',
       'Gender', 'Hospitalization', 'Intensive care unit', 'Transmission'],
      dtype='object', name='Case information')

In [26]:
casetable1["Age group"].unique()

array([ 5.,  4.,  2.,  3.,  1.,  6.,  7.,  8., 99.])

In [27]:
casetable1["Gender"].unique()

array([1., 2., 9., 3.])

In [28]:
casetable1["Hospitalization"].unique()

array([1., 9., 2.])

In [29]:
casetable1["Intensive care unit"].unique()

array([2., 9., 1.])

In [30]:
casetable1["Transmission"].unique()

array([1., 2., 3.])

In [31]:
casetable1['Episode date - month'].unique()

array([ 1.,  2.,  3., 99.,  4.,  5.])

In [32]:
casetable1['Episode date - day'].unique()

array([21., 22., 24., 27., 15.,  9., 14.,  5., 18., 25., 26., 28., 20.,
       23., 29., 99.,  1.,  2.,  4.,  8.,  3.,  7.,  6., 11., 10., 12.,
       13., 16., 17., 19., 30., 31.])

In [33]:
casetable1=casetable1.rename(columns={"Episode date - month":"Month", \
                                      "Episode date - day":"Day", \
                                     "Intensive care unit":"ICU",
                                     "Age group":"AgeGroup"})

In [34]:
casetable1.head()

Case information,AgeGroup,Death,Day,Month,Gender,Hospitalization,ICU,Transmission
Case identifier number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,5.0,2.0,21.0,1.0,1.0,1.0,2.0,1.0
2,4.0,2.0,22.0,1.0,1.0,1.0,9.0,1.0
3,2.0,2.0,24.0,1.0,2.0,9.0,2.0,1.0
4,5.0,2.0,27.0,1.0,2.0,1.0,1.0,1.0
5,3.0,2.0,15.0,1.0,1.0,2.0,9.0,1.0


In [35]:
casetable2=casetable1.copy()

In [36]:
casetable2=casetable2[casetable2.Month<13]
casetable2=casetable2[casetable2.Day<32]

In [37]:
casetable2.head()

Case information,AgeGroup,Death,Day,Month,Gender,Hospitalization,ICU,Transmission
Case identifier number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,5.0,2.0,21.0,1.0,1.0,1.0,2.0,1.0
2,4.0,2.0,22.0,1.0,1.0,1.0,9.0,1.0
3,2.0,2.0,24.0,1.0,2.0,9.0,2.0,1.0
4,5.0,2.0,27.0,1.0,2.0,1.0,1.0,1.0
5,3.0,2.0,15.0,1.0,1.0,2.0,9.0,1.0


In [38]:
from datetime import date
casetable2["Date"]=casetable2.apply(lambda row: date(2020,int(row.Month),int(row.Day)), axis=1)

In [39]:
table3=casetable2.sort_values('Date')

In [40]:
table3=table3.drop(["Day","Month"],axis=1)

In [41]:
table3.head()

Case information,AgeGroup,Death,Gender,Hospitalization,ICU,Transmission,Date
Case identifier number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5,3.0,2.0,1.0,2.0,9.0,1.0,2020-01-15
20785,2.0,9.0,1.0,9.0,9.0,2.0,2020-01-19
1,5.0,2.0,1.0,1.0,2.0,1.0,2020-01-21
2,4.0,2.0,1.0,1.0,9.0,1.0,2020-01-22
423,5.0,2.0,2.0,2.0,9.0,1.0,2020-01-22


In [42]:
table4=spark.createDataFrame(table3)

In [43]:
table4.show(5)

+--------+-----+------+---------------+---+------------+----------+
|AgeGroup|Death|Gender|Hospitalization|ICU|Transmission|      Date|
+--------+-----+------+---------------+---+------------+----------+
|     3.0|  2.0|   1.0|            2.0|9.0|         1.0|2020-01-15|
|     2.0|  9.0|   1.0|            9.0|9.0|         2.0|2020-01-19|
|     5.0|  2.0|   1.0|            1.0|2.0|         1.0|2020-01-21|
|     4.0|  2.0|   1.0|            1.0|9.0|         1.0|2020-01-22|
|     5.0|  2.0|   2.0|            2.0|9.0|         1.0|2020-01-22|
+--------+-----+------+---------------+---+------------+----------+
only showing top 5 rows



In [44]:
table_AgeGroup=table4.crosstab('Date',"AgeGroup").toPandas().sort_values('Date_AgeGroup')

table_AgeGroup=table_AgeGroup.set_index("Date_AgeGroup").cumsum()

table_AgeGroup.tail()

Unnamed: 0_level_0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,99.0
Date_AgeGroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-05-01,1344,3509,4118,4675,4863,3616,2402,4879,382
2020-05-02,1359,3534,4142,4705,4882,3636,2419,4893,382
2020-05-03,1367,3550,4155,4721,4903,3651,2434,4909,382
2020-05-04,1369,3560,4167,4732,4921,3656,2446,4924,382
2020-05-05,1369,3565,4171,4733,4927,3657,2447,4931,382


Age group codes: 1 = 0 to 19 years, 2 = 20 to 29 years, 3 = 30 to 39 years, 4 = 40 to 49 years, 5 = 50 to 59 years, 6 = 60 to 69 years, 7 = 70 to 79 years, 8 = 80 years or older, 99 = Not stated. These values are corrected as the Public Health Agency of Canada (PHAC) receives new information.

In [45]:
table_Death=table4.crosstab('Date',"Death").toPandas().sort_values('Date_Death')

table_Death=table_Death.set_index("Date_Death").cumsum()
table_Death.tail()

Unnamed: 0_level_0,1.0,2.0,9.0
Date_Death,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-05-01,1678,13151,14959
2020-05-02,1678,13158,15116
2020-05-03,1678,13166,15228
2020-05-04,1678,13172,15307
2020-05-05,1678,13172,15332


Death: 1 = Yes, 2 = No, 9 = Not stated.

In [46]:
table_Gender=table4.crosstab('Date',"Gender").toPandas().sort_values('Date_Gender')

table_Gender=table_Gender.set_index("Date_Gender").cumsum()
table_Gender.tail()

Unnamed: 0_level_0,1.0,2.0,3.0,9.0
Date_Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-05-01,13222,16365,11,190
2020-05-02,13300,16450,11,191
2020-05-03,13349,16520,11,192
2020-05-04,13386,16568,11,192
2020-05-05,13397,16581,11,193


Gender codes: 1 = Male, 2 = Female, 3 = Non-binary, 9 = Not stated. These values are corrected as the Public Health Agency of Canada (PHAC) receives new information. It should be noted that the French form uses the term ‘sex’ contrary to the English form that uses the term ‘gender’. In the context of this table, the term gender is also used in French and the cases that have reported ‘other’ for the sex have been categorized as a ‘non-binary’ gender.

In [47]:
table_Hospitalization=table4.crosstab('Date',"Hospitalization").toPandas().sort_values('Date_Hospitalization')

table_Hospitalization=table_Hospitalization.set_index("Date_Hospitalization").cumsum()
table_Hospitalization.tail()

Unnamed: 0_level_0,1.0,2.0,9.0
Date_Hospitalization,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-05-01,3191,15984,10613
2020-05-02,3200,16031,10721
2020-05-03,3210,16063,10799
2020-05-04,3212,16087,10858
2020-05-05,3212,16094,10876


Hospitalization: 1 = Yes, 2 = No, 9 = Not Stated.

In [48]:
table_ICU=table4.crosstab('Date',"ICU").toPandas().sort_values('Date_ICU')

table_ICU=table_ICU.set_index("Date_ICU").cumsum()
table_ICU.tail()

Unnamed: 0_level_0,1.0,2.0,9.0
Date_ICU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-05-01,753,7232,21803
2020-05-02,753,7249,21950
2020-05-03,755,7268,22049
2020-05-04,755,7284,22118
2020-05-05,755,7289,22138


Patient was admitted to the intensive care unit: 1 = Yes, 2 = No, 9 = Not Stated.

In [49]:
table_Transmission=table4.crosstab('Date',"Transmission").toPandas().sort_values('Date_Transmission')

table_Transmission=table_Transmission.set_index("Date_Transmission").cumsum()
table_Transmission['cases'] =table_Transmission['1.0']+table_Transmission['2.0']+table_Transmission['3.0']
table_Transmission.tail()

Unnamed: 0_level_0,1.0,2.0,3.0,cases
Date_Transmission,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-05-01,4454,20418,4916,29788
2020-05-02,4456,20452,5044,29952
2020-05-03,4456,20476,5140,30072
2020-05-04,4456,20485,5216,30157
2020-05-05,4456,20487,5239,30182


Transmission codes: 1= Travel exposure – cases that had contact with a travel-related case or had travelled outside of Canada in the 14 days prior to illness onset. 2 = Community exposure – cases that had no known contact with a travel-related case and had not travelled outside of Canada in the 14 days prior to illness onset. 3 = Pending – confirmation on exposure setting is pending. These values are corrected as the Public Health Agency of Canada (PHAC) receives new information.

In [50]:
# create a dataframe to store ratios
ratios = pd.DataFrame({'date':[],
                       'Age(0-19)':[],
                       'Age(20-9)':[],
                       'Age(30-9)':[],
                       'Age(40-9)':[],
                       'Age(50-9)':[],
                       'Age(60-9)':[],
                       'Age(70-9)':[],
                       'Age(80-)':[],
                       'Age(unkown)':[],
                       'Gender(Male)':[],
                       'Gender(Femail)':[],
                       'Gender(Non-binary)':[],
                       'Gender(Not stated)':[],
                       'Transmission(Travel)':[],
                       'Transmission(Community)':[],
                       'Transmission(Pending)':[],                       
                       'Hospitalization(Yes)':[],
                       'Hospitalization(Nes)':[],
                       'Hospitalization(Not Stated)':[], 
                       'ICU(Yes)':[],
                       'ICU(No)':[],
                       'ICU(Not Stated)':[],                       
                       'Death(Yes)':[], 
                       'Death(No)':[], 
                       'Death(Not stated)':[]})

In [51]:
ratios['date']=table_Transmission.index

In [52]:
for col1,col2 in zip(ratios.columns[1:10],table_AgeGroup.columns):
    print((col1,col2))
    ratios[col1] = np.around(np.array(table_AgeGroup[col2])/np.array(table_Transmission['cases'])*100,decimals=1)
#table_AgeGroup.columns

('Age(0-19)', '1.0')
('Age(20-9)', '2.0')
('Age(30-9)', '3.0')
('Age(40-9)', '4.0')
('Age(50-9)', '5.0')
('Age(60-9)', '6.0')
('Age(70-9)', '7.0')
('Age(80-)', '8.0')
('Age(unkown)', '99.0')


In [53]:
for col1,col2 in zip(ratios.columns[10:14],table_Gender.columns):
    print((col1,col2))
    ratios[col1] = np.around(np.array(table_Gender[col2])/np.array(table_Transmission['cases'])*100,decimals=1)

('Gender(Male)', '1.0')
('Gender(Femail)', '2.0')
('Gender(Non-binary)', '3.0')
('Gender(Not stated)', '9.0')


In [54]:
for col1,col2 in zip(ratios.columns[14:17],table_Transmission.columns):
    print((col1,col2))
    ratios[col1] = np.around(np.array(table_Transmission[col2])/np.array(table_Transmission['cases'])*100,decimals=1)

('Transmission(Travel)', '1.0')
('Transmission(Community)', '2.0')
('Transmission(Pending)', '3.0')


In [55]:
for col1,col2 in zip(ratios.columns[17:20],table_Hospitalization.columns):
    print((col1,col2))
    ratios[col1] = np.around(np.array(table_Hospitalization[col2])/np.array(table_Transmission['cases'])*100,decimals=1)

('Hospitalization(Yes)', '1.0')
('Hospitalization(Nes)', '2.0')
('Hospitalization(Not Stated)', '9.0')


In [56]:
for col1,col2 in zip(ratios.columns[20:23],table_ICU.columns):
    print((col1,col2))
    ratios[col1] = np.around(np.array(table_ICU[col2])/np.array(table_Transmission['cases'])*100,decimals=1)

('ICU(Yes)', '1.0')
('ICU(No)', '2.0')
('ICU(Not Stated)', '9.0')


In [57]:
for col1,col2 in zip(ratios.columns[23:26],table_Death.columns):
    print((col1,col2))
    ratios[col1] = np.around(np.array(table_Death[col2])/np.array(table_Transmission['cases'])*100,decimals=1)

('Death(Yes)', '1.0')
('Death(No)', '2.0')
('Death(Not stated)', '9.0')


In [58]:
ratios.to_csv('data/COVID19/toYuanPei/Canada_COVID19_ratios_based_on_Confirmed_details.csv')