In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
%matplotlib inline

import os
import glob

# Part I: Process influenza cases data 

## 1 Combine all csv data 

In [2]:
os.chdir("./Influenza/Since2005")

In [4]:
# set extension
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

# combine all files in the list
combined_csv = pd.concat([pd.read_csv(f, header = 2) for f in all_filenames])

# export to csv
combined_csv.to_csv( "combined_data.csv", index=False, encoding='utf-8-sig')

# reference: https://www.freecodecamp.org/news/how-to-combine-multiple-csv-files-with-8-lines-of-code-265183e0854/

## 2 Calculate the influenza cases by country and by year

In [8]:
df = pd.read_csv("combined_data.csv", header = 0)

In [9]:
df.columns

Index(['Country', 'WHOREGION', 'FLUREGION', 'Year', 'Week', 'SDATE', 'EDATE',
       'SPEC_RECEIVED_NB', 'SPEC_PROCESSED_NB', 'AH1', 'AH1N12009', 'AH3',
       'AH5', 'ANOTSUBTYPED', 'INF_A', 'BYAMAGATA', 'BVICTORIA',
       'BNOTDETERMINED', 'INF_B', 'ALL_INF', 'ALL_INF2', 'TITLE'],
      dtype='object')

In [10]:
df = df[['Country', 'FLUREGION', 'Year', 'AH1N12009', 'INF_A', 'INF_B', 'ALL_INF']]
df.head()

Unnamed: 0,Country,FLUREGION,Year,AH1N12009,INF_A,INF_B,ALL_INF
0,China,Eastern Asia,2005,,7.0,2.0,9.0
1,China,Eastern Asia,2005,,12.0,6.0,18.0
2,China,Eastern Asia,2005,,16.0,2.0,18.0
3,China,Eastern Asia,2005,,20.0,8.0,28.0
4,China,Eastern Asia,2005,,23.0,10.0,33.0


In [11]:
df.dtypes

Country       object
FLUREGION     object
Year           int64
AH1N12009    float64
INF_A        float64
INF_B        float64
ALL_INF      float64
dtype: object

In [20]:
# apply group by function to the dataframe

df_orig = df.copy()
df_allinf = df.groupby(['Country', 'Year'])['ALL_INF'].sum()
df_allinf = pd.DataFrame(df_allinf).reset_index()


In [21]:
df_allinf.head()

Unnamed: 0,Country,Year,ALL_INF
0,China,2005,8239.0
1,China,2006,7059.0
2,China,2007,9924.0
3,China,2008,8021.0
4,China,2009,103940.0


In [22]:
df_allinf = df_allinf.pivot(index = 'Country', columns = 'Year',
                            values = 'ALL_INF').add_prefix('Influenza').reset_index()
df_allinf

Year,Country,Influenza2005,Influenza2006,Influenza2007,Influenza2008,Influenza2009,Influenza2010,Influenza2011,Influenza2012,Influenza2013,Influenza2014,Influenza2015,Influenza2016,Influenza2017,Influenza2018,Influenza2019
0,China,8239.0,7059.0,9924.0,8021.0,103940.0,44166.0,21515.0,44371.0,33004.0,71665.0,70222.0,87677.0,99072.0,80297.0,122757.0
1,France,1436.0,800.0,1082.0,1860.0,27033.0,2003.0,6649.0,7762.0,21288.0,11852.0,29021.0,26721.0,15657.0,21610.0,25405.0
2,Germany,1164.0,650.0,1319.0,656.0,3367.0,189.0,1771.0,716.0,1799.0,418.0,1770.0,1510.0,1478.0,2334.0,1215.0
3,Iran (Islamic Republic of),22.0,77.0,112.0,73.0,4698.0,567.0,2002.0,593.0,415.0,659.0,3448.0,1506.0,628.0,1430.0,7387.0
4,Italy,878.0,165.0,347.0,210.0,10249.0,305.0,2856.0,1243.0,1589.0,808.0,3618.0,3231.0,2796.0,4382.0,6361.0
5,Japan,6556.0,4631.0,6304.0,4386.0,35376.0,6032.0,10552.0,7168.0,6556.0,9556.0,4499.0,9174.0,10096.0,8862.0,9525.0
6,Republic of Korea,1372.0,1323.0,2755.0,6116.0,8369.0,3523.0,1211.0,3550.0,1773.0,2011.0,1626.0,1752.0,1304.0,1952.0,1702.0
7,Singapore,0.0,0.0,326.0,1525.0,6322.0,3499.0,1129.0,982.0,757.0,998.0,778.0,1176.0,998.0,1106.0,1154.0
8,United States of America,24042.0,17587.0,23596.0,41109.0,173882.0,15253.0,47282.0,52079.0,63341.0,88511.0,83112.0,114753.0,214333.0,267611.0,267384.0


# Part II: Merge with CountryNodes csv

In [25]:
path = "C:/Users/Winnie/Documents/NeilJohnson/Migration/"
df_cn = pd.read_csv(path + 'CountryNodes_IncreaseRate.csv')
df_cn.head(3)

Unnamed: 0,Country,Region
0,Australia,Australia New Zealand
1,New Zealand,Australia New Zealand
2,Anguilla,Caribbean


In [27]:
df_merge = pd.merge(df_cn, df_allinf, how = 'left', 
                    left_on = 'Country', right_on = 'Country').fillna(0)
df_merge

Unnamed: 0,Country,Region,Influenza2005,Influenza2006,Influenza2007,Influenza2008,Influenza2009,Influenza2010,Influenza2011,Influenza2012,Influenza2013,Influenza2014,Influenza2015,Influenza2016,Influenza2017,Influenza2018,Influenza2019
0,Australia,Australia New Zealand,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,New Zealand,Australia New Zealand,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Anguilla,Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Antigua and Barbuda,Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Aruba,Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bahamas,Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Barbados,Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,British Virgin Islands,Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Bonaire, Sint Eustatius and Saba",Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Cayman Islands,Caribbean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
df_merge.dtypes

Country           object
Region            object
Influenza2005    float64
Influenza2006    float64
Influenza2007    float64
Influenza2008    float64
Influenza2009    float64
Influenza2010    float64
Influenza2011    float64
Influenza2012    float64
Influenza2013    float64
Influenza2014    float64
Influenza2015    float64
Influenza2016    float64
Influenza2017    float64
Influenza2018    float64
Influenza2019    float64
dtype: object

## Export csv file

In [31]:
# all countries
df_merge.to_csv(path + 'CountryNodes_InfluenzaData.csv', index=False)

In [35]:
# only selective countries
df_merge_selective = pd.merge(df_cn, df_allinf, how = 'right', 
                    left_on = 'Country', right_on = 'Country').fillna(0)
df_merge_selective.to_csv(path + 'CountryNodes_InfluenzaSelectiveCountries.csv', index=False)

In [None]:
# only selective countries edgelist csv
