In [1]:
import pandas as pd
import plotly.express as px
import joblib
import shap
import numpy as np
import matplotlib.pyplot as plt

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


# Create map data to show mortality geographically over years

In [2]:
top_countries_valid_death = pd.read_csv('../dash_app/' + 'country_focus' + '.csv')
top_countries_valid_death = top_countries_valid_death[top_countries_valid_death['Year']>=2003]

table1 = top_countries_valid_death[['ISO', 'Year', 'Total Deaths']]

table1.rename(columns={'ISO': 'CODE'}, inplace=True)

# Get unique country codes
unique_codes = table1['CODE'].unique()

# Generate a list of years from 2003 to 2022
all_years = list(range(2003, 2023))

# Generate a list of tuples for all possible combinations of country codes and years
all_combinations = [(code, year) for code in unique_codes for year in all_years]

dummy_df = pd.DataFrame(all_combinations, columns=['CODE', 'Year'])

# Merge the dummy DataFrame with the original DataFrame to fill in missing rows
result_df = pd.merge(dummy_df, table1, on=['CODE', 'Year'], how='left')
result_df = result_df.fillna(0)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [3]:

# Merge the dummy DataFrame with the original DataFrame to fill in missing rows
#result_df = pd.merge(dummy_df, table1, on=['CODE', 'Year'], how='left')

wide_df = result_df.pivot_table(index='CODE', columns='Year', values='Total Deaths').reset_index()
wide_df = wide_df.fillna(0)

In [4]:
def get_map_data(file_name):
    # limit to 2003 to show only recent years
    top_countries_valid_death = pd.read_csv('../dash_app/' + file_name + '.csv')
    top_countries_valid_death = top_countries_valid_death[top_countries_valid_death['Year']>=2003]
    
    table1 = top_countries_valid_death[['ISO', 'Year', 'Total Deaths']]
    
    table1.rename(columns={'ISO': 'CODE'}, inplace=True)
    
    # Get unique country codes
    unique_codes = table1['CODE'].unique()
    
    # Generate a list of years from 2003 to 2022
    all_years = list(range(2003, 2022))
    
    # Generate a list of tuples for all possible combinations of country codes and years
    all_combinations = [(code, year) for code in unique_codes for year in all_years]
    
    # Create a new DataFrame with all possible combinations
    dummy_df = pd.DataFrame(all_combinations, columns=['CODE', 'Year'])
    
    # Merge the dummy DataFrame with the original DataFrame to fill in missing rows
    result_df = pd.merge(dummy_df, table1, on=['CODE', 'Year'], how='left')
    result_df = result_df.fillna(0)
    
    wide_df = result_df.pivot_table(index='CODE', columns='Year', values='Total Deaths').reset_index()
    wide_df = wide_df.fillna(0)
    
    return wide_df, top_countries_valid_death

In [5]:
map_data, top_countries_valid_death = get_map_data('country_focus')
map_data.head()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Year,CODE,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,ALB,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,AUT,345.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,BEL,1175.0,0.0,0.0,940.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,410.0,0.0,0.0,0.0,238.666667,1687.0,0.0
3,BGR,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,DEU,9355.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0


In [6]:
#map_data.to_csv('../dash_app/table1.csv', index=None)