In [1]:
import pandas as pd
import re
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import geopandas as gpd
import plotly.subplots as sp
import plotly.graph_objects as go



In [2]:
disasters_df = pd.read_csv("./input_data/natural-disasters.csv")

# Specify keywords
keywords = ['country', 'year', 'earthquake']
regex_pattern = re.compile('|'.join(keywords), flags=re.IGNORECASE)

# Select columns containing the specified keywords
earthquake_data = disasters_df.filter(regex=regex_pattern, axis=1)

earthquake_data = earthquake_data.loc[earthquake_data["Year"] >= 2010 ]

# Words to delete
words_to_delete = ['Africa', 'Asia', 'Europe', 'European Union (27)', 'High-income countries', 'Lower-middle-income countries', 'Low-income countries', 'North America', 'Oceania', 'South America', 'Upper-middle-income countries' , 'World' ]

# Filter rows based on the condition (rows not containing the specified words)
earthquake_data = earthquake_data[~earthquake_data['Country name'].isin(words_to_delete)]

earthquake_data.rename(columns={'Country name': 'Country'}, inplace=True)

earthquake_data

Unnamed: 0,Country,Year,Number of deaths from earthquakes,Number of people injured from earthquakes,Number of people affected by earthquakes,Number of people left homeless from earthquakes,Number of total people affected by earthquakes,Reconstruction costs from earthquakes,Insured damages against earthquakes,Total economic damages from earthquakes,Death rates from earthquakes,Injury rates from earthquakes,"Number of people affected by earthquakes per 100,000",Homelessness rate from earthquakes,"Total number of people affected by earthquakes per 100,000",Total economic damages from earthquakes as a share of GDP
34,Afghanistan,2010,11.0,70.0,0.0,1000.0,1070.0,0.0,0.0,0.0,0.039021,0.248318,0.000000,3.547399,3.795716,0.0
35,Afghanistan,2011,,,,,,,,,,,,,,
36,Afghanistan,2012,73.0,2.0,0.0,235.0,237.0,0.0,0.0,0.0,0.239608,0.006565,0.000000,0.771339,0.777904,0.0
37,Afghanistan,2013,18.0,141.0,3390.0,0.0,3531.0,0.0,0.0,0.0,0.057068,0.447034,10.747842,0.000000,11.194876,0.0
38,Afghanistan,2014,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7076,Zimbabwe,2017,,,,,,,,,,,,,,
7077,Zimbabwe,2019,,,,,,,,,,,,,,
7078,Zimbabwe,2021,,,,,,,,,,,,,,
7079,Zimbabwe,2022,,,,,,,,,,,,,,


In [3]:
# Group by 'Country' and calculate the sum of 'Number of deaths from earthquakes'
total_deaths_per_country = earthquake_data.groupby('Country')['Number of deaths from earthquakes'].sum().reset_index()


total_deaths_per_country

Unnamed: 0,Country,Number of deaths from earthquakes
0,Afghanistan,1286.0
1,Albania,51.0
2,Algeria,6.0
3,Angola,0.0
4,Anguilla,0.0
...,...,...
200,Vietnam,0.0
201,Wallis and Futuna,0.0
202,Yemen,0.0
203,Zambia,0.0


In [4]:
fig = px.choropleth(
    total_deaths_per_country,
    locations='Country',
    locationmode='country names',
    color='Number of deaths from earthquakes',
    hover_name='Country',
    title='Choropleth Map of Total Deaths from Earthquakes by Country',
    color_continuous_scale='reds',  # You can choose a different color scale
    range_color=(1, total_deaths_per_country['Number of deaths from earthquakes'].max()),  # Start color scale from 1
    projection='natural earth',  # Choose the map projection
)

fig.update_geos(
    center=dict(lon=0),
    projection_rotation=dict(lon=0),
)

fig.update_layout(
    autosize=False,
    width=1000,  # Set the width of the figure
    height=600,  # Set the height of the figure
    margin=dict(l=0, r=0, b=0, t=50),
    coloraxis_colorbar=dict(title='Total Number of Deaths'),
)

# Set color for 0 deaths to be a different color (e.g., gray)
fig.update_traces(marker_line_color='white', marker_line_width=0.5, selector=dict(type='choropleth', color=0))
fig.update_traces(marker=dict(color='gray'), selector=dict(type='choropleth', color=0))

fig.show()

In [5]:
# Group by 'Country' and calculate the sum of 'Number of deaths from earthquakes'
top_countries_deaths = earthquake_data.groupby('Country')['Number of deaths from earthquakes'].sum().reset_index()

# Sort by the total number of deaths in descending order
top_countries_deaths = top_countries_deaths.sort_values(by='Number of deaths from earthquakes', ascending=False).head(10)

# Create a color mapping for countries
country_color_mapping = dict(zip(top_countries_deaths['Country'], px.colors.qualitative.Set1[:len(top_countries_deaths)]))

# Create a bar chart
fig_deaths = px.bar(
    top_countries_deaths,
    x='Country',
    y='Number of deaths from earthquakes',
    color='Country',  # Use color to differentiate countries
    color_discrete_map=country_color_mapping,  # Use consistent colors
    title='Top 10 Countries with the Most Deaths from Earthquakes',
    labels={'Number of deaths from earthquakes': 'Total Deaths'},
)

fig_deaths.update_layout(
    xaxis_title='Country',
    yaxis_title='Total Deaths',
    legend_title='Country',
)

fig_deaths.show()



In [6]:
countries_path = Path("./input_data/world_countries/ne_10m_admin_0_countries_lakes.shp")
countries = gpd.read_file(countries_path)
countries_df = countries[["NAME", "POP_EST", "POP_YEAR", "GDP_MD", "GDP_YEAR", "ECONOMY", "INCOME_GRP", "geometry"]]

countries_df = countries_df.rename(columns={'NAME': 'Country'})

countries_df

Unnamed: 0,Country,POP_EST,POP_YEAR,GDP_MD,GDP_YEAR,ECONOMY,INCOME_GRP,geometry
0,Indonesia,270625568.0,2019,1119190,2019,4. Emerging region: MIKT,4. Lower middle income,"MULTIPOLYGON (((117.70361 4.16341, 117.70361 4..."
1,Malaysia,31949777.0,2019,364681,2019,6. Developing region,3. Upper middle income,"MULTIPOLYGON (((117.70361 4.16341, 117.69711 4..."
2,Chile,18952038.0,2019,282318,2019,5. Emerging region: G20,3. Upper middle income,"MULTIPOLYGON (((-69.51009 -17.50659, -69.50611..."
3,Bolivia,11513100.0,2019,40895,2019,5. Emerging region: G20,4. Lower middle income,"POLYGON ((-69.51009 -17.50659, -69.51009 -17.5..."
4,Peru,32510453.0,2019,226848,2019,5. Emerging region: G20,3. Upper middle income,"MULTIPOLYGON (((-69.51009 -17.50659, -69.63832..."
...,...,...,...,...,...,...,...,...
253,Macao,640445.0,2019,53859,2019,6. Developing region,2. High income: nonOECD,"MULTIPOLYGON (((113.55860 22.16303, 113.56943 ..."
254,Ashmore and Cartier Is.,0.0,2019,0,2019,7. Least developed region,5. Low income,"POLYGON ((123.59702 -12.42832, 123.59775 -12.4..."
255,Bajo Nuevo Bank,0.0,2019,0,2019,7. Least developed region,5. Low income,"POLYGON ((-79.98929 15.79495, -79.98782 15.796..."
256,Serranilla Bank,0.0,2019,0,2012,7. Least developed region,5. Low income,"POLYGON ((-78.63707 15.86209, -78.64041 15.864..."


In [7]:
earthquake_data2 = earthquake_data.merge(countries_df, on= 'Country', how='inner')

earthquake_data2

Unnamed: 0,Country,Year,Number of deaths from earthquakes,Number of people injured from earthquakes,Number of people affected by earthquakes,Number of people left homeless from earthquakes,Number of total people affected by earthquakes,Reconstruction costs from earthquakes,Insured damages against earthquakes,Total economic damages from earthquakes,...,Homelessness rate from earthquakes,"Total number of people affected by earthquakes per 100,000",Total economic damages from earthquakes as a share of GDP,POP_EST,POP_YEAR,GDP_MD,GDP_YEAR,ECONOMY,INCOME_GRP,geometry
0,Afghanistan,2010,11.0,70.0,0.0,1000.0,1070.0,0.0,0.0,0.0,...,3.547399,3.795716,0.0,38041754.0,2019,19291,2019,7. Least developed region,5. Low income,"POLYGON ((74.54235 37.02167, 74.54742 37.01567..."
1,Afghanistan,2011,,,,,,,,,...,,,,38041754.0,2019,19291,2019,7. Least developed region,5. Low income,"POLYGON ((74.54235 37.02167, 74.54742 37.01567..."
2,Afghanistan,2012,73.0,2.0,0.0,235.0,237.0,0.0,0.0,0.0,...,0.771339,0.777904,0.0,38041754.0,2019,19291,2019,7. Least developed region,5. Low income,"POLYGON ((74.54235 37.02167, 74.54742 37.01567..."
3,Afghanistan,2013,18.0,141.0,3390.0,0.0,3531.0,0.0,0.0,0.0,...,0.000000,11.194876,0.0,38041754.0,2019,19291,2019,7. Least developed region,5. Low income,"POLYGON ((74.54235 37.02167, 74.54742 37.01567..."
4,Afghanistan,2014,,,,,,,,,...,,,,38041754.0,2019,19291,2019,7. Least developed region,5. Low income,"POLYGON ((74.54235 37.02167, 74.54742 37.01567..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1479,Zimbabwe,2017,,,,,,,,,...,,,,14645468.0,2019,21440,2019,5. Emerging region: G20,5. Low income,"POLYGON ((25.25978 -17.79411, 25.26671 -17.800..."
1480,Zimbabwe,2019,,,,,,,,,...,,,,14645468.0,2019,21440,2019,5. Emerging region: G20,5. Low income,"POLYGON ((25.25978 -17.79411, 25.26671 -17.800..."
1481,Zimbabwe,2021,,,,,,,,,...,,,,14645468.0,2019,21440,2019,5. Emerging region: G20,5. Low income,"POLYGON ((25.25978 -17.79411, 25.26671 -17.800..."
1482,Zimbabwe,2022,,,,,,,,,...,,,,14645468.0,2019,21440,2019,5. Emerging region: G20,5. Low income,"POLYGON ((25.25978 -17.79411, 25.26671 -17.800..."


In [8]:
print(earthquake_data2['ECONOMY'].unique())

['7. Least developed region' '6. Developing region'
 '5. Emerging region: G20' '2. Developed region: nonG7'
 '3. Emerging region: BRIC' '1. Developed region: G7'
 '4. Emerging region: MIKT']


In [9]:
earthquake_data2['ECONOMY'] = earthquake_data2['ECONOMY'].replace({'7. Least developed region': 'Least developed region',
                                                                   '6. Developing region': 'Developing region',
                                                                   '5. Emerging region: G20': 'Emerging region',
                                                                   '2. Developed region: nonG7': 'Developed region',
                                                                   '3. Emerging region: BRIC': 'Emerging region',
                                                                   '1. Developed region: G7': 'Developed region',
                                                                   '4. Emerging region: MIKT': 'Emerging region'})

earthquake_data2['Country+Economy'] = earthquake_data2['Country'] + ' - ' + earthquake_data2['ECONOMY']


In [10]:
# Group by 'Country' and calculate the sum of 'Number of deaths from earthquakes'
# and the mean of 'Total economic damages from earthquakes as a share of GDP'
grouped_data = earthquake_data2.groupby('Country').agg({
    'Number of deaths from earthquakes': 'sum',
    'Total economic damages from earthquakes as a share of GDP': 'mean',
    'Country+Economy': 'first',  # Choose the first economy value (assuming it's constant for a country)
}).reset_index()

# Select the top 10 countries based on the highest number of deaths
top_10_countries = grouped_data.nlargest(10, 'Number of deaths from earthquakes')

# Filter out countries with economic damages as 0
top_10_countries_filtered = top_10_countries[top_10_countries['Total economic damages from earthquakes as a share of GDP'] != 0]

# Create a color mapping for countries
country_color_mapping = dict(zip(top_10_countries_filtered['Country+Economy'], px.colors.qualitative.Set1[:len(top_10_countries_filtered)]))

# Create a color mapping for economic status
economic_status_color_mapping = dict(zip(top_10_countries_filtered['Country+Economy'], px.colors.qualitative.Set1[:len(top_10_countries_filtered)]))

# Create a scatter plot for the top 10 countries with consistent colors
fig_relation = px.scatter(
    top_10_countries_filtered,
    x='Number of deaths from earthquakes',
    y='Total economic damages from earthquakes as a share of GDP',
    color='Country+Economy',  # Use color to differentiate economies
    color_discrete_map=economic_status_color_mapping,  # Use consistent colors for economic status
    title='Relation between Deaths and Economic Damages in Top 10 Countries for 2010-2023',
)


# Set the width of the figure
fig_relation.update_layout(
    xaxis_title='Number of Deaths from Earthquakes',
    yaxis_title='Average Economic Damages as a Share of GDP'
)

fig_relation.show()

In [11]:
# Group by 'Country' and calculate the sum of 'Number of deaths from earthquakes'
# and the sum of 'Number of total people affected by earthquakes'
grouped_data2 = earthquake_data2.groupby('Country').agg({
    'Number of deaths from earthquakes': 'sum',
    'Number of total people affected by earthquakes': 'sum'
}).reset_index()


# Select the top 10 countries based on the highest number of deaths
top_10_countries = grouped_data2.nlargest(10, 'Number of deaths from earthquakes')

# Calculate the ratio of deaths to people affected
top_10_countries['Death to Affected Ratio'] = top_10_countries['Number of deaths from earthquakes'] / top_10_countries['Number of total people affected by earthquakes']

value_vars = ['Number of deaths from earthquakes', 'Number of total people affected by earthquakes', 'Death to Affected Ratio']
for var in value_vars:
    if var not in top_10_countries.columns:
        raise ValueError(f"'{var}' is not present in the DataFrame.")

# Create subplots
fig_combined = sp.make_subplots(rows=1, cols=3, subplot_titles=value_vars)

# Plot each variable separately
for i, var in enumerate(value_vars, start=1):
    melted_df = top_10_countries[['Country', var]]
    
    # Add a subplot
    fig_combined.add_trace(
        go.Bar(x=melted_df['Country'], y=melted_df[var], name=var),
        row=1, col=i
    )

# Update layout
fig_combined.update_layout(
    title_text='Top 10 Countries: People Affected, Deaths, and Death to Affected Ratio from Earthquakes',
    showlegend=False,  # Hide legend to avoid duplicate legends
)