In [96]:
import pandas as pd
import re
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt



In [97]:
disasters_df = pd.read_csv("./input_data/natural-disasters.csv")

# Specify keywords
keywords = ['country', 'year', 'earthquake']
regex_pattern = re.compile('|'.join(keywords), flags=re.IGNORECASE)

# Select columns containing the specified keywords
earthquake_data = disasters_df.filter(regex=regex_pattern, axis=1)

earthquake_data = earthquake_data.loc[earthquake_data["Year"] >= 2010 ]

# Words to delete
words_to_delete = ['Africa', 'Asia', 'Europe', 'European Union (27)', 'High-income countries', 'Lower-middle-income countries', 'Low-income countries', 'North America', 'Oceania', 'South America', 'Upper-middle-income countries' , 'World' ]

# Filter rows based on the condition (rows not containing the specified words)
earthquake_data = earthquake_data[~earthquake_data['Country name'].isin(words_to_delete)]

earthquake_data.rename(columns={'Country name': 'Country'}, inplace=True)

earthquake_data

Unnamed: 0,Country,Year,Number of deaths from earthquakes,Number of people injured from earthquakes,Number of people affected by earthquakes,Number of people left homeless from earthquakes,Number of total people affected by earthquakes,Reconstruction costs from earthquakes,Insured damages against earthquakes,Total economic damages from earthquakes,Death rates from earthquakes,Injury rates from earthquakes,"Number of people affected by earthquakes per 100,000",Homelessness rate from earthquakes,"Total number of people affected by earthquakes per 100,000",Total economic damages from earthquakes as a share of GDP
34,Afghanistan,2010,11.0,70.0,0.0,1000.0,1070.0,0.0,0.0,0.0,0.039021,0.248318,0.000000,3.547399,3.795716,0.0
35,Afghanistan,2011,,,,,,,,,,,,,,
36,Afghanistan,2012,73.0,2.0,0.0,235.0,237.0,0.0,0.0,0.0,0.239608,0.006565,0.000000,0.771339,0.777904,0.0
37,Afghanistan,2013,18.0,141.0,3390.0,0.0,3531.0,0.0,0.0,0.0,0.057068,0.447034,10.747842,0.000000,11.194876,0.0
38,Afghanistan,2014,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7076,Zimbabwe,2017,,,,,,,,,,,,,,
7077,Zimbabwe,2019,,,,,,,,,,,,,,
7078,Zimbabwe,2021,,,,,,,,,,,,,,
7079,Zimbabwe,2022,,,,,,,,,,,,,,


In [98]:
# Group by 'Country' and calculate the sum of 'Number of deaths from earthquakes'
total_deaths_per_country = earthquake_data.groupby('Country')['Number of deaths from earthquakes'].sum().reset_index()


total_deaths_per_country

Unnamed: 0,Country,Number of deaths from earthquakes
0,Afghanistan,1286.0
1,Albania,51.0
2,Algeria,6.0
3,Angola,0.0
4,Anguilla,0.0
...,...,...
200,Vietnam,0.0
201,Wallis and Futuna,0.0
202,Yemen,0.0
203,Zambia,0.0


In [100]:
fig = px.choropleth(
    total_deaths_per_country,
    locations='Country',
    locationmode='country names',
    color='Number of deaths from earthquakes',
    hover_name='Country',
    title='Choropleth Map of Total Deaths from Earthquakes by Country',
    color_continuous_scale='reds',  # You can choose a different color scale
    range_color=(1, total_deaths_per_country['Number of deaths from earthquakes'].max()),  # Start color scale from 1
    projection='natural earth',  # Choose the map projection
)

fig.update_geos(
    center=dict(lon=0),
    projection_rotation=dict(lon=0),
)

fig.update_layout(
    autosize=False,
    width=1000,  # Set the width of the figure
    height=600,  # Set the height of the figure
    margin=dict(l=0, r=0, b=0, t=50),
    coloraxis_colorbar=dict(title='Total Number of Deaths'),
)

# Set color for 0 deaths to be a different color (e.g., gray)
fig.update_traces(marker_line_color='white', marker_line_width=0.5, selector=dict(type='choropleth', color=0))
fig.update_traces(marker=dict(color='gray'), selector=dict(type='choropleth', color=0))

fig.show()

In [94]:
# Group by 'Country' and calculate the sum of 'Number of deaths from earthquakes'
top_countries_deaths = earthquake_data.groupby('Country')['Number of deaths from earthquakes'].sum().reset_index()

# Sort by the total number of deaths in descending order
top_countries_deaths = top_countries_deaths.sort_values(by='Number of deaths from earthquakes', ascending=False).head(10)

# Create a color mapping for countries
country_color_mapping = dict(zip(top_countries_deaths['Country'], px.colors.qualitative.Set1[:len(top_countries_deaths)]))

# Create a bar chart
fig_deaths = px.bar(
    top_countries_deaths,
    x='Country',
    y='Number of deaths from earthquakes',
    color='Country',  # Use color to differentiate countries
    color_discrete_map=country_color_mapping,  # Use consistent colors
    title='Top 10 Countries with the Most Deaths from Earthquakes',
    labels={'Number of deaths from earthquakes': 'Total Deaths'},
)

fig_deaths.update_layout(
    xaxis_title='Country',
    yaxis_title='Total Deaths',
    legend_title='Country',
)

fig_deaths.show()

# Create a scatter plot
fig_relation = px.scatter(
    top_10_countries,
    x='Number of deaths from earthquakes',
    y='Total economic damages from earthquakes as a share of GDP',
    color='Country',  # Use color to differentiate countries
    color_discrete_map=country_color_mapping,  # Use consistent colors
    title='Relation between Deaths and Average Economic Damages in Top 10 Countries',
)

fig_relation.update_layout(
    xaxis_title='Total Deaths',
    yaxis_title='Average Economic Damages as a Share of GDP',
)

fig_relation.show()