In [212]:
import pandas as pd 
earthquakes = pd.read_csv('earthquake_merged2.csv')
# earthquakes.columns

In [213]:
earthquakes = earthquakes[earthquakes["Year"] > 1999]
# earthquakes

In [214]:
country_counts = earthquakes.groupby("Country Name").size().reset_index(name="Count")
country_counts = country_counts.sort_values(by="Count", ascending=False)
country_counts

Unnamed: 0,Country Name,Count
17,CHINA,155
46,INDONESIA,140
47,IRAN,86
49,JAPAN,80
103,UNITED STATES,58
...,...,...
38,GULF OF MEXICO,1
51,KAZAKHSTAN,1
50,JORDAN,1
86,SLOVENIA,1


In [215]:
# Group by Country Name to get avg magnitude
avg_magnitude = earthquakes.groupby("Country Name")["Mag"].mean().reset_index(name="Average Magnitude")

# Merge the average magnitude with the country_counts df
country_counts = country_counts.merge(avg_magnitude, on="Country Name", how="left")

# country_counts

In [216]:
file_path = "historicalCPI.xlsx"  

cpi_data = pd.read_excel(file_path, skiprows=10)
# Make the first row the new header
cpi_data.columns = cpi_data.iloc[0]  # Set the first row as column headers
df = cpi_data[1:]  # Drop the first row from the data

# Reset index for cleaner df
df = df.reset_index(drop=True)

# only will be using december of every year (3rd to last col)for inflation adjustment 
df = df[["Year", "Dec"]]  # Use the explicit column name for December CPI
df.columns = ["Year", "CPI"]  # Rename for simplicity

# only 2000s and on 
df = df[df["Year"].astype(int) >= 2000]
# Add 315.664 to the CPI value for the year 2024 (missing)
df.loc[df["Year"] == 2024, "CPI"] = df.loc[df["Year"] == 2024, "CPI"].fillna(0) + 315.664

    # Display the up

# cpi_data
# df



Workbook contains no default style, apply openpyxl's default


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



In [217]:
# Group by Country and Year, calculating total counts and sum of damages
# Include average magnitude in the aggregation
bubble_data = earthquakes.groupby(["Country Name", "Year"]).agg(
    Earthquake_Count=("Country Name", "count"),
    Total_Damage_Mil=("Total Damage ($Mil)", "sum"),
    Average_Magnitude=("Mag", "mean"), 
    Focal_Depth = ("Focal Depth (km)", "mean"),
    tsunami = ("Tsu", "count")
).reset_index()

# Filter out rows where Total_Damage_Mil is NaN or 0
bubble_data = bubble_data[bubble_data["Total_Damage_Mil"] > 0]
bubble_data["Average_Magnitude"] = bubble_data["Average_Magnitude"].round(2)
# earthquakes.columns
#tsunami 

In [218]:
# Adjust bubble chart data with inflation-adjusted damages using CPI

# Merging bubble_data with the CPI dataset
adjusted_bubble_data = bubble_data.merge(df, on="Year", how="left")

# Add current CPI aka 2024
current_cpi = df.loc[df["Year"] == 2024, "CPI"].values[0]

# calculate inflation-adjusted damages
adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"] = (
    adjusted_bubble_data["Total_Damage_Mil"]
    * (current_cpi / adjusted_bubble_data["CPI"])
)
adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"] = adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"].fillna(0)


# roudning the adjusted damages 
adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"] = adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"].round(2)

# adjusted_bubble_data[adjusted_bubble_data["Country Name"] == "JAPAN"]
# Filter out Hawaii from the dataset
adjusted_bubble_data = adjusted_bubble_data[adjusted_bubble_data["Country Name"] != "HAWAII"]

# View data for JAPAN to verify
adjusted_bubble_data[adjusted_bubble_data["Country Name"] == "JAPAN"]





Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



Unnamed: 0,Country Name,Year,Earthquake_Count,Total_Damage_Mil,Average_Magnitude,Focal_Depth,tsunami,CPI,Inflation_Adjusted_Damage_Mil
81,JAPAN,2000,5,150.0,6.6,33.4,3,174.0,272.12
82,JAPAN,2001,1,500.0,6.8,50.0,0,176.7,893.22
83,JAPAN,2003,6,734.0,6.45,40.833333,2,184.3,1257.18
84,JAPAN,2004,8,28000.0,6.52,18.5,3,190.3,46445.57
85,JAPAN,2007,4,12500.0,6.35,73.5,2,210.036,18786.3
86,JAPAN,2011,8,220136.6,7.39,26.75,5,225.672,307921.23
87,JAPAN,2014,3,2.0,6.33,36.0,1,234.812,2.69
88,JAPAN,2016,4,20100.0,6.52,8.75,2,241.432,26280.06
89,JAPAN,2018,4,9000.0,5.8,16.25,1,251.233,11308.13
90,JAPAN,2021,2,8250.0,7.05,46.0,1,278.802,9340.78


In [235]:
import plotly.express as px

# Ensure your DataFrame includes earthquake counts, economic damages, average magnitude, focal depth, and tsunamis
damage_data = adjusted_bubble_data.groupby("Country Name").agg(
    Total_Earthquake_Count=("Earthquake_Count", "sum"),
    Total_Damages_Mil=("Inflation_Adjusted_Damage_Mil", "sum"),
    Average_Magnitude=("Average_Magnitude", "mean"),
    Average_Focal_Depth=("Focal_Depth", "mean"),  # Aggregate Focal Depth
    Total_Tsunamis=("tsunami", "sum")  # Count of tsunamis
).reset_index()

# Create the treemap
fig = px.treemap(
    damage_data,
    path=["Country Name"],
    values="Total_Earthquake_Count",  # Box size determined by earthquake counts
    color="Total_Damages_Mil",  # Color scale reflects economic damages
    title="Total Earthquake Occurences and Economic Damage Across Countries (2000–2024)",
    hover_data={
        "Total_Earthquake_Count": True,
        "Average_Magnitude": True,
        "Total_Damages_Mil": True,
        "Average_Focal_Depth": True,  # Include aggregated focal depth in hover data
        "Total_Tsunamis": True  # Include tsunami count in hover data
    },
    color_continuous_scale="Viridis"  # Color scale for economic damages
)

# Update hover template for custom display
fig.update_traces(
    customdata=damage_data[["Average_Magnitude", "Total_Damages_Mil", "Average_Focal_Depth", "Total_Tsunamis"]].values,
    hovertemplate="<b>%{label}</b><br>Earthquake Count: %{value}<br>Total Damage in Dollars: $%{customdata[1]:,.2f}M<br>"
                  "Average Magnitude: %{customdata[0]:.2f}<br>Average Focal Depth: %{customdata[2]:.2f} km<br>"
                  "Tsunamis: %{customdata[3]:.0f}<extra></extra>"
)

# Update layout for better colorbar title
fig.update_layout(
    coloraxis_colorbar=dict(
        title="Economic Damage ($Mil)",
        tickformat=".2f"
    )
)

# Optionally, add a note about bubble size
fig.add_annotation(
    xref="paper", yref="paper",
    x=1.0, y=-.05,
    text="*total damage ($M) is adjusted to 2024 dollar value",
    showarrow=False,
    font=dict(size=8),
    align="left"
)


# Show the treemap
fig.show()
# damage_data


This treemap highlights global earthquake occurrences and economic impacts over the last two decades, with damages adjusted to 2024 values. Box size represents earthquake count, while color intensity reflects total damages influenced by factors like frequency, magnitude, and economic vulnerability. Japan leads with $449B in damages, driven by high-magnitude events and costly impacts on infrastructure, economy, and personal assets. China and Indonesia also face significant damages due to frequent seismic activity. Smaller nations, like Haiti and Samoa, reveal that even fewer earthquakes can have devastating localized effects. The data underscores the importance of tailored disaster preparedness in high-risk regions.

In [234]:
import plotly.express as px

# Generate a large, contrasting qualitative palette by combining existing palettes
qualitative_palette = px.colors.qualitative.Plotly + px.colors.qualitative.D3 + px.colors.qualitative.Set3


# Aggregate data for the stacked bar chart
stacked_damage_data = adjusted_bubble_data.groupby(["Year", "Country Name"]).agg(
    Total_Damage_Mil=("Inflation_Adjusted_Damage_Mil", "sum"),
    Average_Magnitude=("Average_Magnitude", "mean"),  # Aggregate average magnitude
    Tsunami_Count=("tsunami", "sum"),  # Count tsunamis
    Earthquake_Count=("Earthquake_Count", "sum"),  # Count earthquakes
    Average_Focal_Depth=("Focal_Depth", "mean")  # Aggregate focal depth
).reset_index()

# Calculate total damage per year for each country and add percentage column
stacked_damage_data["Percent_Total_Damage_Year"] = (
    stacked_damage_data.groupby("Year")["Total_Damage_Mil"].transform(lambda x: x / x.sum() * 100)
)
# Calculate total damage per year for each country and add percentage column
stacked_damage_data["Percent_Total_Damage_Year"] = (
    stacked_damage_data.groupby("Year")["Total_Damage_Mil"].transform(lambda x: x / x.sum() * 100)
)

# Create the stacked bar chart
fig = px.bar(
    stacked_damage_data,
    x="Year",
    y="Total_Damage_Mil",
    color="Country Name",
    title="Earthquake Economic Damage Over Time by Country (2000–2024)",
    labels={
        "Total_Damage_Mil": "Economic Damages ($Mil)",
        "Year": "Year",
        "Average_Magnitude": "Average Magnitude",
        "Tsunami_Count": "Tsunami Count",
        "Earthquake_Count": "Earthquake Count",
        "Average_Focal_Depth": "Focal Depth (km)"
    },
    color_discrete_sequence=qualitative_palette
)

# Add hover template to display percentages per year
fig.update_traces(
    marker_line_width=0.5,
    marker_line_color="black",
    hovertemplate="<b>Country:</b> %{customdata[0]}<br>"
                  "<b>Year:</b> %{x}<br>"
                  "<b>Earthquake Count:</b> %{customdata[3]:.0f}<br>"
                  "<b>Total Damage:</b> $%{y:,.2f}M<br>"
                  "<b>Percentage of Yearly Total:</b> %{customdata[5]:.2f}%<br>"
                  "<b>Average Magnitude:</b> %{customdata[1]:.2f}<br>"
                  "<b>Average Focal Depth:</b> %{customdata[4]:.2f}<br>"
                  "<b>Tsunami Count:</b> %{customdata[2]:.0f}<extra></extra>"
)

# Include Country Name, Average Magnitude, Tsunami Count, Earthquake Count, Focal Depth, and Yearly Percentages in custom data
fig.for_each_trace(lambda t: t.update(customdata=stacked_damage_data[stacked_damage_data["Country Name"] == t.name][[
    "Country Name", "Average_Magnitude", "Tsunami_Count", "Earthquake_Count", "Average_Focal_Depth", "Percent_Total_Damage_Year"
]].values))

# Improve layout
fig.update_layout(
    xaxis_title="Year",
    yaxis_title="Economic Damage ($Mil)",
    legend_title="Country",
    barmode="stack",
    showlegend=True
)

# Add annotation for note
fig.add_annotation(
    xref="paper", yref="paper",
    x=1.0, y=-.28,
    text="*total damage ($M) is adjusted to 2024 dollar value",
    showarrow=False,
    font=dict(size=8),
    align="left"
)

fig.show()


This graph shows earthquake damages by country from 2000 to 2024, adjusted to 2024 values. Japan dominates due to the 2011 Great East Japan Earthquake, the costliest natural disaster in history, causing over $300 billion in damages, a devastating tsunami, and the Fukushima nuclear disaster (Daniell, James, 2011). Countries like Indonesia and China also exhibit significant impacts from recurring earthquakes. The graph underscores the global economic toll of earthquakes, where both large-scale disasters and localized events, such as those in smaller nations like Haiti and Italy, collectively contribute to substantial losses over time.

ecnomioc damage incukldes factors such as 

distribution of magnitude and focal depth