In [62]:
import pandas as pd 
earthquakes = pd.read_csv('earthquake_merged2.csv')
# earthquakes.columns

In [63]:
earthquakes = earthquakes[earthquakes["Year"] > 1999]
# earthquakes

In [64]:
country_counts = earthquakes.groupby("Country Name").size().reset_index(name="Count")
country_counts = country_counts.sort_values(by="Count", ascending=False)
# country_counts

In [65]:
# Group by Country Name to get avg magnitude
avg_magnitude = earthquakes.groupby("Country Name")["Mag"].mean().reset_index(name="Average Magnitude")

# Merge the average magnitude with the country_counts df
country_counts = country_counts.merge(avg_magnitude, on="Country Name", how="left")

# country_counts

In [66]:
file_path = "historicalCPI.xlsx"  

cpi_data = pd.read_excel(file_path, skiprows=10)
# Make the first row the new header
cpi_data.columns = cpi_data.iloc[0]  # Set the first row as column headers
df = cpi_data[1:]  # Drop the first row from the data

# Reset index for cleaner df
df = df.reset_index(drop=True)

# only will be using december of every year (3rd to last col)for inflation adjustment 
df = df[["Year", "Dec"]]  # Use the explicit column name for December CPI
df.columns = ["Year", "CPI"]  # Rename for simplicity

# only 2000s and on 
df = df[df["Year"].astype(int) >= 2000]
# Add 315.664 to the CPI value for the year 2024 (missing)
df.loc[df["Year"] == 2024, "CPI"] = df.loc[df["Year"] == 2024, "CPI"].fillna(0) + 315.664

    # Display the up

# cpi_data
# df



Workbook contains no default style, apply openpyxl's default


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



In [67]:
# Group by Country and Year, calculating total counts and sum of damages
# Include average magnitude in the aggregation
bubble_data = earthquakes.groupby(["Country Name", "Year"]).agg(
    Earthquake_Count=("Country Name", "count"),
    Total_Damage_Mil=("Total Damage ($Mil)", "sum"),
    Average_Magnitude=("Mag", "mean"), 
    Focal_Depth = ("Focal Depth (km)", "mean"),
    tsunami = ("Tsu", "count")
).reset_index()

# Filter out rows where Total_Damage_Mil is NaN or 0
bubble_data = bubble_data[bubble_data["Total_Damage_Mil"] > 0]
bubble_data["Average_Magnitude"] = bubble_data["Average_Magnitude"].round(2)
# earthquakes.columns
#tsunami 

In [68]:
# Merging bubble_data with the CPI dataset
adjusted_bubble_data = bubble_data.merge(df, on="Year", how="left")

# Add current CPI aka 2024
current_cpi = df.loc[df["Year"] == 2024, "CPI"].values[0]

# calculate inflation-adjusted damages
adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"] = (
    adjusted_bubble_data["Total_Damage_Mil"]
    * (current_cpi / adjusted_bubble_data["CPI"])
)
adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"] = adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"].fillna(0)


# roudning the adjusted damages 
adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"] = adjusted_bubble_data["Inflation_Adjusted_Damage_Mil"].round(2)

# Filter out Hawaii from the dataset
adjusted_bubble_data = adjusted_bubble_data[adjusted_bubble_data["Country Name"] != "HAWAII"]

# Ensure your DataFrame includes earthquake counts, economic damages, average magnitude, focal depth, and tsunamis
damage_data = adjusted_bubble_data.groupby("Country Name").agg(
    Total_Earthquake_Count=("Earthquake_Count", "sum"),
    Total_Damages_Mil=("Inflation_Adjusted_Damage_Mil", "sum"),
    Average_Magnitude=("Average_Magnitude", "mean"),
    Average_Focal_Depth=("Focal_Depth", "mean"),  # Aggregate Focal Depth
    Total_Tsunamis=("tsunami", "sum")  # Count of tsunamis
).reset_index()


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



In [69]:
import plotly.express as px
import numpy as np

# Log-transform the color variable
damage_data["Log_Total_Damages_Mil"] = np.log10(damage_data["Total_Damages_Mil"] + 1)  # Add 1 to avoid log(0)

# Create the treemap
fig = px.treemap(
    damage_data,
    path=["Country Name"],
    values="Total_Earthquake_Count",  # Box size determined by earthquake counts
    color="Log_Total_Damages_Mil",  # Log-transformed damages for color scale
    title="Global Earthquake Counts and Economic Damages by Country (2000–2024)",
    hover_data={
        "Total_Earthquake_Count": True,
        "Average_Magnitude": True,
        "Total_Damages_Mil": True,  # Show original damages in hover
        "Average_Focal_Depth": True,  # Include aggregated focal depth in hover data
        "Total_Tsunamis": True  # Include tsunami count in hover data
    },
    color_continuous_scale="reds"  # Color scale for log-transformed data
)
#brwnyl, bles, bugn, greens, orrd 

# Update hover template for custom display
fig.update_traces(
    customdata=damage_data[["Average_Magnitude", "Total_Damages_Mil", "Average_Focal_Depth", "Total_Tsunamis"]].values,
    hovertemplate="<b>%{label}</b><br>Earthquake Count: %{value}<br>Total Damage in Dollars: $%{customdata[1]:,.2f}M<br>"
                  "Average Magnitude: %{customdata[0]:.2f}<br>Average Focal Depth: %{customdata[2]:.2f} km<br>"
                  "Tsunamis: %{customdata[3]:.0f}<extra></extra>"
)

# Update layout to show log-scale description
fig.update_layout(
    coloraxis_colorbar=dict(
        title="Economic Damage ($M) (Log Scale)",
        tickvals=[0, 1, 2, 3, 4, 5],  # Custom ticks for better readability
        ticktext=["1", "10", "100", "1K", "10K", "100K"]
    )
)

# Optionally, add a note about the log scaling
fig.add_annotation(
    xref="paper", yref="paper",
    x=1.0, y=-.05,
    text="*Economic damages use a Log10 scale to reduce skew and dollar values were adjusted to 2024 inflation (CPI from U.S. Bureau of Labor Statistics)",
    showarrow=False,
    font=dict(size=8),
    align="left"
)

# Show the treemap
fig.show()


This treemap visualizes global earthquake counts and economic damages (2000–2024), adjusted to 2024 dollar values for inflation using the U.S. Bureau of Labor Statistics CPI. It includes 47 countries with complete damage data, showing both direct and indirect costs like infrastructure repairs and healthcare expenses. Box size represents earthquake counts, while color intensity encodes economic damages, with a logarithmic scale addressing skew from Japan’s $449B in damages. Hovering reveals detailed metrics such as damage amounts, earthquake counts, and average magnitudes. The visualization highlights trends, like Japan’s dominance due to frequent high-magnitude events and the 2011 earthquake, while smaller nations like Haiti demonstrate how localized events can have outsized impacts. This treemap underscores the importance of disaster preparedness and the uneven economic toll of earthquakes worldwide.

In [70]:
# Aggregate data for the chart
stacked_damage_data = adjusted_bubble_data.groupby(["Year", "Country Name"]).agg(
    Total_Damage_Mil=("Inflation_Adjusted_Damage_Mil", "sum"),
    Average_Magnitude=("Average_Magnitude", "mean"),
    Tsunami_Count=("tsunami", "sum"),
    Earthquake_Count=("Earthquake_Count", "sum"),
    Average_Focal_Depth=("Focal_Depth", "mean")
).reset_index()

# Calculate total damage and total earthquake count per year
total_damage_per_year = stacked_damage_data.groupby("Year").agg(
    Total_Damage_Mil=("Total_Damage_Mil", "sum"),
    Total_Earthquake_Count=("Earthquake_Count", "sum")
).reset_index()

# Get all unique years and countries
all_years = total_damage_per_year["Year"].unique()
all_countries = stacked_damage_data["Country Name"].unique()

# Fill missing years with zeroes for all countries
complete_data = []
for country in all_countries:
    country_data = stacked_damage_data[stacked_damage_data["Country Name"] == country]
    for year in all_years:
        if year not in country_data["Year"].values:
            # Add missing year with zero values
            complete_data.append({
                "Year": year,
                "Country Name": country,
                "Total_Damage_Mil": 0,
                "Average_Magnitude": 0,
                "Tsunami_Count": 0,
                "Earthquake_Count": 0,
                "Average_Focal_Depth": 0
            })
    # Add existing data
    complete_data.extend(country_data.to_dict("records"))
# Convert back to df
complete_data_df = pd.DataFrame(complete_data)

fig = go.Figure()

# Add total damage bar as the base layer
fig.add_trace(
    go.Bar(
        x=total_damage_per_year["Year"],
        y=total_damage_per_year["Total_Damage_Mil"],
        name="Total Damage",
        marker_color="lightgray",
        customdata=total_damage_per_year[["Total_Earthquake_Count"]].values,  # Add total earthquake count as custom data
        hovertemplate="<b>Year:</b> %{x}<br>"
                      "<b>Total Damage:</b> $%{y:,.2f}M<br>"
                      "<b>Total Earthquake Count:</b> %{customdata[0]:.0f}<extra></extra>"  # Hover template with total earthquake count
    )
)

# Add traces for each country's contribution
for country in all_countries:
    country_data = complete_data_df[complete_data_df["Country Name"] == country]
    fig.add_trace(
        go.Bar(
            x=country_data["Year"],
            y=country_data["Total_Damage_Mil"],
            name=country,
            marker=dict(opacity=0.8, line=dict(width=0.5, color="black")),
            visible=(country == "JAPAN"), 
            customdata=country_data[[
                "Country Name",
                "Average_Magnitude",
                "Tsunami_Count",
                "Earthquake_Count",
                "Average_Focal_Depth"
            ]].values,  
            hovertemplate="<b>Country:</b> %{customdata[0]}<br>"
                          "<b>Year:</b> %{x}<br>"
                          "<b>Earthquake Count:</b> %{customdata[3]:.0f}<br>"
                          "<b>Total Damage:</b> $%{y:,.2f}M<br>"
                          "<b>Average Magnitude:</b> %{customdata[1]:.2f}<br>"
                          "<b>Average Focal Depth:</b> %{customdata[4]:.2f}<br>"
                          "<b>Tsunami Count:</b> %{customdata[2]:.0f}<extra></extra>"
        )
    )

# dropdown menu
dropdown_buttons = []
for i, country in enumerate(all_countries):
    visibility = [True] + [j == i for j in range(len(all_countries))]  # Show total bar + selected country
    dropdown_buttons.append(
        {
            "label": country,
            "method": "update",
            "args": [
                {"visible": visibility},
                {"title": f"Earthquake Economic Damage in {country} Compared to Global Totals by Year"},
            ],
        }
    )

# Update layout with dropdown
fig.update_layout(
    updatemenus=[
        {
            "buttons": dropdown_buttons,
            "direction": "down",
            "showactive": True,
            "x": 0.1,
            "xanchor": "left",
            "y": 1.15,
            "yanchor": "top",
            "active": [i for i, country in enumerate(all_countries) if country == "JAPAN"][0],  # Set Japan as default
        }
    ],
    xaxis_title="Year",
    yaxis_title="Economic Damage ($Mil)",
    barmode="overlay",  # Overlay mode for country-specific contributions
    showlegend=False,
    title="Earthquake Economic Damage in JAPAN Compared to Global Totals by Year",  # Default title set to Japan
)


fig.add_annotation(
    xref="paper", yref="paper",
    x=1.0, y=-0.2,
    text="*Total damage ($M) is adjusted to 2024 dollar value",
    showarrow=False,
    font=dict(size=10),
    align="left",
)

fig.show()


This stacked bar chart shows global earthquake damages (2000–2024) adjusted to 2024 dollar values, highlighting country-specific impacts. Total global damages are shown in gray, with the selected country's damages in red. Hovering provides details, including economic damages, earthquake counts, average magnitude, focal depth, and tsunami occurrences. Japan stands out due to the 2011 Great East Japan Earthquake, causing $300 billion in damages, a tsunami, and the Fukushima nuclear disaster (Daniell, 2011), making it the costliest natural disaster in history. The chart also emphasizes other major events, like China's 2008 Sichuan earthquake, while revealing global patterns of vulnerability and resilience.