In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import pycountry
from pandas import NA, DataFrame, Index, options, read_csv, to_datetime
from plotly.express import bar, density_map, line, scatter_geo
from statsmodels.tsa.filters.hp_filter import hpfilter



In [None]:
options.display.max_rows = None
options.display.max_columns = None
options.display.max_colwidth = None
options.display.max_seq_items = None

In [None]:
data = read_csv(
    "data/region_08.csv",
)
data.head()

In [None]:
data.shape

In [None]:
data.columns



In [None]:
data.dtypes



In [None]:
data.info()

In [None]:
def separate_variables(df: DataFrame) -> tuple[Index, Index]:
    """Identify numeric and categorical columns.

    Args:
        df (DataFrame): Input Dataframe.

    Returns:
        tuple[Index[str], Index[str]]: Returns an index of numeric and catagrical cols.
    """
    numeric_cols = df.select_dtypes(include=["int64", "float64"]).columns
    categorical_cols = df.select_dtypes(include=["object", "bool"]).columns

    return numeric_cols, categorical_cols



In [None]:
numeric_cols, categorical_cols = separate_variables(data)
print("Numeric:", len(numeric_cols))  # noqa: T201
print("Categorical:", len(categorical_cols))  # noqa: T201



## Cardinality Analysis

In [None]:
def analyze_categorical_cardinality(df: DataFrame, input_cols: Index) -> None:
    """Calculate cardinality for each categorical column.

    Args:
        df (DataFrame): Input dataframe.
        input_cols (Index[str]): List of columns.
    """
    # Calculate cardinality for each categorical column
    cardinality_dict = {}
    for column in input_cols:
        # Get number of unique values (excluding nulls)
        unique_count = df[column].nunique()
        # Get number of missing values
        missing_count = df[column].isnull().sum()
        # Calculate percentage of unique values
        unique_percentage = (unique_count / df[column].count()) * 100
        # Get value counts for most common categories
        # Calculate percentage of unique values
        missing_percentage = (missing_count / len(df)) * 100
        # Get value counts for most common categories
        top_values = df[column].value_counts().head(5)

        cardinality_dict[column] = {
            "unique_count": unique_count,
            "missing_count": missing_count,
            "missing_percentage": missing_percentage,
            "unique_percentage": unique_percentage,
            "top_values": top_values,
        }

    # Create a summary dataframe
    summary_data = {
        # "Unique_Count": [d["unique_count"] for d in cardinality_dict.values()],
        # "Missing_Count": [d["missing_count"] for d in cardinality_dict.values()],
        "Missing_Percentage": [d["missing_percentage"] for d in cardinality_dict.values()],
        "Unique_Percentage": [d["unique_percentage"] for d in cardinality_dict.values()],
        # "Top": [d["top_values"] for d in cardinality_dict.values()],
    }

    summary_df = DataFrame(summary_data, index=input_cols)
    summary_df = summary_df.sort_values("Unique_Percentage", ascending=False)

    # Print detailed analysis
    print("=== Categorical Variables Cardinality Analysis ===\n")  # noqa: T201
    print(summary_df)  # noqa: T201



### Categorical Cols

In [None]:
analyze_categorical_cardinality(data, categorical_cols)

### Numeric Cols

In [None]:
analyze_categorical_cardinality(data, numeric_cols)

## Duplicate Analysis

In [None]:
any(data.duplicated(subset=["eventid"]))



## Fixing Missing Dates
- Back Fill if approx date is not valid. (As data is ordered by date.)
- Fill with approx date if valid.

In [None]:
data["event_date"] = to_datetime(
    data[["iyear", "imonth", "iday"]].rename(columns={"iyear": "year", "imonth": "month", "iday": "day"}),
    errors="coerce",
)
approx_dates1 = to_datetime(data["approxdate"], errors="coerce", format="%m/%d/%Y")
approx_dates2 = to_datetime(data["approxdate"], errors="coerce", format="%B %d, %Y")
approx_dates3 = to_datetime(data["approxdate"], errors="coerce", format="%Y-%m-%d %H:%M:%S")

data["event_date"] = data[approx_dates1.isnull() & approx_dates2.isnull() & approx_dates3.isnull()][
    "event_date"
].bfill()

data = data.drop(["iyear", "imonth", "iday"], axis=1)

data["event_date"] = data["event_date"].fillna(approx_dates1)
data["event_date"] = data["event_date"].fillna(approx_dates2)
data["event_date"] = data["event_date"].fillna(approx_dates3)

data["year"] = data["event_date"].dt.year

## Events by Region

In [None]:
data["region"].unique()

In [None]:
data["region_txt"].unique()

In [None]:
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3

codes = {country: countries.get(country, "Unknown code") for country in data["country_txt"].unique()}
codes["West Germany (FRG)"] = "DEU"
codes["Vatican City"] = "VAT"

data["codes"] = data["country_txt"].apply(lambda x: codes[x])

*This data is only for Western Europe.*

# Attacks in Eastern Europe

## Total Attacks

In [None]:
# Sort by event date.
data = data.sort_values("event_date")

# Create a daily time series: count events per day
daily_counts = data.groupby("event_date").agg(count=("eventid", "count"))
daily_counts = daily_counts.asfreq("D", fill_value=0)  # Ensure continuous dates

# Aggregate to monthly counts.
monthly_counts = daily_counts["count"].resample("ME").sum()

# Aggregate to yearly counts.
yearly_counts = daily_counts["count"].resample("YE").sum()

# Detecting Trend Using a Hodrick-Prescott Filter
sw_cycle, sw_trend = hpfilter(yearly_counts, lamb=100)
# sw_trend.plot(figsize=(10, 5)).autoscale(axis="x", tight=True)

fig = go.Figure()

# Daily view: raw
fig.add_trace(
    go.Scatter(
        x=daily_counts.index,
        y=daily_counts["count"],
        mode="lines",
        name="Daily Count",
        visible=False,
    ),
)

# Monthly view: aggregated
fig.add_trace(
    go.Scatter(
        x=monthly_counts.index,
        y=monthly_counts,
        mode="lines",
        name="Monthly Count",
        visible=False,
    ),
)

# Yearly view: aggregated
fig.add_trace(
    go.Scatter(
        x=yearly_counts.index,
        y=yearly_counts,
        mode="lines",
        name="Yearly Count",
        visible=True,
    ),
)

# Yearly view: Trend
fig.add_trace(
    go.Scatter(
        x=sw_trend.index,
        y=sw_trend,
        mode="lines",
        name="Trend",
        visible=True,
        line={"color": "#c7947c"},
    ),
)

# --- 3. Add update buttons to toggle between views ---

fig.update_layout(
    updatemenus=[
        {
            "active": 0,
            "buttons": [
                {
                    "label": "Yearly",
                    "method": "update",
                    "args": [{"visible": [False, False, True, True]}, {"title": "Yearly Event Counts"}],
                },
                {
                    "label": "Monthly",
                    "method": "update",
                    "args": [{"visible": [False, True, False, False]}, {"title": "Monthly Event Counts"}],
                },
                {
                    "label": "Daily",
                    "method": "update",
                    "args": [{"visible": [True, False, False, False]}, {"title": "Daily Event Counts"}],
                },
            ],
            "x": 1.0,
            "xanchor": "right",
            "y": 1.1,
            "yanchor": "bottom",
        },
    ],
)

# Add range slider and selector on the x-axis
fig.update_layout(
    xaxis={
        "title": "Date",
        "rangeslider": {"visible": True},
        "rangeselector": {
            "buttons": [
                {"count": 1, "label": "1m", "step": "month", "stepmode": "backward"},
                {"count": 6, "label": "6m", "step": "month", "stepmode": "backward"},
                {"count": 1, "label": "YTD", "step": "year", "stepmode": "todate"},
                {"count": 1, "label": "1y", "step": "year", "stepmode": "backward"},
                {"step": "all"},
            ],
        },
        "type": "date",
    },
    yaxis={"title": "Event Count"},
    title="Number of attacks over the years",
)

fig.show()



The nuber of events peaked in 1979.  
No data or no attacks in the year 1993.  
The trend shows a rise in recent years.  

## Attacks by Country

### Total incidents

In [None]:
total_counts = data.groupby("country_txt").size().reset_index(name="counts")
total_counts = total_counts.sort_values(by="counts")


In [None]:
fig = bar(
    total_counts,
    x="country_txt",
    y="counts",
    labels={"country_txt": "Countries", "counts": "Incidents"},
    title="Most affected Countries",
)
fig.show()


Most attacks have occured in UK over the year.

### Attacks Every Year (Animated)

In [None]:
yearly_counts = data.groupby(["year", "country", "country_txt", "codes"]).size().reset_index(name="count")
yearly_counts["total_py"] = yearly_counts.groupby("year")["count"].transform("sum")
yearly_counts["proportion"] = yearly_counts["count"] / yearly_counts["total_py"]


In [None]:
yearly_counts.head()

In [None]:
fig = scatter_geo(
    yearly_counts,
    locations="codes",
    hover_name="country_txt",
    size="proportion",
    animation_frame="year",
    projection="natural earth",
    title="Incidents by year",
    size_max=30,
)

# fig.update_geos(fitbounds="locations")

fig.update_geos(
    visible=True,
    resolution=110,
    scope="europe",
    showcountries=True,
)

fig.show()

*West Germany faced most attacks in the year 1970. It has since then mostly been domitaed by the UK.
Spain saw a significant increase in year 1975, followed by Italy in 1976/77/78. Massive number of attacks in France in year 1996. 63% of attacks in 2009 were in Greece.*

### Incident Locations

#### Attacks at different coordinates.

In [None]:
years = sorted(data["year"].unique())
countries = sorted(data["country_txt"].unique())

initial_year = years[0]
df_initial = data[data["year"] == initial_year]

# Create the figure and add one trace per country (for the initial year)
fig = go.Figure()

for country in countries:
    df_country = df_initial[df_initial["country_txt"] == country]
    fig.add_trace(
        go.Scattergeo(
            lon=df_country["longitude"],
            lat=df_country["latitude"],
            text=df_country["event_date"],
            mode="markers",
            marker={"size": 8, "opacity": 0.7},
            name=country,
        ),
    )

# Build animation frames - one frame for each year
frames = []
for year in years:
    frame_traces = []
    df_year = data[data["year"] == year]
    for country in countries:
        df_country = df_year[df_year["country_txt"] == country]
        # Each trace in the frame corresponds to the country trace created above
        frame_traces.append(
            go.Scattergeo(
                lon=df_country["longitude"],
                lat=df_country["latitude"],
                text=df_country["event_date"],
                mode="markers",
                marker={"size": 8, "opacity": 0.7},
                name=country,
            ),
        )
    frames.append(go.Frame(data=frame_traces, name=str(year)))

fig.frames = frames

# Create a slider that will step through each year
slider_steps = []
for year in years:
    step = {
        "method": "animate",
        "args": [[str(year)], {"frame": {"duration": 500, "redraw": True}, "mode": "immediate"}],
        "label": str(year),
    }
    slider_steps.append(step)

sliders = [
    {
        "active": 0,
        "currentvalue": {"prefix": "Year: "},
        "pad": {"t": 50},
        "steps": slider_steps,
    },
]

# Create a dropdown menu for filtering by country.
# When "All" is selected, all country traces are visible.
# When a specific country is selected, only that trace is shown.
updatemenus = [
    {
        "buttons": list(
            [
                {
                    "args": [{"visible": [True] * len(countries)}],
                    "label": "All",
                    "method": "update",
                },
            ]
            + [
                {
                    "args": [{"visible": [c == country for c in countries]}],
                    "label": country,
                    "method": "update",
                }
                for country in countries
            ],
        ),
        "direction": "down",
        "pad": {"r": 10, "t": 10},
        "showactive": True,
        "x": 0.1,
        "xanchor": "left",
        "y": 1.15,
        "yanchor": "top",
    },
]

# Update the layout with the geo map settings, slider, and dropdown menu
fig.update_layout(
    title="Terrorist Events in Western Europe",
    geo={
        "scope": "europe",
        "projection_type": "natural earth",
    },
    sliders=sliders,
    updatemenus=updatemenus,
)

# Display the figure
fig.show()



#### Attack Densities

In [None]:
# Create a density heatmap
fig = density_map(data, lat="latitude", lon="longitude",
                        radius=10,  # Adjust for density scaling
                        center={"lat": data["latitude"].mean(), "lon": data["longitude"].mean()},
                        zoom=2)

fig.update_layout(coloraxis_showscale=False, title="Attack densities across Europe")

fig.show()

*Most attacks in UK condensed in Northern Ireland. Lot of attacks in France occuring in Corsica and Paris. Most big cities in Germany have been epicenters of the attacks like Frankfurt. Capital cities seem to be target in majority of cases.*

# Casualties and Consequences

## Across Eastern Europe

In [None]:
total_killings = data.groupby(["year"])[["nkill", "nkillter", "nwound", "nwoundte"]].sum()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=total_killings.index, y=total_killings["nkill"],
    mode="lines+markers",
    name="Total Killed",
))
fig.add_trace(go.Scatter(
    x=total_killings.index, y=total_killings["nkillter"],
    mode="lines+markers",
    name="Terrorists Killed",
))
fig.add_trace(go.Scatter(
    x=total_killings.index, y=total_killings["nwound"],
    mode="lines+markers",
    name="Total Wounded",
))
fig.add_trace(go.Scatter(
    x=total_killings.index, y=total_killings["nwoundte"],
    mode="lines+markers",
    name="Terrorists Wounded",
))

fig.update_layout(
    {"title": "Casualities/Consequences each year"},
     xaxis={
        "title": {
            "text": "X Axis Title",
        },
    },
    yaxis={
        "title": {
            "text": "Y Axis Title",
        },
    },
)

fig.show()

- No. of wounded peaks in 2004, with over 1853. 196 total fatalities, the number less in comparison to wounded however still among the top fatalities figures.
- Most fatal attacks between 1970 and 1989.
- 16 Terrorists killed recently in 2017.

## By Countries

In [None]:
country_killings = data.groupby(["country_txt", "codes"])[["nkill", "nkillter", "nwound", "nwoundte"]].sum().reset_index(1)

In [None]:
unique_countries = data[["country_txt", "codes"]].drop_duplicates()
unique_years = data["year"].unique()

all_combs = pd.MultiIndex.from_tuples(
    [(row.country_txt, row.codes, year)
     for _, row in unique_countries.iterrows()
     for year in unique_years],
    names=["country_txt", "codes", "year"],
)

grouped = data.groupby(["country_txt","codes", "year"])[["nkill", "nkillter", "nwound", "nwoundte"]].sum().reset_index()

df_indexed = grouped.set_index(["country_txt", "codes", "year"])
df_reindexed = df_indexed.reindex(all_combs, fill_value=0)

In [None]:
complete_data=df_reindexed.reset_index()

In [None]:
complete_data = complete_data[~(((complete_data["codes"] == "DEU") & (complete_data["country_txt"] == "Germany") & (complete_data["year"] < 1991)) | ((complete_data["codes"] == "DEU") & (complete_data["country_txt"] == "West Germany (FRG)") & (complete_data["year"] > 1990)))]

In [None]:
df = complete_data
years = sorted(df["year"].unique())

metrics = {
    "nkill": "Total Killed",
    "nkillter": "Total Terrorists Killed",
    "nwound": "Total Wounded",
    "nwoundte": "Total Terrorists Wounded",
}

default_metric = "nkill"

df_init = df[df["year"] == years[0]]
fig = go.Figure(
    data=[go.Choropleth(
        locations = df_init["codes"],
        z = df_init[default_metric],
        text = df_init["country_txt"],
        colorscale = "Reds",
        marker_line_color = "darkgrey",
        colorbar = {"title": metrics[default_metric]},
    )],
)


frames = []
for metric, label in metrics.items():
    for year in years:
        df_year = df[df["year"] == year]
        frames.append(go.Frame(
            data=[go.Choropleth(
                locations = df_year["codes"],
                z = df_year[metric],
                text = df_year["country_txt"],
                colorscale = "Reds",
                marker_line_color = "darkgrey",
                colorbar = {"title": label},
            )],
            name = f"{metric}_{year}",
        ))
fig.frames = frames

slider_steps = []
for year in years:
    slider_steps.append({
        "method": "animate",
        "label": str(year),
        "args": [
            [f"{default_metric}_{year}"],
            {"frame": {"duration": 500, "redraw": True},
             "mode": "immediate",
             "transition": {"duration": 300}},
        ],
    })
sliders = [{
    "active": 0,
    "currentvalue": {"prefix": "Year: "},
    "pad": {"t": 50},
    "steps": slider_steps,
}]

dropdown_buttons = []
for metric, label in metrics.items():
    dropdown_buttons.append({
        "label": label,
        "method": "animate",
        "args": [
            [f"{metric}_{years[0]}"],
            {"frame": {"duration": 0, "redraw": True},
             "mode": "immediate",
             "transition": {"duration": 0}},
        ],
    })
updatemenus = [{
    "buttons": dropdown_buttons,
    "direction": "down",
    "pad": {"r": 10, "t": 10},
    "showactive": True,
    "x": 0.1,
    "xanchor": "left",
    "y": 1.15,
    "yanchor": "top",
}]

fig.update_layout(
    title_text="Casualities and consequences by Year",
    geo = {"showframe": False, "projection_type": "equirectangular", "scope": "europe"},
    sliders = sliders,
    updatemenus = updatemenus,
)

fig.show()


*The 2004 peaks can be corresponded to the attacks in Spain and 2016 to France*

# Attack Information

## Incident Criterias

In [None]:
data["doubtterr"] = data["doubtterr"].replace(-9, NA)


In [None]:
crit_counts = data.groupby("year")[["crit1", "crit2", "crit3", "doubtterr"]].sum().reset_index()
crit_counts.head()


In [None]:
fig = go.Figure()

legend_labels = {
    "crit1": "Political, Economic,<br>Religious, or Social Goal",
    "crit2": "Intention to Coerce,Intimidate<br>or Publicize to Larger Audience",
    "crit3": "Outside International<br>Humanitarian Law",
    "doubtterr": "Doubt Terrorism Proper?",
}

for col in ["crit1", "crit2", "crit3", "doubtterr"]:
    fig.add_trace(go.Scatter(x=crit_counts["year"], y=crit_counts[col], mode="lines+markers", name=legend_labels[col]))

fig.update_layout(
    title="Different inclusion criterias over the years",
    xaxis_title="Year",
    yaxis_title="Count",
    xaxis={"showspikes": True},
    hovermode="x unified",
    legend={
        "yanchor": "top",
        "y": 0.99,
        "xanchor": "right",
        "x": 0.99,
        "bgcolor": "rgba(0,0,0,0)",
    },
)


fig.show()


*Most incidents meet all the three criterias*

### Categorization of the incident other than terrorism

In [None]:
alt_counts = data[data["doubtterr"] == 1].groupby(["year", "alternative_txt"]).size().reset_index(name="count")


In [None]:
fig = line(
    alt_counts,
    x="year",
    y="count",
    color="alternative_txt",
    symbol="alternative_txt",
    labels={"alternative_txt": "Alternative Designation"},
)
fig.update_traces(hovertemplate="%{y}")
fig.update_layout(xaxis={"showspikes": True}, hovermode="x unified")

fig.show()


*For uncategorized events most of them have been actions of Insurgency/Guerilla Action while there was a sudden rise in Other crime times in early 2000s.*

## Attack Types

In [None]:
df = data
attack_types_df = pd.melt(
    df,
    id_vars=["eventid", "year", "country_txt", "latitude", "longitude", "nkill", "nkillter", "nwound", "nwoundte"],
    value_vars=["attacktype1_txt", "attacktype2_txt", "attacktype3_txt"],
    var_name="attack_column",
    value_name="attack_type",
)

attack_types_df = attack_types_df.dropna(subset=["attack_type"])

In [None]:
figures = {}

# 1. Attack Types Distribution Over Years
yearly_distribution = attack_types_df.groupby(["year", "attack_type"]).size().reset_index(name="count")
figures["yearly_trend"] = px.line(
    yearly_distribution,
    x="year",
    y="count",
    color="attack_type",
    title="Attack Types Distribution Over Years",
    labels={"count": "Number of Attacks", "attack_type": "Attack Type"},
    template="plotly_white",
)

# 2. Geographic Distribution (Bubble Map)
geo_distribution = attack_types_df.groupby(["latitude", "longitude", "country_txt", "attack_type"]).size().reset_index(name="count")
figures["geo_distribution"] = px.scatter_geo(
    geo_distribution,
    lat="latitude",
    lon="longitude",
    color="attack_type",
    size="count",
    hover_name="country_txt",
    title="Geographic Distribution of Attack Types",
    template="plotly_white",
    size_max=30,
)

figures["geo_distribution"].update_layout(
    geo={
        "scope": "europe",
    },
)

# 3. Attack Types by Country (Top 10 countries)
country_distribution = attack_types_df.groupby(["country_txt", "attack_type"]).size().reset_index(name="count")
top_countries = country_distribution.groupby("country_txt")["count"].sum().nlargest(10).index
country_distribution_filtered = country_distribution[country_distribution["country_txt"].isin(top_countries)]

figures["country_distribution"] = px.bar(
    country_distribution_filtered,
    x="country_txt",
    y="count",
    color="attack_type",
    title="Attack Types Distribution by Country (Top 10)",
    labels={"count": "Number of Attacks", "attack_type": "Attack Type"},
    template="plotly_white",
    barmode="stack",
)

for fig in figures.values():
    fig.show()


*Most attacks have been Bombings and Explosion in the past however Facility/Infrastructure Attack has been on the rise in recent times. A large no. of assasination attacks have been recorded in United Kingdom* 

In [None]:
unique_attacktypes = df["attacktype1_txt"].unique()
unique_years = df["year"].unique()

multi_index = pd.MultiIndex.from_product(
    [unique_attacktypes, unique_years],
    names=["attacktype1_txt", "year"],
)

In [None]:
# 3. Attack Types by Year
yearly_distribution = df.groupby(["attacktype1_txt", "year"]).size().reset_index(name="count")
yearly_distribution.head()

In [None]:
complete_yearly_distribution = (
    yearly_distribution
    .set_index(["attacktype1_txt", "year"])
    .reindex(multi_index, fill_value=0)
    .reset_index()
)

In [None]:
complete_yearly_distribution.loc[ (complete_yearly_distribution["attacktype1_txt"] == "Assassination")]

In [None]:
yearly_distribution.loc[ (yearly_distribution["attacktype1_txt"] == "Assassination")]

In [None]:
from plotly.colors import qualitative

colors = qualitative.Dark24

In [None]:
fig = go.Figure()
for idx, atype in enumerate(unique_attacktypes):
    color = colors[idx % len(colors)]
    y = complete_yearly_distribution.loc[complete_yearly_distribution["attacktype1_txt"] == atype, "count"]
    fig.add_trace(go.Scatter(
        x = unique_years,
        y=y,
        mode="lines",
        line={"width": 0.5, "color": color},
        stackgroup="one",
        name=atype,
    ))

fig.show()

### Were they successful?

In [None]:
attack_success = data.groupby(["year", "country_txt"]).agg({"success":"sum", "eventid": "count"}).reset_index()
years = sorted(data["year"].unique())
countries = sorted(data["country_txt"].unique())

In [None]:
attack_success["successp"] = attack_success["success"] / attack_success["eventid"]

In [None]:

# initial_year = years[0]

# # Create the figure and add one trace per country (for the initial year)
# fig = go.Figure()

# for country in countries:
#     attack_success_curr = attack_success[attack_success["country_txt"]==country]
#     fig.add_trace(
#         go.Scatter(
#             x=attack_success_curr["year"],
#             y=attack_success_curr["successp"],
#             mode="lines+markers",
#             name=country,
#         ),
#     )

# fig.show()



In [None]:
success_prop = data.groupby(["year","success", "country_txt"]).size().reset_index(name="counts")

In [None]:
fig = go.Figure()

initial_data = success_prop.groupby(["year","success"])["counts"].sum().reset_index()

In [None]:
# Create stacked area plot
fig = px.area(initial_data, x="year", y="counts", color="success",
              color_discrete_sequence=["lightblue", "steelblue"],groupnorm="percent")

fig.update_layout(
    xaxis_title="years",
    yaxis_title="relative proportion",
    # yaxis_tickformat="%",
    legend_title="Success",
)

fig.update_layout(
    showlegend=True,
    yaxis={
        "type": "linear",
        "range": [1, 100],
        "ticksuffix": "%"})



fig.show()


*Most attacks have been successful throughout the history however there is a significant uptick in unsuccessful attacks in the past decade.*

## Attack vs Casualities

### Total

In [None]:
att_cas_stats = attack_types_df.groupby(["year","attack_type"]).agg({
    "eventid": "count",
    "nkill": "sum",
     "nkillter": "sum",
     "nwound": "sum",
     "nwoundte": "sum",
})

att_cas_stats["nkillc"] = att_cas_stats["nkill"] - att_cas_stats["nkillter"]
att_cas_stats["nwoundc"] = att_cas_stats["nwound"] - att_cas_stats["nwoundte"]

att_cas_stats=att_cas_stats.reset_index()

In [None]:
yearly_vs = att_cas_stats.groupby("attack_type")[["eventid", "nkillc"	,"nwoundc","nwoundte"	,"nkillter"]].sum().reset_index()

In [None]:
fig1 = px.scatter(yearly_vs, x="nkillc", y="nkillter",
           size="eventid", color="attack_type", hover_name="attack_type",
           log_x=False, size_max=150)

fig1.update_layout(
    title="Total Attack type vs Casualities",
    xaxis_title="Civilians Killed",
    yaxis_title="Terrorists Killed",
)

fig2 = px.scatter(yearly_vs, x="nwoundc", y="nwoundte",
           size="eventid", color="attack_type", hover_name="attack_type",
           log_x=False, size_max=150)

fig2.update_layout(
    title="Total Attack type vs Wounds",
    xaxis_title="Civilians Wounded",
    yaxis_title="Terrorists Wounded",
)

fig1.show()
fig2.show()

- Significant terrorists have been killed in Bombings and Armed Assaults.
- However Bombings have caused way more civilians death in proportion.
- Assasination however have caused way more civilian deaths as compared to terrorists and have been the most gruesome attack types.
- Facility infrastruce attacks though are large in number have had fairly less significant caualities and consequences.

### Per Year

In [None]:
fig1 = px.scatter(att_cas_stats, x="nkillc", y="nkillter", animation_frame="year", animation_group="year",
           size="eventid", color="attack_type", hover_name="attack_type",
           log_x=False, size_max=150)

fig1.update_layout(
    title="Attack type vs Casualities per year",
    xaxis_title="Civilians Killed",
    yaxis_title="Terrorists Killed",
)


fig2 = px.scatter(att_cas_stats, x="nwoundc", y="nwoundte", animation_frame="year", animation_group="year",
           size="eventid", color="attack_type", hover_name="attack_type",
           log_x=False, size_max=150)

fig2.update_layout(
    title="Attack type vs Wounds per year",
    xaxis_title="Civilians Killed",
    yaxis_title="Terrorists Killed",
)

fig1.show()
fig2.show()

# Weapons

In [None]:
def reshape_weapons_data(df, name1, name2, number, suff="_txt"):
    rows = []

    for _, row in df.iterrows():
        for i in range(1, number+1):
            weapon_type = row[f"{name1}{i}{suff}"]
            weapon_subtype = row[f"{name2}{i}{suff}"]

            if pd.notna(weapon_type):
                new_row = {col: row[col] for col in df.columns
                          if not (col.startswith((f"{name1}", f"{name2}")))}

                new_row[name1] = weapon_type
                new_row[name2] = weapon_subtype if pd.notna(weapon_subtype) else None

                rows.append(new_row)

    result_df = pd.DataFrame(rows)

    return result_df

reshaped_df = reshape_weapons_data(data[["eventid", "weaptype1_txt",  "weapsubtype1_txt", "weaptype2_txt",    "weapsubtype2_txt", "weaptype3_txt",    "weapsubtype3_txt", "weaptype4_txt",    "weapsubtype4_txt"]], "weaptype", "weapsubtype", 4)


In [None]:
grouped_df = reshaped_df.groupby(["weaptype", "weapsubtype"]).size().reset_index(name="count")
fig = px.bar(
    grouped_df,
    x="count",
    y="weaptype",
    color="weapsubtype",
    orientation="h",
    title="Weapon Types by Subtype (Stacked Horizontal Bar Chart)",
    labels={"count": "Count", "weaptype": "Weapon Type"},
    color_discrete_sequence=px.colors.qualitative.Pastel,
    custom_data=["weapsubtype", "count"],
)
fig.update_layout(
    barmode="stack",
    legend_title="Weapon Subtype",
    height=500,
    width=800,
    xaxis=dict(title="Count", showgrid=True),  # noqa: C408
    yaxis=dict(title="Weapon Type", categoryorder="total ascending"),  # noqa: C408
    hovermode="y unified",
)

fig.update_traces(
    hovertemplate="%{customdata[0]}: %{customdata[1]}<extra></extra>",
)

totals = grouped_df.groupby("weaptype")["count"].sum()
for i, total in enumerate(totals):
    fig.add_annotation(
        x=total,
        y=totals.index[i],
        text=str(total),
        showarrow=False,
        xshift=10,
    )
fig.show()

- Explosives rank as the most commonly used weapon type, aligning with the high frequency of bombings and explosions.
- Firearms follow at a considerable distance behind explosives, with handguns being the leading subtype, accounting for over 30% of firearm-related incidents.
- Arson and fire-related attacks also constitute a notable portion of the data.

# Target

In [None]:
reshaped_df = reshape_weapons_data(data[["eventid", "targtype1", "targsubtype1", "targtype2", "targsubtype2", "targtype3",    "targsubtype3"]], "targtype", "targsubtype",3, "")
reshaped_df = reshaped_df.sort_values(["targtype", "targsubtype"])
reshaped_df["targtype"] = reshaped_df["targtype"].fillna(0).astype(int).astype(str)
reshaped_df["targsubtype"] = reshaped_df["targsubtype"].fillna(0).astype(int).astype(str)


In [None]:
grouped_df = reshaped_df.groupby(["targtype", "targsubtype"]).size().reset_index(name="count")


In [None]:
fig = px.density_heatmap(reshaped_df, x="targtype", y="targsubtype")
fig.show()

# Perpetrator

In [None]:
perp_data = data.rename(columns={"gname": "gname1", "gsubname": "gsubname1"})
reshaped_df = reshape_weapons_data(perp_data[["gname1","gsubname1","gname2","gsubname2","gname3","gsubname3"]], "gname", "gsubname",3, "")
grouped_df1 = reshaped_df.groupby(["gname"]).size().reset_index(name="count")
reshaped_df = reshaped_df.fillna("Unknown")

In [None]:
grouped_df1 = grouped_df1[grouped_df1["gname"] != "Unknown"]
top_10 = grouped_df1.sort_values("count", ascending=False).head(10)["gname"]

In [None]:
grouped_df = reshaped_df[reshaped_df["gname"].isin(top_10)].groupby(["gname","gsubname"]).size().reset_index(name="count")
grouped_df["gname"].unique()

In [None]:
fig = px.bar(
    grouped_df,
    x="count",
    y="gname",
    # color="gsubname",
    orientation="h",
    title="Top Perpetrators",
    labels={"count": "Count", "gname": "Perpetrator"},
    # color_discrete_sequence=px.colors.qualitative.Pastel,
    # custom_data=["gsubname", "count"],
)
fig.update_layout(
    # barmode="stack",
    # height=500,
    # width=800,
    xaxis=dict(title="Count", showgrid=True),  # noqa: C408
    yaxis=dict(title="Perpetrator", categoryorder="total ascending"),  # noqa: C408
    hovermode="y unified",
    showlegend=False,
)

# fig.update_traces(
#     hovertemplate="%{customdata[0]}: %{customdata[1]}<extra></extra>",
# )

totals = grouped_df.groupby("gname")["count"].sum()
for i, total in enumerate(totals):
    fig.add_annotation(
        x=total,
        y=totals.index[i],
        text=str(total),
        showarrow=False,
        xshift=10,
    )
fig.show()

*Other than Unknown perptrators, Irish Republican Army is the top perpetrators which explains the number of attacks in the United Kingdom, and them being centered around Northern Ireland*