In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load in csv file
df = pd.read_csv("Crash_Reporting_-_Drivers_Data.csv",low_memory = False)
df = df.replace(r'^\s*$', np.nan, regex=True)

# Drop rows missing key info
key_cols = ["Injury Severity", "Route Type", "Report Number"]
df = df.dropna(subset=key_cols).reset_index(drop=True)

# Simplify Injury Severity
def simplify_severity(x):
    if pd.isna(x): return np.nan
    s = str(x).upper()
    if "FATAL" in s: return "Fatal"
    if "SERIOUS" in s: return "Serious"
    if "MINOR" in s or "POSSIBLE" in s: return "Minor/Possible"
    if "NO APPARENT" in s: return "No Injury"
    return np.nan

df["severity"] = df["Injury Severity"].apply(simplify_severity)
df["severe_flag"] = df["severity"].isin(["Fatal", "Serious"])

# Clean up route type text
df["route_type"] = df["Route Type"].astype(str).str.title().str.strip()

# Create Road Group Category
highway_types = ["Interstate (State)","Us (State)","Maryland (State)", "Maryland (State) Route","Ramp"]
local_types = ["County Route","Municipality Route","Municipality", "Private Route","Other Public Roadway"]

def group_roads(route):
    if route in highway_types:
        return "Highway / Freeway"
    elif route in local_types:
        return "Local / Residential"
    else:
        return "Other"

df["road_group"] = df["route_type"].apply(group_roads)
summary = (
    df.groupby(["road_group", "route_type"])
      .agg(total_crashes=("Report Number", "count"),
           severe_crashes=("severe_flag", "sum"))
      .reset_index()
)
# Calculate percentage of crashes that are severe
summary["severe_rate"] = summary["severe_crashes"] / summary["total_crashes"]
# Filter for stable data
summary = summary[summary["total_crashes"] >= 50]
# Split Data into Two Groups
highway_data = summary[summary["road_group"] == "Highway / Freeway"]
local_data = summary[summary["road_group"] == "Local / Residential"]




In [None]:
# Figure 1 (Severity Rates for Highways/Freeways)
plt.figure(figsize=(8, 5))
sns.barplot(
    data=highway_data.sort_values("severe_rate", ascending=False),
    x="severe_rate", y="route_type", color="red"
)
plt.xlabel("Severe Injury Rate")
plt.ylabel("Highway Types")
plt.title("Severe Injury Rate - Highways/Freeways")
plt.show()


In [None]:
# Figure 2 (Total amount of traffic collisions for Highway/Freeways)
plt.figure(figsize=(8, 5))
sns.barplot(
    data=highway_data.sort_values("total_crashes", ascending=False),
    x="total_crashes", y="route_type", color="blue"
)
plt.xlabel("Total Crashes")
plt.ylabel("Highway Types")
plt.title("Total Crashes - Highways/Freeways")
plt.show()

In [None]:
# Figure 3 (Severity Rates for Local/Residential Roads)
plt.figure(figsize=(8, 5))
sns.barplot(
    data=local_data.sort_values("severe_rate", ascending=False),
    x="severe_rate", y="route_type", color="red"
)
plt.xlabel("Severe Injury Rate")
plt.ylabel("Local Road Types")
plt.title("Severe Injury Rate - Local/Residential Roads")
plt.show()

In [None]:
# Figure 4 (Total amount of traffic collisions for Local/Residential Roads)
plt.figure(figsize=(8, 5))
sns.barplot(
    data=local_data.sort_values("total_crashes", ascending=False),
    x="total_crashes", y="route_type", color="blue"
)
plt.xlabel("Total Crashes")
plt.ylabel("Local Road Types")
plt.title("Total Crashes - Local/Residential Roads")
plt.show()