In [None]:
import pandas as pd
import numpy as np

# Create date range
dates = pd.date_range(start="2018-01", end="2023-12", freq="M")

# Generate random unemployment rates with a Covid spike (2020-2021)
np.random.seed(42)
unemployment_rate = np.random.uniform(4, 6, len(dates))  # base rate 4%-6%

# Add Covid spike
for i in range(len(dates)):
    if dates[i].year in [2020, 2021]:
        unemployment_rate[i] += np.random.uniform(2, 6)  # spike during Covid

# Create DataFrame
df = pd.DataFrame({
    "Date": dates,
    "Unemployment_Rate": np.round(unemployment_rate, 2)
})

df.to_csv("unemployment_data.csv", index=False)
df.head()


In [None]:
# Load dataset
df = pd.read_csv("unemployment_data.csv")

# Check for missing values
print("Missing Values:\n", df.isnull().sum())

# Ensure Date format
df["Date"] = pd.to_datetime(df["Date"])

df.head()


In [None]:
print("Data Summary:")
print(df.describe())

print("\nDate Range:", df["Date"].min(), "to", df["Date"].max())

# Average unemployment before and during Covid
pre_covid = df[df["Date"] < "2020-03"]["Unemployment_Rate"].mean()
during_covid = df[(df["Date"] >= "2020-03") & (df["Date"] <= "2021-12")]["Unemployment_Rate"].mean()
post_covid = df[df["Date"] > "2021-12"]["Unemployment_Rate"].mean()

print(f"\nAverage Pre-Covid: {pre_covid:.2f}%")
print(f"Average During Covid: {during_covid:.2f}%")
print(f"Average Post-Covid: {post_covid:.2f}%")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12,6))
sns.lineplot(data=df, x="Date", y="Unemployment_Rate", marker="o")
plt.axvspan(pd.to_datetime("2020-03"), pd.to_datetime("2021-12"),
            color="red", alpha=0.2, label="Covid-19 Period")
plt.title("Unemployment Rate Trends (2018-2023)")
plt.xlabel("Year")
plt.ylabel("Unemployment Rate (%)")
plt.legend()
plt.show()


In [None]:
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month

plt.figure(figsize=(10,6))
sns.boxplot(data=df, x="Month", y="Unemployment_Rate")
plt.title("Seasonal Trends in Unemployment Rates")
plt.xlabel("Month")
plt.ylabel("Unemployment Rate (%)")
plt.show()


In [None]:
print("📌 Insights:")
print("1. There is a clear spike in unemployment during 2020-2021 due to Covid-19.")
print("2. Average unemployment increased from {:.2f}% pre-Covid to {:.2f}% during Covid.".format(pre_covid, during_covid))
print("3. Post-Covid rates show partial recovery but not fully back to pre-Covid levels.")
print("4. Minor seasonal variation seen, possibly due to hiring cycles in certain months.")
