In [None]:
import pandas as pd

df = pd.read_csv(r'C:\Users\DELL-2025\Desktop\internship\Task_2\Unemployment in India.csv')

df.columns = df.columns.str.strip()

print("Columns:", df.columns)

date_col = [col for col in df.columns if 'date' in col.lower()][0]

df[date_col] = df[date_col].astype(str).str.strip()

df[date_col] = pd.to_datetime(df[date_col], format='%d-%m-%Y')

df = df.rename(columns={
    'Estimated Unemployment Rate (%)': 'UnemploymentRate',
    'Estimated Employed': 'Employed',
    'Estimated Labour Participation Rate (%)': 'LabourParticipationRate'
})

df['UnemploymentRate'] = pd.to_numeric(df['UnemploymentRate'], errors='coerce')
df['Employed'] = pd.to_numeric(df['Employed'], errors='coerce')
df['LabourParticipationRate'] = pd.to_numeric(df['LabourParticipationRate'], errors='coerce')

df = df.drop_duplicates()

print(df.isnull().sum())


In [None]:


print(df.info())
print(df.describe())

print("Average Unemployment:", df['UnemploymentRate'].mean())
print("Highest Unemployment:", df['UnemploymentRate'].max())
print("Lowest Unemployment:", df['UnemploymentRate'].min())

region_max = df.loc[df['UnemploymentRate'].idxmax()]
print("Region with highest unemployment:", region_max['Region'])

region_min = df.loc[df['UnemploymentRate'].idxmin()]
print("Region with lowest unemployment:", region_min['Region'])

monthly_avg = df.groupby(df['Date'].dt.to_period('M'))['UnemploymentRate'].mean()
print(monthly_avg)

region_avg = df.groupby('Region')['UnemploymentRate'].mean()
print(region_avg)

area_avg = df.groupby('Area')['UnemploymentRate'].mean()
print(area_avg)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12,5))
plt.plot(df['Date'], df['UnemploymentRate'])
plt.title("Unemployment Trend Over Time")
plt.xlabel("Date")
plt.ylabel("Unemployment Rate (%)")
plt.grid(True)
plt.show()

region_avg = df.groupby('Region')['UnemploymentRate'].mean().sort_values(ascending=False)

plt.figure(figsize=(14,6))
region_avg.plot(kind='bar', color='skyblue')
plt.title("Average Unemployment Rate by Region")
plt.xlabel("Region")
plt.ylabel("Unemployment Rate (%)")
plt.show()

area_avg = df.groupby('Area')['UnemploymentRate'].mean()

plt.figure(figsize=(6,4))
area_avg.plot(kind='bar', color=['orange', 'green'])
plt.title("Rural vs Urban Unemployment")
plt.xlabel("Area")
plt.ylabel("Unemployment Rate (%)")
plt.show()

df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month_name()

monthly_pivot = df.pivot_table(values="UnemploymentRate", index="Month", columns="Year", aggfunc="mean")

plt.figure(figsize=(8,6))
sns.heatmap(monthly_pivot, annot=True, cmap="Reds")
plt.title("Monthly Unemployment Heatmap")
plt.show()


In [None]:
before_covid = df[df['Date'] < '2020-03-01']
during_covid = df[df['Date'] >= '2020-03-01']

avg_before = before_covid['UnemploymentRate'].mean()
avg_during = during_covid['UnemploymentRate'].mean()

print("Average Before Covid:", avg_before)
print("Average During Covid:", avg_during)

unemp_2020 = df[df['Date'].dt.year == 2020].groupby(df['Date'].dt.to_period('M'))['UnemploymentRate'].mean()
print("\n2020 Month-wise unemployment:")
print(unemp_2020)

region_impact = during_covid.groupby('Region')['UnemploymentRate'].mean() - before_covid.groupby('Region')['UnemploymentRate'].mean()
region_impact = region_impact.sort_values(ascending=False)
print("\nRegion-wise Covid impact:")
print(region_impact)

area_impact = during_covid.groupby('Area')['UnemploymentRate'].mean() - before_covid.groupby('Area')['UnemploymentRate'].mean()
print("\nRural vs Urban Covid impact:")
print(area_impact)


In [None]:
df['Month'] = df['Date'].dt.month_name()
df['Year'] = df['Date'].dt.year

monthly_pattern = df.groupby('Month')['UnemploymentRate'].mean()
print("\nMonth-wise Unemployment Pattern:")
print(monthly_pattern)

yearly_pattern = df.groupby('Year')['UnemploymentRate'].mean()
print("\nYear-wise Average Unemployment:")
print(yearly_pattern)

monthly_year_pattern = df.groupby([df['Date'].dt.to_period('M')])['UnemploymentRate'].mean()
print("\nMonth-Year Trend:")
print(monthly_year_pattern)

area_season = df.groupby(['Area', 'Month'])['UnemploymentRate'].mean()
print("\nRural vs Urban Month-wise Pattern:")
print(area_season)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# -------------------------------
# PREPARE DATA FOR FINAL INSIGHTS
# -------------------------------
before_covid = df[df['Date'] < '2020-03-01']
during_covid = df[df['Date'] >= '2020-03-01']

avg_before = before_covid['UnemploymentRate'].mean()
avg_during = during_covid['UnemploymentRate'].mean()

area_impact = during_covid.groupby('Area')['UnemploymentRate'].mean() - before_covid.groupby('Area')['UnemploymentRate'].mean()

region_impact = (
    during_covid.groupby('Region')['UnemploymentRate'].mean()
    - before_covid.groupby('Region')['UnemploymentRate'].mean()
).sort_values(ascending=False)

# -------------------------------
# 1. Before vs During Covid Bar Chart
# -------------------------------
plt.figure(figsize=(6,4))
plt.bar(["Before Covid", "During Covid"], [avg_before, avg_during])
plt.title("Unemployment Before vs During Covid")
plt.ylabel("Unemployment Rate (%)")
plt.show()

# -------------------------------
# 2. Urban vs Rural Covid Impact
# -------------------------------
plt.figure(figsize=(6,4))
area_impact.plot(kind='bar')
plt.title("Urban vs Rural: Covid Impact on Unemployment")
plt.ylabel("Increase in Unemployment (%)")
plt.xlabel("Area")
plt.show()

# -------------------------------
# 3. State-wise Covid Impact
# -------------------------------
plt.figure(figsize=(14,6))
region_impact.head(10).plot(kind='bar')
plt.title("Top 10 States Most Affected by Covid (Unemployment Increase)")
plt.ylabel("Increase in Unemployment (%)")
plt.xlabel("Region")
plt.show()

# -------------------------------
# 4. Monthly Spike Due to Covid
# -------------------------------
unemp_2020 = df[df['Date'].dt.year == 2020].groupby(df['Date'].dt.to_period('M'))['UnemploymentRate'].mean()

plt.figure(figsize=(10,4))
plt.plot(unemp_2020.index.astype(str), unemp_2020.values)
plt.title("2020 Monthly Unemployment Trend (Covid Spike Visible)")
plt.xlabel("Month (2020)")
plt.ylabel("Unemployment Rate (%)")
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

# -------------------------------
# 5. Labour Participation Comparison
# -------------------------------
avg_lpr_before = before_covid['LabourParticipationRate'].mean()
avg_lpr_during = during_covid['LabourParticipationRate'].mean()

plt.figure(figsize=(6,4))
plt.bar(["Before Covid", "During Covid"], [avg_lpr_before, avg_lpr_during])
plt.title("Labour Participation Rate Before vs During Covid")
plt.ylabel("Labour Participation (%)")
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import os

# -------------------------------
# CREATE A FOLDER TO STORE GRAPHS
# -------------------------------
folder_name = "unemployment_graphs"

if not os.path.exists(folder_name):
    os.makedirs(folder_name)

# -------------------------------
# 1. Before vs During Covid
# -------------------------------
plt.figure(figsize=(6,4))
plt.bar(["Before Covid", "During Covid"], [avg_before, avg_during])
plt.title("Unemployment Before vs During Covid")
plt.ylabel("Unemployment Rate (%)")
plt.savefig(f"{folder_name}/before_vs_during_covid.png")
plt.close()

# -------------------------------
# 2. Urban vs Rural Covid Impact
# -------------------------------
plt.figure(figsize=(6,4))
area_impact.plot(kind='bar')
plt.title("Urban vs Rural: Covid Impact on Unemployment")
plt.ylabel("Increase in Unemployment (%)")
plt.xlabel("Area")
plt.savefig(f"{folder_name}/urban_vs_rural_covid_impact.png")
plt.close()

# -------------------------------
# 3. State-wise Covid Impact
# -------------------------------
plt.figure(figsize=(14,6))
region_impact.head(10).plot(kind='bar')
plt.title("Top 10 States Most Affected by Covid")
plt.ylabel("Increase in Unemployment (%)")
plt.xlabel("Region")
plt.savefig(f"{folder_name}/state_wise_covid_impact.png")
plt.close()

# -------------------------------
# 4. Monthly Spike Due to Covid
# -------------------------------
plt.figure(figsize=(10,4))
plt.plot(unemp_2020.index.astype(str), unemp_2020.values)
plt.title("Monthly Unemployment Trend â€“ Covid Spike")
plt.xlabel("Month (2020)")
plt.ylabel("Unemployment Rate (%)")
plt.xticks(rotation=45)
plt.grid(True)
plt.savefig(f"{folder_name}/monthly_spike_covid.png")
plt.close()

# -------------------------------
# 5. Labour Participation Rate Before vs During
# -------------------------------
plt.figure(figsize=(6,4))
plt.bar(["Before Covid", "During Covid"], [avg_lpr_before, avg_lpr_during])
plt.title("Labour Participation Rate Before vs During Covid")
plt.ylabel("Labour Participation (%)")
plt.savefig(f"{folder_name}/labour_participation_comparison.png")
plt.close()

print("All graphs saved successfully in folder:", folder_name)
