# Importing required libraries

In [1]:
import numpy as np, pandas as pd, plotly.express as px, plotly.graph_objects as go
from plotly.subplots import make_subplots as mks

# Loading the datasets

In [2]:
data = pd.read_csv("Unemployment in India.csv")
data_rate = pd.read_csv("Unemployment_Rate_upto_11_2020.csv")

# Data Description

## Dataset Details:

#### - The dataset provides insights into the unemployment scenario across different Indian states:

1. **States**: The states within India.

2. **Date**: The date when the unemployment rate was recorded.

3. **Measuring Frequency**: The frequency at which measurements were taken (Monthly).

4. **Estimated Unemployment Rate (%)**: The percentage of *individuals unemployed* in each state of India.

5. **Estimated Employed Individuals**: The count of people *currently employed*.

6. **Estimated Labour Participation Rate (%)**: The *proportion of the working population* (age group: 16-64 years) participating in the labor force, either *employed* or *actively seeking employment*.

7. **Area**: The area to which the population belongs to, which are of two types in the dataset which are **Rural** and **Urban**.

8. **Region**: The Region tells about the section of the country to which the data is belonging to, which are of 5  types in this dataset **South, Northeast, East, West, North**

In [3]:
data.head(10)

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Area
0,Andhra Pradesh,31-05-2019,Monthly,3.65,11999139.0,43.24,Rural
1,Andhra Pradesh,30-06-2019,Monthly,3.05,11755881.0,42.05,Rural
2,Andhra Pradesh,31-07-2019,Monthly,3.75,12086707.0,43.5,Rural
3,Andhra Pradesh,31-08-2019,Monthly,3.32,12285693.0,43.97,Rural
4,Andhra Pradesh,30-09-2019,Monthly,5.17,12256762.0,44.68,Rural
5,Andhra Pradesh,31-10-2019,Monthly,3.52,12017412.0,43.01,Rural
6,Andhra Pradesh,30-11-2019,Monthly,4.12,11397681.0,41.0,Rural
7,Andhra Pradesh,31-12-2019,Monthly,4.38,12528395.0,45.14,Rural
8,Andhra Pradesh,31-01-2020,Monthly,4.84,12016676.0,43.46,Rural
9,Andhra Pradesh,29-02-2020,Monthly,5.91,11723617.0,42.83,Rural


In [4]:
data.shape

(768, 7)

In [5]:
data_rate.head(10)

Unnamed: 0,Region,Date,Frequency,Estimated Unemployment Rate (%),Estimated Employed,Estimated Labour Participation Rate (%),Region.1,longitude,latitude
0,Andhra Pradesh,31-01-2020,M,5.48,16635535,41.02,South,15.9129,79.74
1,Andhra Pradesh,29-02-2020,M,5.83,16545652,40.9,South,15.9129,79.74
2,Andhra Pradesh,31-03-2020,M,5.79,15881197,39.18,South,15.9129,79.74
3,Andhra Pradesh,30-04-2020,M,20.51,11336911,33.1,South,15.9129,79.74
4,Andhra Pradesh,31-05-2020,M,17.43,12988845,36.46,South,15.9129,79.74
5,Andhra Pradesh,30-06-2020,M,3.31,19805400,47.41,South,15.9129,79.74
6,Andhra Pradesh,31-07-2020,M,8.34,15431615,38.91,South,15.9129,79.74
7,Andhra Pradesh,31-08-2020,M,6.96,15251776,37.83,South,15.9129,79.74
8,Andhra Pradesh,30-09-2020,M,6.4,15220312,37.47,South,15.9129,79.74
9,Andhra Pradesh,31-10-2020,M,6.59,15157557,37.34,South,15.9129,79.74


In [6]:
data_rate.shape

(267, 9)

In [7]:
data_rate[" Date"].unique()

array([' 31-01-2020', ' 29-02-2020', ' 31-03-2020', ' 30-04-2020',
       ' 31-05-2020', ' 30-06-2020', ' 31-07-2020', ' 31-08-2020',
       ' 30-09-2020', ' 31-10-2020'], dtype=object)

# Data Preprocessing

## Handling missing values

In [8]:
data.isnull().sum()

Region                                      28
 Date                                       28
 Frequency                                  28
 Estimated Unemployment Rate (%)            28
 Estimated Employed                         28
 Estimated Labour Participation Rate (%)    28
Area                                        28
dtype: int64

In [9]:
data_rate.isnull().sum()

Region                                      0
 Date                                       0
 Frequency                                  0
 Estimated Unemployment Rate (%)            0
 Estimated Employed                         0
 Estimated Labour Participation Rate (%)    0
Region.1                                    0
longitude                                   0
latitude                                    0
dtype: int64

In [10]:
data.dropna(inplace=True)

In [11]:
data.isnull().sum()

Region                                      0
 Date                                       0
 Frequency                                  0
 Estimated Unemployment Rate (%)            0
 Estimated Employed                         0
 Estimated Labour Participation Rate (%)    0
Area                                        0
dtype: int64

## Renaming the columns
#### To make it easier for usage

In [12]:
data.drop(columns=" Frequency", inplace=True)
data.rename(columns={"Region":"State", " Date": "Date", " Estimated Unemployment Rate (%)": "Unemp_Rate (%)", " Estimated Employed": "Employed (%)", " Estimated Labour Participation Rate (%)": "Labour_Participation_Rate (%)"}, inplace=True)
data

Unnamed: 0,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%),Area
0,Andhra Pradesh,31-05-2019,3.65,11999139.0,43.24,Rural
1,Andhra Pradesh,30-06-2019,3.05,11755881.0,42.05,Rural
2,Andhra Pradesh,31-07-2019,3.75,12086707.0,43.50,Rural
3,Andhra Pradesh,31-08-2019,3.32,12285693.0,43.97,Rural
4,Andhra Pradesh,30-09-2019,5.17,12256762.0,44.68,Rural
...,...,...,...,...,...,...
749,West Bengal,29-02-2020,7.55,10871168.0,44.09,Urban
750,West Bengal,31-03-2020,6.67,10806105.0,43.34,Urban
751,West Bengal,30-04-2020,15.63,9299466.0,41.20,Urban
752,West Bengal,31-05-2020,15.22,9240903.0,40.67,Urban


In [13]:
data_rate.drop(columns={" Frequency", "longitude", "latitude"}, inplace=True)
data_rate.rename(columns={'Region': "State", ' Date':"Date", ' Frequency': "Frequency", ' Estimated Unemployment Rate (%)': "Unemp_Rate (%)",
       ' Estimated Employed': "Employed (%)", ' Estimated Labour Participation Rate (%)': "Labour_Participation_Rate (%)",
       'Region.1': "Region"},inplace=True)
data_rate

Unnamed: 0,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%),Region
0,Andhra Pradesh,31-01-2020,5.48,16635535,41.02,South
1,Andhra Pradesh,29-02-2020,5.83,16545652,40.90,South
2,Andhra Pradesh,31-03-2020,5.79,15881197,39.18,South
3,Andhra Pradesh,30-04-2020,20.51,11336911,33.10,South
4,Andhra Pradesh,31-05-2020,17.43,12988845,36.46,South
...,...,...,...,...,...,...
262,West Bengal,30-06-2020,7.29,30726310,40.39,East
263,West Bengal,31-07-2020,6.83,35372506,46.17,East
264,West Bengal,31-08-2020,14.87,33298644,47.48,East
265,West Bengal,30-09-2020,9.35,35707239,47.73,East


In [14]:
data_rate.columns

Index(['State', 'Date', 'Unemp_Rate (%)', 'Employed (%)',
       'Labour_Participation_Rate (%)', 'Region'],
      dtype='object')

In [15]:
data.columns

Index(['State', 'Date', 'Unemp_Rate (%)', 'Employed (%)',
       'Labour_Participation_Rate (%)', 'Area'],
      dtype='object')

## Removing any present duplicate values

In [16]:
data.duplicated().sum()

0

In [17]:
data_rate.duplicated().sum()

0

## Feature Engineering

In [18]:
data["Date"] = pd.to_datetime(data["Date"])
data["Year"] = data["Date"].dt.year
# data["Month"] = data["Date"].dt.month.apply(lambda x: cd.month_abbr[x])
data["Date"] = pd.to_datetime(data["Date"]).dt.strftime("%Y-%m")
data["Date"].reset_index(drop=True)
data["Year"].astype("object")

  data["Date"] = pd.to_datetime(data["Date"])


0      2019
1      2019
2      2019
3      2019
4      2019
       ... 
749    2020
750    2020
751    2020
752    2020
753    2020
Name: Year, Length: 740, dtype: object

In [19]:
data["Date"].unique()

array(['2019-05', '2019-06', '2019-07', '2019-08', '2019-09', '2019-10',
       '2019-11', '2019-12', '2020-01', '2020-02', '2020-03', '2020-04',
       '2020-05', '2020-06'], dtype=object)

In [20]:
data.reset_index(inplace=True, drop=True)
data

Unnamed: 0,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%),Area,Year
0,Andhra Pradesh,2019-05,3.65,11999139.0,43.24,Rural,2019
1,Andhra Pradesh,2019-06,3.05,11755881.0,42.05,Rural,2019
2,Andhra Pradesh,2019-07,3.75,12086707.0,43.50,Rural,2019
3,Andhra Pradesh,2019-08,3.32,12285693.0,43.97,Rural,2019
4,Andhra Pradesh,2019-09,5.17,12256762.0,44.68,Rural,2019
...,...,...,...,...,...,...,...
735,West Bengal,2020-02,7.55,10871168.0,44.09,Urban,2020
736,West Bengal,2020-03,6.67,10806105.0,43.34,Urban,2020
737,West Bengal,2020-04,15.63,9299466.0,41.20,Urban,2020
738,West Bengal,2020-05,15.22,9240903.0,40.67,Urban,2020


In [21]:
data_rate["Date"] = pd.to_datetime(data_rate["Date"])
data_rate["Year"] = data_rate["Date"].dt.year
data_rate["Date"] = pd.to_datetime(data_rate["Date"]).dt.strftime("%Y-%m")
data_rate["Date"].reset_index(drop=True)

  data_rate["Date"] = pd.to_datetime(data_rate["Date"])


0      2020-01
1      2020-02
2      2020-03
3      2020-04
4      2020-05
        ...   
262    2020-06
263    2020-07
264    2020-08
265    2020-09
266    2020-10
Name: Date, Length: 267, dtype: object

In [22]:
employed_2020 = data["Employed (%)"].where(data["Year"] == 2020).dropna()
employed_2019 = data["Employed (%)"].where(data["Year"] == 2019).dropna()
employed_2019 = round((employed_2019 / 512600000.0) * 100, 3).reset_index().drop(columns="index")
employed_2020 = round((employed_2020 / 495900000.0) * 100, 3).reset_index().drop(columns="index")
employed_comb = pd.concat(objs=[employed_2019, employed_2020], ignore_index= True)
data["Employed (%)"] = employed_comb
# data["Employed (%)"] = round(data["Employed (%)"] / 1000000.0, 3)
data

Unnamed: 0,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%),Area,Year
0,Andhra Pradesh,2019-05,3.65,2.341,43.24,Rural,2019
1,Andhra Pradesh,2019-06,3.05,2.293,42.05,Rural,2019
2,Andhra Pradesh,2019-07,3.75,2.358,43.50,Rural,2019
3,Andhra Pradesh,2019-08,3.32,2.397,43.97,Rural,2019
4,Andhra Pradesh,2019-09,5.17,2.391,44.68,Rural,2019
...,...,...,...,...,...,...,...
735,West Bengal,2020-02,7.55,2.192,44.09,Urban,2020
736,West Bengal,2020-03,6.67,2.179,43.34,Urban,2020
737,West Bengal,2020-04,15.63,1.875,41.20,Urban,2020
738,West Bengal,2020-05,15.22,1.863,40.67,Urban,2020


In [23]:
employed_2020 = data_rate["Employed (%)"].where(data_rate["Year"] == 2020).dropna()
employed_2019 = data_rate["Employed (%)"].where(data_rate["Year"] == 2019).dropna()
employed_2019 = round((employed_2019 / 485510000.0) * 100, 3).reset_index().drop(columns="index")
employed_2020 = round((employed_2020 / 447180000.0) * 100, 3).reset_index().drop(columns="index")
employed_comb = pd.concat(objs=[employed_2019, employed_2020], ignore_index= True)
data_rate["Employed (%)"] = employed_comb
# data_rate["Employed (%)"] = round(data_rate["Employed (%)"] / 1000000, 3)
data_rate.drop(columns="Year", inplace=True)
data_rate

Unnamed: 0,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%),Region
0,Andhra Pradesh,2020-01,5.48,3.720,41.02,South
1,Andhra Pradesh,2020-02,5.83,3.700,40.90,South
2,Andhra Pradesh,2020-03,5.79,3.551,39.18,South
3,Andhra Pradesh,2020-04,20.51,2.535,33.10,South
4,Andhra Pradesh,2020-05,17.43,2.905,36.46,South
...,...,...,...,...,...,...
262,West Bengal,2020-06,7.29,6.871,40.39,East
263,West Bengal,2020-07,6.83,7.910,46.17,East
264,West Bengal,2020-08,14.87,7.446,47.48,East
265,West Bengal,2020-09,9.35,7.985,47.73,East


## Removal of outliers

In [24]:
def removing_outliers(data):
    sorted(data)
    Q1,Q3 = data.quantile([0.25,0.75])
    IQR = Q3 - Q1
    lr = Q1 - (1.5*IQR)
    ur = Q3 + (1.5*IQR)
    return lr,ur
    

In [25]:
data.head()

Unnamed: 0,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%),Area,Year
0,Andhra Pradesh,2019-05,3.65,2.341,43.24,Rural,2019
1,Andhra Pradesh,2019-06,3.05,2.293,42.05,Rural,2019
2,Andhra Pradesh,2019-07,3.75,2.358,43.5,Rural,2019
3,Andhra Pradesh,2019-08,3.32,2.397,43.97,Rural,2019
4,Andhra Pradesh,2019-09,5.17,2.391,44.68,Rural,2019


In [26]:
print("=="*54, "DATA", "=="*54)
px.box(x=data["Unemp_Rate (%)"], title= "Unemployment Rate").show()
px.box(x=data["Employed (%)"], title= "Employment").show()
px.box(x=data["Labour_Participation_Rate (%)"], title= "Labour Participation Rate").show()
print("=="*53, "DATA RATE", "=="*53)
px.box(x=data_rate["Unemp_Rate (%)"], title= "Unemployment Rate").show()
px.box(x=data_rate["Employed (%)"], title= "Employment").show()
px.box(x=data_rate["Labour_Participation_Rate (%)"], title= "Labour Participation Rate").show()





In [27]:
data_cols = ["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]
for i in data_cols:
    l,u = removing_outliers(data[i])
    data[i] = np.where(data[i]>u,u,data[i])
    data[i] = np.where(data[i]<l,l,data[i])
    
    l,u = removing_outliers(data_rate[i])
    data_rate[i] = np.where(data_rate[i]>u,u,data_rate[i])
    data_rate[i] = np.where(data_rate[i]<l,l,data_rate[i])

In [28]:
print("=="*54, "DATA", "=="*54)
px.box(x=data["Unemp_Rate (%)"], title= "Unemployment Rate").show()
px.box(x=data["Employed (%)"], title= "Employment").show()
px.box(x=data["Labour_Participation_Rate (%)"], title= "Labour Participation Rate").show()
print("=="*53, "DATA RATE", "=="*53)
px.box(x=data_rate["Unemp_Rate (%)"], title= "Unemployment Rate").show()
px.box(x=data_rate["Employed (%)"], title= "Employment").show()
px.box(x=data_rate["Labour_Participation_Rate (%)"], title= "Labour Participation Rate").show()





# Visualisation and Analysis

### Unemployment in India DATASET

In [29]:
data_area_count = data["Area"].value_counts().reset_index()
px.bar(data_area_count, x="Area", y="count", color="Area", labels={"Area":"Area", "count": "Count"}, title="Area-wise frequency", width=1000).show()

In [30]:
px.pie(data["Area"], width=800, title="Percentage of Rural and Urban distribution in total", names="Area")

In [31]:
data_state_count = data["State"].value_counts().reset_index()
px.bar(data_state_count,x="State", y = "count",color="State", labels={"States": "State", "count":"Frequency"}, title= "Frequency of each state").show()
# data_state_count

In [32]:
data_state_unemploymentRate = data[["Date", "State", "Unemp_Rate (%)"]].reset_index(drop=True)
data_state_unemploymentRate_Date_mean = data[["Unemp_Rate (%)"]].groupby(by=[data["Date"], data["State"]]).mean().reset_index()
fig = px.bar(data_state_unemploymentRate_Date_mean, x="Date", y="Unemp_Rate (%)", color="State", height=700, range_y=[0,35], animation_frame="State", title="State-wise Unemployment Rates (in %) [May-2019 -> June-2020]")
fig.show()
# data_state_unemploymentRate_Date_mean

In [33]:
data_state_unemploymentRate_mean = data[["Unemp_Rate (%)"]].groupby(data["State"]).mean().reset_index()
data_state_EmploymentRate_mean = data[["Employed (%)"]].groupby(data["State"]).mean().reset_index()
data_state_Labour_Participation_Rate_mean = data[["Labour_Participation_Rate (%)"]].groupby(data["State"]).mean().reset_index()
fig_state = go.Figure()

fig_state.add_trace(go.Bar(x=data_state_unemploymentRate_mean["State"], y=data_state_unemploymentRate_mean["Unemp_Rate (%)"], name="Unemployment Rate (%)"))
fig_state.add_trace(go.Bar(x=data_state_EmploymentRate_mean["State"], y=data_state_EmploymentRate_mean["Employed (%)"], name="Employment Rate (%)"))
fig_state.add_trace(go.Bar(x=data_state_Labour_Participation_Rate_mean["State"], y=data_state_Labour_Participation_Rate_mean["Labour_Participation_Rate (%)"], name="Labour Participation Rate (%)"))

fig_state.update_layout(
    title="Comparision of State-wise Unemployment, Employment and Labour Participation Rates (in %)",
    xaxis_title = "State",
    yaxis_title = "Rates (in %)"
).show()

In [34]:
data_month_unemploymentRate_allStates = data.groupby(by=[data["Date"], data["State"]])["Unemp_Rate (%)"].mean().reset_index().sort_values(by="Date",ascending=False).sort_index()
data_month_employmentRate_allStates = data.groupby(by=[data["Date"], data["State"]])["Employed (%)"].mean().reset_index().sort_values(by="Date",ascending=False).sort_index()
data_month_LabourParticipationRate_allStates = data.groupby(by=[data["Date"], data["State"]])["Labour_Participation_Rate (%)"].mean().reset_index().sort_values(by="Date",ascending=False).sort_index()
fig = px.bar(data_month_unemploymentRate_allStates,x="State", y="Unemp_Rate (%)", color="State",labels={"Unemp_Rate (%)": "Unemployment Rate (in %)"}, hover_data=["Date"], title="Month-wise mean Unemployment Rates of all States(in %)",animation_frame="Date", range_y=(0,55),height=700)
fig.show()
fig1 = px.bar(data_month_employmentRate_allStates,x="State", y="Employed (%)", color="State",labels={"Employed (%)": "Indivisuals Employed (in %)"}, hover_data=["Date"], title="Month-wise mean Employment Rates of all States(in %)",animation_frame="Date", range_y=(0,6),height=700)
fig1.show()
fig2 = px.bar(data_month_LabourParticipationRate_allStates,x="State", y="Labour_Participation_Rate (%)", color="State", hover_data=["Date"], title="Month-wise mean Labour Participation Rates of all States(in %)",animation_frame="Date", range_y=(0,100),height=700)
fig2.show()

In [35]:
data_employment_combined_year = data[["Year", "Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].reset_index(drop=True)
data_employment_combined_year_mean = data[["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].groupby(data["Year"]).mean().reset_index()
data_employment_combined_year_mean

Unnamed: 0,Year,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%)
0,2019,9.38914,1.331407,43.358038
1,2020,13.72696,1.404819,40.549468


In [36]:
data_employment_combined_Date = data[["Date", "Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].reset_index(drop=True)
data_employment_combined_Date_mean = data[["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].groupby(data["Date"]).mean().reset_index()
data_employment_combined_Date

Unnamed: 0,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%)
0,2019-05,3.65,2.341,43.24
1,2019-06,3.05,2.293,42.05
2,2019-07,3.75,2.358,43.50
3,2019-08,3.32,2.397,43.97
4,2019-09,5.17,2.391,44.68
...,...,...,...,...
735,2020-02,7.55,2.192,44.09
736,2020-03,6.67,2.179,43.34
737,2020-04,15.63,1.875,41.20
738,2020-05,15.22,1.863,40.67


In [37]:
px.scatter_3d(data_employment_combined_Date,x="Unemp_Rate (%)", y="Employed (%)", z="Labour_Participation_Rate (%)", color="Date", height=800, width=1700, labels={"Unemp_Rate (%)": "Unemployment Rate (in %)", "Employed":"Employed indivisuals (in %)"}, title="Date-wise Unemployment, Employment and Labour Participation Rates (in %)").show()
px.scatter_3d(data_employment_combined_Date_mean,x="Unemp_Rate (%)", y="Employed (%)", z="Labour_Participation_Rate (%)", color="Date", height=800, width=1700, labels={"Unemp_Rate (%)": "Unemployment Rate (in %)", "Employed":"Employed indivisuals (in %)"}, title="Date-wise mean Unemployment, Employment and Labour Participation Rates (in %)").show()

In [38]:
px.scatter_3d(data_employment_combined_year,x="Unemp_Rate (%)", y="Employed (%)", z="Labour_Participation_Rate (%)", color="Year", height=800, width=1700, labels={"Unemp_Rate (%)": "Unemployment Rate (in %)", "Employed":"Employed indivisuals (in %)"}, title="Year-wise Unemployment, Employment and Labour Participation Rates (in %)").show()
px.scatter_3d(data_employment_combined_year_mean,x="Unemp_Rate (%)", y="Employed (%)", z="Labour_Participation_Rate (%)",color= "Year",height=800, width=1700, labels={"Unemp_Rate (%)": "Unemployment Rate (in %)", "Employed":"Employed indivisuals (in %)"}, title="Year-wise mean Unemployment, Employment and Labour Participation Rates (in %)").show()

In [39]:
fig = px.line(data_employment_combined_Date_mean, x="Date", y=["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"], labels={"Unemp_Rate (%)": "Unemployment Rate (in %)", "Employed": "Indivisuals Employed"}, title= "Date-wise mean Comparision of the Employment, Unemployment and Labour participation Rates ", markers=True, height=900)
# fig.show()
fig.update_layout(
    xaxis_title="Date",
    yaxis_title="Rate (in %)",
    legend_title_text="Rate Type"
).show()

### Unemployment_Rate_upto_11_2020 DATASET

In [40]:
print(data_rate["State"].unique(), "\n\n", data_rate["Date"].unique(), "\n\n", data_rate["Region"].unique())

['Andhra Pradesh' 'Assam' 'Bihar' 'Chhattisgarh' 'Delhi' 'Goa' 'Gujarat'
 'Haryana' 'Himachal Pradesh' 'Jammu & Kashmir' 'Jharkhand' 'Karnataka'
 'Kerala' 'Madhya Pradesh' 'Maharashtra' 'Meghalaya' 'Odisha' 'Puducherry'
 'Punjab' 'Rajasthan' 'Sikkim' 'Tamil Nadu' 'Telangana' 'Tripura'
 'Uttar Pradesh' 'Uttarakhand' 'West Bengal'] 

 ['2020-01' '2020-02' '2020-03' '2020-04' '2020-05' '2020-06' '2020-07'
 '2020-08' '2020-09' '2020-10'] 

 ['South' 'Northeast' 'East' 'West' 'North']


In [41]:
data_rate

Unnamed: 0,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%),Region
0,Andhra Pradesh,2020-01,5.48,3.720,41.02,South
1,Andhra Pradesh,2020-02,5.83,3.700,40.90,South
2,Andhra Pradesh,2020-03,5.79,3.551,39.18,South
3,Andhra Pradesh,2020-04,20.51,2.535,33.10,South
4,Andhra Pradesh,2020-05,17.43,2.905,36.46,South
...,...,...,...,...,...,...
262,West Bengal,2020-06,7.29,6.871,40.39,East
263,West Bengal,2020-07,6.83,7.910,46.17,East
264,West Bengal,2020-08,14.87,7.446,47.48,East
265,West Bengal,2020-09,9.35,7.985,47.73,East


In [42]:
px.pie(data_rate, names="Region", height=700,title="Frequency of Region")

In [43]:
datarate_state_region_unemp_mean = data_rate.groupby(["Region", "State"])["Unemp_Rate (%)"].mean().reset_index()
px.sunburst(datarate_state_region_unemp_mean, path=["Region", "State"], height=700, title="Distribution of States on the basis of Region with each state's Unemployment Rates", values="Unemp_Rate (%)", labels={"Unemp_Rate (%)": "Unemployment Rate (%)"}).show()

In [44]:
datarate_region_combined_mean = data_rate.groupby("Region")[["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].mean().reset_index()
fig_region = go.Figure()
fig_region.add_trace(go.Bar(x=datarate_region_combined_mean["Region"], y=datarate_region_combined_mean["Unemp_Rate (%)"], name="Unemployment Rate (%)"))
fig_region.add_trace(go.Bar(x=datarate_region_combined_mean["Region"], y=datarate_region_combined_mean["Employed (%)"], name="Employment Rate (%)"))
fig_region.add_trace(go.Bar(x=datarate_region_combined_mean["Region"], y=datarate_region_combined_mean["Labour_Participation_Rate (%)"], name="Labour Participation Rate (%)"))

fig_region.update_layout(
    title = "Region-wise Unemployment, Employment and Labour Participation Rates (in %)",
    xaxis_title = "Month",
    yaxis_title = "Rates (in %)"
).show()

In [45]:
datarate_region_combined = data_rate[["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].reset_index().drop(columns="index")
px.line(datarate_region_combined, y=["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"], title="Unemployment, Employment and Labour Participation Rates (in %)").show()

In [46]:
datarate_month_combined_mean = data_rate.groupby("Date")[["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].mean().reset_index()
fig_month = go.Figure()
fig_month.add_trace(go.Bar(x=datarate_month_combined_mean["Date"], y=datarate_month_combined_mean["Unemp_Rate (%)"], name="Unemployment Rate (%)", marker_color="#071633"))
fig_month.add_trace(go.Bar(x=datarate_month_combined_mean["Date"], y=datarate_month_combined_mean["Employed (%)"], name="Employed (%)", marker_color="#FF6347"))
fig_month.add_trace(go.Bar(x=datarate_month_combined_mean["Date"], y=datarate_month_combined_mean["Labour_Participation_Rate (%)"], name="Labour Participation Rate (%)", marker_color="#2E8B57"))

fig_month.update_layout(
    title="Date-wise Unemployment, Employment and Labour Participation Rates (in %)",
    xaxis_title="Date",
    yaxis_title="Rate (in %)",
    barmode='group'
).show()


In [47]:
datarate_state_month_unemp_mean = data_rate.groupby(["Date", "State"])["Unemp_Rate (%)"].mean().reset_index()
datarate_state_month_emp_mean = data_rate.groupby(["Date", "State"])["Employed (%)"].mean().reset_index()
datarate_state_month_labour_mean = data_rate.groupby(["Date", "State"])["Labour_Participation_Rate (%)"].mean().reset_index()
px.bar(datarate_state_month_unemp_mean, x="Date", y="Unemp_Rate (%)", animation_frame="State", color="Date", title="State & Date wise Unemployment Rates (in %)", range_y=(0,60)).show()
px.bar(datarate_state_month_emp_mean, x="Date", y="Employed (%)", animation_frame="State", color="Date", title="State & Date wise Employment Rates (in %)", range_y=(0,60)).show()
px.bar(datarate_state_month_labour_mean, x="Date", y="Labour_Participation_Rate (%)", animation_frame="State", color="Date", title="State & Date wise Labour Participation Rates (in %)", range_y=(0,60)).show()

In [48]:
datarate_all_combined = data_rate.groupby(["Region", "State", "Date"])[["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]].mean().reset_index()
px.sunburst(datarate_all_combined, path=["Date", "Region", "State"], names= "Region", hover_data=["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"], height=1000, title="Date, Region and State-wise Unemployment, Employment and Labour articipation Rates (in %)", labels={"Unemp_Rate (%)": "Unemployement Rate (%)", "Employed (%)": "Employement Rate (%)", "Labour_Participation_Rate (%)": "Labour Participation Rate (%)"}, ).show()
datarate_all_combined

Unnamed: 0,Region,State,Date,Unemp_Rate (%),Employed (%),Labour_Participation_Rate (%)
0,East,Bihar,2020-01,10.61,5.903,37.72
1,East,Bihar,2020-02,10.29,5.877,37.35
2,East,Bihar,2020-03,15.43,5.751,38.69
3,East,Bihar,2020-04,34.62,3.588,38.17
4,East,Bihar,2020-05,34.62,3.641,38.16
...,...,...,...,...,...,...
262,West,Maharashtra,2020-06,9.23,8.880,42.71
263,West,Maharashtra,2020-07,3.89,9.168,41.57
264,West,Maharashtra,2020-08,6.23,9.160,42.49
265,West,Maharashtra,2020-09,4.55,9.402,42.75


In [49]:
df = datarate_all_combined
fig = go.Figure()
colors = {
    "Unemp_Rate (%)": "#071633",
    "Employed (%)": "#FF6347",
    "Labour_Participation_Rate (%)": "#2E8B57"
}

for rate in ["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]:
    for state in df["State"].unique():
        state_df = df[df["State"] == state]
        fig.add_trace(go.Bar(
            x=state_df["Date"],
            y=state_df[rate],
            name=f"{state}-{rate}",
            marker_color=colors[rate],
        ))

fig.update_layout(
    title="Unemployment, Employment and Labour Participation Rates by State and Date (Compressed)",
    xaxis_title="Date",
    yaxis_title="Rate (%)",
    barmode='group',
    width = 10000
).show()

In [50]:
df = datarate_all_combined

colors = {
    "Unemp_Rate (%)": "#071633",
    "Employed (%)": "#FF6347",
    "Labour_Participation_Rate (%)": "#2E8B57"
}

custom_labels = {
    "Unemp_Rate (%)": "Unemployment Rate (%)",
    "Employed (%)": "Employment Rate (%)",
    "Labour_Participation_Rate (%)": "Labour Participation Rate (%)"
}

fig_combined = go.Figure()
states = df["State"].unique()
fig_combined = mks(rows=9, cols=3, subplot_titles=states)

for idx, state in enumerate(states):
    row = (idx // 3) + 1
    col = (idx % 3) + 1
    state_df = df[df["State"] == state]
    for rate in ["Unemp_Rate (%)", "Employed (%)", "Labour_Participation_Rate (%)"]:
        fig_combined.add_trace(go.Bar(
            x=state_df["Date"],
            y=state_df[rate],
            name=custom_labels[rate],
            marker_color=colors[rate],
            showlegend=idx == 0
        ), row=row, col=col)

    fig_combined.update_xaxes(title_text="Date", row=row, col=col)
    fig_combined.update_yaxes(title_text="Rate (in %)", row=row, col=col)

fig_combined.update_layout(
    title="Unemployment, Employment and Labour Participation Rates by State and Date",
    xaxis_title="Date",
    yaxis_title="Rate (%)",
    barmode='group',
    height = 4000,
    width = 1800,
    legend_title="Rate Type"
).show()