# Economic - Unemployment

In [1]:
# Import the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load the data
UNEMP = pd.read_csv("/dataset/economic/Unemployment.csv")

In [3]:
# View the first 5 rows of the data
UNEMP.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Aruba,ABW,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Africa Eastern and Southern,AFE,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.801597,7.896982,7.898076,7.768859,7.533566,7.44631,7.406224,7.438247,7.479567,7.422605,7.316977,7.278188,7.236416,7.048158,6.874314,6.818582,6.765904,6.745309,6.855173,7.269673,7.145734,6.949656,6.841767,6.807348,6.896303,7.042617,7.145377,7.110553,7.308025,7.734146,8.065361,7.555503,7.59107
2,Afghanistan,AFG,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.946,7.94,7.961,7.98,7.967,7.941,7.92,7.941,7.953,7.955,7.958,7.939,7.922,7.914,7.914,7.918,7.908,7.92,7.914,7.914,7.916,7.909,7.919,7.915,9.011,10.1,11.184,11.206,11.224,11.71,11.934,14.1,14.386
3,Africa Western and Central,AFW,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.158535,4.277885,4.376871,4.399825,4.390342,4.339947,4.323778,4.343833,4.554083,4.610096,4.541772,4.361402,4.245707,4.158426,4.144384,3.998829,3.967087,3.990419,4.035142,4.029018,4.001803,4.018677,3.71367,3.891935,4.189575,4.127635,4.203684,4.224509,4.247435,4.69756,4.520787,3.726039,3.379122
4,Angola,AGO,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,16.468,16.621,17.074,17.105,16.711,16.013,15.928,16.148,16.392,16.502,16.539,16.342,16.37,16.267,16.268,16.121,16.08,16.17,16.392,16.598,16.77,16.557,16.487,16.401,16.491,16.58,16.617,16.599,16.497,16.676,15.799,14.693,14.62


In [4]:
# Rename columns
UNEMP = UNEMP.rename(columns={"Country Name": "country"})

In [5]:
# Drop columns not required
UNEMP = UNEMP.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'])

In [6]:
# Only keep OECD countries
countries_to_keep = ['Australia', 'Austria', 'Belgium', 'Canada', 'Chile', 'Colombia', 'Costa Rica', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Iceland', 'Israel', 'Italy', 'Korea, Rep.', 'Japan', 'Latvia', 'Lithuania', 'Luxembourg', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Poland', 'Portugal', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Turkiye', 'United Kingdom', 'United States']
UNEMP = UNEMP[UNEMP['country'].isin(countries_to_keep)]

In [7]:
# Rename Türkiye to Turkey
UNEMP['country'] = UNEMP['country'].replace('Turkiye', 'Turkey')

In [8]:
# Rename Korea, Rep. to Korea
UNEMP['country'] = UNEMP['country'].replace('Korea, Rep.', 'Korea')

In [9]:
# Drop columns by name
UNEMP = UNEMP.drop(columns=['1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988'])

In [10]:
# Check for missing values by row
UNEMP.isnull().sum(axis=1)

13     2
14     2
17     2
35     2
37     2
39     2
45     2
48     2
54     2
55     2
58     2
70     2
71     2
75     2
77     2
81     2
89     2
101    2
111    2
114    2
115    2
116    2
119    2
126    2
143    2
144    2
145    2
154    2
176    2
177    2
180    2
190    2
194    2
222    2
223    2
244    2
251    2
dtype: int64

In [11]:
# Copy 1991 values to 1989 and 1990 columns
UNEMP['1989'] = UNEMP['1991']
UNEMP['1990'] = UNEMP['1991']

In [13]:
# Melt the dataframe
UNEMP_melted = pd.melt(UNEMP, id_vars=['country'], var_name='year', value_name='value')
UNEMP_melted = UNEMP_melted.sort_values(by=['country', 'year']).reset_index(drop=True)
UNEMP_melted['year'] = UNEMP_melted['year'].astype(int)

In [14]:
# Calculate percentage change
UNEMP_melted['unemployment_percentage_change'] = UNEMP_melted.groupby('country')['value'].pct_change() * 100

In [15]:
# Remove rows before 1990
UNEMP_melted = UNEMP_melted[UNEMP_melted['year'] != 1989]
UNEMP_melted = UNEMP_melted.reset_index(drop=True)

In [16]:
# Remove the value column
UNEMP_melted = UNEMP_melted.drop(columns=['value'])

In [17]:
# Check for missing values
UNEMP_melted.isnull().sum()

country                           0
year                              0
unemployment_percentage_change    0
dtype: int64

In [18]:
# Check for duplicated rows
UNEMP_melted.duplicated().sum()

0

In [19]:
# View a summary of the dataframe
UNEMP_melted.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 3 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country                         1258 non-null   object 
 1   year                            1258 non-null   int64  
 2   unemployment_percentage_change  1258 non-null   float64
dtypes: float64(1), int64(1), object(1)
memory usage: 29.6+ KB


In [20]:
# View a summary of the dataframe
UNEMP_melted.describe()

Unnamed: 0,year,unemployment_percentage_change
count,1258.0,1258.0
mean,2006.5,1.956103
std,9.81461,35.660649
min,1990.0,-37.153757
25%,1998.0,-9.283523
50%,2006.5,-1.535673
75%,2015.0,6.269845
max,2023.0,1050.0
