# Merging Industry Employment with LAUS

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
from os import *
import sys
sys.path.append("../")

In [2]:
# Change directory
def change_dir(newpath):
    get_path = sys.path[0].split("\\")      
    del get_path[-1]                                                
    get_path.append(newpath)                    
    path = "\\".join(get_path)              
    os.chdir(path)

## Metro level

In [3]:
# Retreive industry MSA file
change_dir('output')
industry = pd.read_csv("industries_msa_s1.csv")
industry = industry.rename(columns={'value':'industry_employment'})
industry = industry.drop(['Unnamed: 0','series_id'],axis=1)
industry

Unnamed: 0,year,period,industry_employment,state_code,area_code,supersector_code,industry_code
0,2015,M01,45.8,1,11500,0,0
1,2015,M02,46.0,1,11500,0,0
2,2015,M03,46.1,1,11500,0,0
3,2015,M04,46.4,1,11500,0,0
4,2015,M05,46.6,1,11500,0,0
...,...,...,...,...,...,...,...
950119,2021,M02,7.0,56,16940,90,90930000
950120,2021,M03,7.0,56,16940,90,90930000
950121,2021,M04,7.0,56,16940,90,90930000
950122,2021,M05,7.0,56,16940,90,90930000


In [4]:
# Get unemployment reates and clean up database
unemp_rate = pd.read_csv("unemployment_rate.csv")
unemp_rate = unemp_rate[unemp_rate['year'] > 2014]
unemp_rate.drop(unemp_rate.columns.difference(['year', 'period','unemployment_rate','area_code','srd_code']),1,inplace=True)
unemp_rate = unemp_rate.rename(columns={'srd_code':'state_code'})
unemp_rate

Unnamed: 0,year,period,unemployment_rate,area_code,state_code
325,2015,M01,7.6,11500,1
326,2015,M02,7.2,11500,1
327,2015,M03,6.8,11500,1
328,2015,M04,6.3,11500,1
329,2015,M05,6.9,11500,1
...,...,...,...,...,...
174768,2021,M02,7.1,45104,53
174769,2021,M03,6.9,45104,53
174770,2021,M04,6.4,45104,53
174771,2021,M05,5.4,45104,53


In [5]:
# Merging unemployment counts with the industry dataset
m1 = industry.merge(unemp_rate, left_on=['area_code','year','period','state_code'],right_on=['area_code','year','period','state_code'])
m1

Unnamed: 0,year,period,industry_employment,state_code,area_code,supersector_code,industry_code,unemployment_rate
0,2015,M01,45.8,1,11500,0,0,7.6
1,2015,M01,33.5,1,11500,5,5000000,7.6
2,2015,M01,6.7,1,11500,6,6000000,7.6
3,2015,M01,39.1,1,11500,7,7000000,7.6
4,2015,M01,26.8,1,11500,8,8000000,7.6
...,...,...,...,...,...,...,...,...
913411,2021,M06,1.6,56,16940,80,80000000,5.0
913412,2021,M06,13.7,56,16940,90,90000000,5.0
913413,2021,M06,2.8,56,16940,90,90910000,5.0
913414,2021,M06,3.9,56,16940,90,90920000,5.0


In [6]:
# Get unemployment counts and clean up database
unemp = pd.read_csv("unemployment.csv")
unemp = unemp[unemp['year'] > 2014]
unemp.drop(unemp.columns.difference(['year', 'period','area_code','srd_code','unemployment']),1,inplace=True)
unemp = unemp.rename(columns={'srd_code':'state_code'})
unemp

Unnamed: 0,year,period,unemployment,area_code,state_code
325,2015,M01,3506.0,11500,1
326,2015,M02,3344.0,11500,1
327,2015,M03,3183.0,11500,1
328,2015,M04,2940.0,11500,1
329,2015,M05,3201.0,11500,1
...,...,...,...,...,...
174768,2021,M02,30392.0,45104,53
174769,2021,M03,30394.0,45104,53
174770,2021,M04,27784.0,45104,53
174771,2021,M05,23593.0,45104,53


In [7]:
# Merging unemployment counts with the industry dataset
m2 = m1.merge(unemp, left_on=['area_code','year','period','state_code'],right_on=['area_code','year','period','state_code'])
m2

Unnamed: 0,year,period,industry_employment,state_code,area_code,supersector_code,industry_code,unemployment_rate,unemployment
0,2015,M01,45.8,1,11500,0,0,7.6,3506.0
1,2015,M01,33.5,1,11500,5,5000000,7.6,3506.0
2,2015,M01,6.7,1,11500,6,6000000,7.6,3506.0
3,2015,M01,39.1,1,11500,7,7000000,7.6,3506.0
4,2015,M01,26.8,1,11500,8,8000000,7.6,3506.0
...,...,...,...,...,...,...,...,...,...
913411,2021,M06,1.6,56,16940,80,80000000,5.0,2567.0
913412,2021,M06,13.7,56,16940,90,90000000,5.0,2567.0
913413,2021,M06,2.8,56,16940,90,90910000,5.0,2567.0
913414,2021,M06,3.9,56,16940,90,90920000,5.0,2567.0


In [8]:
# Get employment counts and clean up database
emp = pd.read_csv("employment.csv")
emp = emp[emp['year']>2014]
emp.drop(emp.columns.difference(['year', 'period','employment','area_code','srd_code']),1,inplace=True)
emp = emp.rename(columns={'srd_code':'state_code'})
emp

Unnamed: 0,year,period,employment,area_code,state_code
325,2015,M01,42824.0,11500,1
326,2015,M02,43276.0,11500,1
327,2015,M03,43378.0,11500,1
328,2015,M04,43509.0,11500,1
329,2015,M05,43209.0,11500,1
...,...,...,...,...,...
174768,2021,M02,399477.0,45104,53
174769,2021,M03,407609.0,45104,53
174770,2021,M04,409716.0,45104,53
174771,2021,M05,415423.0,45104,53


In [9]:
# Merging employment counts with the industry dataset
m3 = m2.merge(emp, left_on=['area_code','year','period','state_code'],right_on=['area_code','year','period','state_code'])
m3

Unnamed: 0,year,period,industry_employment,state_code,area_code,supersector_code,industry_code,unemployment_rate,unemployment,employment
0,2015,M01,45.8,1,11500,0,0,7.6,3506.0,42824.0
1,2015,M01,33.5,1,11500,5,5000000,7.6,3506.0,42824.0
2,2015,M01,6.7,1,11500,6,6000000,7.6,3506.0,42824.0
3,2015,M01,39.1,1,11500,7,7000000,7.6,3506.0,42824.0
4,2015,M01,26.8,1,11500,8,8000000,7.6,3506.0,42824.0
...,...,...,...,...,...,...,...,...,...,...
913411,2021,M06,1.6,56,16940,80,80000000,5.0,2567.0,48663.0
913412,2021,M06,13.7,56,16940,90,90000000,5.0,2567.0,48663.0
913413,2021,M06,2.8,56,16940,90,90910000,5.0,2567.0,48663.0
913414,2021,M06,3.9,56,16940,90,90920000,5.0,2567.0,48663.0


In [10]:
# Get labor force and clean up database
labor_force = pd.read_csv("labor_force.csv")
labor_force = labor_force[labor_force['year']>2014]
labor_force.drop(labor_force.columns.difference(['year', 'period','labor_force','area_code','srd_code']),1,inplace=True)
labor_force = labor_force.rename(columns={'srd_code':'state_code'})
labor_force

Unnamed: 0,year,period,labor_force,area_code,state_code
325,2015,M01,46330.0,11500,1
326,2015,M02,46620.0,11500,1
327,2015,M03,46561.0,11500,1
328,2015,M04,46449.0,11500,1
329,2015,M05,46410.0,11500,1
...,...,...,...,...,...
174768,2021,M02,429869.0,45104,53
174769,2021,M03,438003.0,45104,53
174770,2021,M04,437500.0,45104,53
174771,2021,M05,439016.0,45104,53


In [11]:
# Merging labor force with the industry dataset
m4 = m3.merge(labor_force, left_on=['area_code','year','period','state_code'],right_on=['area_code','year','period','state_code'])
m4

Unnamed: 0,year,period,industry_employment,state_code,area_code,supersector_code,industry_code,unemployment_rate,unemployment,employment,labor_force
0,2015,M01,45.8,1,11500,0,0,7.6,3506.0,42824.0,46330.0
1,2015,M01,33.5,1,11500,5,5000000,7.6,3506.0,42824.0,46330.0
2,2015,M01,6.7,1,11500,6,6000000,7.6,3506.0,42824.0,46330.0
3,2015,M01,39.1,1,11500,7,7000000,7.6,3506.0,42824.0,46330.0
4,2015,M01,26.8,1,11500,8,8000000,7.6,3506.0,42824.0,46330.0
...,...,...,...,...,...,...,...,...,...,...,...
913411,2021,M06,1.6,56,16940,80,80000000,5.0,2567.0,48663.0,51230.0
913412,2021,M06,13.7,56,16940,90,90000000,5.0,2567.0,48663.0,51230.0
913413,2021,M06,2.8,56,16940,90,90910000,5.0,2567.0,48663.0,51230.0
913414,2021,M06,3.9,56,16940,90,90920000,5.0,2567.0,48663.0,51230.0


In [12]:
m4.to_csv("merged_industry_LAUS.csv",index=False)

## State level

In [60]:
# Retreive industry state file
change_dir('output')
industry = pd.read_csv("industries_states_s1.csv")
industry = industry.rename(columns={'value':'industry_employment'})
industry = industry.drop(['Unnamed: 0','series_id'],axis=1)
industry

Unnamed: 0,year,period,industry_employment,state_code,area_code,supersector_code,industry_code
0,2015,M01,1932.5,1,0,0,0
1,2015,M02,1945.1,1,0,0,0
2,2015,M03,1953.7,1,0,0,0
3,2015,M04,1969.1,1,0,0,0
4,2015,M05,1978.2,1,0,0,0
...,...,...,...,...,...,...,...
277111,2021,M02,22.1,56,0,90,90932000
277112,2021,M03,22.1,56,0,90,90932000
277113,2021,M04,22.1,56,0,90,90932000
277114,2021,M05,22.3,56,0,90,90932000


In [61]:
# Get LAUS dataset 
laus_state = pd.read_csv("LAUS_state.csv")
laus_state = laus_state[laus_state['year'] > 2014]
laus_state.drop(laus_state.columns.difference(['year', 'period','value','srd_code','measure_code']),1,inplace=True)
laus_state = laus_state.rename(columns={'srd_code':'state_code'})
laus_state

Unnamed: 0,year,period,value,measure_code,state_code
507,2015,M01,6.6,3,1
508,2015,M02,6.3,3,1
509,2015,M03,6.0,3,1
510,2015,M04,5.5,3,1
511,2015,M05,6.0,3,1
...,...,...,...,...,...
210982,2021,M02,456315.0,9,56
210983,2021,M03,456556.0,9,56
210984,2021,M04,456813.0,9,56
210985,2021,M05,457104.0,9,56


In [62]:
# Get area code as the index and industries as columns
laus_pivot = laus_state.pivot_table(index=['state_code','year','period'],columns='measure_code',values='value',aggfunc='mean')
laus_pivot = laus_pivot.rename(columns={3:'unemployment_rate',4:'unemployment',5:'employment',6:'labor_force',7:'employment_population_ratio',8:'labor_force_participation_rate'})
laus_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,measure_code,unemployment_rate,unemployment,employment,labor_force,employment_population_ratio,labor_force_participation_rate,9
state_code,year,period,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,2015,M01,6.6,142161.0,2006508.0,2148669.0,52.9,56.7,3792717.0
1,2015,M02,6.3,136656.0,2025374.0,2162030.0,53.4,57.0,3793390.0
1,2015,M03,6.0,129207.0,2041579.0,2170786.0,53.8,57.2,3794218.0
1,2015,M04,5.5,119248.0,2050976.0,2170224.0,54.0,57.2,3795273.0
1,2015,M05,6.0,129594.0,2040588.0,2170182.0,53.7,57.2,3796534.0
...,...,...,...,...,...,...,...,...,...
56,2021,M02,6.5,19312.0,276040.0,295352.0,60.5,64.7,456315.0
56,2021,M03,5.9,17284.0,277285.0,294569.0,60.7,64.5,456556.0
56,2021,M04,5.7,16723.0,277017.0,293740.0,60.6,64.3,456813.0
56,2021,M05,5.4,16021.0,278699.0,294720.0,61.0,64.5,457104.0


In [63]:
# Merging laus state data with the industry dataset
merged = industry.merge(laus_pivot, left_on=['year','period','state_code'],right_on=['year','period','state_code'])
merged

Unnamed: 0,year,period,industry_employment,state_code,area_code,supersector_code,industry_code,unemployment_rate,unemployment,employment,labor_force,employment_population_ratio,labor_force_participation_rate,9
0,2015,M01,1932.5,1,0,0,0,6.6,142161.0,2006508.0,2148669.0,52.9,56.7,3792717.0
1,2015,M01,1553.7,1,0,5,5000000,6.6,142161.0,2006508.0,2148669.0,52.9,56.7,3792717.0
2,2015,M01,345.6,1,0,6,6000000,6.6,142161.0,2006508.0,2148669.0,52.9,56.7,3792717.0
3,2015,M01,1586.9,1,0,7,7000000,6.6,142161.0,2006508.0,2148669.0,52.9,56.7,3792717.0
4,2015,M01,1208.1,1,0,8,8000000,6.6,142161.0,2006508.0,2148669.0,52.9,56.7,3792717.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
277111,2021,M06,8.4,56,0,90,90910000,5.6,16825.0,282615.0,299440.0,61.8,65.5,457382.0
277112,2021,M06,12.4,56,0,90,90920000,5.6,16825.0,282615.0,299440.0,61.8,65.5,457382.0
277113,2021,M06,8.9,56,0,90,90922000,5.6,16825.0,282615.0,299440.0,61.8,65.5,457382.0
277114,2021,M06,46.0,56,0,90,90930000,5.6,16825.0,282615.0,299440.0,61.8,65.5,457382.0


In [64]:
merged.to_csv("merged_industry_LAUS_state.csv")