*** 
# Veronica's Capstone Scratch Pad
***

### Imports

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error, explained_variance_score
import statsmodels.api as sm
from statsmodels.tsa.api import Holt
from datetime import datetime

# no yelling in the library
import warnings
warnings.filterwarnings("ignore")

# plt.rc('figure', figsize=(13, 7))
# plt.rc('axes.spines', top=False, right=False)
# plt.rc('font', size=13)

# visualization settings
plt.rc('figure', figsize=(13, 7))
plt.rc('font', size=16)
plt.style.use('seaborn-darkgrid')
sns.set_palette('tab20')
# pal = sns.set_palette('tab20')

# Acquire

In [2]:
#Acquire Job Market Data with Education
df_education = pd.read_csv('labor_market_data.csv')  

In [26]:
#Acquire Job Market Data with Race Data
df_race = pd.read_csv('LaborMarketWRace.csv')

In [25]:
#Acquire Job Market Data with Gender Data
df_age = pd.read_csv('LaborMarketWAge.csv')

# Prepare

In [27]:
#Check out data with education
df_education.head()

Unnamed: 0,periodicity,periodicity_label,seasonadj,seasonadj_label,geo_level,geo_level_label,geography,geography_label,ind_level,industry,...,job_loss,EarnBeg,Payroll,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sEarnBeg,sPayroll
0,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,359.0,2161.0,,1,1,1,1,1,1,5
1,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,242.0,2514.0,,1,1,1,1,1,1,5
2,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,194.0,2770.0,,1,1,1,1,1,1,5
3,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,102.0,3531.0,,1,1,1,1,1,1,5
4,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,193.0,1529.0,,1,1,1,1,1,1,5


In [4]:
df_education.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20580 entries, 0 to 20579
Data columns (total 43 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   periodicity              20580 non-null  object 
 1   periodicity_label.value  20580 non-null  object 
 2   seasonadj                20580 non-null  object 
 3   seasonadj_label.value    20580 non-null  object 
 4   geo_level                20580 non-null  object 
 5   geo_level_label.value    20580 non-null  object 
 6   geography                20580 non-null  int64  
 7   geography_label.value    20580 non-null  object 
 8   ind_level                20580 non-null  int64  
 9   industry                 20580 non-null  int64  
 10  industry_label.value     20580 non-null  object 
 11  ownercode                20580 non-null  object 
 12  ownercode_label.value    20580 non-null  object 
 13  sex                      20580 non-null  int64  
 14  sex_label.value       

In [5]:
df_education.shape

(20580, 43)

In [6]:
df_education.FrmJbGn.max()

30373.0

In [7]:
df_education = df_education.rename(columns={"FrmJbGn": "job_gain", "FrmJbLs": "job_loss", "HirA": "hires", "Sep":"separations"}, errors="raise")

In [8]:
df_education = df_education.rename(columns={"periodicity_label.value": "periodicity_label", "seasonadj_label.value": "seasonadj_label",
                        "geo_level_label.value": "geo_level_label", "geography_label.value":"geography_label",
                        "industry_label.value":"industry_label",
                        "ownercode_label.value": "ownercode_label", "sex_label.value": "sex_label",
                        "agegrp_label.value" : "agegrp_label", "race_label.value": "race_label",
                        "ethnicity_label.value":"ethnicity_label", "education_label.value": "education_label",
                        "firmage_label.value":"firmage_label", "firmsize_label.value": "firmsize_label"
                        }, errors="raise")

In [9]:
df_education

Unnamed: 0,periodicity,periodicity_label,seasonadj,seasonadj_label,geo_level,geo_level_label,geography,geography_label,ind_level,industry,...,job_loss,EarnBeg,Payroll,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sEarnBeg,sPayroll
0,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,359.0,2161.0,,1,1,1,1,1,1,5
1,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,242.0,2514.0,,1,1,1,1,1,1,5
2,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,194.0,2770.0,,1,1,1,1,1,1,5
3,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,102.0,3531.0,,1,1,1,1,1,1,5
4,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,193.0,1529.0,,1,1,1,1,1,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20575,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,928,...,,3139.0,,1,1,-1,-1,-1,1,5
20576,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,928,...,,3563.0,,1,1,-1,-1,-1,1,5
20577,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,928,...,,3894.0,,1,1,-1,-1,-1,1,5
20578,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,928,...,,5058.0,,1,1,-1,-1,-1,1,5


In [30]:
df_education.describe()

Unnamed: 0,geography,ind_level,industry,sex,firmage,firmsize,year,quarter,Emp,hires,...,job_loss,EarnBeg,Payroll,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sEarnBeg,sPayroll
count,20580.0,20580.0,20580.0,20580.0,20580.0,20580.0,20580.0,20580.0,20529.0,20449.0,...,19549.0,20557.0,0.0,20580.0,20580.0,20580.0,20580.0,20580.0,20580.0,20580.0
mean,48.0,3.0,478.22449,1.5,0.0,0.0,2018.142857,2.428571,12215.271031,2248.24231,...,679.702747,4499.461059,,1.009913,1.025462,0.93897,0.914674,0.914674,1.00447,5.0
std,0.0,0.0,199.710734,0.500012,0.0,0.0,1.520867,1.136917,24768.661095,6305.020944,...,1570.128481,3080.976198,,0.198881,0.318124,0.568858,0.472077,0.472077,0.13365,0.0
min,48.0,3.0,111.0,1.0,0.0,0.0,2016.0,1.0,0.0,0.0,...,0.0,101.0,,1.0,1.0,-1.0,-1.0,-1.0,1.0,5.0
25%,48.0,3.0,327.0,1.0,0.0,0.0,2017.0,1.0,1218.0,181.0,...,60.0,2561.0,,1.0,1.0,1.0,1.0,1.0,1.0,5.0
50%,48.0,3.0,481.5,1.5,0.0,0.0,2018.0,2.0,4410.0,574.0,...,211.0,3813.0,,1.0,1.0,1.0,1.0,1.0,1.0,5.0
75%,48.0,3.0,541.0,2.0,0.0,0.0,2019.0,3.0,11793.0,1718.0,...,609.0,5414.0,,1.0,1.0,1.0,1.0,1.0,1.0,5.0
max,48.0,3.0,928.0,2.0,0.0,0.0,2021.0,4.0,282658.0,115464.0,...,36372.0,59720.0,,5.0,5.0,5.0,5.0,5.0,5.0,5.0


In [10]:
df_education.columns

Index(['periodicity', 'periodicity_label', 'seasonadj', 'seasonadj_label',
       'geo_level', 'geo_level_label', 'geography', 'geography_label',
       'ind_level', 'industry', 'industry_label', 'ownercode',
       'ownercode_label', 'sex', 'sex_label', 'agegrp', 'agegrp_label', 'race',
       'race_label', 'ethnicity', 'ethnicity_label', 'education',
       'education_label', 'firmage', 'firmage_label', 'firmsize',
       'firmsize_label', 'year', 'quarter', 'Emp', 'hires', 'separations',
       'job_gain', 'job_loss', 'EarnBeg', 'Payroll', 'sEmp', 'sHirA', 'sSep',
       'sFrmJbGn', 'sFrmJbLs', 'sEarnBeg', 'sPayroll'],
      dtype='object')

In [11]:
df_education.sex_label.value_counts()

Female    10290
Male      10290
Name: sex_label, dtype: int64

In [12]:
df_education.ethnicity_label.value_counts()

All Ethnicities    20580
Name: ethnicity_label, dtype: int64

In [13]:
df_education.agegrp_label.value_counts()

All Ages (14-99)    20580
Name: agegrp_label, dtype: int64

In [14]:
df_education.race_label.value_counts()

All Races    20580
Name: race_label, dtype: int64

In [15]:
df_education.education_label.value_counts()

Bachelor's degree or advanced degree                                 4116
Some college or Associate degree                                     4116
Less than high school                                                4116
High school or equivalent, no college                                4116
Educational attainment not available (workers aged 24 or younger)    4116
Name: education_label, dtype: int64

In [21]:
#Look for nulls
df_education.isna().sum()

periodicity              0
periodicity_label        0
seasonadj                0
seasonadj_label          0
geo_level                0
geo_level_label          0
geography                0
geography_label          0
ind_level                0
industry                 0
industry_label           0
ownercode                0
ownercode_label          0
sex                      0
sex_label                0
agegrp                   0
agegrp_label             0
race                     0
race_label               0
ethnicity                0
ethnicity_label          0
education                0
education_label          0
firmage                  0
firmage_label            0
firmsize                 0
firmsize_label           0
year                     0
quarter                  0
Emp                     51
hires                  131
separations           1156
job_gain              1031
job_loss              1031
EarnBeg                 23
Payroll              20580
sEmp                     0
s

In [33]:
df_education.drop(columns=['Payroll'],inplace=True )

In [34]:
df_education.isna().sum()

periodicity             0
periodicity_label       0
seasonadj               0
seasonadj_label         0
geo_level               0
geo_level_label         0
geography               0
geography_label         0
ind_level               0
industry                0
industry_label          0
ownercode               0
ownercode_label         0
sex                     0
sex_label               0
agegrp                  0
agegrp_label            0
race                    0
race_label              0
ethnicity               0
ethnicity_label         0
education               0
education_label         0
firmage                 0
firmage_label           0
firmsize                0
firmsize_label          0
year                    0
quarter                 0
Emp                    51
hires                 131
separations          1156
job_gain             1031
job_loss             1031
EarnBeg                23
sEmp                    0
sHirA                   0
sSep                    0
sFrmJbGn    

In [41]:
df_education[df_education.job_gain.isna()].industry.nunique()

98

In [43]:
df_education[df_education.job_gain.isna()].industry.value_counts()

482    61
541    10
532    10
531    10
525    10
       ..
336    10
335    10
334    10
333    10
928    10
Name: industry, Length: 98, dtype: int64

In [44]:
df_education[df_education.Emp.isna()].industry.nunique()

1

In [45]:
df_education[df_education.Emp.isna()].industry.value_counts()

482    51
Name: industry, dtype: int64

In [50]:
#Dropping all nulls since largest quantity is 62 for Rail Industry and the others are 10 nulls per industry
df_education.dropna(inplace=True)

In [51]:
df_education.isna().sum()

periodicity          0
periodicity_label    0
seasonadj            0
seasonadj_label      0
geo_level            0
geo_level_label      0
geography            0
geography_label      0
ind_level            0
industry             0
industry_label       0
ownercode            0
ownercode_label      0
sex                  0
sex_label            0
agegrp               0
agegrp_label         0
race                 0
race_label           0
ethnicity            0
ethnicity_label      0
education            0
education_label      0
firmage              0
firmage_label        0
firmsize             0
firmsize_label       0
year                 0
quarter              0
Emp                  0
hires                0
separations          0
job_gain             0
job_loss             0
EarnBeg              0
sEmp                 0
sHirA                0
sSep                 0
sFrmJbGn             0
sFrmJbLs             0
sEarnBeg             0
sPayroll             0
dtype: int64

In [52]:
#Taking a look at the dataset with race data
df_race.head()

Unnamed: 0,periodicity,periodicity_label.value,seasonadj,seasonadj_label.value,geo_level,geo_level_label.value,geography,geography_label.value,ind_level,industry,...,Payroll,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sFrmJbC,sHirAEndReplR,sEarnBeg,sPayroll
0,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
1,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
2,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
3,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
4,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5


In [19]:
df_race.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24696 entries, 0 to 24695
Data columns (total 47 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   periodicity              24696 non-null  object 
 1   periodicity_label.value  24696 non-null  object 
 2   seasonadj                24696 non-null  object 
 3   seasonadj_label.value    24696 non-null  object 
 4   geo_level                24696 non-null  object 
 5   geo_level_label.value    24696 non-null  object 
 6   geography                24696 non-null  int64  
 7   geography_label.value    24696 non-null  object 
 8   ind_level                24696 non-null  int64  
 9   industry                 24696 non-null  int64  
 10  industry_label.value     24696 non-null  object 
 11  ownercode                24696 non-null  object 
 12  ownercode_label.value    24696 non-null  object 
 13  sex                      24696 non-null  int64  
 14  sex_label.value       

In [53]:
df_race.describe()

Unnamed: 0,geography,ind_level,industry,sex,firmage,firmsize,year,quarter,Emp,HirA,...,Payroll,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sFrmJbC,sHirAEndReplR,sEarnBeg,sPayroll
count,24696.0,24696.0,24696.0,24696.0,24696.0,24696.0,24696.0,24696.0,24251.0,23207.0,...,0.0,24696.0,24696.0,24696.0,24696.0,24696.0,24696.0,24696.0,24696.0,24696.0
mean,48.0,3.0,478.22449,0.0,0.0,0.0,2018.142857,2.428571,10340.468228,1980.961348,...,,1.072076,1.241173,1.145287,0.977162,0.977162,0.977162,5.568837,1.059929,5.0
std,0.0,0.0,199.709926,0.0,0.0,0.0,1.520861,1.136912,39185.344938,9175.59024,...,,0.532092,0.952136,1.063726,0.692514,0.692514,0.692514,1.49845,0.485935,0.0
min,48.0,3.0,111.0,0.0,0.0,0.0,2016.0,1.0,0.0,0.0,...,,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,5.0
25%,48.0,3.0,327.0,0.0,0.0,0.0,2017.0,1.0,97.0,18.0,...,,1.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,5.0
50%,48.0,3.0,481.5,0.0,0.0,0.0,2018.0,2.0,429.0,83.0,...,,1.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,5.0
75%,48.0,3.0,541.0,0.0,0.0,0.0,2019.0,3.0,3463.0,593.0,...,,1.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,5.0
max,48.0,3.0,928.0,0.0,0.0,0.0,2021.0,4.0,651822.0,170305.0,...,,5.0,5.0,5.0,5.0,5.0,5.0,6.0,5.0,5.0


In [56]:
df_race.shape

(24696, 47)

In [57]:
df_race.isna().sum()

periodicity                    0
periodicity_label.value        0
seasonadj                      0
seasonadj_label.value          0
geo_level                      0
geo_level_label.value          0
geography                      0
geography_label.value          0
ind_level                      0
industry                       0
industry_label.value           0
ownercode                      0
ownercode_label.value          0
sex                            0
sex_label.value                0
agegrp                         0
agegrp_label.value             0
race                           0
race_label.value               0
ethnicity                      0
ethnicity_label.value          0
education                      0
education_label.value          0
firmage                        0
firmage_label.value            0
firmsize                       0
firmsize_label.value           0
year                           0
quarter                        0
Emp                          445
HirA      

In [60]:
#Dropping Payroll column since it's empty
df_race.drop(columns=['Payroll'],inplace=True )

In [61]:
#Dropping nulls
df_race.dropna(inplace=True)

In [63]:
df_race.isna().sum()

periodicity                0
periodicity_label.value    0
seasonadj                  0
seasonadj_label.value      0
geo_level                  0
geo_level_label.value      0
geography                  0
geography_label.value      0
ind_level                  0
industry                   0
industry_label.value       0
ownercode                  0
ownercode_label.value      0
sex                        0
sex_label.value            0
agegrp                     0
agegrp_label.value         0
race                       0
race_label.value           0
ethnicity                  0
ethnicity_label.value      0
education                  0
education_label.value      0
firmage                    0
firmage_label.value        0
firmsize                   0
firmsize_label.value       0
year                       0
quarter                    0
Emp                        0
HirA                       0
Sep                        0
FrmJbGn                    0
FrmJbLs                    0
FrmJbC        

In [66]:
df_race.shape

(20522, 46)

In [68]:
df_race = df_race.rename(columns={"FrmJbGn": "job_gain", "FrmJbLs": "job_loss", "HirA": "hires", "Sep":"separations"}, errors="raise")

In [71]:
df_race.columns

Index(['periodicity', 'periodicity_label.value', 'seasonadj',
       'seasonadj_label.value', 'geo_level', 'geo_level_label.value',
       'geography', 'geography_label.value', 'ind_level', 'industry',
       'industry_label.value', 'ownercode', 'ownercode_label.value', 'sex',
       'sex_label.value', 'agegrp', 'agegrp_label.value', 'race',
       'race_label.value', 'ethnicity', 'ethnicity_label.value', 'education',
       'education_label.value', 'firmage', 'firmage_label.value', 'firmsize',
       'firmsize_label.value', 'year', 'quarter', 'Emp', 'hires',
       'separations', 'job_gain', 'job_loss', 'FrmJbC', 'HirAEndReplR',
       'EarnBeg', 'sEmp', 'sHirA', 'sSep', 'sFrmJbGn', 'sFrmJbLs', 'sFrmJbC',
       'sHirAEndReplR', 'sEarnBeg', 'sPayroll'],
      dtype='object')

In [72]:
df_race = df_race.rename(columns={"periodicity_label.value": "periodicity_label", "seasonadj_label.value": "seasonadj_label",
                        "geo_level_label.value": "geo_level_label", "geography_label.value":"geography_label",
                        "industry_label.value":"industry_label",
                        "ownercode_label.value": "ownercode_label", "sex_label.value": "sex_label",
                        "agegrp_label.value" : "agegrp_label", "race_label.value": "race_label",
                        "ethnicity_label.value":"ethnicity_label", "education_label.value": "education_label",
                        "firmage_label.value":"firmage_label", "firmsize_label.value": "firmsize_label"
                        }, errors="raise")

In [73]:
df_race.columns

Index(['periodicity', 'periodicity_label', 'seasonadj', 'seasonadj_label',
       'geo_level', 'geo_level_label', 'geography', 'geography_label',
       'ind_level', 'industry', 'industry_label', 'ownercode',
       'ownercode_label', 'sex', 'sex_label', 'agegrp', 'agegrp_label', 'race',
       'race_label', 'ethnicity', 'ethnicity_label', 'education',
       'education_label', 'firmage', 'firmage_label', 'firmsize',
       'firmsize_label', 'year', 'quarter', 'Emp', 'hires', 'separations',
       'job_gain', 'job_loss', 'FrmJbC', 'HirAEndReplR', 'EarnBeg', 'sEmp',
       'sHirA', 'sSep', 'sFrmJbGn', 'sFrmJbLs', 'sFrmJbC', 'sHirAEndReplR',
       'sEarnBeg', 'sPayroll'],
      dtype='object')

In [74]:
#Taking a look at the dataset with age data
df_age.head()

Unnamed: 0,periodicity,periodicity_label.value,seasonadj,seasonadj_label.value,geo_level,geo_level_label.value,geography,geography_label.value,ind_level,industry,...,Payroll,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sFrmJbC,sHirAEndReplR,sEarnBeg,sPayroll
0,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
1,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
2,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
3,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5
4,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,,1,1,1,1,1,1,6,1,5


In [75]:
df_age.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32928 entries, 0 to 32927
Data columns (total 47 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   periodicity              32928 non-null  object 
 1   periodicity_label.value  32928 non-null  object 
 2   seasonadj                32928 non-null  object 
 3   seasonadj_label.value    32928 non-null  object 
 4   geo_level                32928 non-null  object 
 5   geo_level_label.value    32928 non-null  object 
 6   geography                32928 non-null  int64  
 7   geography_label.value    32928 non-null  object 
 8   ind_level                32928 non-null  int64  
 9   industry                 32928 non-null  int64  
 10  industry_label.value     32928 non-null  object 
 11  ownercode                32928 non-null  object 
 12  ownercode_label.value    32928 non-null  object 
 13  sex                      32928 non-null  int64  
 14  sex_label.value       

In [77]:
df_age.shape

(32928, 47)

In [78]:
df_age.isna().sum()

periodicity                    0
periodicity_label.value        0
seasonadj                      0
seasonadj_label.value          0
geo_level                      0
geo_level_label.value          0
geography                      0
geography_label.value          0
ind_level                      0
industry                       0
industry_label.value           0
ownercode                      0
ownercode_label.value          0
sex                            0
sex_label.value                0
agegrp                         0
agegrp_label.value             0
race                           0
race_label.value               0
ethnicity                      0
ethnicity_label.value          0
education                      0
education_label.value          0
firmage                        0
firmage_label.value            0
firmsize                       0
firmsize_label.value           0
year                           0
quarter                        0
Emp                          207
HirA      

In [79]:
#Dropping Payroll column since it's empty
df_age.drop(columns=['Payroll'],inplace=True )

In [82]:
#Dropping nulls
df_age.dropna(inplace=True)

In [83]:
df_age.isna().sum()

periodicity                0
periodicity_label.value    0
seasonadj                  0
seasonadj_label.value      0
geo_level                  0
geo_level_label.value      0
geography                  0
geography_label.value      0
ind_level                  0
industry                   0
industry_label.value       0
ownercode                  0
ownercode_label.value      0
sex                        0
sex_label.value            0
agegrp                     0
agegrp_label.value         0
race                       0
race_label.value           0
ethnicity                  0
ethnicity_label.value      0
education                  0
education_label.value      0
firmage                    0
firmage_label.value        0
firmsize                   0
firmsize_label.value       0
year                       0
quarter                    0
Emp                        0
HirA                       0
Sep                        0
FrmJbGn                    0
FrmJbLs                    0
FrmJbC        

In [85]:
df_age = df_age.rename(columns={"FrmJbGn": "job_gain", "FrmJbLs": "job_loss", "HirA": "hires", "Sep":"separations"}, errors="raise")

In [86]:
df_age = df_age.rename(columns={"periodicity_label.value": "periodicity_label", "seasonadj_label.value": "seasonadj_label",
                        "geo_level_label.value": "geo_level_label", "geography_label.value":"geography_label",
                        "industry_label.value":"industry_label",
                        "ownercode_label.value": "ownercode_label", "sex_label.value": "sex_label",
                        "agegrp_label.value" : "agegrp_label", "race_label.value": "race_label",
                        "ethnicity_label.value":"ethnicity_label", "education_label.value": "education_label",
                        "firmage_label.value":"firmage_label", "firmsize_label.value": "firmsize_label"
                        }, errors="raise")

In [87]:
df_age.head()

Unnamed: 0,periodicity,periodicity_label,seasonadj,seasonadj_label,geo_level,geo_level_label,geography,geography_label,ind_level,industry,...,EarnBeg,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sFrmJbC,sHirAEndReplR,sEarnBeg,sPayroll
0,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,971.0,1,1,1,1,1,1,6,1,5
1,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,1376.0,1,1,1,1,1,1,6,1,5
2,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,1867.0,1,1,1,1,1,1,6,1,5
3,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,2333.0,1,1,1,1,1,1,6,1,5
4,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,2719.0,1,1,1,1,1,1,6,1,5


In [88]:
df_education.columns

Index(['periodicity', 'periodicity_label', 'seasonadj', 'seasonadj_label',
       'geo_level', 'geo_level_label', 'geography', 'geography_label',
       'ind_level', 'industry', 'industry_label', 'ownercode',
       'ownercode_label', 'sex', 'sex_label', 'agegrp', 'agegrp_label', 'race',
       'race_label', 'ethnicity', 'ethnicity_label', 'education',
       'education_label', 'firmage', 'firmage_label', 'firmsize',
       'firmsize_label', 'year', 'quarter', 'Emp', 'hires', 'separations',
       'job_gain', 'job_loss', 'EarnBeg', 'sEmp', 'sHirA', 'sSep', 'sFrmJbGn',
       'sFrmJbLs', 'sEarnBeg', 'sPayroll'],
      dtype='object')

# Explore

In [89]:
#Comparing number of employed, hired, separated, jobs gained, and jobs lost for 2020 to focus on pandemic activity

In [91]:
df_ed_2020 = df_education[df_education.year == 2020]
df_ed_2020.head()

Unnamed: 0,periodicity,periodicity_label,seasonadj,seasonadj_label,geo_level,geo_level_label,geography,geography_label,ind_level,industry,...,job_gain,job_loss,EarnBeg,sEmp,sHirA,sSep,sFrmJbGn,sFrmJbLs,sEarnBeg,sPayroll
15680,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,411.0,331.0,2565.0,1,1,1,1,1,1,5
15681,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,275.0,236.0,2953.0,1,1,1,1,1,1,5
15682,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,240.0,208.0,3244.0,1,1,1,1,1,1,5
15683,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,134.0,111.0,3995.0,1,1,1,1,1,1,5
15684,Q,Quarterly data,U,Not seasonally adjusted,S,States,48,Texas,3,111,...,357.0,227.0,1711.0,1,1,1,1,1,1,5


In [None]:
df_education['qtr_year'] = df_education.quarter + 