2019 - Employment Permit by Companies

In [28]:
year = 2019

# Reading the raw data in the folder on my personal computer
import pandas as pd

df = pd.read_excel(f"E:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/raw_data/{year}/companies-issued-with-permits-{year}.xlsx",
                   header=1,
                   skiprows=[2]
)

In [29]:
# Removing the first empty column
df.drop(columns=["Unnamed: 0"], inplace=True)

# Filling the month column
df['Month'] = df['Month'].astype(str).str.strip().replace('nan', pd.NA) # Removing all blank rows before
df['Month'] = df['Month'].ffill()

# Removing rows where "Company Name" is empty
df = df[df['Employer Name'].notna() & (df['Employer Name'].str.strip() != '')]

# Removing rows where contais "Total for" value
df = df[~df['Employer Name'].str.contains('Total for', na=False)]

# Grouping by "Company name" because the next datasets don't have county information and using just the total information
df['Employer Name'] = df['Employer Name'].str.strip().str.replace(r'\s+', ' ', regex=True).str.title() # Normalizing data before grouping

# Grouping by employer name and month
df = df.groupby(['Employer Name', 'Month'], as_index=False)['Total'].sum()

# Adding the year
df['Year'] = year

# Renaming columns
df = df.rename(columns={'Employer Name': 'Company'})
df = df[['Year', 'Month', 'Company', 'Total']]

# Sorting months
month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)
df = df.sort_values(by=['Year', 'Month', 'Company'])

In [None]:
# I used the extension .csv because is lighter and easy to work with some libraries like pandas, sqlalchemy
df.to_csv(f"E:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/{year}/permits-by-companies-{year}.csv", index=False)

# I noticed that they didn't have August's values

print(df)

      Year Month                                       Company  Total
3     2019   Jan                            3D4Medical Limited      1
18    2019   Jan                  Abacus Systems Networks Ltd.      1
20    2019   Jan                 Abbeybreaffy Nursing Home Ltd      1
25    2019   Jan  Abbeylands Nursing Home Alzheimer'S Unit Ltd      1
28    2019   Jan                                Abbott Ireland      1
...    ...   ...                                           ...    ...
5953  2019   Dec                       Zalando Ireland Limited      1
5959  2019   Dec                                Zefone Limited      1
5971  2019   Dec                   Zenith Technologies Limited      4
5985  2019   Dec                                    Zhuohui Wu      1
5998  2019   Dec                          Zurich Insurance Plc      1

[6002 rows x 4 columns]
