2025 - Employment Permit by Companies

In [8]:
year = 2025

# Reading the raw data in the folder on my personal computer
import pandas as pd

df = pd.read_excel(f"G:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/raw_data/{year}/permits-issued-to-companies-{year}.xlsx",
                header=0,
                skiprows=[1]
)
print(df)

                                             Unnamed: 0  Jan  Feb  Mar  Apr  \
0                                 IQVIA RDS IRELAND LTD  NaN  NaN  NaN  NaN   
1     '-\tMoran Industrial and Agricultural Solution...  NaN  NaN  NaN  NaN   
2                         19th Hole Hospitality Limited  NaN  1.0  NaN  NaN   
3                       24 Doc (Doctor On Call) Limited  NaN  NaN  NaN  NaN   
4                            24hr Care Services Limited  3.0  1.0  1.0  NaN   
...                                                 ...  ...  ...  ...  ...   
7459                               ZSTS Express Limited  NaN  NaN  NaN  NaN   
7460                      Zucchini's Restaurant Limited  1.0  NaN  NaN  NaN   
7461                       Zurich Insurance Company Ltd  1.0  NaN  1.0  NaN   
7462                        Z-We-Ton (Alandale) Limited  NaN  NaN  NaN  NaN   
7463                          Z-We-Ton (Raheen) Limited  NaN  NaN  NaN  NaN   

      May  Jun  Jul  Aug  Grand Total  
0     NaN  

In [9]:
# Renaming the first column
df = df.rename(columns={df.columns[0]: "Company"})

# Melting the dataframe to have a tidy format
month_order = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
               "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
for m in month_order:
    if m not in df.columns:
        df[m] = 0

# Melting the values into months
df = df.melt(
    id_vars=["Company"], 
    value_vars=month_order,
    var_name="Month",
    value_name="Total"
)        

# Converting Total to numeric, forcing errors to NaN and then filling NaN with 0
df['Total'] = pd.to_numeric(df['Total'], errors='coerce').fillna(0)

# Removing rows where Total is 0
df = df[df['Total'] > 0]

# Adding the Year column
df['Year'] = year

# Normalizing the Company names (removing leading/trailing spaces, multiple spaces, and standardizing case)
df['Company'] = df['Company'].str.strip().str.replace(r'\s+', ' ', regex=True).str.title()

# Sorting the dataframe by Year, Month (in calendar order), and Company
df = df[['Year', 'Month', 'Company', 'Total']]
df["Month"] = pd.Categorical(df["Month"], categories=month_order, ordered=True)
df = df.sort_values(by=["Month", "Company"])

In [None]:
# I used the extension .csv because is lighter and easy to work with some libraries like pandas, sqlalchemy
df.to_csv(f"G:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/{year}/permits-by-companies-{year}.csv", index=False)
print(df)

       Year Month                           Company  Total
4      2025   Jan        24Hr Care Services Limited    3.0
6      2025   Jan           2K Games Dublin Limited    1.0
8      2025   Jan               4D Varghese Limited    1.0
17     2025   Jan  A To Z Innovative Solutions Ltd.    1.0
21     2025   Jan      A&M Interior Glazing Limited    4.0
...     ...   ...                               ...    ...
59698  2025   Aug              Zifo Ireland Limited    1.0
59699  2025   Aug           Zill Consulting Limited    1.0
59704  2025   Aug               Zoetis Belgium S.A.    1.0
59706  2025   Aug              Zs Retailers Limited    1.0
52717  2025   Aug         Áth Trasna Medical Centre    1.0

[10096 rows x 4 columns]
