2025 - Employment Permit by Companies

In [8]:
year = 2025

# Reading the raw data in the folder on my personal computer
import pandas as pd

df = pd.read_excel(f"G:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/raw_data/{year}/permits-issued-to-companies-{year}.xlsx",
                header=0
)
print(df)

                                          Employer Name  January  February  \
0                                 IQVIA RDS IRELAND LTD      NaN       NaN   
1     '-\tMoran Industrial and Agricultural Solution...      NaN       NaN   
2                            12 Tables Eastside Limited      NaN       NaN   
3                         19th Hole Hospitality Limited      NaN       1.0   
4                       24 Doc (Doctor On Call) Limited      NaN       NaN   
...                                                 ...      ...       ...   
8027                      Zucchini's Restaurant Limited      1.0       NaN   
8028                       Zurich Insurance Company Ltd      1.0       NaN   
8029                        Z-We-Ton (Alandale) Limited      NaN       NaN   
8030                          Z-We-Ton (Raheen) Limited      NaN       NaN   
8031                                        Grand Total   3799.0    2993.0   

       March   April     May    June    July  August  September

In [9]:
# Garantee the first column is named "Company"
if df.columns[0] in [None, "", "Unnamed: 0"]:
    df.rename(columns={df.columns[0]: "Company"}, inplace=True)
else:
    df.rename(columns={df.columns[0]: "Company"}, inplace=True)

# Standardize column names: strip whitespace, title case, abbreviate month columns to 3 letters
cols = df.columns.tolist()
df.columns = [c if c == "Company" else c.strip().title()[:3] for c in cols]

# Months list
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
          "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# Identify month columns in the DataFrame
month_cols = [c for c in df.columns if c in months]

# Garantee all months are present in the DataFrame
for m in months:
    if m not in df.columns:
        df[m] = 0

# Melt the DataFrame to long format
df_melted = df.melt(
    id_vars=["Company"],
    value_vars=month_cols,
    var_name="Month",
    value_name="Total"
)

# Remove summary rows
df_melted = df_melted[~df_melted["Company"].astype(str).str.contains("Grand Total", case=False, na=False)]
df_melted = df_melted[~df_melted["Company"].astype(str).str.contains("^Total$", case=False, na=False)]

# Converts "Total" column to numeric, coercing errors to NaN and filling NaN with 0
df_melted["Total"] = pd.to_numeric(df_melted["Total"], errors="coerce").fillna(0)

# Remove rows where "Total" is 0
df_melted = df_melted[df_melted["Total"] > 0]

# Add "Year" column
df_melted["Year"] = year

# Normalize "Company" names: strip whitespace, replace multiple spaces with single space, title case
df_melted["Company"] = (
    df_melted["Company"]
    .astype(str)
    .str.strip()
    .str.replace(r"\s+", " ", regex=True)
    .str.title()
)

# Sort values by Year, Month (in calendar order), and Company
df_melted["Month"] = pd.Categorical(df_melted["Month"], categories=months, ordered=True)
df = df_melted.sort_values(by=["Year", "Month", "Company"])

# Organize final DataFrame
df = df[["Year", "Month", "Company", "Total"]]

In [10]:
# I used the extension .csv because is lighter and easy to work with some libraries like pandas, sqlalchemy
df.to_csv(f"G:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/{year}/permits-by-companies-{year}.csv", index=False)
print(df)

       Year Month                                          Company  Total
5      2025   Jan                       24Hr Care Services Limited    3.0
7      2025   Jan                          2K Games Dublin Limited    1.0
9      2025   Jan                              4D Varghese Limited    1.0
20     2025   Jan                 A To Z Innovative Solutions Ltd.    1.0
25     2025   Jan                     A&M Interior Glazing Limited    4.0
...     ...   ...                                              ...    ...
72253  2025   Sep                            Zenith Health Limited    1.0
72262  2025   Sep             Zf Digital Solutions Ireland Limited    1.0
72276  2025   Sep  Zinc Processors Ltd T/A Shannonside Galvanising    2.0
72279  2025   Sep                              Zoetis Belgium S.A.    1.0
69810  2025   Sep                            Óstan Gúgán Barra Teo    1.0

[11468 rows x 4 columns]
