2025 - Employment Permit by County

In [1]:
from datetime import datetime

current_year = datetime.now().year

year = current_year

# Reading the raw data in the folder on my personal computer
import pandas as pd
import os

df = pd.read_excel(f"G:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/raw_data/{year}/permits-by-county-{year}.xlsx",
                header=1,
                skiprows=[2]
)
print(df)

           Unnamed: 0   Issued  Refused  Withdrawn
0   No County Entered      NaN      NaN     4620.0
1              Antrim      NaN      1.0        NaN
2              Carlow    128.0     14.0       76.0
3               Cavan    385.0     40.0      234.0
4               Clare    379.0     54.0      260.0
5                Cork   2130.0    224.0      898.0
6             Donegal    377.0     52.0      178.0
7                Down      1.0      NaN        NaN
8              Dublin  10503.0    997.0     3715.0
9              Galway    765.0     85.0      373.0
10              Kerry    396.0     38.0      198.0
11            Kildare    914.0    139.0      445.0
12           Kilkenny    405.0     33.0      147.0
13              Laois    170.0     50.0       90.0
14            Leitrim     68.0      6.0       26.0
15           Limerick   1164.0    123.0      525.0
16           Longford    101.0     17.0       26.0
17              Louth    348.0     35.0      122.0
18               Mayo    271.0 

In [2]:
# As the structure of the columns were modified after 2020, I had to rename automatically the first one by "County"
df.rename(columns={"Unnamed: 0": "County"}, inplace=True)

In [3]:
# Creating a new column because of the new structure where the year is on top of the table
df["Year"] = year

# Sorting the year column to be the first one 
df = df[["Year"] + [col for col in df.columns if col != "Year"]]

In [4]:
# Reading counties/provinces reference
counties_df = pd.read_excel("G:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/counties_and_provincies.xlsx")

# Creating the Obs column
def get_obs(county):
    if county == "No County Entered":
        return county
    # If the County isnt on the file, its Northern Ireland
    match = counties_df[counties_df["County"] == county]
    if match.empty:
        return "Northern Ireland"
    else:
        return "" 

df["Obs"] = df["County"].apply(get_obs)

In [5]:
# As this part of project is focused only in the counties, I had to group all missed places by "Others"

# df["Obs"] == "" → Northern Ireland
# df["Obs"] != "" → Others (North Ireland ou No County Entered)


# Filtering rows that needs to be "Others"
df_others = df[df["Obs"] != ""].copy()
df_others["County"] = "Others"
df_others_grouped = df_others.groupby(["Year", "County"], as_index=False)[["Issued", "Refused", "Withdrawn"]].sum(min_count=1)

# Republic of Ireland's rows (Obs == "")
df_main = df[df["Obs"] == ""].copy()
df = pd.concat([df_main, df_others_grouped], ignore_index=True)

# Removing Obs column
df.drop(columns=["Obs"], inplace=True)


In [6]:
# Creating the Primary Key
# Ensure Year and County are strings and remove spaces from County
df["Year"] = df["Year"].astype(str)
df["County"] = df["County"].astype(str).str.strip()
df["id_county"] = df["Year"] + df["County"].str.replace(r"\s+", "_", regex=True)

# Reordering columns
cols = ["id_county", "Year", "County", "Issued", "Refused", "Withdrawn"]
df = df[[c for c in cols if c in df.columns]]

In [7]:
# Creates the folder if it doesnt exist
output_dir = f"G:/My Drive/ESTUDOS DATA SCIENCE/ie-employment-permit/data/{year}"
os.makedirs(output_dir, exist_ok=True)

# Save as csv
csv_path = os.path.join(output_dir, f"permits-issued-by-county-{year}.csv")
df.to_csv(csv_path, index=False)

print(df)

        id_county  Year     County   Issued  Refused  Withdrawn
0      2025Carlow  2025     Carlow    128.0     14.0       76.0
1       2025Cavan  2025      Cavan    385.0     40.0      234.0
2       2025Clare  2025      Clare    379.0     54.0      260.0
3        2025Cork  2025       Cork   2130.0    224.0      898.0
4     2025Donegal  2025    Donegal    377.0     52.0      178.0
5      2025Dublin  2025     Dublin  10503.0    997.0     3715.0
6      2025Galway  2025     Galway    765.0     85.0      373.0
7       2025Kerry  2025      Kerry    396.0     38.0      198.0
8     2025Kildare  2025    Kildare    914.0    139.0      445.0
9    2025Kilkenny  2025   Kilkenny    405.0     33.0      147.0
10      2025Laois  2025      Laois    170.0     50.0       90.0
11    2025Leitrim  2025    Leitrim     68.0      6.0       26.0
12   2025Limerick  2025   Limerick   1164.0    123.0      525.0
13   2025Longford  2025   Longford    101.0     17.0       26.0
14      2025Louth  2025      Louth    34