In [None]:
# IMPORTANT: The parameters below are set only for running this notebook independently. 
# When executing the full Ploomber pipeline, these values will be overridden by the settings in `pipeline.yaml`. 
# Any modifications made here will not persist when running the pipeline.


YEAR = 2018
COUNTRY =  'ARM' # Code of the Country

upstream = {
    "20_activities_programme_stucture": {
        "data": f"../data/processed/{COUNTRY}/activities_programme_stucture.xlsx",
    }
}
product = {
    "data_staff": f"../data/processed/{COUNTRY}/staff_data.xlsx",
    "data_staff_thematic": f"../data/processed/{COUNTRY}/staff_thematic.xlsx",
    "data_hr_count_against_utilized_by_goal_area": f"../data/processed/{COUNTRY}/hr_count_against_utilized_by_goal_area.xlsx",
}


data_source = f'data/raw/hr-regional-office/Staff data 2024-2016.xlsx'

This Notebook analyzes HR Data to determine staff distribution by Nationality, Appointment Type, Post Level, Goal Area, and Thematic Area by year. Funds utilization per staff in different goal areas and staff turnover is also examined.

In [None]:
from pathlib import Path
import re
import os
import pandas as pd
import numpy as np
import io
from unicef_cpe.utils import *
from unicef_cpe.config import PROJ_ROOT


In [None]:
country_map = {k:v for k,v in get_ecaro_countries_mapping(priority=False).items() if k in COUNTRY}
country_code_map = {v:k for k,v in get_ecaro_countries_mapping('iso','code', priority=False).items()  if k in COUNTRY}


## HR Data ##
### Loading and Cleaning the Data

In [None]:
file_paths = PROJ_ROOT / data_source
file_paths

Read in the data

In [None]:
sheets = pd.ExcelFile(file_paths).sheet_names  # Get the list of sheet names
df_hr = pd.DataFrame()

for sheet in sheets:
    df = pd.read_excel(file_paths, sheet_name=sheet)
    df.rename(lambda x: x.strip().lower().replace(' ', '_'), axis=1, inplace=True)
    # Extract the year from the sheet name (last 4 characters)
    year =  sheet[-4:] 
    # Add the spreadsheet year as a new column ['ram3_year'] to the DataFrame
    df['year'] = year
    df_hr = pd.concat([df_hr, df], ignore_index=True)

print('Shape:', df_hr.shape)

Clean the HR DataFrame

In [None]:
# Merge columns with different names from 2016-2024
# Merge the differently named country columns into a new column 'country'
df_hr['country'] = df_hr['org_division/country_name'].combine_first(
    df_hr['org_division_country_name'].combine_first(df_hr['org_division/country']))
# Create a reverse mapping from the dictionary (NOTE: check Macedonia naming as various names are used!)
df_hr['country'] = df_hr['country'].replace({v: k for k, v in country_map.items()})
# Replace 'Macedonia' with 'MKD'
df_hr['country'] = df_hr['country'].replace('Macedonia', 'MKD')

# Merge the differently named start date columns into Appointment Effective Date
df_hr['appointment_effective_date'] = df_hr['appointment_effective_date'].combine_first(df_hr['entry_into_position_date'])

# Merge the differently named staff codes into Staff Level Code
df_hr['staff_level_code'] = df_hr['staff_level_code'].combine_first(df_hr['staff_level'])

In [None]:
country_mask = df_hr['country'].eq(COUNTRY)
df_hr = df_hr[country_mask].copy()
print('Shape:', df_hr.shape)

Add a column 'nationality' to distinguish between local and foreign staff.

In [None]:
# Clean the country names in 'country_of_nationality'
# Create a reverse mapping from the dictionary (NOTE: check Macedonia and BiH namin, various names are used!)
df_hr['country_of_nationality'] = df_hr['country_of_nationality'].replace({v: k for k, v in country_code_map.items()})
# Replace 'Macedonia, TFYR' and 'TFYR Macedonia' with 'MKD'
df_hr['country_of_nationality'] = df_hr['country_of_nationality'].replace(
    {'Macedonia, TFYR': 'MKD', 'TFYR Macedonia': 'MKD'})
# Replace 'Bosnia and Herz' with 'BIH'
df_hr['country_of_nationality'] = df_hr['country_of_nationality'].replace('Bosnia and Herz', 'BIH')

In [None]:
df_hr['nationality'] = np.where(df_hr['country'] == df_hr['country_of_nationality'], 'local', 'foreign')

In [None]:
# columns to keep
to_keep = [
    'index_number', 
    'country', 
    #'appointment_effective_date', 
    #'appointment_expiry_date', 
    'year', 
    #'country_of_nationality',
    'nationality',
    'appointment_type_name', 
    'staff_level_code', 
    'post_title'
    ]

df_hr = df_hr[to_keep].copy()
df_hr.sort_values(by=['country', 'year'], ascending=[True,True], inplace=True)
print('Shape:', df_hr.shape)

In [None]:
post_titles = df_hr.query("appointment_type_name != 'Vacant'")[['staff_level_code', 'post_title']].drop_duplicates()

In [None]:
post_title_mapping ={
    'Child Protection':     
    ['Chief Child Protection', 'Child Protection Officer', 'Child Protection Specialist'] + ['Adolescent Development Officer', 'Adolescent Development Specialist', 'Child Development Officer', 
     'Early Childhood Development Officer', 'Youth & Adolescent Development Officer', 
     'Education for Development Specialist', 'Communication for Development Officer'],
     'Education': 
    ['Education Officer', 'Education Specialist'],
     'Health and Nutrition': 
    ['Health & Nutrition Officer', 'Health & Nutrition Specialist', 'Health Education Officer', 
     'Health Officer', 'Nutrition Officer', 'HIV/AIDS Officer'],
    'Humanitarian Crises': 
    ['Emergency Officer'], 
    'Social Protection and Inclusion':	
    ['Social & Behavior Change Officer', 'Social & Economic Analysis Specialist', 'Social Policy & Economic Specialist', 
     'Social Policy Officer', 'Social Policy Specialist','Social Services Officer','Social Welfare Officer'],
    'HR, Operation, Administration and Finance': 
    ['Driver', 'Human Resources Assistant', 'Human Resources Associate', 'Human Resources Officer', 
     'Operations Assistant', 'Operations Manager', 'Senior Driver',
     'Accounting & Finance Assistant','Accounting & Finance Associate', 'Administrative & HR Assistant', 
     'Administrative & HR Associate', 
     'Administrative Assistant', 'Administrative Associate', 'Finance Assistant', 'Finance Associate', 
     'Finance/Accounts Assistant', 'Finance/Accounts Associate', 'Finance/HR Assistant', 'Senior Budget Associate', 
     'Senior Programme Budget Associate'],
    'Monitoring, Evaluation, Research':
    ['Monitoring & Evaluation Officer', 'Monitoring & Evaluation Specialist', 'Planning & Monitoring Officer', 
     'Planning Officer', 'Planning, Monitoring& Evaluation Officer', 'Research & Evaluation Officer', 
     'Statistics & Monitoring Officer'],
    'Advocacy and Communication': 
    ['Advocacy and Communications Specialist', 'Communication Assistant', 'Communication Associate', 
     'Communication Officer', 'Communication Specialist', 'Programme Communication Assistant'],
    'Management': 
    ['Chief Field Office', 'Deputy Representative', 'Representative'],
    'Supply and Logistics':
    ['Procurement & Administrative Associate', 'Procurement Associate', 'Procurement Officer'
     , 'Administrative & Supply Assistant', 'Administrative & Supply Associate', 
     'Supply & Logistics Assistant']   
}
# The number of classified Post Titles
print('Number of classified Post Titles:', sum(len(values) for values in post_title_mapping.values()))

In [None]:
# The total number of Post Titles
post_titles = df_hr['post_title'].unique()
print('Total number of Post Titles:', len(post_titles))

In [None]:
# Function to map post title to thematic areas
def map_post_to_area(post_title):
    for area, post in post_title_mapping.items():
        if post_title in post:
            return area
    return None  # Return None if no match is found

# Create a new column 'thematic_area' using the mapping function and assign 'Other'if not classified
df_hr['thematic_area'] = df_hr['post_title'].apply(map_post_to_area)
df_hr['thematic_area'] = df_hr['thematic_area'].fillna('Other')

### Position and posttile to goal area mapping

In [None]:


to_goal_area_mapping = """staff_level_code,post_title,goal_area
P-5,Representative,Management
G-5,Executive Assistant,Cross Sectoral
NO-2,Communication Officer,Cross Sectoral
NO-1,Programme Officer,Cross Sectoral
G-6,Programme Assistant,Cross Sectoral
NO-2,Programme Officer,Cross Sectoral
G-7,Senior Programme Assistant,Cross Sectoral
NO-3,Health & Nutrition Specialist,Survive and Thrive
NO-2,Nutrition Officer,Survive and Thrive
NO-3,Education Specialist,Learn
G-5,Assistant,Cross Sectoral
P-2,Child Protection Officer,Protection from Violence and Exploitation
NO-3,Child Protection Specialist,Protection from Violence and Exploitation
NO-3,Monitoring & Evaluation Specialist,Development Effectiveness
P-4,Deputy Representative,Management
G-2,Driver,Cross Sectoral
G-6,Administrative & HR Assistant,Cross Sectoral
G-6,Accounting & Finance Assistant,Management
NO-3,Operations Manager,Management
G-6,Information Comm. Technology Assistant,Cross Sectoral
G-3,Senior Driver,Cross Sectoral
NO-1,Education Officer,Learn
G-6,Programme Associate,Cross Sectoral
G-6,Accounting & Finance Associate,Management
G-6,Information Comm. Technology Associate,Cross Sectoral
G-6,Human Resources Associate,Management
G-5,Administrative Assistant,Cross Sectoral
G-5,Programme Communication Assistant,Cross Sectoral
NO-2,Early Childhood Development Officer,Survive and Thrive
NO-2,Planning & Monitoring Officer,Development Effectiveness
NO-1,Adolescent Development Officer,Equitable Chance in Life
G-6,Finance Associate,Management
G-6,Administrative Associate,Cross Sectoral
G-5,Programme Assistant,Cross Sectoral
G-5,Supply & Logistics Assistant,Cross Sectoral
NO-1,Research & Evaluation Officer,Development Effectiveness
NO-3,Social Policy Specialist,Equitable Chance in Life
NO-3,Communication Specialist,Cross Sectoral
G-6,Procurement Associate,Cross Sectoral
NO-1,Communication Officer,Cross Sectoral
NO-1,Social & Behavior Change Officer,Cross Sectoral
NO-2,Education Officer,Learn
NO-2,Emergency Officer,Protection from Violence and Exploitation
P-2,Youth & Adolescent Development Officer,Equitable Chance in Life
NO-2,Communication for Development Officer,Cross Sectoral
NO-2,Youth & Adolescent Development Officer,Equitable Chance in Life
G-4,Administrative Assistant,Cross Sectoral
G-6,Finance/HR Assistant,Management
P-3,Adolescent Development Specialist,Equitable Chance in Life
NO-2,Adolescent Development Officer,Equitable Chance in Life
NO-2,Health & Nutrition Officer,Survive and Thrive
G-5,Communication Assistant,Cross Sectoral
G-6,Communication Associate,Cross Sectoral
NO-3,Adolescent Development Specialist,Equitable Chance in Life
NO-1,Emergency Officer,Protection from Violence and Exploitation
P-3,Advocacy and Communications Specialist,Cross Sectoral
NO-1,Early Childhood Development Officer,Survive and Thrive
NO-1,Health Officer,Survive and Thrive
NO-1,Human Resources Officer,Management
G-7,Senior Programme Budget Associate,Management
G-6,Procurement & Administrative Associate,Cross Sectoral
G-6,Executive Associate,Cross Sectoral
NO-2,Child Protection Officer,Protection from Violence and Exploitation
P-3,Child Protection Specialist,Protection from Violence and Exploitation
G-6,Finance Assistant,Management
NO-1,Communication for Development Officer,Cross Sectoral
NO-1,Child Protection Officer,Protection from Violence and Exploitation
NO-1,Social Policy Officer,Equitable Chance in Life
NO-1,Planning Officer,Development Effectiveness
NO-1,Monitoring & Evaluation Officer,Development Effectiveness
G-5,Operations Assistant,Cross Sectoral
NO-2,Health Officer,Survive and Thrive
NO-2,Social Policy Officer,Equitable Chance in Life
G-6,Field Assistant,Cross Sectoral
NO-1,Project Officer,Cross Sectoral
P-4,Chief Field Office,Management
P-3,Health & Nutrition Specialist,Survive and Thrive
NO-1,Health Education Officer,Survive and Thrive
G-7,Senior Project Assistant,Cross Sectoral
NO-2,Social Welfare Officer,Equitable Chance in Life
P-4,Child Protection Specialist,Protection from Violence and Exploitation
NO-3,Social Policy & Economic Specialist,Equitable Chance in Life
G-6,Finance/Accounts Assistant,Management
NO-2,Statistics & Monitoring Officer,Development Effectiveness
G-7,Senior Project Associate,Cross Sectoral
G-6,Field Associate,Cross Sectoral
G-6,Finance/Accounts Associate,Management
P-4,Chief Child Protection,Protection from Violence and Exploitation
G-6,IT Associate,Cross Sectoral
G-5,Administrative & Supply Assistant,Cross Sectoral
NO-2,Project Officer,Cross Sectoral
G-7,Senior Budget Associate,Management
G-5,Administrative & Supply Associate,Cross Sectoral
NO-1,Health & Nutrition Officer,Survive and Thrive
NO-2,Social Services Officer,Equitable Chance in Life
G-6,Administrative & Supply Associate,Cross Sectoral
NO-2,Social & Behavior Change Officer,Cross Sectoral
G-6,Communication Assistant,Cross Sectoral
G-6,Programmer Assistant,Cross Sectoral
NO-1,Procurement Officer,Cross Sectoral
G-6,Administrative & HR Associate,Management
G-5,Administrative Associate,Cross Sectoral
NO-1,Child Development Officer,Survive and Thrive
NO-1,HIV/AIDS Officer,Survive and Thrive
G-6,Information Technology Associate,Cross Sectoral
NO-2,"Planning, Monitoring& Evaluation Officer",Development Effectiveness
NO-2,Partnerships Officer,Cross Sectoral
NO-3,Monitoring & Evaluation Officer,Development Effectiveness
NO-3,Social & Economic Analysis Specialist,Equitable Chance in Life
NO-3,Education for Development Specialist,Learn
G-4,Programme Assistant,Cross Sectoral
G-5,Administrative & HR Assistant,Management
G-5,Human Resources Assistant,Management
"""
df_to_goal_area_mapping = pd.read_csv(io.StringIO(to_goal_area_mapping))
df_to_goal_area_mapping.head()


In [None]:
to_goal_area_mapping = df_to_goal_area_mapping.set_index(['staff_level_code', 'post_title'])['goal_area'].to_dict()
df_hr['goal_area'] = df_hr.set_index(['staff_level_code', 'post_title']).index.map(to_goal_area_mapping)

### Staff against Utilization

In [None]:

df_activities_programme_stucture = pd.read_excel(upstream['20_activities_programme_stucture']['data'])


# apply inverse mapping
df_activities_programme_stucture['country_iso'] = df_activities_programme_stucture['country'].replace(dict(map(reversed, country_map.items())))

# convert year to string to map
df_activities_programme_stucture['year'] = df_activities_programme_stucture['year'].astype(str)

df_activities_programme_stucture.head()

In [None]:
df_to_plot = df_hr.copy()
df_to_plot = df_to_plot.query("~goal_area.isna()").groupby(['country', 'year', 'goal_area']).size().reset_index(name='hr_count')
print(df_to_plot.head())

In [None]:
activities_programme_stucture_map = df_activities_programme_stucture.groupby(['country_iso', 'year', 'goal_area']).agg({'utilized': 'sum'}).to_dict()['utilized']
df_to_plot['utilized'] = df_to_plot.set_index(['country', 'year', 'goal_area']).index.map(activities_programme_stucture_map)
df_to_plot['mean_utilized'] = (df_to_plot['utilized'] / df_to_plot['hr_count']).fillna(0)
hr_count_against_utilized_by_goal_area = df_to_plot.copy()
df_to_plot.head()

### Staff Turnover and Appointment Types

To find the staff turnover over ~ 1 year we calculate:

**turnover = [(the number of employees who left within the time period)/(the number of employees in the year)] x 100**

the number of employees who left within the time period: how many index numbers from say 2016 are not present in 2017

the number of employees in the year: how many distinct index numbers in 2016

Staff data are snapshots of employed staff:  31st Aug - 1st Sep for 2024-2018; 30th Sept for 2017 and 30th Oct for 2016

In [None]:
# Function that calculates the number of staff that work in a given year and that have left from the index_number
def left_total(country, year):
    country_this_year = set(df_hr.loc[(df_hr['country'] == country) & (df_hr['year'] == year), 'index_number'])
    total = len(country_this_year)
    next_year = str(int(year)+1)
    country_next_year = set(df_hr.loc[(df_hr['country'] == country) & (df_hr['year'] == next_year), 'index_number'])
    left = len(country_this_year - country_next_year)
    index_list = list(country_this_year - country_next_year)
    return left, total, index_list

def count_local(country, year, index_list):
    filtered_df = df_hr.loc[(df_hr['country'] == country) & (df_hr['year'] == year)]
    filtered_df = filtered_df[(filtered_df['index_number'].isin(index_list))]
    nationality_counts = filtered_df['nationality'].value_counts()

    # Get the counts for 'local' and 'foreign'
    local_count = nationality_counts.get('local', 0)
    foreign_count = nationality_counts.get('foreign', 0)
    return local_count, foreign_count


In [None]:
country_mask = df_hr['country'].eq(COUNTRY)
df_hr = df_hr[country_mask].copy()
print('Shape:', df_hr.shape)

In [None]:
years = df_hr['year'].unique()
countries = df_hr['country'].unique()

print(years)
print(countries)

dfs = []
total_index_list = [] # indices of staff that have left

# The entry for people left in the last year can't be calculated so we skip 2024
for country in countries:
    for year in years[:-1]:
        left, total , index_list = left_total(country, year)
        local_count, foreign_count = count_local(country, year, index_list)
        turnover = 100*(left/total)
        local_turnover = 100*(local_count/total)
        foreign_turnover = 100*(foreign_count/total)
        df_turnover = pd.DataFrame(
            {'country': [country], 'year': [year], 'turnover':[turnover], 'local_turnover':[local_turnover],
             'foreign_turnover':[foreign_turnover]
             }
            )
        dfs.append(df_turnover)
        for item in index_list:
            total_index_list.append(item)

df_turnover = pd.concat(dfs, axis=0, ignore_index=True)
df_turnover.sort_values(by=['country', 'year'], ascending=True, inplace=True)

print(df_turnover.head())

Add contract types (appointment_type_name), nationality, staff level (staff_level_code) and combine with df_turnover. 

In [None]:
def pivot_table(df, col_name):
    staff = df_hr.groupby(['country', 'year'])[col_name].value_counts().reset_index()
    df = staff.pivot_table(index=['country', 'year'], 
                                            columns=col_name, 
                                            values='count', 
                                            fill_value=0).reset_index()
    return df

In [None]:
# Merge TA into Temporary Appt.
df_hr['appointment_type_name'] = df_hr['appointment_type_name'].replace('TA', 'Temporary Appt.')


# Count the appointment types
df_staff = pivot_table(df_hr, 'appointment_type_name')

# Count the nationalities
df_nationality = pivot_table(df_hr, 'nationality')

# Count the staff level
df_level = pivot_table(df_hr, 'staff_level_code')

# List of DataFrames to merge with df_staff
to_merge = [df_nationality, df_level, df_turnover]

# Merge all DataFrames in the list using a loop
for df in to_merge:
    df_staff = df_staff.merge(df, on=['country', 'year'], how='left')

#### Looking at which Post Titles in Turnover

Look at what type of staff have left within a year, to see if there is a pattern. df_turnover_positions stores the appointment types that have left within a year (this data is not exported to excel).

In [None]:
df_turnover_positions = pd.DataFrame({'index_number': total_index_list})
# Merge the dataframes on 'index_number'

# Get the first occurrence of each index_number from df_hr
df_hr_unique = df_hr.drop_duplicates(subset='index_number')
df_turnover_positions = df_turnover_positions.merge(
    df_hr_unique[['index_number', 'country', 'post_title']], on='index_number', how='left')

# Count the occurrences of each post_title
df_post_title_counts = df_turnover_positions['post_title'].value_counts().reset_index()

# Rename the columns for clarity
df_post_title_counts.columns = ['post_title', 'count']

### Number of Emergency Officers


ARM: 1 in 2024

AZE: 1 in 2023 and 2024 (same person)

BIH: 1 from 2020 to 2024 (same person)

GEO: 1 in 2020

### Staff by Thematic Area

Create a mapping of Post Titles to thematic areas:
- Child Development 
- Child Protection
- Climate Change and Environmental Sustainability 
- Education
- Gender Equality 
- Health and Nutrition 
- HIV/AIDS
- Humanitarian Crises 
- Social Protection and Inclusion
- WASH

Other areas:
- Information Technology
- HR and Operation
- Administration and Finance
- Monitoring
- Communication
- Management (P-4 and P-5)
- Supply and Logistics

Combine:

Child Development = Early Childhood Development + Adolescent Development

Health and Nutrition = Health and Nutrition + Health + Nutrition

In [None]:
# Count the occurrences of each post_title
df_agg = df_hr.groupby(['country','year'])['thematic_area'].value_counts().reset_index()
df_agg.rename(columns={'count': 'staff_number'}, inplace=True)

# Calculate the total staff counts
staff_counts = df_agg.groupby(['country', 'year'])['staff_number'].sum().reset_index()
staff_counts.rename(columns={'staff_number': 'total_staff'}, inplace=True)

# Merge total staff counts back to the original DataFrame
df_agg = df_agg.merge(staff_counts, on=['country', 'year'])

# Calculate the proportion of staff
df_agg['staff_proportion'] = 100*(df_agg['staff_number'] / df_agg['total_staff'])
df_agg.sort_values(by=['country', 'year', 'staff_proportion'], ascending=[True,True,False], inplace=True)

### Output to Excel

In [None]:
# output df to excel file in '/data/outputs/HR_staff_data.xlsx'
# Write the dataframes to separate sheets


output_path = Path(product['data_staff'])
output_path.parent.mkdir(parents=True, exist_ok=True)  # Create missing directories

df_staff.to_excel(product['data_staff'], index=False)
df_agg.to_excel(product['data_staff_thematic'], index=False)
hr_count_against_utilized_by_goal_area.to_excel(product['data_hr_count_against_utilized_by_goal_area'], index=False)