In [None]:
# IMPORTANT: The parameters below are set only for running this notebook independently. 
# When executing the full Ploomber pipeline, these values will be overridden by the settings in `pipeline.yaml`. 
# Any modifications made here will not persist when running the pipeline.


COUNTRY = "ARM"
upstream = {
    "50_parttners_to_type": {
        "data": f"../data/processed/ARM/unsdcf-partner_type_mapping.xlsx",
        "data2": f"../data/processed/ARM/unsdcf-framework-with-partner-types.xlsx",
    }
}
product = {
    "data": f"../data/processed/{COUNTRY}/agencies_SDG_and_outcomes.xlsx",
}
data_source = None

This Notebook uses UNDSCF data to determine agency funding by SDG (Sustainable Development Goal) Area and count outputs/outcomes per agency.

In [None]:
import pandas as pd
import numpy as np
import re
from openpyxl import load_workbook
from pathlib import Path
from unicef_cpe.config import PROJ_ROOT
from unicef_cpe.utils import *
import unicef_cpe
from unicef_cpe.plotting import SDG_goals

In [None]:
country_map = {k:v for k,v in get_ecaro_countries_mapping(priority=False).items() if k in COUNTRY}
country_code_map = {v:k for k,v in country_map.items() }

In [None]:
#Read in the classified output excel
df_agencies = pd.read_excel(upstream['50_parttners_to_type']['data2'])
df_agencies.rename(lambda x: x.lower().replace(' ', '_'), axis=1, inplace=True)
print('Shape:', df_agencies.shape)

In [None]:
to_keep = ['country', 'plan_name', 'outcome', 'output', 'sdg_goals',
           'contributing_partners', 'implementation_partners', 'agencies','agency_abbreviations',
           'contributing_partners_partner_category','contributing_partners_partner_sub_category',
           'implementation_partners_partner_category', 'implementation_partners_partner_sub_category', 
           'total_required_resources', 'total_available_resources', 'total_expenditure_resources',
           '2021_expenditure', '2022_expenditure', '2023_expenditure', '2024_expenditure'
           ]

# 'outcome_code', 'output_code', 
df_agencies = df_agencies[to_keep]
df_agencies['country'] = df_agencies['country'].replace(country_code_map)
df_agencies.rename(columns={'agencies': 'agencies_full_name', 'total_expenditure_resources':'total_expenditure',
                            'total_required_resources': 'total_required', 'total_available_resources': 'total_available',
                            'contributing_partners_partner_category' : 'contributing_partners_category',
                            'implementation_partners_partner_category' : 'implementation_partners_category'
                            }, inplace=True)
print('Shape:', df_agencies.shape)

**Multiple SDG and agencies; Unknown SDG:**

In [None]:
# Extract numbers from the sdg_goals column
df_agencies['sdg_goal_codes'] = df_agencies['sdg_goals'].str.findall(r'(\b[1-9]\b|\b1[0-7]\b)')  # Finding numbers 1-17
# Join multiple numbers with a comma
df_agencies['sdg_goal_codes'] = df_agencies['sdg_goal_codes'].apply(lambda x: ', '.join(x) if x else None)
df_agencies['sdg_goal_codes'] = df_agencies['sdg_goal_codes'].fillna('Unknown')

# if there are multiple sdg_goal_codes then replace with 'Multiple'
df_agencies['sdg_goal_codes'] = df_agencies['sdg_goal_codes'].apply(lambda x: 'Multiple' if ',' in str(x) else x)
# map SDG goal names from dictionary SDG_goals for consistency
df_agencies['SDG'] = df_agencies['sdg_goal_codes'].map(SDG_goals)

In [None]:
# Function to assign cases with multiple agencies: 
# If multiple agencies: 1. 'Multiple (incl. UNICEF)' or 2. 'Multiple (excl. UNICEF)'
def determine_agency_status(agency_abbr):
    if ',' in agency_abbr or ';' in agency_abbr:
        if 'UNICEF' in agency_abbr:
            return 'Multiple (incl. UNICEF)'
        else:
            return 'Multiple (excl. UNICEF)'
    else:
        return agency_abbr

# Create the new column 'agencies' to account for Mutiple agencies in a row (1. incl. UNICEF or 2. excl. UNICEF)
df_agencies['agencies'] = df_agencies['agency_abbreviations'].apply(determine_agency_status)

**Counting outputs/outcomes per agency:**

- if multiple agencies are listed for an outcome/output we count the outcome/output for each (for example if we have UNICEF, WHO for Outcome 1.1, we assign this outcome to both UNICEF and WHO)

In [None]:
# print('Outcomes:', df_agencies['outcome'].unique())
print('Outcome count:', len(df_agencies['outcome'].unique()))

# print('Outputs:', df_agencies['output'].unique())
print('Output count:', len(df_agencies['output'].unique()))

In [None]:
df_out = df_agencies[['country', 'outcome', 'output', 'agency_abbreviations']].copy()

In [None]:
# First, replace any instances of semicolons and commas with a common delimiter (e.g., a comma)
df_out['agency_abbreviations'] = df_out['agency_abbreviations'].str.replace(';', ',')

# Now split the agencies and explode the DataFrame
# there are sometimes multiple , and ; separating the agency names so we will end up with blank agency entries
df_out['agency_abbreviations'] = df_out['agency_abbreviations'].str.split(',')
df_out = df_out.explode('agency_abbreviations')
print('Before', df_out.shape)
df_out['agency_abbreviations'] = df_out['agency_abbreviations'].str.strip()
# remove blank agency entries '' 
df_out = df_out[df_out['agency_abbreviations'].str.strip() != '']
df_out = df_out.drop_duplicates()
print('After', df_out.shape)

In [None]:
def count_instances(df, name):
    df = df.drop_duplicates()
    df = df.groupby(['country', 'agency_abbreviations']).size().reset_index(name=name)
    return df

In [None]:
df_outcome = count_instances(df_out[['country', 'agency_abbreviations', 'outcome']].copy(), 'Outcome Count') 
df_output = count_instances(df_out[['country', 'agency_abbreviations', 'output']].copy(), 'Output Count') 

df_out_count = df_outcome.merge(df_output, how='left')
df_count = df_out_count.melt(id_vars=['country','agency_abbreviations'], 
                         value_vars=['Outcome Count', 'Output Count'],
                         var_name='type', 
                         value_name='count')



**Write to Spreadsheet**

In [None]:
output_path = Path(product['data'])
output_path.parent.mkdir(parents=True, exist_ok=True)  # Create missing directories
df_agencies.to_excel(product['data'], index=False)