In [None]:
# IMPORTANT: The parameters below are set only for running this notebook independently. 
# When executing the full Ploomber pipeline, these values will be overridden by the settings in `pipeline.yaml`. 
# Any modifications made here will not persist when running the pipeline.
upstream = None
COUNTRY =  'ARM' # Code of the Country
product = {'data': f'../data/processed/{COUNTRY}/partnership_agreements.xlsx'}  # Path to save the final data product (stored under the 'data' key)
data_source = 'data/raw/etools-datamart/Full Partner Agreement List 20240903.json'


This Notebook extracts partnership agreements data from a json file.


In [None]:
import json
import pandas as pd
from pathlib import Path

from unicef_cpe.config import PROJ_ROOT
import unicef_cpe as cpe

# other settings
country_map = {k:v for k,v in cpe.utils.get_ecaro_countries_mapping(priority=False).items() if k in COUNTRY}
country_code_map = {v:k for k,v in country_map.items() }

In [None]:
file_path = PROJ_ROOT / Path(data_source)

with open(file_path, 'r', encoding='utf8') as rf: 
    data = json.load(rf)

# Normalize the JSON data to a flat table
df = pd.json_normalize(data, 'results')

In [None]:
mask = df['country_name'].eq(country_map.get(COUNTRY))
df = df[mask].copy()

In [None]:
df = df.query("status in ['ended', 'signed', 'suspended', 'terminated']")

In [None]:
print(df.head())

In [None]:
date_columns = ['start', 'end']
for col in date_columns:
    df[col] = pd.to_datetime(df[col], errors='coerce')

In [None]:
# agrrement duration in days
df['duration'] = (df['end'] - df['start']).dt.days

# Sort by start date to ensure correct classification of new vs renewal
df = df.sort_values(by='start')

df['start_year'] = df['start'].dt.year

# 1. Classify agreements into 'short' and 'long'
df['agreement_length'] = df['duration'].apply(lambda x: 'long' if x > 365 else 'short')

# Classify agreements as 'new' or 'renewal' based on the first occurrence of each vendor number
df['agreement_type'] = df.duplicated(subset='vendor_number', keep='first').apply(lambda x: 'renewal' if x else 'new')

df['country_code'] = df['country_name'].replace(country_code_map)

In [None]:
df.to_excel(product['data'], index=False)

In [None]:
################################################################################################################################################################################################