# Appendix - Code Assiting Data Preparation

## README

TBC-------------

## 0 Initial Run

Run the following cell every time you start a new kernel to configure related parameters.

In [None]:
from pathlib import Path
import sys

curr_root = Path().resolve()    # current file path
repo_root = curr_root.parent    # current repository path
data_root = repo_root / "data"  # path for saving the data
src_root = repo_root / "src"    # path for other sources
sys.path.append(str(src_root))  # add src to system path to import custom functions


## 1 Data Download

### 1.1 NO2 Data Download 

In this section, NO2 pollution data from [Google Earth Engine Sentinel 5P](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_NRTI_L3_NO2) is downloaded, for both Ethiopia and Iraq in country level.

From related literature and data quality, we finally decided to use **NO2_column_number_density** as the proxy for NO2 concentration level.

#### 1) Custom Functions

Custom function to generate desired time period of NOx data.

In [None]:
import pandas as pd
from typing import List

import ee
ee.Authenticate() # For the first Initialization, individual API is needed to log into Google Earth Engine
ee.Initialize()

# Function: generate desired time period of NO2 data  
def specific_date(start_date: str, end_date: str, time_resolution: str = 'D') -> List[str]:
    """
    Generate a list of dates within specified time period and resolution.

    Parameters:
    - start_date: str
        Start date, format: 'YYYY-MM-DD'.
    - end_date: str
        End date, format: 'YYYY-MM-DD'.
    - time_resolution: str
        Time resolution (e.g., 'D' for daily, 'W' for weekly, 'M' for monthly). Default is 'D'.
    
    Return:
    - dates(list): List of date strings marking the ends of each time segment, format: 'YYYY-MM-DD'.
    
    """
    dates = (
        pd.date_range(start_date, end_date, freq = time_resolution)
        .strftime('%Y-%m-%d')
        .tolist()
    )
    return dates


Request tasks to download in Google Drive.

In [None]:
# Function: download NO2 data
def download_no2_country(country_name: str, dates: list):
    """
    Request NO2 data download from Earth Engine for a specified country and time period

    Parameters:
    - country_name: str
        Name of the target country. Must match the format used by Earth Engine.
    - dates: list
        List containing the desired time range, (e.g., [start_date, end_date]).

    Return:
    - None. Sends a/multiple request(s) to Earth Engine to initiate data download.
        Exported files are saved under a folder named 'NO2_<country_name>' in first-level Google Drive directory.
        Each exported .tiff file is named using its starting date.
    """
    
    countries = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017')
    country = countries.filter(ee.Filter.eq('country_na', country_name)).geometry()

    n_dates = len(dates)

    for i in range(n_dates-1):

        date_start, date_end = dates[i], dates[i+1]

        no2 = (ee.ImageCollection('COPERNICUS/S5P/NRTI/L3_NO2')
            .select('tropospheric_NO2_column_number_density')
            .filterDate(date_start, date_end)
            .mean())

        task = ee.batch.Export.image.toDrive(
            image=no2,
            description=f'{country_name}_NO2_{date_start}_{date_end}',
            folder=f'NO2_{country_name}',
            fileNamePrefix=f'{country_name}_NO2_{date_start}',
            region=country,
            scale=1000,
            maxPixels=1e13
        )

        try:
            task.start()
            print(f'{country_name}: The export task for {date_start} is ongoing, please check the results in Google Drive.')
        except Exception as e:
            print(f'Fail to sumbit task: {e}')

#### 2) Call and Download Data

In [None]:
dates = specific_date('2023-01-01', '2025-01-01')
len(dates) # 731

# Download Ethiopia Data
download_no2_country('Ethiopia', dates)

# Download Iraq Data
download_no2_country('Iraq', dates)

## 2 Generate Meshses

Generate meshes, from 2023-01-01 to 2024-12-31, one mesh for each day.

In [None]:
import shutil
from datetime import datetime, timedelta
import fiona

mesh_addis = data_root / "mesh-grid" / "grid_addis_ababa.gpkg"
mesh_baghdad = data_root / "mesh-grid" / "grid_baghdad.gpkg"

lyr_addis_name = fiona.listlayers(mesh_addis)[0]  # control layer number
lyr_baghdad_name = fiona.listlayers(mesh_baghdad)[0]

# start and end date
start_date = datetime.strptime("2023-01-01", "%Y-%m-%d")
end_date = datetime.strptime("2024-12-31", "%Y-%m-%d")

addis_meshes_path = data_root / 'addis-empty-mesh-data'
baghdad_meshes_path = data_root / 'baghdad-empty-mesh-data'

addis_meshes_path.mkdir(exist_ok=True)
baghdad_meshes_path.mkdir(exist_ok=True)

delta = end_date - start_date
days_count = delta.days + 1

# For Addis Ababa
for i in range(days_count):
    current_date = start_date + timedelta(days=i)
    date_str = current_date.strftime("%Y-%m-%d")
    filename = f"addis-ababa-{date_str}.gpkg"
    dest_path = addis_meshes_path / filename

    shutil.copy(mesh_addis, dest_path)

print(f"Complete Generating meshes for Addis Ababa!")

# For Baghdad
for i in range(days_count):
    current_date = start_date + timedelta(days=i)
    date_str = current_date.strftime("%Y-%m-%d")
    filename = f"baghdad-{date_str}.gpkg"
    dest_path = baghdad_meshes_path / filename

    shutil.copy(mesh_baghdad, dest_path)


print(f"Complete Generating meshes for Baghdad!")


Complete Generating meshes for Addis Ababa!
Complete Generating meshes for Baghdad!
