In [5]:
import io, sys, os, datetime, requests, json
from collections import defaultdict
import numpy as np
import pandas as pd
import xarray as xr
import netCDF4
import boto3
import ee

In [6]:
#ee.Authenticate()
ee.Initialize()

In [20]:
SPECIES_INFO = {
    'no2': {
        'name': 'nitrogen dioxide',
        'molar_mass': 46.0055,
        'cams_unit': 'kg/kg',
        'who_threshold': 25.0,
        'eac4_varname': 'no2'
    },
    'so2': {
        'name': 'sulfur dioxide',
        'molar_mass': 64.066,
        'cams_unit': 'kg/kg',
        'who_threshold': 40.0,
        'eac4_varname': 'so2'
    },
    'o3': {    # Ozone thresholds are based on 8-hour average, not 24-hour.
               # We use averages at 9am, noon, 3pm to get a 9-hour average at peak O3 production.
        'name': 'ozone',
        'molar_mass': 48.0,
        'cams_unit': 'kg/kg',
        'who_threshold': 100.0,
        'eac4_varname': 'go3'
    },
    'pm25': {
        'name': 'fine particulate matter',
        'cams_unit': 'kg/m^3',
        'who_threshold': 15.0,
        'eac4_varname': 'pm2p5'
    },
    'pm10': {
        'name': 'coarse particulate matter',
        'cams_unit': 'kg/m^3',
        'who_threshold': 45.0,
        'eac4_varname': 'pm10'
    },
    'co': {
        'name': 'carbon monoxide',
        'molar_mass': 28.01,
        'cams_unit': 'kg/kg',
        'who_threshold': 4000.0,
        'eac4_varname': 'co'
    }
}

In [4]:
with open('aws_creds.json', 'r') as credfile:
    aws_keys = json.loads(credfile.read())
ACCESS_KEY = aws_keys['Access Key']
SECRET_KEY = aws_keys['Secret Key']
s3client = boto3.client(
    service_name='s3',
    aws_access_key_id=ACCESS_KEY,
    aws_secret_access_key=SECRET_KEY
)

In [3]:
pressuretemp = xr.open_dataset('data/pressure_temp_2020.nc')

In [7]:
year = 2020
bucket = 'cities-cities4forests'

In [43]:
# Create raster of exeedance days for ANY pollutant
SAVE_SINGLE_POLLUTANT_EXCEEDANCEDAY_RASTERS = False

for year in [2010, 2015, 2020]:
    exceedances = {}
    bucket = 'cities-cities4forests'
    for species in SPECIES_INFO.keys():
        varname = SPECIES_INFO[species]['eac4_varname']
        local_filename = 'cams-eac4_{}_sfc_2020.nc'.format(species)
        if not local_filename in os.listdir('.'):
            f = s3client.download_file(bucket, 'data/air_pollution/cams/cams-eac4_{}_sfc_2020.nc'.format(species), local_filename)
        nc_file = xr.open_dataset(local_filename)
        mmr = nc_file[varname] 
        if species in ['no2', 'co', 'so2', 'o3']:

            conc = mmr * pressuretemp['msl'] / (287.058 * pressuretemp['t2m'])
            if species in ['no2', 'co', 'so2']:
                conc_1D = conc.resample(time='1D').mean()
                exc_days = (conc_1D * 10**9 >= SPECIES_INFO[species]['who_threshold']) * 1
            else:
                conc_8H = conc.resample(time='8H').mean()
                conc_1D = conc_8H.resample(time='1D').max()
                exc_days = (conc_1D * 10**9 >= SPECIES_INFO[species]['who_threshold']) * 1
        else:
            conc = nc_file[varname]
            conc_1D = conc.resample(time='1D').mean()
            exc_days = (conc_1D * 10**9 >= SPECIES_INFO[species]['who_threshold']) * 1
        exceedances[species] = exc_days
        if SAVE_SINGLE_POLLUTANT_EXCEEDANCEDAY_RASTERS:
            x = exc_days
            x = x.rio.set_spatial_dims(x_dim='longitude', y_dim='latitude')
            x.rio.write_crs("epsg:4326", inplace=True)
            x.rio.to_raster(r"cams-eac4-exceedancedays_{0}_{1}.tiff".format(species, year))
    exc_sum = exceedances[list(SPECIES_INFO.keys())[0]]
    for species in list(SPECIES_INFO.keys())[1:]:
        exc_sum += exceedances[species]
    result = ((exc_sum > 0) * 1).sum(dim='time')

    x = result
    x = x.rio.set_spatial_dims(x_dim='longitude', y_dim='latitude')
    x.rio.write_crs("epsg:4326", inplace=True)
    x.rio.to_raster(r"cams-eac4-exceedancedays_{0}_{1}.tiff".format('any', year))