This notebook extracts Sentinel 5P air pollution metrics via the Earth Engine Python API.

In [1]:
from make_dataset import Entry
import ee
ee.Initialize()
import pandas as pd
import geopandas as gpd
import os
import sys
sys.path.append("../GEE_Zonal/src")
from gee_helpers import gpd_to_gee, authenticateGoogleDrive
import geojson
from shapely.geometry import Polygon, MultiPolygon
from glob import glob
from gee_tools import ZonalStats

In [2]:
%load_ext autoreload
%autoreload 2

In [39]:
data_dir = os.path.join("x:", "data", "serbia")
wp_dir = os.path.join(data_dir, "wp")
out_dir = os.path.join(data_dir, "output")
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

In [4]:
urban_extents = gpd.read_file(os.path.join(out_dir, "urban_extents.shp"))

In [5]:
urban_extents.sort_values("Pop", ascending=False, inplace=True)

In [6]:
urban_extents.columns

Index(['group', 'ID', 'Pop', 'city', 'county', 'state', 'geometry'], dtype='object')

In [7]:
row = urban_extents.iloc[0]

In [9]:
def makePollutionData(row):
    shpJSON = geojson.Feature(geometry=row['geometry'])
    if type(row['geometry']) == Polygon:
        ee_poly = ee.Geometry.Polygon(shpJSON['geometry']['coordinates'])
    elif type(row['geometry']) == MultiPolygon:
        ee_poly = ee.Geometry.MultiPolygon(shpJSON['geometry']['coordinates'])
    ee_feature = ee.FeatureCollection(ee_poly)
    entry = Entry(aoi=ee_feature, index=row['group'], ID=row['ID'], city=row["city"])
    return entry.createAirPollutionDF()

In [10]:
data_pollution = makePollutionData(row)

In [11]:
for df in data_pollution:
    df.reportRunTime()

Status is Ready, hasn't started
Status is Ready, hasn't started
Status is Ready, hasn't started
Status is Ready, hasn't started


In [7]:
urban_extents.set_index("ID", inplace=True)

In [36]:
ids = [522, 502, 573, 549, 579, 477, 558, 499, 518, 582] #522

In [37]:
cities = urban_extents.loc[ids].copy()

In [38]:
cities.loc[:, "ID"] = cities.index

In [31]:
pollution_results = [makePollutionData(row) for idx, row in cities.iterrows()]

GET Data from EE

In [23]:
import io

In [12]:
from pydrive.drive import GoogleDrive
from pydrive.auth import GoogleAuth

gauth = GoogleAuth()

In [13]:
drive = GoogleDrive(gauth)

In [14]:
folder = drive.ListFile({'q': "title = 'serbia' and trashed=false and mimeType = 'application/vnd.google-apps.folder'"}).GetList()[0]

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=439204002005-8hf6m3mir3vnrol3phs9o5ki8g28kocv.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.


In [15]:
folder_id = folder['id']

In [16]:
folder_id

'1SNOP6VrPDqwrORHdioqmVyzTEouabm51'

In [17]:
file_list = drive.ListFile({'q': f"'{folder_id}' in parents and trashed=false and mimeType = 'text/csv'"}).GetList()

In [63]:
for one in file_list:
  print('title: %s, id: %s' % (one['title'], one['id']))

title: 45_co.csv, id: 16nQIEte6MK-IMXgmY7KAs7-4hyyfCjRU
title: 45_so2.csv, id: 1w-1RYYmVfdZFu89ZDarVFRXEdjzJmeHC
title: 45_ai.csv, id: 1mRB4dFAwGsMA6_eHKImeFX42LeO-JEuY
title: 45_no2.csv, id: 1cgFAM1P7yjhcsr5DFACmPVgEuSq7oQzr


In [25]:
s = one.GetContentString()
c = pd.read_csv(io.StringIO(s))
c.drop('.geo', axis=1, inplace=True)

In [30]:
b = os.path.basename(one['title'])
var_name = b[b.rfind("_")+1:b.rfind(".")]
stats = ['sum', 'mean', 'max', 'p75', 'p95']

In [32]:
df = entry.processResponseFromEE(io.StringIO(s), var_name, stats, asdate=False)

NameError: name 'entry' is not defined

In [66]:
def mergeDFs(df_admin, df_list):
    for i, df in enumerate(df_list):
        if i==0:
            df_merge = df
        elif i>0:
            df_merge = df_merge.merge(df, on=["index", "date"], how="outer")
    df_merge = df_admin.merge(df_merge, on=["index"], how="outer")
    return df_merge

In [67]:
row = cities.iloc[0]

In [68]:
entry = Entry(aoi=None, index=row['group'], ID=row['ID'], city=row["city"])

In [69]:
df_admin = pd.DataFrame(entry.kwargs, index=[entry.idx])
df_admin.reset_index(inplace = True)

In [70]:
df_admin

Unnamed: 0,index,ID,city
0,45,522,New Belgrade Urban Municipality


In [71]:
file_list = drive.ListFile({'q': f"'{folder_id}' in parents and title contains '{entry.idx}' and trashed=false and mimeType = 'text/csv'"}).GetList()

In [74]:
if len(file_list) > 0:        
    dfs = []
    for file in file_list:
        b = os.path.basename(file['title'])
        var_name = b[b.rfind("_")+1:b.rfind(".")]
        s = file.GetContentString()
        df = entry.processResponseFromEE(io.StringIO(s), var_name, stats, asdate=False)
        dfs.append(df)
    aoi_df = mergeDFs(df_admin, dfs)

In [83]:
res = []

In [84]:
for idx, row in cities.iterrows():
    print(f"processing {idx}")
    entry = Entry(aoi=None, index=row['group'], ID=row['ID'], city=row["city"])
    df_admin = pd.DataFrame(entry.kwargs, index=[entry.idx])
    df_admin.reset_index(inplace = True)
    file_list = drive.ListFile({'q': f"'{folder_id}' in parents and title contains '{entry.idx}' and trashed=false and mimeType = 'text/csv'"}).GetList()
    if len(file_list) > 0:        
        dfs = []
        for file in file_list:
            b = os.path.basename(file['title'])
            var_name = b[b.rfind("_")+1:b.rfind(".")]
            s = file.GetContentString()
            df = entry.processResponseFromEE(io.StringIO(s), var_name, stats, asdate=False)
            dfs.append(df)
        aoi_df = mergeDFs(df_admin, dfs)
        res.append(aoi_df)

processing 522
processing 502
processing 573
processing 549
processing 579
processing 477
processing 558
processing 499
processing 518
processing 582


In [86]:
res_all = pd.concat(res)

In [88]:
res_all.to_csv(os.path.join(out_dir, "pollution/Cities_Pollution.csv"), index=False)