In [1]:
import ee
#ee.Authenticate()
ee.Initialize()

In [2]:
#Import required packages
import geemap
import pandas as pd
import numpy as np
import time
import glob

In [3]:
airsheds = glob.glob("gridextents_shponly/*.shp")
airsheds=['bishek/grids_bishkek.shp']

In [4]:
# Chennai airshed box is already uploaded as a feature on GEE.
#chennai_box = ee.FeatureCollection("projects/ee-saikrishnadammalapati/assets/chennai-box")
#aoi = chennai_box.geometry()

def get_aoi(airshed_shp):
    airshed_box = geemap.shp_to_ee(airshed_shp)
    aoi = airshed_box.geometry()
    return airshed_box, aoi

# The following is a feature at all India level.
#admin2 = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level1")
#india = admin2.filter(ee.Filter.eq('ADM0_NAME', 'India'))

**maskClouds** is a function to mask the satellite image, if the pixel has a "cloud_fraction" less than 0.5. This function is taken from [Ujaval Gandhi's GEE code.](https://code.earthengine.google.co.in/0f1259deeb86530cee552817a05e2031)

In [5]:
def maskClouds(image):
    mask = image.select('cloud_fraction').lt(0.1)
    return image.updateMask(mask)

# clip_image function clips the satellite image to our given area of interest (Chennai airshed box in our case)
# https://gis.stackexchange.com/questions/302760/gee-imagecollection-map-with-multiple-input-function
def clip_image(roi):
    def call_image(image):
        return image.clip(roi)
    return call_image

The satellite images are acquired from [**COPERNICUS/S5P/OFFL/L3_NO2**](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_NRTI_L3_NO2) service. NO2 data is provided in various terms (tropospheric_column_density, stratospheric_column_density etc). I'm taking tropospheric_column_density for now.

In [28]:
global tifs
tifs = glob.glob("HCHO_tifs/*.tif")

In [22]:
def download_tifs(year):
    #year=2020
    airshed_box, aoi = get_aoi('bishek/grids_bishkek.shp')
    
    airshed_name = 'bishkek'#airshed_shp.split('_')[2].split('.')[0]
    tic = time.perf_counter()
    
    if year ==2022:
        max_month=8
    else:
        max_month=13
        
    
    for month in range(1,max_month):
        print(month)
    
        #Image Collection - l3_NO2 satellite -- SELECTING only two bands (NO2 Column Number density and Cloud_fraction)
        collection = ee.ImageCollection('COPERNICUS/S5P/OFFL/L3_HCHO').select(['tropospheric_HCHO_column_number_density', 'cloud_fraction'])
        if month <9:
            startDate = str(year)+'-0'+str(month)+'-01'
            endDate = str(year)+'-0'+str(month)+'-15'
        elif month==9:
            startDate = str(year)+'-0'+str(month)+'-01'
            endDate = str(year)+'-'+str(month)+'-15'
        elif month<12:
            startDate = str(year)+'-'+str(month)+'-01'
            endDate = str(year)+'-'+str(month)+'-15'
        else:
            startDate = str(year)+'-'+str(month)+'-01'
            endDate = str(year)+'-'+str(month)+'-15'
            
            
        #
        #if 'HCHO_tifs\\'+airshed_name+'_15dayavg_'+'hcho_'+startDate+'.tropospheric_HCHO_column_number_density'+'.tif' in tifs:
         #   print('Yaoooo')
          #  continue
        #else:
         #   pass
            
        #Filter image collection -- filtered for date range, chennai_box range,
        fortnight=0
        while fortnight<2:
            filtered = collection.filter(ee.Filter.date(startDate, endDate)).filter(ee.Filter.bounds(aoi))
            #Apply the maskClouds and clip_image function to each image in the image collection.
            cloudMasked = filtered.map(maskClouds).select('tropospheric_HCHO_column_number_density')
            clipped_images = cloudMasked.map(clip_image(aoi))
        
            #fortnightly mean
            image = clipped_images.mean()
        
            #Export image
            geemap.ee_export_image(image, filename='bishek/HCHO_tifs/'+airshed_name+'_15dayavg_'+'hcho_'+startDate+'.tif',
                               scale=30,
                               region=aoi, file_per_band=True)
        
            ## To download aggregated data for the given airshed box in the form of a csv. Use 'toBands' of above to use this.
            geemap.zonal_statistics(clipped_images.toBands(), airshed_box,
                            'bishek/HCHO_csvs/'+airshed_name+'_15dayavg'+'_hcho_'+startDate+'.csv', statistics_type='MEAN', scale=30)
        

            print(startDate+'xx'+endDate)
            startDate = startDate[:-2]+'16'
            if month==2:
                endDate = endDate[:-2]+'28'
            elif month in [4,6,9,11]:
                endDate = endDate[:-2]+'30'
            else:
                endDate = endDate[:-2]+'31'
        
            fortnight = fortnight+1

    
        # To download all tif images of a collection 
        #geemap.ee_export_image_collection(clipped_images, out_dir='tifs',
                                     #scale=30
         #                                )

    toc = time.perf_counter()
    print('Time taken {} seconds'.format(toc-tic))

In [23]:
from joblib import Parallel, delayed
import multiprocessing as mp
from multiprocessing.pool import ThreadPool

In [24]:
airsheds[19:20]

[]

In [25]:
pool= ThreadPool(processes=32)
pool.map(download_tifs,[2021,2022])
#download_tifs('gridextents_shponly\\grids_agra.shp')
#Parallel(n_jobs=mp.cpu_count())(delayed(download_tifs)(airshed_shp,year) for airshed_shp in airsheds)

1
Generating URL ...
1
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/f9ae641ec0aacdf08d448a4291478a45-64fdfb362dba4123a2f55ea9887adbdc:getPixels
Please wait ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/d729349ec4669885a76a29e76ce4ffbf-f88d797ab9dece895125fbf888c43c1b:getPixels
Please wait ...
Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_tifs
Computing statistics ...
Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_tifs
Computing statistics ...
Generating URL ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/4fe0145ae98eae424f1555fe9ba6b68c-870743aee1b8f721c2a0b50d5d48fdc5:getFeatures
Please wait ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/9fdf3426a29da457e83e22efeb5d3afb-9b4ec18f92b81b8

Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_tifs
Computing statistics ...
Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_tifs
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/ef9ce151474d59e2d160acce335401c9-34bd2ebddbc147efa71d3bd212822c4b:getFeatures
Please wait ...
Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_csvs\bishkek_15dayavg_hcho_2022-04-01.csv
2022-04-01xx2022-04-15
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/c57bd3010c366daf178f22ea92725a0f-5145b7988b87286e6f6fc0cdbcfb2afd:getPixels
Please wait ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/eb2b9c56c5266cd08b82cbca6db43bb6-a47043d7fef0a7b420ed8ce2438ee7ac:getFeatures
Please wait ...
Data downloaded to D:\Projects\UrbanEmissions

Generating URL ...
Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_csvs\bishkek_15dayavg_hcho_2022-07-01.csv
2022-07-01xx2022-07-15
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/3c07346dce9b3ff86a69bbb6801de775-931dedcd59d8cf00d58210fe73b8bf50:getFeatures
Please wait ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/5c983055058b1cfeab277d8ae4782e07-d6d4a0054fa3d7d6bcd38a79853adac8:getPixels
Please wait ...
Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_csvs\bishkek_15dayavg_hcho_2021-07-01.csv
2021-07-01xx2021-07-15
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/162c03b00e214a546a41f93be65ade61-bc723c0a17bfb481da5f5bac5573750e:getPixels
Please wait ...
Data downloaded to D:\Projects\UrbanEmissions Info\bishek\HCHO_tifs
Computing statistics ...
Data dow

[None, None]

Ujaval Gandhi used "image.projection().nominalScale().getInfo()" as scale while exporting images. I'm not getting satisfactory results with that scale (huge pixels). So I used a scale of 30 and it gave me intuitively good results. Need help with tuning this parameter.

In [11]:

## Following functions are useful when we download zonal statistics from GEE. 
def prep_dataset(csv_file,metric_name,separator,date_pos=0):
    df = pd.read_csv(csv_file)
    df = df.T.reset_index()[:-13]
    df = df.drop_duplicates()
    df['date']=df['index'].str.split(separator).str[date_pos]
    if df['date'][0][:1]=='A':
        df['year'] = df['date'].str[1:5]
        df['day'] = df['date'].str[5:]
        # converting to date
        df['date'] = pd.to_datetime(df['year'].astype(int) * 1000 + df['day'].astype(int), format='%Y%j')
        df = df.drop(['year','day'],axis=1)
    elif len(df['date'][4])>7:
        df['date']=df['date'].str[:4]+"-"+df['date'].str[4:6]+"-"+df['date'].str[6:]
    else:
         df['date']=df['date'].str[:4]+"-"+df['date'].str[4:6]+"-"+"01"
    
    df = df.drop(['index'],axis=1)
    
    df['date'] = pd.to_datetime(df['date'])
    df.columns = [metric_name,'date']
    
    df = df.groupby('date')[metric_name].mean().reset_index()
    df = df.set_index('date')
    
    df = df.fillna(np.NaN)
    return df

In [25]:
## Preparing dataset
#global chennai_no2_df
#chennai_no2_df = prep_dataset('Chennai_NO2_2020-01-01_2020-01-31.csv','tropospheric_NO2_column_number_density','_')
#df = chennai_no2_df.resample('SMS').mean()
#df.index += pd.Timedelta(14, 'd')

In [9]:
import glob
result = glob.glob("CLOUD*.csv")

In [12]:
master = pd.DataFrame(columns=['date','tropospheric_NO2_column_number_density'])
for csv in result:
    df = prep_dataset(csv,'tropospheric_NO2_column_number_density','_')
    df = df.resample('SMS').mean()
    #df.index += pd.Timedelta(14, 'd')
    df = df.reset_index()
    master = master.append(df)


master = master.reset_index(drop=True).sort_values(by='date')
master.columns=['start_date','tropospheric_NO2_column_number_density']

In [13]:
master.dropna().to_csv('CLOUDCHECK_Chennai_2021_NO2.csv',index=False)

In [111]:
master.dropna()

Unnamed: 0,start_date,tropospheric_NO2_column_number_density
1,2021-01-01,2.1e-05
2,2021-01-15,5.2e-05
3,2021-02-01,5.8e-05
4,2021-02-15,6e-05
6,2021-03-01,4.2e-05
7,2021-03-15,4.7e-05
8,2021-04-01,4.2e-05
9,2021-04-15,4.2e-05
10,2021-05-01,2.5e-05
11,2021-05-15,2.8e-05


In [10]:
result

['CLOUDCHECK_2022-01-01xx2022-01-15_NO2.csv',
 'CLOUDCHECK_2022-01-16xx2022-01-31_NO2.csv',
 'CLOUDCHECK_2022-02-01xx2022-02-15_NO2.csv',
 'CLOUDCHECK_2022-02-16xx2022-02-28_NO2.csv',
 'CLOUDCHECK_2022-03-01xx2022-03-15_NO2.csv',
 'CLOUDCHECK_2022-03-16xx2022-03-31_NO2.csv',
 'CLOUDCHECK_2022-04-01xx2022-04-15_NO2.csv',
 'CLOUDCHECK_2022-04-16xx2022-04-30_NO2.csv',
 'CLOUDCHECK_2022-05-01xx2022-05-15_NO2.csv',
 'CLOUDCHECK_2022-05-16xx2022-05-31_NO2.csv',
 'CLOUDCHECK_2022-06-01xx2022-06-15_NO2.csv',
 'CLOUDCHECK_2022-06-16xx2022-06-30_NO2.csv']

In [None]:
img.close()

In [14]:
import geemap

In [21]:
print(geemap.__version__)

0.13.4


In [33]:
import glob
tifss = glob.glob("HCHO_tifs/*.tif")

In [34]:
cities = []
for tif in tifss:
    cities.append(tif.split('15')[0].split('\\')[1])

In [35]:
df = pd.DataFrame(cities,columns=['city'])

In [36]:
df.value_counts()[-15:]#.to_csv('value_coints.csv')

city         
jalna_           60
jalgaon_         60
jalandhar_       60
jaipur_          60
jabalpur_        60
hyderabad_       60
hubli_           60
haldia_          60
gwalior_         60
guwahati_        60
gulburga_        60
gorakhpur_       60
gaya_            60
vizianagaram_    60
lucknow_         59
dtype: int64

In [None]:
#2022
#-- Pathankot only 01-01 tif got made.
#-- mumbai - first both tifs in 01 not made.
#-- Latur -- only 01-01 and 06-16 got made.
#-- Nalbari only 4 tifs frmo 01 and 02 got made.
#--Indore -- one tif 01-01 is not made.
#-- Delhi -- one tif 01-01 is not made.

In [37]:
airshed_shp_temp = 'gridextents_shponly\\grids_lucknow.shp'
airshed_box, aoi = get_aoi(airshed_shp_temp)
airshed_name = airshed_shp_temp.split('_')[2].split('.')[0]
collection = ee.ImageCollection('COPERNICUS/S5P/OFFL/L3_HCHO').select(['tropospheric_HCHO_column_number_density', 'cloud_fraction'])

startDate = '2020-01-01'
endDate = '2020-01-15'

filtered = collection.filter(ee.Filter.date(startDate, endDate)).filter(ee.Filter.bounds(aoi))
#Apply the maskClouds and clip_image function to each image in the image collection.
cloudMasked = filtered.map(maskClouds).select('tropospheric_HCHO_column_number_density')
clipped_images = cloudMasked.map(clip_image(aoi))
#fortnightly mean
image = clipped_images.mean()
#Export image
geemap.ee_export_image(image, filename='HCHO_tifs/'+airshed_name+'_15dayavg_'+'hcho_'+startDate+'.tif',
                               scale=30,
                               region=aoi, file_per_band=True)
## To download aggregated data for the given airshed box in the form of a csv. Use 'toBands' of above to use this.
geemap.zonal_statistics(clipped_images.toBands(), airshed_box,
                        'HCHO_csvs/'+airshed_name+'_15dayavg'+'_hcho_'+startDate+'.csv', statistics_type='MEAN', scale=30)

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/6dcc1cf1045ea9e2a162cad9848bbd91-0050abdda8c47d193bd67d11baf28ddc:getPixels
Please wait ...
Data downloaded to D:\Projects\UrbanEmissions Info\HCHO_tifs
Computing statistics ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/3520aaca0c9c2b4d200b48a4b1b4207e-46a8555c101932da5fb94318c79dbeec:getFeatures
Please wait ...
Data downloaded to D:\Projects\UrbanEmissions Info\HCHO_csvs\lucknow_15dayavg_hcho_2020-01-01.csv
