# Google Earth Engine

In [1]:
import os, datetime
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [2]:
import eeconvert

In [3]:
from src import read_data, process_data, split_df
data = read_data('data/train_dataset_train_2.csv')
data_test = read_data('data/test_dataset_test_2.csv')
data_cat = pd.concat((data, data_test), axis=0)

In [4]:
import ee
ee.Authenticate()

Enter verification code:  4/1AfgeXvu9dKB_1yzUI-YnF1MneqydthAUNJs1gsQ5FEumsRThLRC996vVHRI



Successfully saved authorization token.


In [5]:
ee.Initialize()

In [16]:
def get_series(collection, features, band='ndvi', scale=30, drive_folder=None):
    """https://stackoverflow.com/questions/47633088/get-results-in-an-earth-engine-python-script"""
    def GetSeries(feature):
        def NDVIcalc(img):
            return feature.set(img.reduceRegion(ee.Reducer.median(), feature.geometry(), scale)).set('date', img.date().format("YYYY/MM/dd"))

        series = collection.map(NDVIcalc)

        lst = series.reduceColumns(ee.Reducer.toList(2), ['date', band.upper()]).get('list')
        return feature.set(ee.Dictionary(ee.List(lst).flatten()))

    features_mapped = features.map(GetSeries)
    if drive_folder is None:
        result = features_mapped.getInfo()
        return pd.DataFrame([i['properties'] for i in result['features']])
    else:
        task = ee.batch.Export.table.toDrive(features_mapped, folder=drive_folder, fileFormat='csv')
        task.start()

## landsat

In [7]:
# https://mgimond.github.io/ArcGIS_tutorials/Download_landsat.htm
grid = gpd.read_file('data/grid/wrs2.shp')
data_grid = data_cat.sjoin(grid[['PATH', 'ROW', 'geometry']], how='left')
gb = data_grid.groupby(['PATH', 'ROW'])
tiles = [gb.get_group(x) for x in gb.groups]

In [8]:
out = []
for tile in tqdm(tiles):
    features = eeconvert.gdfToFc(tile.to_crs(4326)[['id', 'geometry']])
    collection = ee.ImageCollection('LANDSAT/LC08/C01/T1_8DAY_EVI') \
        .filterBounds(features) \
        .filterDate('2021-04-15', '2021-09-01') \
        .select('EVI')
    out.append(get_series(collection, features, band='evi', scale=30))

  0%|          | 0/102 [00:00<?, ?it/s]

In [9]:
data_landsat = pd.concat(out)
data_landsat = data_landsat.groupby('id').max().reset_index(drop=True)
data_landsat = data_landsat.loc[data_cat['id'], sorted(data_landsat.columns)]

In [11]:
data_landsat.iloc[:data.shape[0],:].to_csv('data/train_dataset_landsat_evi.csv', index=False)
data_landsat.iloc[data.shape[0]:,:].to_csv('data/test_dataset_landsat_evi.csv', index=False)

## modis

In [12]:
# https://mgimond.github.io/ArcGIS_tutorials/Download_landsat.htm
grid = gpd.read_file('data/grid/modis_sinusoidal_grid_world.shp')
data_grid = data_cat.to_crs(grid.crs).sjoin(grid[['h', 'v', 'geometry']], how='left')
gb = data_grid.groupby(['h', 'v'])
tiles = [gb.get_group(x) for x in gb.groups]

In [15]:
tiles_ = []
for tile in tiles:
    if tile.shape[0] > 100:
        tiles_.extend(split_df(tile, 100))
    else:
        tiles_.append(tile)
tiles = tiles_

In [16]:
out = []
for tile in tqdm(tiles):
    features = eeconvert.gdfToFc(tile.to_crs(4326)[['id', 'geometry']])
    collection = ee.ImageCollection('MODIS/MOD09GA_006_EVI') \
        .filterBounds(features) \
        .filterDate('2021-04-15', '2021-09-01') \
        .select('EVI')
    out.append(get_series(collection, features, band='evi', scale=250))

  0%|          | 0/73 [00:00<?, ?it/s]

In [17]:
data_modis = pd.concat(out)

In [18]:
data_modis = data_modis.groupby('id').max().reset_index(drop=True).fillna(0)
data_modis = data_modis.loc[data_cat['id'], sorted(data_modis.columns)]

In [22]:
data_modis.iloc[:data.shape[0],:].to_csv('data/train_dataset_modis_evi.csv', index=False)
data_modis.iloc[data.shape[0]:,:].to_csv('data/test_dataset_modis_evi.csv', index=False)

## sentinel

In [8]:
def set_sentinel_ndvi(image):
    red = image.select('B4')
    nir = image.select('B8')
    ndvi = nir.subtract(red).divide(nir.add(red)).rename('NDVI')
    return image.addBands(ndvi)

def set_sentinel_evi(image):
    evi = image.expression(
        '2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))', 
        {'NIR': image.select('B8').divide(10000),
         'RED': image.select('B4').divide(10000),
         'BLUE': image.select('B2').divide(10000)}
    ).rename('EVI')
    return image.addBands(evi)

In [9]:
# https://github.com/justinelliotmeyers/Sentinel-2-Shapefile-Index
grid = gpd.read_file('data/grid/sentinel_2_index_shapefile.shp')
data_grid = data_cat.to_crs(grid.crs).sjoin(grid[['Name', 'geometry']], how='left')
gb = data_grid.groupby('Name')
tiles = [gb.get_group(x) for x in gb.groups]

In [10]:
tiles_ = []
for tile in tiles:
    if tile.shape[0] > 100:
        tiles_.extend(split_df(tile, 100))
    else:
        tiles_.append(tile)
tiles = tiles_

In [17]:
collection = ee.ImageCollection('COPERNICUS/S2').map(set_sentinel_ndvi).map(set_sentinel_evi)

In [18]:
out = []
for tile in tqdm(tiles):
    features = eeconvert.gdfToFc(tile.to_crs(4326)[['id', 'geometry']])
    collection_ = collection.filterBounds(features).filterDate('2021-04-15', '2021-09-01').select('EVI')
    out.append(get_series(collection_, features, band='evi', scale=10, drive_folder='sentinel_evi'))

  0%|          | 0/187 [00:00<?, ?it/s]

In [283]:
out = []
for file in os.listdir('sentinel_evi'):
    out.append(pd.read_csv('sentinel_evi/'+file))

In [284]:
data_sentinel = pd.concat(out)

In [291]:
# data_prev = data_sentinel.iloc[:,1:56]
# data_prev.columns = pd.to_datetime(data_prev.columns)
# data_next = data_sentinel.iloc[:,58:]
# data_next.columns = pd.to_datetime(data_next.columns) - pd.Timedelta(days=1)
# data_prev.update(data_next)
# data_prev['id'] = data_sentinel['id']

In [294]:
data_prev = data_sentinel.iloc[:,1:56]
data_prev.columns = pd.to_datetime(data_prev.columns)
data_next = data_sentinel.iloc[:,59:]
data_next.columns = pd.to_datetime(data_next.columns) - pd.Timedelta(days=1)
data_prev.update(data_next)
data_prev['id'] = data_sentinel['id']

In [296]:
data_sentinel = data_prev.groupby('id').max().reset_index(drop=True).fillna(0)
data_sentinel = data_sentinel.loc[data_cat['id'], sorted(data_sentinel.columns)]

In [298]:
data_sentinel.iloc[:data.shape[0],:].to_csv('data/train_dataset_sentinel_evi.csv', index=False)
data_sentinel.iloc[data.shape[0]:,:].to_csv('data/test_dataset_sentinel_evi.csv', index=False)