# Google Earth Engine

In [1]:
import os, datetime
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import geojson
import shapely
import shapely.geometry

In [2]:
import eeconvert

In [3]:
from src import *
data = read_data('data/train_dataset_train_2.csv')
data_test = read_data('data/test_dataset_test_2.csv')

In [4]:
data_cat = pd.concat((data, data_test), axis=0)

In [5]:
import ee
ee.Authenticate()

Enter verification code:  4/1AfgeXvsLsNslzB0gZ1VTPDzip6SP_TeiMGGwMWFIoFWNbJbtTENyZdo7UPY



Successfully saved authorization token.


In [6]:
ee.Initialize()

## landsat

In [7]:
def get_series(collection, features):
    """https://stackoverflow.com/questions/47633088/get-results-in-an-earth-engine-python-script"""
    def GetSeries(feature):
        def NDVIcalc(img):
            return feature.set(img.reduceRegion(ee.Reducer.median(), feature.geometry(), 30)).set('date', img.date().format("YYYY/MM/dd"))

        series = collection.map(NDVIcalc)

        list_ = series.reduceColumns(ee.Reducer.toList(2), ['date', 'NDVI']).get('list')
        return feature.set(ee.Dictionary(ee.List(list_).flatten()))

    result = features.map(GetSeries).getInfo()
    return pd.DataFrame([i['properties'] for i in result['features']])

In [8]:
# https://mgimond.github.io/ArcGIS_tutorials/Download_landsat.htm
grid = gpd.read_file('grid/wrs2.shp')
data_grid = data_cat.sjoin(grid[['PATH', 'ROW', 'geometry']], how='left')
gb = data_grid.groupby(['PATH', 'ROW'])
tiles = [gb.get_group(x) for x in gb.groups]

In [9]:
out = []
for tile in tqdm(tiles):
    features = eeconvert.gdfToFc(tile[['id', 'geometry']])
    collection = ee.ImageCollection('LANDSAT/LC08/C01/T1_8DAY_NDVI').filterBounds(features).filterDate('2021-04-15', '2021-09-01').select('NDVI')
    out.append(get_series(collection, features))

  0%|          | 0/102 [00:00<?, ?it/s]

In [24]:
data_landsat = pd.concat(out)
data_landsat = data_landsat.groupby('id').max().reset_index(drop=True)
data_landsat = data_landsat.loc[data_cat['id'], sorted(data_landsat.columns)]

In [32]:
data_landsat.iloc[:data.shape[0],:].to_csv('data/train_dataset_landsat.csv', index=False)
data_landsat.iloc[data.shape[0]:,:].to_csv('data/test_dataset_landsat.csv', index=False)

## modis

In [None]:
ee.ImageCollection("MODIS/MOD09GA_006_NDVI")

In [39]:
# https://mgimond.github.io/ArcGIS_tutorials/Download_landsat.htm
grid = gpd.read_file('grid/modis_sinusoidal_grid_world.shp')
data_grid = data_cat.to_crs(grid.crs).sjoin(grid[['h', 'v', 'geometry']], how='left')
gb = data_grid.groupby(['h', 'v'])
tiles = [gb.get_group(x) for x in gb.groups]

In [54]:
def get_series(collection, features):
    """https://stackoverflow.com/questions/47633088/get-results-in-an-earth-engine-python-script"""
    def GetSeries(feature):
        def NDVIcalc(img):
            return feature.set(img.reduceRegion(ee.Reducer.median(), feature.geometry(), scale=250)).set('date', img.date().format("YYYY/MM/dd"))

        series = collection.map(NDVIcalc)

        list_ = series.reduceColumns(ee.Reducer.toList(2), ['date', 'NDVI']).get('list')
        return feature.set(ee.Dictionary(ee.List(list_).flatten()))

    result = features.map(GetSeries).getInfo()
    return pd.DataFrame([i['properties'] for i in result['features']])

In [90]:
tiles_ = []
for tile in tiles:
    if tile.shape[0] > 100:
        tiles_.extend(split_df(tile, 100))
    else:
        tiles_.append(tile)

In [91]:
tiles = tiles_

In [93]:
out = []
for tile in tqdm(tiles):
    features = eeconvert.gdfToFc(tile.to_crs(4326)[['id', 'geometry']])
    collection = ee.ImageCollection('MODIS/MOD09GA_006_NDVI').filterBounds(features).filterDate('2021-04-15', '2021-09-01').select('NDVI')
    out.append(get_series(collection, features))

  0%|          | 0/85 [00:00<?, ?it/s]

In [98]:
data_modis = pd.concat(out)

In [99]:
data_modis = data_modis.groupby('id').max().reset_index(drop=True).fillna(0)
data_modis = data_modis.loc[data_cat['id'], sorted(data_modis.columns)]

In [101]:
data_modis.iloc[:data.shape[0],:].to_csv('data/train_dataset_modis.csv', index=False)
data_modis.iloc[data.shape[0]:,:].to_csv('data/test_dataset_modis.csv', index=False)

## sentinel

In [7]:
ee.ImageCollection('COPERNICUS/S2')

<ee.imagecollection.ImageCollection at 0x1ef68715a30>

In [14]:
# https://github.com/justinelliotmeyers/Sentinel-2-Shapefile-Index
grid = gpd.read_file('grid/sentinel_2_index_shapefile.shp')
data_grid = data_cat.to_crs(grid.crs).sjoin(grid[['Name', 'geometry']], how='left')
gb = data_grid.groupby('Name')
tiles = [gb.get_group(x) for x in gb.groups]

In [15]:
tiles_ = []
for tile in tiles:
    if tile.shape[0] > 100:
        tiles_.extend(split_df(tile, 50))
    else:
        tiles_.append(tile)

In [16]:
tiles = tiles_

In [17]:
def get_series(collection, features):
    """https://stackoverflow.com/questions/47633088/get-results-in-an-earth-engine-python-script"""
    def GetSeries(feature):
        def NDVIcalc(img):
            red = img.select('B4')
            nir = img.select('B8')
            ndvi = nir.subtract(red).divide(nir.add(red)).rename(['NDVI'])
            return feature.set(ndvi.reduceRegion(ee.Reducer.median(), feature.geometry(), scale=10)).set('date', img.date().format("YYYY/MM/dd"))

        series = collection.map(NDVIcalc)

        list_ = series.reduceColumns(ee.Reducer.toList(2), ['date', 'NDVI']).get('list')
        return feature.set(ee.Dictionary(ee.List(list_).flatten()))

    result = features.map(GetSeries).getInfo()
    return pd.DataFrame([i['properties'] for i in result['features']])

In [None]:
out = []
for tile in tqdm(tiles):
    features = eeconvert.gdfToFc(tile.to_crs(4326)[['id', 'geometry']])
    collection = ee.ImageCollection('COPERNICUS/S2').filterBounds(features).filterDate('2021-04-15', '2021-09-01')
    out.append(get_series(collection, features))

  0%|          | 0/259 [00:00<?, ?it/s]

In [None]:
data_sentinel = pd.concat(out)

In [None]:
data_sentinel = data_sentinel.groupby('id').max().reset_index(drop=True).fillna(0)
data_sentinel = data_sentinel.loc[data_cat['id'], sorted(data_sentinel.columns)]

In [None]:
data_sentinel.iloc[:data.shape[0],:].to_csv('data/train_dataset_sentinel.csv', index=False)
data_sentinel.iloc[data.shape[0]:,:].to_csv('data/test_dataset_sentinel.csv', index=False)