In [11]:
import ee
import datetime
import os
import itertools
import sys
import urllib.request

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import geemap

import subprocess
from subprocess import PIPE

In [12]:
ee.Initialize()

##### Define basic parameters

In [21]:
region_cn = '华东'
region_en = 'huadong'

In [22]:
grid_shp  = ee.FeatureCollection(f"users/wangjinzhulala/China_built_up/01_Boundary_shp/Grid_{region_en}")
sample_path = 'users/rlyon2/China_built_classification/00_sample_ext_img'

In [23]:
# define the gee-asset path for exporting
export_path = 'users/rlyon2/China_built_classification/00_sample_ext_img'

In [24]:
# define the year range
year_start = [i for i in range(1990,2018,3)]
year_end   = [i for i in range(1992,2020,3)]

year_name = [f'{i[0]}_{i[1]}' for i in zip(year_start,year_end)]

In [25]:
year_name

['1990_1992',
 '1993_1995',
 '1996_1998',
 '1999_2001',
 '2002_2004',
 '2005_2007',
 '2008_2010',
 '2011_2013',
 '2014_2016',
 '2017_2019']

##### Using grid to extract sample points

In [26]:
# Define a spatial filter as geometries that intersect.
spatialFilter = ee.Filter.intersects(
                                      leftField= '.geo',
                                      rightField= '.geo',
                                      maxError= 1
                                    )

In [27]:
# Define a save all join.
saveAllJoin = ee.Join.saveAll(matchesKey= 'sample_pts')

In [28]:
for year in year_name:
    
    # get sample
    sample_pt = ee.FeatureCollection(f"{sample_path}/Control_sample_ext_img_{region_en}_{year}")
    
    #_____________________________1: select one point from each grid________________________
    # Apply the join.
    intersectJoined = saveAllJoin.apply(grid_shp, sample_pt, spatialFilter)
    
    # select one point from each grid
    choose_one = intersectJoined.map(lambda fe: ee.List(fe.get('sample_pts')).get(0) )
    
    # balance the size of choose_one sample [built = non-built]
    bulit = choose_one.filterMetadata('Built','equals',1)
    non_bulit = choose_one.filterMetadata('Built','equals',0)\
                          .randomColumn()\
                          .limit(bulit.size().getInfo())
    
    # merge to get the hold_out sample
    hold_out = bulit.merge(non_bulit)  
    
    #_____________________________2: exclude the selected points________________________
    
    # using the invert join for exclusion
    invertedJoin = ee.Join.inverted()
    hold_in = invertedJoin.apply(sample_pt, hold_out, spatialFilter)
    
    
    #_____________________________3: download the csv to local drive________________________
    # create the download urls
    url_hold_out = hold_out.getDownloadURL('csv')
    url_hold_in  = hold_in.getDownloadURL('csv')    
    
    # download the csv to local drive
    name_hold_out = f'Grid_select_{region_en}_{year}.csv'
    name_hold_in  = f'Training_sample_{region_en}_{year}.csv'
    
    urllib.request.urlretrieve(url_hold_out, f'./Data/{name_hold_out}')
    urllib.request.urlretrieve(url_hold_in, f'./Data/{name_hold_in}')    
    
    # print out the process
    print(f'{region_en}_{year} downloaded!')

huadong_1990_1992 downloaded!
huadong_1993_1995 downloaded!
huadong_1996_1998 downloaded!
huadong_1999_2001 downloaded!
huadong_2002_2004 downloaded!
huadong_2005_2007 downloaded!
huadong_2008_2010 downloaded!
huadong_2011_2013 downloaded!
huadong_2014_2016 downloaded!
huadong_2017_2019 downloaded!
