In [1]:
import ee
import datetime
import os
import itertools
import sys
import re

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import geemap

import subprocess
from subprocess import PIPE

In [2]:
ee.Initialize()

In [3]:
sample_path = 'users/wangjinzhulala/China_built_up/03_sample_train_test_split'

In [4]:
# define basic parameters
year_span = [f'{i}_{i+2}' for i in range(1990,2020,3)]

In [5]:
region_cn = '西南'
region_en = 'xinan'

### Step_1: Prepare input classification_img, control_sample, and sample_size

In [6]:
# import the untouched control points, which are diveded by a random column with a 101 random state
Sample  = [ee.FeatureCollection(f'{sample_path}/Grid_select_{region_en}_{year}') for year in year_span]

In [7]:
# prepare the different input band types
in_band_selection =[#'Spectrum',
                    #'Spectrum_Normalize',
                    #'Spectrum_Normalize_Fourier',
                    #'Spectrum_Normalize_Fourier_Terrain',
                    'Spectrum_Normalize_Fourier_Terrain_Meterology']

In [8]:
# import the classified imgs

img_path = 'users/rlyon2/China_built_classification/01_primary_classifications'

Classified_Landsat_1990_2019 = {}

# get the sum of 10-folds-classification
for in_band in in_band_selection:
    for i,year in enumerate(year_span):
        
        img_name = f'{in_band}_{region_en}_{year}'    
        preliminary_classification = ee.ImageCollection([ee.Image(f'{img_path}/{img_name}_{seed}') for seed in range(10)]).sum()
        test_sample = Sample[i]
        
        Classified_Landsat_1990_2019.update({f'{year}':( preliminary_classification,
                                                         test_sample,
                                                         test_sample.size().getInfo())} ) 

In [9]:
Input_df = pd.DataFrame(Classified_Landsat_1990_2019).T
Input_df.columns = ['classification_img','sample_pt','sample_size']
Input_df

Unnamed: 0,classification_img,sample_pt,sample_size
1990_1992,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
1993_1995,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
1996_1998,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
1999_2001,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",628
2002_2004,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
2005_2007,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
2008_2010,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
2011_2013,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
2014_2016,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634
2017_2019,"ee.Image({\n ""functionInvocationValue"": {\n ...","ee.FeatureCollection({\n ""functionInvocationV...",634


### Step_2: Loop through each threshold and extract classification value to samples for later comparsion

In [10]:
Threshold_acc = {}

for idx,row in Input_df.iterrows():
    
    img = row[0]
    sample = row[1]
    sample_size = row[2]
    
    for threshold in range(10):
    
        # apply the threshold to the sum of 10 classifications,rename so we can use Accuracyassement module
        img_threshold = img.gt(threshold).rename('classification')

        # extract reclassified GAIA pixel value to test_sample
        sample_with_value = img_threshold.sampleRegions(collection = sample, 
                                                        properties = ['Built'], 
                                                        scale      = 30)

        # Put the result into a dict
        Threshold_acc[(idx,threshold)] = [sample_with_value,sample_size]

In [11]:
# formating the dataframe
Threshold_acc_df = pd.DataFrame(Threshold_acc).T
Threshold_acc_df.reset_index(inplace=True)
Threshold_acc_df.columns = ['Year','Threshold','Feature','Sample_size']

### Step_3: Calculate the number of correct classified points, then get the accuracy

In [12]:
Threshold_acc_df

Unnamed: 0,Year,Threshold,Feature,Sample_size
0,1990_1992,0,"ee.FeatureCollection({\n ""functionInvocationV...",634
1,1990_1992,1,"ee.FeatureCollection({\n ""functionInvocationV...",634
2,1990_1992,2,"ee.FeatureCollection({\n ""functionInvocationV...",634
3,1990_1992,3,"ee.FeatureCollection({\n ""functionInvocationV...",634
4,1990_1992,4,"ee.FeatureCollection({\n ""functionInvocationV...",634
...,...,...,...,...
95,2017_2019,5,"ee.FeatureCollection({\n ""functionInvocationV...",634
96,2017_2019,6,"ee.FeatureCollection({\n ""functionInvocationV...",634
97,2017_2019,7,"ee.FeatureCollection({\n ""functionInvocationV...",634
98,2017_2019,8,"ee.FeatureCollection({\n ""functionInvocationV...",634


In [14]:
correct_num_list = []

for idx,row in Threshold_acc_df.iterrows():
    
    # If the 'Built' and 'classification' attribute are same, then it will be a correct classified point
    correct_num = row[2].filter(ee.Filter.equals('Built',None,'classification')).size().getInfo()
    correct_num_list.append(correct_num)
    
    print(f'Compute the correct classified points number of {row[0]}_{row[1]} finished')

EEException: Collection.loadTable: Collection asset 'users/wangjinzhulala/China_built_up/03_sample_train_test_split/Grid_select_xinan_1990_1992' not found.

In [None]:
# formating the dataframe
Threshold_acc_df['Correct_num'] = correct_num_list
Threshold_acc_df['Accuracy'] = Threshold_acc_df['Correct_num'] /Threshold_acc_df['Sample_size'] *100
Threshold_acc_df['Year'] = Threshold_acc_df['Year'].astype('category')

In [None]:
# save df to disk
Threshold_acc_df.to_csv(f'./Result/Threshold_acc_df_{region_en}.csv',index=False)

# read data
Threshold_acc_df = pd.read_csv(f'./Result/Threshold_acc_df_{region_en}.csv')

# make the threshold categorical so we can make figures
Threshold_acc_df['Threshold'] = Threshold_acc_df['Threshold'].astype('category')
Threshold_acc_df['Year'] = Threshold_acc_df['Year'].astype('category')

##### Observe that the best threshold is 4

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(data=Threshold_acc_df,
            x='Threshold',
            y='Accuracy')

In [None]:
plt.figure(figsize=(12,8))
sns.lineplot(data=Threshold_acc_df,x='Threshold',y='Accuracy',hue='Year')

##### Compare with original accuracy

In [None]:
# import the original accuracy for comparison
Original_acc = pd.read_csv(f'../Sub_Process_6_Before_classification_Feature_selection/Result/Grid_acc_tree_{region_en}.csv')
Original_acc = Original_acc[(Original_acc['In_Bands']=='Spectrum_Normalize_Fourier_Terrain_Meterology')&
                            (Original_acc['Tree']==100)]
Original_acc = Original_acc[['Year','Accuracy']]
Original_acc['Accuracy'] =Original_acc['Accuracy'] * 100
Original_acc['Region'] = region_en
Original_acc['Type'] = 'Original'

In [None]:
Threshold_acc_df_4 = Threshold_acc_df[Threshold_acc_df['Threshold']==4]
Threshold_acc_df_4 = Threshold_acc_df_4[['Year','Accuracy']]
Threshold_acc_df_4['Region'] = region_en
Threshold_acc_df_4['Type'] = 'Ten_folds_check'

In [None]:
sns.lineplot(x='Year',y='Accuracy',data=Threshold_acc_df_4)
sns.lineplot(x='Year',y='Accuracy',data=Original_acc)