In [7]:
import ee
import geemap
import json
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

ee.Authenticate()
ee.Initialize(project='ee-yuxinchen118')

In [8]:
coords = [[[7.365302030814007, 45.99791113635203],
           [7.365302030814007, 45.890003655794075],
           [7.564429228079632, 45.890003655794075],
           [7.564429228079632, 45.99791113635203],
           [7.365302030814007, 45.99791113635203]]]


geometry = ee.Geometry.Polygon(coords)
center = geometry.centroid()
center_coords = center.getInfo()['coordinates']
x_center = center_coords[0]
y_center = center_coords[1]

# 25km×20km
lat_degree = 20 / 111.32  
lon_degree = 25 / (111.32 * math.cos(math.radians(y_center)))  


Aosta_region = ee.Geometry.Rectangle([
    x_center - lon_degree / 2,
    y_center - lat_degree / 2,
    x_center + lon_degree / 2,
    y_center + lat_degree / 2
])

color_scheme = {
    'water': '#419BDF',
    'trees': '#397D49',
    'grass': '#88B053',
    'shrub': '#DFC35A',
    'bare': '#A59B8F',
    'snowice': '#B39FE1'
}

classes = [ 'water', 'trees','grass','shrub','bare','snowice']
class_values = [0, 1, 2, 3, 4,5]  
palette = [color_scheme[cls] for cls in classes]

In [9]:
# Define the desired landcover class order
desired_order = [0, 1, 2, 5, 7, 8]

# Map original landcover values to new class IDs (0 to 5)
value_to_new_class = {original: i for i, original in enumerate(desired_order)}

# Load the CSV file
df = pd.read_csv('Aosta_Feature_Collection.csv')

# Filter to keep only the desired landcover classes
df = df[df['landcover'].isin(desired_order)]

# Sort rows according to the specified order
df['sort_key'] = df['landcover'].apply(lambda x: desired_order.index(x))
df = df.sort_values(by='sort_key').drop(columns='sort_key')

# Map original landcover values to new class IDs
df['class'] = df['landcover'].map(value_to_new_class)

# Convert to an Earth Engine FeatureCollection
training_features = []
for _, row in df.iterrows():
    geom_dict = json.loads(row['.geo'])
    geometry = ee.Geometry(geom_dict)
    landcover_class = int(row['class'])  # New class ID (0 to 5)
    feature = ee.Feature(geometry, {'class': landcover_class})
    training_features.append(feature)

polygon_fc = ee.FeatureCollection(training_features)

In [10]:
image_id = "20221006T102949_20221006T103655_T32TLR"
sentinel_image = ee.Image("COPERNICUS/S2_SR/" + image_id)

# 选择关键波段
selected_bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12'] 
image = sentinel_image.select(selected_bands).clip(Aosta_region)

ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')
ndbi = image.normalizedDifference(['B11', 'B8']).rename('NDBI')
bsi = image.expression(
    '((B11 + B4) - (B8 + B2)) / ((B11 + B4) + (B8 + B2))', {
        'B11': image.select('B11'),
        'B4': image.select('B4'),
        'B8': image.select('B8'),
        'B2': image.select('B2')
    }
).rename('BSI')

composite = image.addBands([ndvi, ndwi, ndbi, bsi])
all_bands = selected_bands + ['NDVI', 'NDWI', 'NDBI', 'BSI']

print("准备训练数据...")

# 从polygon中采样像素（优化参数以处理大区域）
sampled_pixels = composite.sampleRegions(
    collection=polygon_fc,
    properties=['class'],
    scale=10,  # 保持10m分辨率
    geometries=True ,
    tileScale=8  # 增加tileScale以处理大区域
)

# 每个类别固定使用1000个训练样本（无论原始样本量多少）
training_pixels = ee.FeatureCollection([])
for i in range(6):
    class_pixels = sampled_pixels.filter(ee.Filter.eq('class', i)).limit(1000)  # 每类固定1000个像素
    training_pixels = training_pixels.merge(class_pixels)

# 输出样本数量
for i in range(6):
    count = training_pixels.filter(ee.Filter.eq('class', i)).size().getInfo()
    print(f"Class {i} pixel samples: {count}")

# 分割训练和验证数据 (7:3比例)
with_random = training_pixels.randomColumn('random', 42)
train_data = with_random.filter(ee.Filter.lt('random', 0.7))
val_data = with_random.filter(ee.Filter.gte('random', 0.7))

print("Training pixels:", train_data.size().getInfo())  
print("Validation pixels:", val_data.size().getInfo()) 

准备训练数据...
Class 0 pixel samples: 1000
Class 1 pixel samples: 1000
Class 2 pixel samples: 1000
Class 3 pixel samples: 1000
Class 4 pixel samples: 1000
Class 5 pixel samples: 1000
Training pixels: 4225
Validation pixels: 1775


In [11]:
print("\n开始导出CSV文件...")

# 导出训练数据为CSV
train_export_task = ee.batch.Export.table.toDrive(
    collection=train_data,
    description='Aosta_pixels_training_data',
    fileNamePrefix='Aosta_pixels_training_data',
    fileFormat='CSV',
    folder='EarthEngine_Exports'  # Google Drive中的文件夹名称，可以修改
)

# 导出验证数据为CSV
val_export_task = ee.batch.Export.table.toDrive(
    collection=val_data,
    description='Aosta_pixels_validation_data',
    fileNamePrefix='Aosta_pixels_validation_data',
    fileFormat='CSV',
    folder='EarthEngine_Exports'  # Google Drive中的文件夹名称，可以修改
)

# 启动导出任务
print("启动训练数据导出任务...")
train_export_task.start()

print("启动验证数据导出任务...")
val_export_task.start()

print("导出任务已启动！")
print("请到您的Google Drive查看导出进度和结果文件。")
print("文件将保存在 'EarthEngine_Exports' 文件夹中。")


开始导出CSV文件...
启动训练数据导出任务...
启动验证数据导出任务...
导出任务已启动！
请到您的Google Drive查看导出进度和结果文件。
文件将保存在 'EarthEngine_Exports' 文件夹中。
