In [1]:
import ee
import geemap
import json
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

ee.Authenticate()
ee.Initialize(project='ee-yuxinchen118')

In [2]:
# Define center coordinates
center_lat = 43.5
center_lon = 11.0

# Convert kilometers to degrees (more accurate)
km_to_deg_lat = 1 / 111.32
km_to_deg_lon = 1 / (111.32 * math.cos(math.radians(center_lat)))

# Create a 25 km × 20 km rectangle (25 km east-west, 20 km north-south)
lat_extent = 20 * km_to_deg_lat
lon_extent = 25 * km_to_deg_lon

# Define the study area as a rectangle centered at the given point
tuscany_region = ee.Geometry.Rectangle([
    center_lon - lon_extent / 2,
    center_lat - lat_extent / 2,
    center_lon + lon_extent / 2,
    center_lat + lat_extent / 2
])

In [3]:
# Define the desired landcover class order
desired_order = [1, 2, 4, 5, 6]

# Map original landcover values to new class IDs (0 to 4)
value_to_new_class = {original: i for i, original in enumerate(desired_order)}

# Load the CSV file
df = pd.read_csv('Tuscany_Feature_Collection.csv')
df = df[df['landcover'].isin(desired_order)]
df['class'] = df['landcover'].map(value_to_new_class)

# 限制样本数量但保持每类足够的样本
max_samples_per_class = 1000  # 增加每类样本数量
df_sampled = df.groupby('class').apply(lambda x: x.sample(min(len(x), max_samples_per_class))).reset_index(drop=True)

# 转换为Earth Engine FeatureCollection
features = []
for _, row in df_sampled.iterrows():
    try:
        geom = json.loads(row['.geo'])
        ee_geom = ee.Geometry(geom)
        cls = int(row['class'])
        features.append(ee.Feature(ee_geom, {'class': cls}))
    except:
        continue

polygon_fc = ee.FeatureCollection(features)


  df_sampled = df.groupby('class').apply(lambda x: x.sample(min(len(x), max_samples_per_class))).reset_index(drop=True)


In [6]:
# 加载Sentinel-2影像
image_id = "20220418T100601_20220418T101300_T32TPP"
sentinel_image = ee.Image("COPERNICUS/S2_SR/" + image_id)

# 选择关键波段
selected_bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12'] 
image = sentinel_image.select(selected_bands).clip(tuscany_region)

ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')
ndbi = image.normalizedDifference(['B11', 'B8']).rename('NDBI')
bsi = image.expression(
    '((B11 + B4) - (B8 + B2)) / ((B11 + B4) + (B8 + B2))', {
        'B11': image.select('B11'),
        'B4': image.select('B4'),
        'B8': image.select('B8'),
        'B2': image.select('B2')
    }
).rename('BSI')

composite = image.addBands([ndvi, ndwi, ndbi, bsi])
all_bands = selected_bands + ['NDVI', 'NDWI', 'NDBI', 'BSI']

print("准备训练数据...")

# 从polygon中采样像素（优化参数以处理大区域）
sampled_pixels = composite.sampleRegions(
    collection=polygon_fc,
    properties=['class'],
    scale=10,  # 保持10m分辨率
    geometries=True ,
    tileScale=8  # 增加tileScale以处理大区域
)

# 每个类别固定使用1000个训练样本（无论原始样本量多少）
training_pixels = ee.FeatureCollection([])
for i in range(5):
    class_pixels = sampled_pixels.filter(ee.Filter.eq('class', i)).limit(1000)  # 每类固定1000个像素
    training_pixels = training_pixels.merge(class_pixels)

# 输出样本数量
for i in range(5):
    count = training_pixels.filter(ee.Filter.eq('class', i)).size().getInfo()
    print(f"Class {i} pixel samples: {count}")

# 分割训练和验证数据 (7:3比例)
with_random = training_pixels.randomColumn('random', 42)
train_data = with_random.filter(ee.Filter.lt('random', 0.7))
val_data = with_random.filter(ee.Filter.gte('random', 0.7))

print("Training pixels:", train_data.size().getInfo())  
print("Validation pixels:", val_data.size().getInfo()) 


准备训练数据...
Class 0 pixel samples: 1000
Class 1 pixel samples: 1000
Class 2 pixel samples: 1000
Class 3 pixel samples: 1000
Class 4 pixel samples: 1000
Training pixels: 3530
Validation pixels: 1470


In [7]:
print("\n开始导出CSV文件...")

# 导出训练数据为CSV
train_export_task = ee.batch.Export.table.toDrive(
    collection=train_data,
    description='Tuscany_pixels_training_data',
    fileNamePrefix='Tuscany_pixels_training_data',
    fileFormat='CSV',
    folder='EarthEngine_Exports'  # Google Drive中的文件夹名称，可以修改
)

# 导出验证数据为CSV
val_export_task = ee.batch.Export.table.toDrive(
    collection=val_data,
    description='Tuscany_pixels_validation_data',
    fileNamePrefix='Tuscany_pixels_validation_data',
    fileFormat='CSV',
    folder='EarthEngine_Exports'  # Google Drive中的文件夹名称，可以修改
)

# 启动导出任务
print("启动训练数据导出任务...")
train_export_task.start()

print("启动验证数据导出任务...")
val_export_task.start()

print("导出任务已启动！")
print("请到您的Google Drive查看导出进度和结果文件。")
print("文件将保存在 'EarthEngine_Exports' 文件夹中。")


开始导出CSV文件...
启动训练数据导出任务...
启动验证数据导出任务...
导出任务已启动！
请到您的Google Drive查看导出进度和结果文件。
文件将保存在 'EarthEngine_Exports' 文件夹中。
