# 安装和导入必要的库

In [2]:
import os
import sys
import pandas as pd
import requests
import json
import subprocess
import datetime

# 基础设置

In [None]:
# ============================================================================
# 请在此处修改您的配置
# ============================================================================
# 起始日期 (YYYY-MM-DD)
startDate = '2025-01-01'
# 结束日期 (YYYY-MM-DD)
endDate = '2025-06-30'

# 所需卫星数据 (例如 'SENTINEL-2', 'SENTINEL-3', 'LANDSAT-ETM-C2-L2')
satellite = 'SENTINEL-2'

# 检索时文件名需包括的字符串 (对于哨兵2可以用来筛选区块或者产品等级)
contains_list = ['L1C', 'RTN']

# 感兴趣区域 (ROI) 的 GeoJSON 文件路径
# 您可以在 https://geojson.io/ 网站上绘制并导出该文件
roi_geojson = 'mapdc.geojson'

# 数据和快视图的保存路径
output_dir = 'F:/LakeTH/DataBur/'

# 哥白尼数据中心账号 (https://dataspace.copernicus.eu/)
email="221302173@njnu.edu.cn"
# 哥白尼数据中心密码
password="Ljz@96982000"
# ============================================================================

# 确保输出目录存在
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"创建目录: {output_dir}")

# 处理ROI地理信息

In [4]:
# 从 GeoJSON 文件中读取坐标并格式化为API所需的字符串
try:
    with open(roi_geojson, 'r') as f:
        data = f.read()
    geojson_data = json.loads(data)
    coordinates = geojson_data['features'][0]['geometry']['coordinates'][0]
    
    # 将坐标点列表转换为API查询格式的字符串
    coordinates_str = ', '.join([f"{coord[0]} {coord[1]}" for coord in coordinates])
    
    # 确保多边形是闭合的
    if coordinates[0] != coordinates[-1]:
        coordinates_str += f", {coordinates[0][0]} {coordinates[0][1]}"
        
    print("ROI坐标处理完成。")
except FileNotFoundError:
    print(f"错误: GeoJSON 文件未找到: {roi_geojson}")
    sys.exit()
except (KeyError, IndexError) as e:
    print(f"错误: GeoJSON 文件格式不正确。请确保它包含有效的多边形坐标。错误详情: {e}")
    sys.exit()

ROI坐标处理完成。


# 生成检索链接

In [5]:
# 基础前缀
base_prefix = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter="

# -- 构建检索条件 --
# 初始化一个过滤器列表
filters = []

# 1. 名称包含 (如果 contains_list 不为空，则构建 OR 条件)
if contains_list:
    name_filters = [f"contains(Name,'{s}')" for s in contains_list]
    # 将多个 or 条件用括号括起来
    filters.append(f"({ ' and '.join(name_filters) })")

# 2. 卫星平台
filters.append(f"Collection/Name eq '{satellite}'")

# 3. 地理范围
filters.append(f"OData.CSC.Intersects(area=geography'SRID=4326;POLYGON(({coordinates_str}))')")

# 4. 时间范围
filters.append(f"ContentDate/Start gt {startDate}T00:00:00.000Z and ContentDate/Start lt {endDate}T23:59:59.999Z")

# -- 构建检索属性 --
# 1. 结果上限 (最大 1000)
search_lim = "&$top=1000"
# 2. 扩展属性 (为了获取快视图和下载链接)
expand_assets = "&$expand=Assets"

# -- 组合成最终的检索链接 --
# 使用 ' and ' 连接所有有效的过滤条件
request_url = f"{base_prefix}{' and '.join(filters)}{search_lim}{expand_assets}"

print("检索URL已生成:")
print(request_url)

检索URL已生成:
https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=(contains(Name,'L1C') and contains(Name,'RTN')) and Collection/Name eq 'SENTINEL-2' and OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((102.68959377414325 24.8874461373789, 102.68959377414325 24.83731300905565, 102.72877059111312 24.83731300905565, 102.72877059111312 24.8874461373789, 102.68959377414325 24.8874461373789))') and ContentDate/Start gt 2025-01-01T00:00:00.000Z and ContentDate/Start lt 2025-06-30T23:59:59.999Z&$top=1000&$expand=Assets


# 执行检索并显示结果

In [6]:
print("\n正在向服务器发送检索请求...")
try:
    response = requests.get(request_url)
    response.raise_for_status()  # 如果请求失败 (如 404, 500), 则会抛出异常
    JSON = response.json()
    
    if 'value' not in JSON or not JSON['value']:
        print('未查询到任何数据，请检查您的检索条件。')
        sys.exit()
        
    df = pd.DataFrame.from_dict(JSON['value'])
    print(f'查询成功！共找到 {len(df)} 条数据。')

    # 打印部分关键信息
    columns_to_print = ['Id', 'Name', 'ContentDate', 'S3Path']
    print("\n数据检索结果预览:")
    print(df[columns_to_print].head())

except requests.exceptions.RequestException as e:
    print(f"网络请求失败: {e}")
    sys.exit()
except json.JSONDecodeError:
    print("无法解析服务器返回的JSON数据，请检查API状态或URL。")
    sys.exit()



正在向服务器发送检索请求...
查询成功！共找到 41 条数据。

数据检索结果预览:
                                     Id  \
0  ccf6c6a3-509e-4c16-bfa7-7a70bb8ad82e   
1  83868a84-a9a1-41db-9f05-1457f73815ca   
2  f54e972f-7fad-4e1d-89e1-7a3bbe1fb18f   
3  5899b701-80cb-44d3-b2f0-18b5148a0baa   
4  6850f858-a139-413b-9304-32f77afa6add   

                                                Name  \
0  S2B_MSIL1C_20250520T033539_N0511_R061_T48RTN_2...   
1  S2C_MSIL1C_20250415T033601_N0511_R061_T48RTN_2...   
2  S2B_MSIL1C_20250609T033539_N0511_R061_T48RTN_2...   
3  S2A_MSIL1C_20250626T034201_N0511_R061_T48RTN_2...   
4  S2B_MSIL1C_20250510T033539_N0511_R061_T48RTN_2...   

                                         ContentDate  \
0  {'Start': '2025-05-20T03:35:39.024000Z', 'End'...   
1  {'Start': '2025-04-15T03:36:01.024000Z', 'End'...   
2  {'Start': '2025-06-09T03:35:39.024000Z', 'End'...   
3  {'Start': '2025-06-26T03:42:01.024000Z', 'End'...   
4  {'Start': '2025-05-10T03:35:39.024000Z', 'End'...   

                      

# 下载快视图 (Quicklooks)

In [7]:
print("\n开始下载快视图...")
quicklook_dir = os.path.join(output_dir, 'quicklooks')
if not os.path.exists(quicklook_dir):
    os.makedirs(quicklook_dir)
    print(f"创建快视图目录: {quicklook_dir}")

# 提取快视图下载链接
try:
    # 筛选出类型为 'QUICKLOOK' 的资产并获取其下载链接
    df['QuicklookUrl'] = df['Assets'].apply(lambda assets: next((asset['DownloadLink'] for asset in assets if asset.get('Type') == 'QUICKLOOK'), None))
    
    quicklook_downloads = df[df['QuicklookUrl'].notna()]

    if quicklook_downloads.empty:
        print("未在检索结果中找到可用的快视图。")
    else:
        for index, row in quicklook_downloads.iterrows():
            quicklook_url = row['QuicklookUrl']
            # 使用产品名来命名快视图文件，更具可读性
            file_name = f"{row['Name']}_quicklook.jpg"
            save_path = os.path.join(quicklook_dir, file_name)
            
            if os.path.exists(save_path):
                print(f"快视图已存在，跳过: {file_name}")
                continue

            try:
                print(f"正在下载快视图: {file_name}")
                ql_response = requests.get(quicklook_url, stream=True)
                ql_response.raise_for_status()
                with open(save_path, 'wb') as f:
                    for chunk in ql_response.iter_content(chunk_size=8192):
                        f.write(chunk)
                print(f"快视图下载成功: {file_name}")
            except requests.exceptions.RequestException as e:
                print(f"下载快视图失败: {file_name}. 错误: {e}")

except KeyError:
    print("错误: 'Assets' 字段不存在，无法获取快视图。请确保检索URL中包含 '&$expand=Assets'。")


开始下载快视图...
快视图已存在，跳过: S2B_MSIL1C_20250520T033539_N0511_R061_T48RTN_20250520T054401.SAFE_quicklook.jpg
快视图已存在，跳过: S2C_MSIL1C_20250415T033601_N0511_R061_T48RTN_20250415T071417.SAFE_quicklook.jpg
快视图已存在，跳过: S2B_MSIL1C_20250609T033539_N0511_R061_T48RTN_20250609T054504.SAFE_quicklook.jpg
快视图已存在，跳过: S2A_MSIL1C_20250626T034201_N0511_R061_T48RTN_20250626T060016.SAFE_quicklook.jpg
快视图已存在，跳过: S2B_MSIL1C_20250510T033539_N0511_R061_T48RTN_20250510T054318.SAFE_quicklook.jpg
快视图已存在，跳过: S2B_MSIL1C_20250420T033539_N0511_R061_T48RTN_20250420T054304.SAFE_quicklook.jpg
快视图已存在，跳过: S2C_MSIL1C_20250405T033601_N0511_R061_T48RTN_20250405T071222.SAFE_quicklook.jpg
快视图已存在，跳过: S2C_MSIL1C_20250604T033601_N0511_R061_T48RTN_20250604T070922.SAFE_quicklook.jpg
快视图已存在，跳过: S2A_MSIL1C_20250606T034201_N0511_R061_T48RTN_20250606T060459.SAFE_quicklook.jpg
快视图已存在，跳过: S2A_MSIL1C_20250407T034201_N0511_R061_T48RTN_20250407T071654.SAFE_quicklook.jpg
快视图已存在，跳过: S2C_MSIL1C_20250525T033551_N0511_R061_T48RTN_20250525T055818.SAFE_q

# 获取访问令牌 (Access Token)

In [8]:
def get_access_token(username: str, password: str) -> str:
    """
    使用用户名和密码获取访问令牌。
    """
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
        print("成功获取新的 Access Token。")
        return r.json()["access_token"]
    except requests.exceptions.RequestException as e:
        print(f"获取 Access Token 失败: {e}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"服务器响应: {e.response.text}")
        return None

# 下载完整的卫星数据

In [None]:
print("\n开始下载完整的卫星数据产品...")

data_id_list = df.Id.tolist()
data_name_list = df.Name.tolist()

for i in range(len(data_id_list)):
    data_id = data_id_list[i]
    data_name = data_name_list[i]
    output_path = os.path.join(output_dir, data_name + '.zip')
    
    if os.path.exists(output_path):
        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 文件已存在，跳过: {data_name}")
        continue

    # 每次下载前都获取一个新的Token，以防过期 (Token有效期为10分钟)
    access_token = get_access_token(email, password)
    if not access_token:
        print("无法获取Access Token，跳过本次下载。")
        continue

    # 构建wget命令
    download_url = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products({data_id})/$value"
    command = [
        'wget',
        '--header', f'Authorization: Bearer {access_token}',
        '--no-check-certificate', # 有时需要忽略证书检查
        '-O', output_path,
        download_url
    ]
    
    try:
        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 开始下载: {data_name}")
        # 【修改】移除 capture_output=True，让 wget 的输出直接显示在控制台
        # 这将允许您看到实时的下载进度条、网速和剩余时间
        subprocess.run(command, check=True)
        print(f"\n[{datetime.datetime.now().strftime('%H:%M:%S')}] 下载成功: {data_name}")
    except FileNotFoundError:
        print("错误: 'wget' 命令未找到。请确保您已安装wget并将其添加到了系统环境变量中。")
        sys.exit()
    except subprocess.CalledProcessError as e:
        # 下载失败时，wget 会将错误信息直接打印到控制台，所以这里只记录失败事件
        print(f"\n[{datetime.datetime.now().strftime('%H:%M:%S')}] 下载失败: {data_name}")

print("\n所有任务完成。")



开始下载完整的卫星数据产品...
[16:25:44] 文件已存在，跳过: S2B_MSIL1C_20250520T033539_N0511_R061_T48RTN_20250520T054401.SAFE
成功获取新的 Access Token。
[16:25:45] 开始下载: S2C_MSIL1C_20250415T033601_N0511_R061_T48RTN_20250415T071417.SAFE

[16:28:28] 下载成功: S2C_MSIL1C_20250415T033601_N0511_R061_T48RTN_20250415T071417.SAFE
成功获取新的 Access Token。
[16:28:29] 开始下载: S2B_MSIL1C_20250609T033539_N0511_R061_T48RTN_20250609T054504.SAFE
