# 安装和导入必要的库

In [8]:
import os
import sys
import pandas as pd
import requests
import json
import subprocess
import datetime

# 基础设置

In [2]:
# 1 起始日期
startDate='2023-09-25'
endDate  ='2023-10-14'

# 2 所需卫星数据
satellite='SENTINEL-3'

# 3 检索时文件名需包括的字符串  对于哨兵3 可以用来筛选EFR数据
contains_str='EFR'

# 4 检索区域 可在该网站绘制geojson文件 https://geojson.io/#map=5.12/34.13/122.8
roi_geojson='mapdc.geojson'

# 5 数据保存路径
output_dir='F:/LakeTH/DataBur/'

# 6 新版哥白尼数据中心账号密码 即这个网站的账号密码 https://dataspace.copernicus.eu/
email="221302173@njnu.edu.cn"
password="Ljz@96982000"

# 确保输出目录存在
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"创建目录: {output_dir}")

# 处理ROI地理信息

In [3]:
# 从 GeoJSON 文件中读取坐标并格式化为API所需的字符串
try:
    with open(roi_geojson, 'r') as f:
        data = f.read()
    geojson_data = json.loads(data)
    coordinates = geojson_data['features'][0]['geometry']['coordinates'][0]
    
    # 将坐标点列表转换为API查询格式的字符串
    coordinates_str = ', '.join([f"{coord[0]} {coord[1]}" for coord in coordinates])
    
    # 确保多边形是闭合的
    if coordinates[0] != coordinates[-1]:
        coordinates_str += f", {coordinates[0][0]} {coordinates[0][1]}"
        
    print("ROI坐标处理完成。")
except FileNotFoundError:
    print(f"错误: GeoJSON 文件未找到: {roi_geojson}")
    sys.exit()
except (KeyError, IndexError) as e:
    print(f"错误: GeoJSON 文件格式不正确。请确保它包含有效的多边形坐标。错误详情: {e}")
    sys.exit()

ROI坐标处理完成。


# 生成检索链接

In [4]:
#基础前缀
base_prefix="https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter="
#检索条件
str_in_name="contains(Name,'"+contains_str+"')"
collection="Collection/Name eq '"+satellite+"'"
roi="OData.CSC.Intersects(area=geography'SRID=4326;POLYGON(("+coordinates_str+"))') "
time_range="ContentDate/Start gt "+startDate+"T00:00:00.000Z and ContentDate/Start lt "+endDate+"T00:00:00.000Z"
#检索属性
search_lim="&$top=1000"
expand_assets="&$expand=Assets"

#最终的检索链接 记得检索条件之间要加 and 
request_url=base_prefix+str_in_name+" and "+collection+" and "+roi+" and "+time_range+search_lim+expand_assets

#%% 进行检索
JSON = requests.get(request_url).json()
df = pd.DataFrame.from_dict(JSON['value'])

if len(df)==0:
    print('未查询到数据')
    sys.exit()

columns_to_print = ['Id', 'Name','S3Path','GeoFootprint']  
df[columns_to_print].head(3)

#原始数据id列表
data_id_list=df.Id
data_name_list=df.Name

# 执行检索并显示结果

In [5]:
print("\n正在向服务器发送检索请求...")
try:
    response = requests.get(request_url)
    response.raise_for_status()  # 如果请求失败 (如 404, 500), 则会抛出异常
    JSON = response.json()
    
    if 'value' not in JSON or not JSON['value']:
        print('未查询到任何数据，请检查您的检索条件。')
        sys.exit()
        
    df = pd.DataFrame.from_dict(JSON['value'])
    print(f'查询成功！共找到 {len(df)} 条数据。')

    # 打印部分关键信息
    columns_to_print = ['Id', 'Name', 'ContentDate', 'S3Path']
    print("\n数据检索结果预览:")
    print(df[columns_to_print].head())

except requests.exceptions.RequestException as e:
    print(f"网络请求失败: {e}")
    sys.exit()
except json.JSONDecodeError:
    print("无法解析服务器返回的JSON数据，请检查API状态或URL。")
    sys.exit()



正在向服务器发送检索请求...
查询成功！共找到 40 条数据。

数据检索结果预览:
                                     Id  \
0  6cd7898e-2410-3e08-9ee3-28f651c9c8e5   
1  55e11c8e-4536-3c20-acf7-cee882d5fef5   
2  cfdd0f5e-7c37-339e-8cdc-690cfae09a70   
3  e389e0b3-b096-36ba-9f08-7fe713c66bbb   
4  493379f2-520f-3fb4-b16d-6b62043da177   

                                                Name  \
0  S3A_OL_1_EFR____20231009T033125_20231009T03342...   
1  S3A_OL_1_EFR____20231013T032740_20231013T03304...   
2  S3A_OL_1_EFR____20230925T025359_20230925T02565...   
3  S3A_OL_1_EFR____20231002T031242_20231002T03154...   
4  S3B_OL_1_EFR____20231008T031839_20231008T03213...   

                                         ContentDate  \
0  {'Start': '2023-10-09T03:31:25.163277Z', 'End'...   
1  {'Start': '2023-10-13T03:27:40.162155Z', 'End'...   
2  {'Start': '2023-09-25T02:53:59.396142Z', 'End'...   
3  {'Start': '2023-10-02T03:12:42.354396Z', 'End'...   
4  {'Start': '2023-10-08T03:18:39.197551Z', 'End'...   

                      

# 下载快视图

In [6]:
print("\n开始下载快视图...")
quicklook_dir = os.path.join(output_dir, 'quicklooks')
if not os.path.exists(quicklook_dir):
    os.makedirs(quicklook_dir)
    print(f"创建快视图目录: {quicklook_dir}")

# 提取快视图下载链接
try:
    # 筛选出类型为 'QUICKLOOK' 的资产并获取其下载链接
    df['QuicklookUrl'] = df['Assets'].apply(lambda assets: next((asset['DownloadLink'] for asset in assets if asset.get('Type') == 'QUICKLOOK'), None))
    
    quicklook_downloads = df[df['QuicklookUrl'].notna()]

    if quicklook_downloads.empty:
        print("未在检索结果中找到可用的快视图。")
    else:
        for index, row in quicklook_downloads.iterrows():
            quicklook_url = row['QuicklookUrl']
            # 使用产品名来命名快视图文件，更具可读性
            file_name = f"{row['Name']}_quicklook.jpg"
            save_path = os.path.join(quicklook_dir, file_name)
            
            if os.path.exists(save_path):
                print(f"快视图已存在，跳过: {file_name}")
                continue

            try:
                print(f"正在下载快视图: {file_name}")
                ql_response = requests.get(quicklook_url, stream=True)
                ql_response.raise_for_status()
                with open(save_path, 'wb') as f:
                    for chunk in ql_response.iter_content(chunk_size=8192):
                        f.write(chunk)
                print(f"快视图下载成功: {file_name}")
            except requests.exceptions.RequestException as e:
                print(f"下载快视图失败: {file_name}. 错误: {e}")

except KeyError:
    print("错误: 'Assets' 字段不存在，无法获取快视图。请确保检索URL中包含 '&$expand=Assets'。")


开始下载快视图...
正在下载快视图: S3A_OL_1_EFR____20231009T033125_20231009T033425_20240919T215613_0179_104_175_2520_MAR_R_NT_004.SEN3_quicklook.jpg
快视图下载成功: S3A_OL_1_EFR____20231009T033125_20231009T033425_20240919T215613_0179_104_175_2520_MAR_R_NT_004.SEN3_quicklook.jpg
正在下载快视图: S3A_OL_1_EFR____20231013T032740_20231013T033040_20240919T220947_0179_104_232_2520_MAR_R_NT_004.SEN3_quicklook.jpg
快视图下载成功: S3A_OL_1_EFR____20231013T032740_20231013T033040_20240919T220947_0179_104_232_2520_MAR_R_NT_004.SEN3_quicklook.jpg
正在下载快视图: S3A_OL_1_EFR____20230925T025359_20230925T025659_20240919T191511_0179_103_360_2520_MAR_R_NT_004.SEN3_quicklook.jpg
快视图下载成功: S3A_OL_1_EFR____20230925T025359_20230925T025659_20240919T191511_0179_103_360_2520_MAR_R_NT_004.SEN3_quicklook.jpg
正在下载快视图: S3A_OL_1_EFR____20231002T031242_20231002T031542_20240919T211337_0179_104_075_2520_MAR_R_NT_004.SEN3_quicklook.jpg
快视图下载成功: S3A_OL_1_EFR____20231002T031242_20231002T031542_20240919T211337_0179_104_075_2520_MAR_R_NT_004.SEN3_quicklook.jpg
正在下载

# 获取访问令牌

In [9]:
def get_access_token(username: str, password: str) -> str:
    """
    使用用户名和密码获取访问令牌。
    """
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
        print("成功获取新的 Access Token。")
        return r.json()["access_token"]
    except requests.exceptions.RequestException as e:
        print(f"获取 Access Token 失败: {e}")
        if hasattr(e, 'response') and e.response is not None:
            print(f"服务器响应: {e.response.text}")
        return None

# 下载完整影像

In [None]:
print("\n开始下载完整的卫星数据产品...")

data_id_list = df.Id.tolist()
data_name_list = df.Name.tolist()

for i in range(len(data_id_list)):
    data_id = data_id_list[i]
    data_name = data_name_list[i]
    output_path = os.path.join(output_dir, data_name + '.zip')
    
    if os.path.exists(output_path):
        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 文件已存在，跳过: {data_name}")
        continue

    # 每次下载前都获取一个新的Token，以防过期 (Token有效期为10分钟)
    access_token = get_access_token(email, password)
    if not access_token:
        print("无法获取Access Token，跳过本次下载。")
        continue

    # 构建wget命令
    download_url = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products({data_id})/$value"
    command = [
        'wget',
        '--header', f'Authorization: Bearer {access_token}',
        '--no-check-certificate', # 有时需要忽略证书检查
        '-O', output_path,
        download_url
    ]
    
    try:
        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 开始下载: {data_name}")
        # 【修改】移除 capture_output=True，让 wget 的输出直接显示在控制台
        # 这将允许您看到实时的下载进度条、网速和剩余时间
        subprocess.run(command, check=True)
        print(f"\n[{datetime.datetime.now().strftime('%H:%M:%S')}] 下载成功: {data_name}")
    except FileNotFoundError:
        print("错误: 'wget' 命令未找到。请确保您已安装wget并将其添加到了系统环境变量中。")
        sys.exit()
    except subprocess.CalledProcessError as e:
        # 下载失败时，wget 会将错误信息直接打印到控制台，所以这里只记录失败事件
        print(f"\n[{datetime.datetime.now().strftime('%H:%M:%S')}] 下载失败: {data_name}")

print("\n所有任务完成。")



开始下载完整的卫星数据产品...
成功获取新的 Access Token。
[21:25:08] 开始下载: S3A_OL_1_EFR____20231009T033125_20231009T033425_20240919T215613_0179_104_175_2520_MAR_R_NT_004.SEN3

[21:25:59] 下载成功: S3A_OL_1_EFR____20231009T033125_20231009T033425_20240919T215613_0179_104_175_2520_MAR_R_NT_004.SEN3
成功获取新的 Access Token。
[21:26:00] 开始下载: S3A_OL_1_EFR____20231013T032740_20231013T033040_20240919T220947_0179_104_232_2520_MAR_R_NT_004.SEN3
