In [1]:
from intake import open_catalog
import pandas as pd
import numpy as np
from pystac_client import Client as psc
import stackstac
import rasterio.features
from pyproj import CRS
from dotenv import load_dotenv
import os
from datetime import datetime, timedelta
from dask.distributed import Client, LocalCluster, Variable
import dask
import dask_geopandas as dg

load_dotenv()

True

In [2]:
#### dask worker tcp://172.20.12.11:8786 --nthreads=12

In [3]:
URL = "https://earth-search.aws.element84.com/v1"

collection = "SENTINEL-2"
collect_amazon ='sentinel-2-l2a'
collect_EE = 'COPERNICUS/S2_SR_HARMONIZED'

date_start=datetime(2023,12,1)
date_end=datetime(2023,12,31)

catalog_path = f'{os.getenv("PROJECT_PATH")}/data_reference_feux.yaml'
table_source="sentinel_surfaces_detectees"

In [4]:
def getDaskClient(local=False):

    def createClient(schedulerIp="172.20.12.11:8786"):
        client = Client(schedulerIp)
        return client

    if local :
        # Démarrer un cluster local avec 4 cœurs
        cluster = LocalCluster(n_workers=2,threads_per_worker=8,silence_logs='DEBUG',memory_limit='20GB', timeout="60s",heartbeat_interval="10s")

        client = Client(cluster)
        return client


    if 'client' in globals():   
        if client.scheduler != None:
        # La variable client existe dans l'espace de noms global
            return client
        else:
            schedulerIp = os.getenv("SCHEDULER_IP")
            if schedulerIp:
                client = createClient(schedulerIp)
            else:
                client = createClient(schedulerIp)
                
    else:
        schedulerIp = os.getenv("SCHEDULER_IP")
        if schedulerIp:
            client = createClient(schedulerIp)
        else:
            client = createClient()

    return client

client = getDaskClient(local=False)
client

0,1
Connection method: Direct,
Dashboard: http://172.20.12.11:8787/status,

0,1
Comm: tcp://172.28.0.5:8786,Workers: 0
Dashboard: http://172.28.0.5:8787/status,Total threads: 0
Started: 1 day ago,Total memory: 0 B


In [5]:
#sql = f"""SELECT *
#FROM feux_cq.{table_source} si
#WHERE si.date_ >= '{pd.to_datetime(date_start).strftime('%Y-%m-%d')}' AND si.date_ <= '{pd.to_datetime(date_end).strftime('%Y-%m-%d')}'
#"""

sql = f"""SELECT *
FROM feux_cq.{table_source} si
WHERE si.surface_id_h3 = 'L2A_T58KDB_20231224_8f9f5e5593a6982'
"""

catalog = open_catalog(catalog_path)
dataCatalog = getattr(catalog, table_source)(sql_expr=sql)
data = dataCatalog.read()

data['date_'] = pd.to_datetime(data['date_'], format='%Y-%m-%d').dt.date
data_cloud = data.to_crs(epsg=4326)

data_cloud = data_cloud.explode()
data_cloud=data_cloud.reset_index(drop=True)

  data_cloud = data_cloud.explode()


In [6]:
def find_image_stac(bbox, dates):
    client = psc.open(URL)
    search = client.search(
        collections=[collect_amazon],
        bbox=bbox,
        datetime=dates)

    print(f"{search.matched()} scenes Sentinel-2 L2A trouvées dans l'intervalle temporel")
    items = search.item_collection()
    
    if len(items) == 0:
        return None, None
    
    sentinel_stack = stackstac.stack(
        items,
        bounds_latlon=[bbox[0], bbox[1], bbox[2], bbox[3]],
        gdal_env=stackstac.DEFAULT_GDAL_ENV.updated(
            {'GDAL_HTTP_MAX_RETRY': 3,
             'GDAL_HTTP_RETRY_DELAY': 5,
            }),
        epsg=4326
    ).rename({'x': 'lon', 'y': 'lat'})
    
    data_indices = sentinel_stack.sel(band=["scl"]).to_dataset(dim='band')
    return data_indices, sentinel_stack

In [9]:
def process_partition(partition):
    row = partition.iloc[0]

    date_ = pd.to_datetime(row['date_'], format='%Y-%m-%d').date()
    bbox = row["geometry"].bounds
    datemin = date_.strftime('%Y-%m-%d')
    datemax = (date_ + timedelta(days=1)).strftime('%Y-%m-%d')
    dates = f'{datemin}/{datemax}'

    data_indices, sentinel_stack = find_image_stac(bbox, dates)

    if sentinel_stack is not None:
        if 'time' in sentinel_stack.coords:
            time_index = sentinel_stack.coords['time'].values
            selected_time_index = time_index[0] if len(time_index) > 0 else None
        else:
            selected_time_index = None

        if selected_time_index is not None:
            scl_data = sentinel_stack.sel(time=selected_time_index, band="scl").values
            cloud_classes = [3, 8, 9, 10, 11]
            cloud_mask = np.isin(scl_data, cloud_classes)

            print(cloud_mask)
            cloud_coverage = np.sum(cloud_mask) / scl_data.size
        else:
            cloud_coverage = np.nan  
    else:
        cloud_coverage = np.nan  

    partition['cloud_coverage'] = cloud_coverage
    return partition

df=process_partition(data_cloud)


1 scenes Sentinel-2 L2A trouvées dans l'intervalle temporel


  times = pd.to_datetime(


KeyboardInterrupt: 

In [None]:

nb_line = len(data_cloud)
offset = 0
limit = 12

results = []

if offset >= 0 and limit > 0:
    while offset < nb_line:
        upper_bound = min(offset + limit, nb_line)
        gdf_chunk = data_cloud.iloc[offset:upper_bound]

        dask_gdf = dg.from_geopandas(gdf_chunk, npartitions=20)
        
        meta = gdf_chunk.iloc[:0].copy()
        meta['cloud_coverage'] = np.nan

        result = dask_gdf.map_partitions(process_partition, meta=meta)

        result_computed = result.compute()
        results.append(result_computed)

        offset += limit

result_combined = pd.concat(results)
data_cloud = data_cloud.merge(result_combined[['geometry', 'cloud_coverage']], on='geometry', how='left')

print(data_cloud)

In [None]:
final=data_cloud
final.drop_duplicates(subset=['surface_id_h3'], inplace=True)

upper_zero=final[final['cloud_coverage']> 0 ]
upper_zero=upper_zero.reset_index(drop=True)
surface_id_h3=upper_zero['surface_id_h3']

In [17]:
from sqlalchemy import create_engine, text
conex = create_engine(f'postgresql://{os.getenv("DB_USER")}:{os.getenv("DB_PWD")}@{os.getenv("DB_HOST")}:{os.getenv("DB_PORT")}/{os.getenv("DB_WORKSPACE")}')
sql_query = f"DELETE FROM feux_cq.{table_source} WHERE surface_id_h3 = :surface_id"

with conex.begin() as conn:
    for surface_id in surface_id_h3:
        conn.execute(text(sql_query), {'surface_id': surface_id})