In [1]:
from intake import open_catalog
import pandas as pd
import numpy as np
from pystac_client import Client as psc
import stackstac
import rasterio.features
from pyproj import CRS
from dotenv import load_dotenv
import os
from datetime import datetime, timedelta
from dask.distributed import Client, LocalCluster, Variable
import dask
import dask_geopandas as dg

load_dotenv()

True

In [2]:
#### dask worker tcp://172.20.12.11:8786 --nthreads=12

In [3]:
URL = "https://earth-search.aws.element84.com/v1"

collection = "SENTINEL-2"
collect_amazon ='sentinel-2-l2a'
collect_EE = 'COPERNICUS/S2_SR_HARMONIZED'

date_start=datetime(2024,1,1)
date_end=datetime(2024,5,31)

catalog_path = f'{os.getenv("PROJECT_PATH")}/data_reference_feux.yaml'
table_source="sentinel_surfaces_detectees"

In [4]:
def getDaskClient(local=False):

    def createClient(schedulerIp="172.20.12.11:8786"):
        client = Client(schedulerIp)
        return client

    if local :
        # Démarrer un cluster local avec 4 cœurs
        cluster = LocalCluster(n_workers=2,threads_per_worker=8,silence_logs='DEBUG',memory_limit='20GB', timeout="60s",heartbeat_interval="10s")

        client = Client(cluster)
        return client


    if 'client' in globals():   
        if client.scheduler != None:
        # La variable client existe dans l'espace de noms global
            return client
        else:
            schedulerIp = os.getenv("SCHEDULER_IP")
            if schedulerIp:
                client = createClient(schedulerIp)
            else:
                client = createClient(schedulerIp)
                
    else:
        schedulerIp = os.getenv("SCHEDULER_IP")
        if schedulerIp:
            client = createClient(schedulerIp)
        else:
            client = createClient()

    return client

client = getDaskClient(local=False)
client


+---------+--------+-----------+---------+
| Package | Client | Scheduler | Workers |
+---------+--------+-----------+---------+
| tornado | 6.4    | 6.4.1     | None    |
+---------+--------+-----------+---------+


0,1
Connection method: Direct,
Dashboard: http://172.20.12.11:8787/status,

0,1
Comm: tcp://172.28.0.6:8786,Workers: 0
Dashboard: http://172.28.0.6:8787/status,Total threads: 0
Started: 3 days ago,Total memory: 0 B


In [5]:
sql = f"""SELECT *
FROM feux_cq.{table_source} si
WHERE si.date_ >= '{pd.to_datetime(date_start).strftime('%Y-%m-%d')}' AND si.date_ <= '{pd.to_datetime(date_end).strftime('%Y-%m-%d')}'
"""

catalog = open_catalog(catalog_path)
dataCatalog = getattr(catalog, table_source)(sql_expr=sql)
data = dataCatalog.read()

data['date_'] = pd.to_datetime(data['date_'], format='%Y-%m-%d').dt.date
data_cloud = data.to_crs(epsg=4326)

data_cloud = data_cloud.explode()
data_cloud=data_cloud.reset_index(drop=True)
#data_cloud=data_cloud[:1000]

#data_cloud=data_cloud[data_cloud['surface_id_h3']=='L2A_T58KDC_20240225_8f9f5ac2ec85073']

  data_cloud = data_cloud.explode()


In [6]:
def find_image_stac(bbox, dates):
    import stackstac
    from pystac_client import Client as psc

    client = psc.open(URL)
    search = client.search(
        collections=[collect_amazon],
        bbox=bbox,
        datetime=dates)

    print(f"{search.matched()} scenes Sentinel-2 L2A trouvées dans l'intervalle temporel")
    items = search.item_collection()
    
    if len(items) == 0:
        return None, None
    
    sentinel_stack = stackstac.stack(
        items,
        bounds_latlon=[bbox[0], bbox[1], bbox[2], bbox[3]],
        gdal_env=stackstac.DEFAULT_GDAL_ENV.updated(
            {'GDAL_HTTP_MAX_RETRY': 3,
             'GDAL_HTTP_RETRY_DELAY': 5,
            }),
        epsg=4326
    ).rename({'x': 'lon', 'y': 'lat'})
    
    data_indices = sentinel_stack.sel(band=["scl"]).to_dataset(dim='band')
    return data_indices, sentinel_stack

In [7]:
def process_partition(partition):
    row = partition.iloc[0]

    date_ = pd.to_datetime(row['date_'], format='%Y-%m-%d').date()
    print("date réelle", date_)
    bbox = row["geometry"].bounds
    datemin = date_.strftime('%Y-%m-%d')
    datemax = (date_ + timedelta(days=1)).strftime('%Y-%m-%d')
    dates = f'{datemin}/{datemax}'

    data_indices, sentinel_stack = find_image_stac(bbox, dates)
    if sentinel_stack is not None and 'time' in sentinel_stack.coords:
        time_index = sentinel_stack.coords['time'].values
        selected_time_index = time_index[0]
        print("date récupérée", selected_time_index)

        scl_data = sentinel_stack.sel(time=selected_time_index, band="scl").values
        #print(scl_data)

        if hasattr(sentinel_stack, 'transform'):
            transform = sentinel_stack.transform
            polygon_mask = rasterio.features.geometry_mask([row.geometry], out_shape=scl_data.shape, transform=transform, invert=True)
            cloud_classes = [3, 8, 9]  
            masked_scl_data = np.where(polygon_mask, scl_data, np.nan) 

            cloud_mask = np.isin(masked_scl_data, cloud_classes)
            
            cloud_coverage = np.nansum(cloud_mask) / masked_scl_data.size
        else:
            cloud_coverage = np.nan
    else:
        cloud_coverage = np.nan

    partition['cloud_coverage'] = cloud_coverage
    return partition

nb_line = len(data_cloud)
offset = 0
limit = 12

results = []

if offset >= 0 and limit > 0:
    while offset < nb_line:
        upper_bound = min(offset + limit, nb_line)
        gdf_chunk = data_cloud.iloc[offset:upper_bound]

        dask_gdf = dg.from_geopandas(gdf_chunk, npartitions=20)
        
        meta = gdf_chunk.iloc[:0].copy()
        meta['cloud_coverage'] = np.nan

        result = dask_gdf.map_partitions(process_partition, meta=meta)

        result_computed = result.compute()
        results.append(result_computed)

        offset += limit

result_combined = pd.concat(results)
data_cloud = data_cloud.merge(result_combined[['geometry', 'cloud_coverage']], on='geometry', how='left')

print(data_cloud)

                                 nom       province      commune   surface  \
0     SENTINEL2A_20240323_L2A_T58KDB  Province Nord         POYA  1.630962   
1     SENTINEL2A_20240323_L2A_T58KFA   Province Sud   BOULOUPARI  2.780932   
2     SENTINEL2A_20240323_L2A_T58KFA   Province Sud   BOULOUPARI  4.061437   
3     SENTINEL2A_20240323_L2A_T58KFA   Province Sud   BOULOUPARI  1.120328   
4     SENTINEL2A_20240522_L2A_T58KEB   Province Sud       LA FOA  2.771176   
...                              ...            ...          ...       ...   
7296  SENTINEL2A_20240323_L2A_T58KDB  Province Nord    POUEMBOUT  1.240771   
7297  SENTINEL2A_20240323_L2A_T58KDB  Province Nord    POUEMBOUT  1.490941   
7298  SENTINEL2A_20240415_L2A_T58KDC  Province Nord  KAALA GOMEN  1.190895   
7299  SENTINEL2A_20240323_L2A_T58KDB  Province Nord    POUEMBOUT  3.872484   
7300  SENTINEL2A_20240323_L2A_T58KDB  Province Nord         POYA  2.071275   

           date_                        surface_id_h3 qualifica

In [8]:
final=data_cloud
final.drop_duplicates(subset=['surface_id_h3'], inplace=True)

upper_zero=final[final['cloud_coverage']>0 ]
upper_zero=upper_zero.reset_index(drop=True)
surface_id_h3=upper_zero['surface_id_h3']

In [9]:
upper_zero

Unnamed: 0,nom,province,commune,surface,date_,surface_id_h3,qualification,geometry,cloud_coverage
0,SENTINEL2A_20240323_L2A_T58KFA,Province Sud,BOULOUPARI,1.060285,2024-03-23,L2A_T58KFA_20240323_8f9f51ca9c59498,,"POLYGON ((166.10489 -21.89071, 166.10499 -21.8...",0.136029
1,SENTINEL2B_20240101_L2A_T58KDC,Province Nord,KOUMAC,2.011438,2024-01-01,L2A_T58KDC_20240101_8f9f5ac01b124ae,,"POLYGON ((164.25886 -20.52161, 164.25905 -20.5...",0.014706
2,SENTINEL2A_20240106_L2A_T58KDC,Province Nord,KOUMAC,217.015062,2024-01-06,L2A_T58KDC_20240106_8f9f5ad0b8f1c02,,"POLYGON ((164.33722 -20.58843, 164.33741 -20.5...",0.000038
3,SENTINEL2B_20240207_L2A_T58KFA,Province Sud,PAITA,1.970505,2024-02-07,L2A_T58KFA_20240207_8f9f51d1eb33d1d,,"POLYGON ((166.24665 -22.04163, 166.24675 -22.0...",0.008242
4,SENTINEL2A_20240323_L2A_T58KEB,Province Nord,KOUAOUA,1.400537,2024-03-23,L2A_T58KEB_20240323_8f9f5148a2e2b95,,"POLYGON ((165.84442 -21.44280, 165.84461 -21.4...",0.064516
...,...,...,...,...,...,...,...,...,...
297,SENTINEL2B_20240530_L2A_T58KCC,Province Nord,POUM,1.311024,2024-05-30,L2A_T58KCC_20240530_8f9f5a546244123,,"POLYGON ((164.03435 -20.28921, 164.03445 -20.2...",0.466667
298,SENTINEL2A_20240225_L2A_T58KDC,Province Nord,OUEGOA,1.721631,2024-02-25,L2A_T58KDC_20240225_8f9e2d34b873170,,"POLYGON ((164.39449 -20.29832, 164.39468 -20.2...",0.037143
299,SENTINEL2B_20240111_L2A_T58KDC,Province Nord,OUEGOA,2.051834,2024-01-11,L2A_T58KDC_20240111_8f9f5ad914ae301,,"POLYGON ((164.45790 -20.40741, 164.45847 -20.4...",0.104218
300,SENTINEL2B_20240111_L2A_T58KDC,Province Nord,KAALA GOMEN,1.320876,2024-01-11,L2A_T58KDC_20240111_8f9f5e6f52e0a60,,"POLYGON ((164.43089 -20.78945, 164.43128 -20.7...",0.016667


In [10]:
from sqlalchemy import create_engine, text
conex = create_engine(f'postgresql://{os.getenv("DB_USER")}:{os.getenv("DB_PWD")}@{os.getenv("DB_HOST")}:{os.getenv("DB_PORT")}/{os.getenv("DB_WORKSPACE")}')
sql_query = f"DELETE FROM feux_cq.{table_source} WHERE surface_id_h3 = :surface_id"

with conex.begin() as conn:
    for surface_id in surface_id_h3:
        conn.execute(text(sql_query), {'surface_id': surface_id})