### Script pour le controle des données brutes issues de la chaine INSIGHT
### Etude de conformité des fichiers

In [None]:
%%writefile ".env"

PROJECT_ID="feux_cq"

COMMUN_PATH = "N:/"
PATH_INFOCENTRE_APP = ${COMMUN_PATH}Informatique/SIG/Application/Jupyterhub/
PATH_ETUDE = ${COMMUN_PATH}Informatique/SIG/Donnees/Oeil/Traitement_Donnees/SURFACES_BRULEES_SENTINEL/2022/220502_PreparationFeu2020/
PROJECT_PATH =${COMMUN_PATH}Informatique/SIG/Application/Jupyterhub/projets/catalogFiles/
DATA_CATALOG_DIR = ${PATH_INFOCENTRE_APP}projets/catalogFiles/ 
DATA_OUTPUT_DIR = ${PROJECT_PATH}output/
SIG_DATA_PATH = ${COMMUN_PATH}Informatique/SIG/Donnees/
DB_USER="jfnguyenvansoc"
DB_PWD="oeil"
DB_HOST="172.20.12.13"
DB_PORT=5432

DB_WORKSPACE="oeil_traitement"
DB_REF="oeil_reference"
DB_EXT="data_externe"
DB_SCHEMA = "feux_cq"
DB_SCHEMA_REF = "feux"

In [2]:
import logging
import os
from dotenv import load_dotenv
from intake import open_catalog
import pandas as pd
import numpy as np
from shapely import wkt
import shapely
from sqlalchemy import create_engine
from shapely.geometry import shape
from shapely.ops import unary_union
import geopandas as gpd
from datetime import datetime
import networkx as nx
from dotenv import find_dotenv

env_file = find_dotenv("N:\Informatique\SIG\Etudes\2023\2309_QC_feux\Travail\Scripts\CQ_sentinel_surfaces_brulees\Controle\.env")
load_dotenv()

## Create log file 
logging.basicConfig(filename='N:/Informatique/SIG/Etudes/2023/2309_QC_feux/Travail/Scripts/CQ_sentinel_surfaces_brulees/Controle/{:%Y-%m-%d}_Controle_data_brute.log'.format(datetime.now()),filemode='w', encoding='utf-8', level=logging.INFO, force=True, format='%(message)s')

##open yaml file to get raw buned area table
date_start=pd.to_datetime("2023-12-01", format="%Y-%m-%d")
date_end=pd.to_datetime("2023-12-31", format="%Y-%m-%d")

table_source="surfaces_brulees_brute_control"
catalog_path = f'{os.getenv("PROJECT_PATH")}/Fire_Detection_Data_Quality.yaml'

sql = f"""SELECT *
FROM feux_cq.{table_source} si
WHERE si.date_ >= '{pd.to_datetime(date_start).strftime('%Y-%m-%d')}' AND si.date_ <= '{pd.to_datetime(date_end).strftime('%Y-%m-%d')}'
"""

catalog = open_catalog(catalog_path)
dataCatalog = getattr(catalog, table_source)(sql_expr=sql)
surfdetect_control = dataCatalog.read()

## Début des contrôles

In [4]:
def get_all_elements_in_list_of_lists(list):
    count = 0
    for element in list:
        count += len(element)
    return count

### Check if tile's name are differents than the 15 use in NC

In [5]:
logging.info('------------------------------------------------')
logging.info('Check if unknown tiles are found in data')
logging.info('------------------------------------------------')

## liste des 15 tuiles présentes sur la GT et les iles 
list_tiles=['L2A_T58KCC','L2A_T58KCD','L2A_T58KDB','L2A_T58KDC','L2A_T58KEA','L2A_T58KEB','L2A_T58KEC',
            'L2A_T58KFA','L2A_T58KFB','L2A_T58KFC','L2A_T58KGA','L2A_T58KGB','L2A_T58KGC','L2A_T58KGV','L2A_T58KHB']

## prendre que le numéro de la dalle pour les traitements
surfdetect_control['nom_court'] = [x[-10:] for x in surfdetect_control['nom']]

error_tile=[]

for i in range(len(surfdetect_control)):
    if surfdetect_control['nom_court'][i] in list_tiles:
         continue
    else:
        error_tile.append(surfdetect_control.iloc[i]) ## on regarde si d'autres tuiles sont présentes dans le jeu de donnée

if not error_tile:
    print('Aucune tuiles détectées en dehors des 15 tuiles utilisées par l algorithme')
    logging.info('Not unkonw tiles found in data - continue process')
    len_error_tile=0
else:
    error_tile=pd.concat(error_tile,axis=1, ignore_index=True).T
    print('Attention des tuiles en dehors des tuiles abituelles sont présentes dans le jeux de donnée')
    logging.error('%s !! WARNING !! Unknow tile found in data - check surface_id_h3 number above or in code !! :', error_tile['surface_id_h3']) ## on alimente le log si présence de tuiles autres
    len_error_tile=len(error_tile)

logging.info('Number of tiles detected outside the 15 tiles', len_error_tile)

Aucune tuiles détectées en dehors des 15 tuiles utilisées par l algorithme


--- Logging error ---
Traceback (most recent call last):
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 1110, in emit
    msg = self.format(record)
          ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 953, in format
    return fmt.format(record)
           ^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 687, in format
    record.message = record.getMessage()
                     ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 377, in getMessage
    msg = msg % self.args
          ~~~~^~~~~~~~~~~
TypeError: not all arguments converted during string formatting
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\oriane.bruyere\AppData\Local

### Check if tile occurence is equal to zero

In [6]:
### Check if tile occurence is equal to zero
## traitement à prendre avec des pincettes car selon la résolution temporelle des données des tuiles peuvent manquer 

logging.info('------------------------------------------------')
logging.info('Check if tile occurence')
logging.info('------------------------------------------------')

df_dalle=pd.DataFrame({"date_":surfdetect_control.date_,"dalle_names":surfdetect_control.nom_court})
## prendre que le numéro de la dalle
df_dalle['dalle_names'] = [x[-10:] for x in df_dalle['dalle_names']]
df_dalle=df_dalle.set_index(df_dalle['date_'])
df_dalle = df_dalle.sort_index()

occurrences_dalles = df_dalle["dalle_names"].value_counts()
print(occurrences_dalles)

logging.info('%s !! Tile occurence - check tile name above or in code !! :', occurrences_dalles)

dalle_names
L2A_T58KDC    1538
L2A_T58KDB     739
L2A_T58KEB     694
L2A_T58KFB     324
L2A_T58KFA     260
L2A_T58KGB     106
L2A_T58KCC      46
L2A_T58KGA      44
L2A_T58KCD      38
L2A_T58KHB      22
L2A_T58KEA      22
L2A_T58KEC      20
L2A_T58KGV       9
L2A_T58KGC       2
L2A_T58KFC       1
Name: count, dtype: int64


### Check if burned area are under 1ha

In [7]:
## Check if burned area are under 1ha

logging.info('------------------------------------------------')
logging.info('Check if burned areas are not under 1 ha')
logging.info('------------------------------------------------')

error_surface=[]

for i in range(len(surfdetect_control)):
    if surfdetect_control['surface'][i] >= 1:
         continue
    else:
        error_surface.append(surfdetect_control.iloc[i])

if not error_surface:
    print('empty_list')
    logging.info('No surface found under 1ha - continue process')
    len_error_surface=0
else:
    error_surface=pd.concat(error_surface,axis=1, ignore_index=True).T
    print('list if not empty')
    logging.info('%s !! WARNING !! burned areas are found under 1 ha - check ogc_fid number above or in code !! :', error_surface['surface_id_h3'])
    len_error_surface=len(error_surface)

logging.info('Number polygons under 1ha ', len_error_surface)

empty_list


--- Logging error ---
Traceback (most recent call last):
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 1110, in emit
    msg = self.format(record)
          ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 953, in format
    return fmt.format(record)
           ^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 687, in format
    record.message = record.getMessage()
                     ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 377, in getMessage
    msg = msg % self.args
          ~~~~^~~~~~~~~~~
TypeError: not all arguments converted during string formatting
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\oriane.bruyere\AppData\Local

### Check if geometries have errors

In [8]:
## Check if burned area have wrong geometries

logging.info('------------------------------------------------')
logging.info('Check if burned areas have wrong geometries')
logging.info('------------------------------------------------')

is_valid = surfdetect_control.geometry.is_valid
invalid_geometries = surfdetect_control[~is_valid]
len_invalid_geometries=len(invalid_geometries)

print("Multipolygones with geometries errors :",invalid_geometries)
logging.error('%s !! WARNING !! burned areas geometries have errors - check surface_id_h3 above or in code !! :', invalid_geometries.surface_id_h3)

surfdetect_control.geometry = surfdetect_control.geometry.buffer(0) # correct the geometry

## check if the correction is applied
is_valid_after_correction = surfdetect_control.geometry.is_valid
invalid_geometries_after_correction = surfdetect_control[~is_valid_after_correction]
print("Check if geometry correction were applied :", invalid_geometries_after_correction)
logging.error('%s !! WARNING !! a geometry correction were applied - if empty continue process : ',invalid_geometries_after_correction)

Multipolygones with geometries errors :                                  nom       province      commune     surface  \
76    SENTINEL2B_20231202_L2A_T58KCC  Province Nord         POUM  209.907261   
294   SENTINEL2B_20231222_L2A_T58KDC  Province Nord       OUEGOA   30.526195   
504   SENTINEL2B_20231222_L2A_T58KDC  Province Nord  KAALA GOMEN  189.183834   
983   SENTINEL2A_20231227_L2A_T58KDC  Province Nord         POUM  396.083292   
1229  SENTINEL2B_20231202_L2A_T58KDC  Province Nord         POUM  220.987127   
1466  SENTINEL2B_20231202_L2A_T58KCC  Province Nord         POUM  209.907261   
2915  SENTINEL2B_20231222_L2A_T58KDC  Province Nord       OUEGOA   30.526195   
3118  SENTINEL2B_20231222_L2A_T58KDC  Province Nord  KAALA GOMEN  189.183834   
3128  SENTINEL2B_20231222_L2A_T58KCC  Province Nord         POUM  167.711583   
3494  SENTINEL2B_20231229_L2A_T58KEB  Province Nord         POYA   44.806425   

                                               geometry       date_  \
76    MU

#### Check if burned areas are detected outside date interval set at the begening

In [9]:
## Check if burned areas are detected outside date interval set at the begening

logging.info('------------------------------------------------')
logging.info('%s Check if burned areas are included into the following date interval', )
logging.info('------------------------------------------------')

error_date=[]

for i in range(len(surfdetect_control)):
    current_date = pd.to_datetime(surfdetect_control['date_'][i])
    
    if date_start <= current_date <= date_end:
        continue
    else:
        error_date.append(surfdetect_control.iloc[i])

if not error_date:
    print('empty_list')
    logging.info('Burned areas are included into date range - continue process')
    len_outside_date=0
else:
    error_date=pd.concat(error_date,axis=1, ignore_index=True).T
    print('list if not empty')
    logging.error('%s !! WARNING !! burned areas are found outside date range - check surface_id_h3 above or in code !! :', error_date['surface_id_h3'])
    len_outside_date=len(error_date)

logging.info('Number polygons outiside the date range ', len_outside_date)

empty_list


--- Logging error ---
Traceback (most recent call last):
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 1110, in emit
    msg = self.format(record)
          ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 953, in format
    return fmt.format(record)
           ^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 687, in format
    record.message = record.getMessage()
                     ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\oriane.bruyere\AppData\Local\miniconda3\envs\gis311\Lib\logging\__init__.py", line 377, in getMessage
    msg = msg % self.args
          ~~~~^~~~~~~~~~~
TypeError: not all arguments converted during string formatting
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\oriane.bruyere\AppData\Local

In [10]:
## create an id
surfdetect_control['ID'] = range(1, len(surfdetect_control)+1,1)

### Check if identical geometries are observed at a same date

In [11]:
#check if identical geometries are observed at a same date
len_surfdetect_control_before=len(surfdetect_control)
def compare_geometries(gdf):
    geometry_date_dict = {}

    for index, row in gdf.iterrows():
        geometry =  row['geometry']
        date = row['date_']

        # Convertir la géométrie en une forme Shapely
        geom_shape = shape(geometry)
        key = (geom_shape, date)

        # Ajouter la paire (forme, date) au dictionnaire
        if key not in geometry_date_dict:
            geometry_date_dict[key] = []

        geometry_date_dict[key].append(index)
    identique_geom=[]
    
    # identifier les géométries avec la même forme à la même date = doublon
    for key, indices in geometry_date_dict.items():
        if len(indices) > 1:
            identique_geom.append(indices)
            print(f"Géométries identiques à la même date : {indices}")
    
    return(identique_geom)

surfdetect_control['geometry']=shapely.wkt.loads(shapely.wkt.dumps(surfdetect_control['geometry'], rounding_precision=2)) ## change number of decimal for geometry
identique_geom=compare_geometries(surfdetect_control) 

logging.info('------------------------------------------------')
logging.info('%s Check if 2 identical geometries shape are observed at a same date', )
logging.info('------------------------------------------------')

if not identique_geom:
    print('empty_list')
    logging.info('NO identical geometries found at the same date - continue process')
    len_identique_geom=0
else:
    print('list if not empty')
    len_identique_geom=len(identique_geom)
    #logging.error('%s !! WARNING !! same geometries at a same date were found - check ogc_fid number above or in code !! :', identique_geom.index)
    
    ### if identical geometries are found : check if they have the name tile name
    for sublist in identique_geom:
        nom_values = [surfdetect_control.loc[i, 'nom_court'] for i in sublist]
        index_values=[surfdetect_control.loc[i, 'ID'] for i in sublist]
        if all(value == nom_values[0] for value in nom_values):
            print('nom de thuile identique')
            logging.error('%s !! WARNING !! same geometries at a same date were found on a same tile: check above for informations',index_values)
            
            ## we keep the first geom et delete the others
            if len(sublist) == 2: 
                surfdetect_control=surfdetect_control.drop(sublist[1])
                print('delete index : ',sublist[1])
                logging.error('%s !! DELETE !! geometry deleted : check above for informations',sublist[1])
            else:
                surfdetect_control=surfdetect_control.drop(sublist[1:])
                print('delete index :',sublist[1:])
                logging.error('%s !! DELETE !! geometry deleted : check above for informations',sublist[1:])
        else:
            print('nom de thuile différente')
            logging.error('%s !! WARNING !! same geometries at a same date were found on different tiles : check above for informations',index_values)
            
            ## we keep the first geom et delete the others
            if len(sublist) == 2: 
                surfdetect_control=surfdetect_control.drop(sublist[1])
                print('delete index : ',sublist[1])
                logging.error('%s !! DELETE !! geometry deleted : check above for informations',sublist[1])
            else:
                surfdetect_control=surfdetect_control.drop(sublist[1:])
                print('delete index :',sublist[1:])
                logging.error('%s !! DELETE !! geometry deleted : check above for informations',sublist[1:])

surfdetect_control=surfdetect_control.reset_index(drop=True)
surfdetect_control['ID'] = range(1, len(surfdetect_control)+1,1)

Géométries identiques à la même date : [9, 549]
Géométries identiques à la même date : [10, 74]
Géométries identiques à la même date : [13, 1391]
Géométries identiques à la même date : [14, 510]
Géométries identiques à la même date : [15, 502]
Géométries identiques à la même date : [16, 506]
Géométries identiques à la même date : [17, 512]
Géométries identiques à la même date : [18, 513]
Géométries identiques à la même date : [19, 1523]
Géométries identiques à la même date : [20, 524]
Géométries identiques à la même date : [21, 525]
Géométries identiques à la même date : [22, 517]
Géométries identiques à la même date : [23, 527]
Géométries identiques à la même date : [25, 532]
Géométries identiques à la même date : [26, 533]
Géométries identiques à la même date : [27, 534]
Géométries identiques à la même date : [28, 535]
Géométries identiques à la même date : [29, 538]
Géométries identiques à la même date : [30, 543]
Géométries identiques à la même date : [31, 544]
Géométries identique

## Overlapping polygons for a same date

### Create intersected group of polygons at a same date

In [12]:
def find_intersecting_id(row, gdf):
    # Filtrer le GeoDataFrame pour les polygones à la même date
    gdf_same_date = gdf[gdf['date_'] == row['date_']]
    
    # Utiliser `sindex` pour une recherche spatiale plus efficace parmi ceux de la même date
    possible_matches_index = list(gdf_same_date.sindex.intersection(row['geometry'].bounds))
    possible_matches = gdf_same_date.iloc[possible_matches_index]
    precise_matches = possible_matches[possible_matches.geometry.intersects(row['geometry'])]
    
    intersecting_ids = precise_matches['ID'].tolist()
    intersecting_ids = [id_ for id_ in intersecting_ids if id_ != row['ID']]
    
    return intersecting_ids

G = nx.Graph()
for index, row in surfdetect_control.iterrows():
    intersecting_ids = find_intersecting_id(row, surfdetect_control)
    for id_ in intersecting_ids:
        G.add_edge(row['ID'], id_)

groupes = list(nx.connected_components(G))
for groupe_id, groupe in enumerate(groupes):
    for id_ in groupe:
        surfdetect_control.loc[surfdetect_control['ID'] == id_, 'groupe_id'] = groupe_id

### Check if overlapping geometries are around tile's limit

In [13]:
tile_stl2 = catalog.tile_sentinel2_line_UTM.read() ## ouvirr la table des limites des tuiles Sentinel2
tile_stl2=tile_stl2.set_index(tile_stl2['Name']) ## mettre en index le nom de la tuile

group_ids = surfdetect_control['groupe_id'].unique()
delete_index_tiles_limits=[]

logging.info('------------------------------------------------')
logging.info('%s Check if overlapping geometries at a same date are observed close to tile limits ', )
logging.info('------------------------------------------------')

for group_id in group_ids:
    # Sélectionnez les données pour le groupe en cours
    group_data = surfdetect_control[surfdetect_control['groupe_id'] == group_id]
    group_data['nom_court']= [x[-5:] for x in group_data['nom_court']]
    if group_data.shape[0] >= 2 :

        for i in range(len(group_data)):
            if group_data.iloc[i]['geometry'].buffer(20).intersects(tile_stl2['geometry'].loc[group_data.iloc[i]['nom_court']]):
                print("tile touche",group_data.iloc[i]["ID"]-1)
                surfdetect_control.drop(group_data.iloc[i]["ID"]-1,inplace=True)
                logging.error('%s !! WARNING !! Intersection between geometry and tile limits were found : check above for informations',group_data.iloc[i]["ID"]-1)
                delete_index_tiles_limits.append(group_data.iloc[i]["ID"]-1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

tile touche 117
tile touche 134
tile touche 135
tile touche 143


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

tile touche 755
tile touche 756
tile touche 762


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

tile touche 1406
tile touche 1412
tile touche 1401
tile touche 2049
tile touche 2050
tile touche 2051
tile touche 2060


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

tile touche 1998
tile touche 2313
tile touche 2694
tile touche 2687


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

tile touche 2691
tile touche 2814
tile touche 3225
tile touche 3195


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

tile touche 2954
tile touche 2977


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

### Check if overlapping geometries are from differents tiles

In [14]:
group_ids = surfdetect_control['groupe_id'].unique()
delete_index_same_tiles_geom=[]

logging.info('------------------------------------------------')
logging.info('%s Check if overlapping geometries at a same date are observed from a same tile ', )
logging.info('------------------------------------------------')

for group_id in group_ids:
    
    group_data = surfdetect_control[surfdetect_control['groupe_id'] == group_id]
    group_data['nom_court']= [x[-5:] for x in group_data['nom_court']]
    if group_data.shape[0] >= 2 :
        if all(v == group_data['nom_court'].iloc[0] for v in group_data['nom_court']):
            print(f"les noms des tuiles sont identiques.", group_data)
            logging.error('%s !! WARNING !! overlapping geometries at a same date were found on a same tile : check above for informations',group_data)
        
        else: ## we merge geometries overlapping from differents tiles
            print(f"les noms des tuiles sont différents")
            nouveau_polygone = unary_union(group_data['geometry'])
            surfdetect_control.at[group_data.iloc[0]["ID"]-1,'geometry'] = nouveau_polygone
            surfdetect_control.drop(group_data.iloc[1:]["ID"]-1,inplace=True)
            delete_index_same_tiles_geom.append(group_data.iloc[i]["ID"]-1)


les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont identiques.                                nom           province commune   surface  \
21  SENTINEL2A_20231201_L2A_T58KGB  Province des Iles   LIFOU  1.508727   
22  SENTINEL2A_20231201_L2A_T58KGB  Province des Iles   LIFOU  1.878418   

                                             geometry       date_  \
21  POLYGON ((540642.940 341959.510, 540682.920 34...  2023-12-01   
22  POLYGON ((540414.580 341708.200, 540424.570 34...  2023-12-01   

                          surface_id_h3 nom_court  ID  groupe_id  
21  L2A_T58KGB_20231201_8f9f562d04cc823     58KGB  22        3.0  
22  L2A_T58KGB_20231201_8f9f562d07a0c23     58KGB  23        3.0  
les noms des tuiles sont différents
les noms des tuiles sont identiques.                                 nom           province commune   surface  \
41   SENTINEL2A_20231221_L2A_T58KGB  Province des Iles   LIFOU  1.98819

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont dif

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

les noms des tuiles sont différents


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

les noms des tuiles sont identiques.                                 nom       province commune    surface  \
238  SENTINEL2B_20231222_L2A_T58KDC  Province Nord  OUEGOA   1.121057   
240  SENTINEL2B_20231222_L2A_T58KDC  Province Nord  OUEGOA  56.132955   

                                              geometry       date_  \
238  POLYGON ((231923.630 434788.090, 231933.630 43...  2023-12-22   
240  POLYGON ((232312.680 434960.750, 232322.680 43...  2023-12-22   

                           surface_id_h3 nom_court   ID  groupe_id  
238  L2A_T58KDC_20231222_8f9e2d264a184e5     58KDC  239       74.0  
240  L2A_T58KDC_20231222_8f9e2d264840c66     58KDC  241       74.0  
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont identiques.                                 nom       province commune    surface  \
273  SENTINEL2B_20231222_L2A_T58KDC  Province Nord  OUEGOA   3.563218   
277  SENTINEL2B_20231222_L2A_T58KDC  Province Nord  OUEGOA  72.315618 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

les noms des tuiles sont identiques.                                 nom       province commune    surface  \
731  SENTINEL2A_20231214_L2A_T58KDB  Province Nord    KONE   1.490987   
738  SENTINEL2A_20231214_L2A_T58KDB  Province Nord    KONE  19.753059   

                                              geometry       date_  \
731  POLYGON ((294445.010 353729.510, 294485.020 35...  2023-12-14   
738  POLYGON ((294175.110 353697.770, 294235.130 35...  2023-12-14   

                           surface_id_h3 nom_court   ID  groupe_id  
731  L2A_T58KDB_20231214_8f9f5e554b0010a     58KDB  732      111.0  
738  L2A_T58KDB_20231214_8f9f5e5734dcca5     58KDB  739      111.0  
les noms des tuiles sont différents
les noms des tuiles sont identiques.                                 nom       province commune    surface  \
855  SENTINEL2A_20231224_L2A_T58KDB  Province Nord    KONE   1.010656   
859  SENTINEL2A_20231224_L2A_T58KDB  Province Nord    KONE  33.521727   

                                

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont identiques.                                  nom       province commune   surface  \
1435  SENTINEL2B_20231202_L2A_T58KDB  Province Nord    KONE  2.571614   
1449  SENTINEL2B_20231202_L2A_T58KDB  Province Nord    KONE  1.530961   

                                               geometry       date_  \
1435  POLYGON ((271829.590 345561.720, 271849.590 34...  2023-12-02   
1449  POLYGON ((271958.850 345682.600, 271998.860 34...  2023-12-02   

                         

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont dif

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont identiques.                                  nom       province commune   surface  \
2577  SENTINEL2B_20231219_L2A_T58KEB  Province Nord  CANALA  3.591241   
2579  SENTINEL2B_20231219_L2A_T58KEB  Province Nord  CANALA  4.411518   

                                               geometry       date_  \
2577  POLYGON ((396073.510 292680.430, 396093.520 29...  2023-12-19   
2579  POLYGON ((396384.010 292612.410, 396394.010 29...  2023-12-19   

                            surface_id_h3 nom_court    ID  groupe_id  
2577  L2A_T58KEB_20231219_8f9f5158bb0e8e4     58KEB  2578      261.0  
2579  L2A_T58KEB_20231219_8f9f51589498412     58KEB  2580      261.0  
les noms des tuiles sont différents
les noms des tuiles sont différents
les noms des tuiles sont identiques.                                  nom       province    commune   surface  \
2761  SENTINEL2B_202312

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

In [15]:
surfdetect_control=surfdetect_control.reset_index(drop=True)
surfdetect_control['ID'] = range(1, len(surfdetect_control)+1,1)

In [16]:
# Check if the geometry is a Polygon 
for i in range(len(surfdetect_control)):
    if surfdetect_control['geometry'].geom_type[i] == 'Polygon':
        surfdetect_control.at[i, 'geometry'] = MultiPolygon([surfdetect_control['geometry'][i]])
        #print(surfdetect_control)
    else:
        print("La géométrie n'est pas de type Polygon.")

La géométrie n'est pas de type Polygon.
La géométrie n'est pas de type Polygon.
La géométrie n'est pas de type Polygon.
La géométrie n'est pas de type Polygon.
La géométrie n'est pas de type Polygon.
La géométrie n'est pas de type Polygon.


In [17]:
surfdetect_control['surface'] = surfdetect_control['geometry'].area/10000

## FINALISATION : Intégration des données dans la table "sentinel_surfaces_detectees"

In [18]:
gdf = gpd.GeoDataFrame(surfdetect_control.copy(), crs=surfdetect_control.crs, geometry=surfdetect_control['geometry'])
gdf=gdf[["nom", "province", "commune", "surface", "geometry","date_","surface_id_h3"]]

In [19]:
gdf

Unnamed: 0,nom,province,commune,surface,geometry,date_,surface_id_h3
0,SENTINEL2A_20231227_L2A_T58KDB,Province Nord,KONE,1.120721,"MULTIPOLYGON (((288038.120 346646.040, 288048....",2023-12-27,L2A_T58KDB_20231227_8f9f5e56e621226
1,SENTINEL2B_20231202_L2A_T58KDC,Province Nord,HIENGHENE,1.030828,"MULTIPOLYGON (((267380.610 393280.720, 267410....",2023-12-02,L2A_T58KDC_20231202_8f9e2db55b52410
2,SENTINEL2A_20231227_L2A_T58KDB,Province Nord,KONE,2.301435,"MULTIPOLYGON (((286677.140 346727.350, 286697....",2023-12-27,L2A_T58KDB_20231227_8f9f5e56a848852
3,SENTINEL2B_20231202_L2A_T58KDB,Province Nord,KONE,2.011358,"MULTIPOLYGON (((292386.750 354906.750, 292396....",2023-12-02,L2A_T58KDB_20231202_8f9f5e55545a5ac
4,SENTINEL2B_20231202_L2A_T58KDC,Province Nord,OUEGOA,1.190977,"MULTIPOLYGON (((234044.220 413592.280, 234104....",2023-12-02,L2A_T58KDC_20231202_8f9f5ad8125299a
...,...,...,...,...,...,...,...
2940,SENTINEL2B_20231212_L2A_T58KDC,Province Nord,POUEBO,2.442276,"MULTIPOLYGON (((239841.390 432689.240, 239851....",2023-12-12,L2A_T58KDC_20231212_8f9e2d343d72493
2941,SENTINEL2A_20231224_L2A_T58KFA,Province Sud,PAITA,1.070347,"MULTIPOLYGON (((425549.430 238672.660, 425559....",2023-12-24,L2A_T58KFA_20231224_8f9f51d1c1b1628
2942,SENTINEL2A_20231227_L2A_T58KDB,Province Nord,KONE,1.430889,"MULTIPOLYGON (((286246.560 346794.620, 286286....",2023-12-27,L2A_T58KDB_20231227_8f9f5e56aa31472
2943,SENTINEL2A_20231227_L2A_T58KDC,Province Nord,OUEGOA,1.160969,"MULTIPOLYGON (((256280.230 401882.450, 256290....",2023-12-27,L2A_T58KDC_20231227_8f9e2da71ce9768


In [20]:
from sqlalchemy import text
table = "sentinel_surfaces_detectees"

conex = create_engine(f'postgresql://{os.getenv("DB_USER")}:{os.getenv("DB_PWD")}@{os.getenv("DB_HOST")}:{os.getenv("DB_PORT")}/{os.getenv("DB_WORKSPACE")}')
gdf.to_postgis(table, conex,schema=os.getenv("DB_SCHEMA"),if_exists='append')