In [1]:
### This notebook loads : 
###  [x] the grid with 100m cell
###  [x] select only cell from list in train test val , add that as argument: selected_grid
###  [x] then load the tlm file , merge the classes, give english names
###  [ ] then cut the tlm via the select 100x100m cell

In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd 


In [2]:
grid_path = '/home/valerie/data/rocky_tlm/footprint/aoiGrid100m.shp'
csv_paths = [ '/home/valerie/Projects/Alps_LCC/data/split/'+x+'_dataset.csv' for x in ['train','val','test'] ]
tlm_path = '/home/valerie/data/ace_Xiaolong/mask_shp/Label.shp'
ace_label_path = '/home/valerie/data/ace_Xiaolong/mask_shp/Label_cut.shp'

In [3]:
# Initialize an empty list to store DataFrames
dfs = []

# Loop through the file paths and read each CSV file into a DataFrame
for file_path in csv_paths:
    df = pd.read_csv(file_path)
    dfs.append(df)

# Concatenate the DataFrames vertically (along rows)
tile_ids = pd.concat(dfs, ignore_index=True)

# Display or manipulate the concatenated DataFrame as needed
tile_ids .head(),len(tile_ids)

(                   rbg                  dem                   mask  mainclass
 0  26193_11033_rgb.tif  26193_11033_dem.tif  26193_11033_label.tif        9.0
 1  26455_11059_rgb.tif  26455_11059_dem.tif  26455_11059_label.tif        1.0
 2  25602_11133_rgb.tif  25602_11133_dem.tif  25602_11133_label.tif       10.0
 3  25750_11179_rgb.tif  25750_11179_dem.tif  25750_11179_label.tif        1.0
 4  25659_11101_rgb.tif  25659_11101_dem.tif  25659_11101_label.tif        1.0,
 229535)

In [4]:
id_list = tile_ids[tile_ids.columns[0]]
id_list = [x.replace('_rgb.tif', '') for x in id_list]
id_list[:10]

['26193_11033',
 '26455_11059',
 '25602_11133',
 '25750_11179',
 '25659_11101',
 '25725_10905',
 '26370_11461',
 '26420_10961',
 '26429_11456',
 '25899_10953']

In [6]:
grid = gpd.read_file(grid_path)

In [7]:
grid.head(),len(grid)

(            ID                                           geometry
 0  25733_10799  POLYGON ((2573300.000 1079900.000, 2573300.000...
 1  25734_10797  POLYGON ((2573400.000 1079700.000, 2573400.000...
 2  25734_10798  POLYGON ((2573400.000 1079800.000, 2573400.000...
 3  25734_10799  POLYGON ((2573400.000 1079900.000, 2573400.000...
 4  25735_10793  POLYGON ((2573500.000 1079300.000, 2573500.000...,
 398921)

In [8]:
# Use the isin() method to filter the GeoDataFrame
selected_grid =  grid[grid['ID'].isin(id_list)]
selected_grid.head(),len(selected_grid)

(            ID                                           geometry
 1  25734_10797  POLYGON ((2573400.000 1079700.000, 2573400.000...
 2  25734_10798  POLYGON ((2573400.000 1079800.000, 2573400.000...
 3  25734_10799  POLYGON ((2573400.000 1079900.000, 2573400.000...
 4  25735_10793  POLYGON ((2573500.000 1079300.000, 2573500.000...
 5  25735_10794  POLYGON ((2573500.000 1079400.000, 2573500.000...,
 229535)

In [41]:
# Use the unary_union method to merge all polygons into one
merged_grid = selected_grid['geometry'].unary_union

# Create a new GeoDataFrame with the merged polygon
merged_grid = gpd.GeoDataFrame({'geometry': [merged_grid]}, crs='epsg:2056')

# Save the merged GeoDataFrame as a Shapefile
merged_grid.to_file('merged_grid.shp', driver='ESRI Shapefile')

In [5]:
# Load your GeoDataFrame (replace 'your_file.geojson' with your actual file path)
tlm = gpd.read_file(tlm_path)
print (tlm.columns, tlm['OBJEKTART_'].unique() )


Index(['UUID', 'DATUM_AEND', 'DATUM_ERST', 'ERSTELL_J', 'ERSTELL_M',
       'REVISION_J', 'REVISION_M', 'GRUND_AEND', 'HERKUNFT', 'HERKUNFT_J',
       'HERKUNFT_M', 'OBJEKTART', 'REVISION_Q', 'UUID_2', 'DATUM_AE_1',
       'DATUM_ER_1', 'ERSTELL_J_', 'ERSTELL_M_', 'REVISION_1', 'REVISION_2',
       'GRUND_AE_1', 'HERKUNFT_2', 'HERKUNFT_1', 'HERKUNFT_3', 'OBJEKTART_',
       'REVISION_3', 'CLASS', 'geometry'],
      dtype='object') ['Gletscher' 'Schneefeld Toteis' 'Gebueschwald' 'Fels' 'Felsbloecke' nan
 'Lockergestein' 'Stehende Gewaesser' 'Fels locker' 'Feuchtgebiet'
 'Lockergestein locker' 'Wald' 'Wald offen' 'Gehoelzflaeche'
 'Felsbloecke locker' 'Fliessgewaesser']


In [7]:
# Define a dictionary to map old class labels to new ones
class_mapping = {
    'Gletscher':    'glacier',
    'Schneefeld Toteis': 'glacier',
    'Gebueschwald': 'forest',
    'Wald': 'forest',
    'Wald offen':   'forest',
    'Gehoelzflaeche' :  'forest',
    'Fels': 'bedrock',
    'Fels locker':  'bedrock with grass',
    'Felsbloecke': 'large blocks',
    'Felsbloecke locker' : 'large blocks with grass',
    'Lockergestein': 'scree',
    'Lockergestein locker': 'scree with grass',
    'Stehende Gewaesser': 'water',
    'Feuchtgebiet': 'water',
    'Fliessgewaesser':'water',
 #    np.nan:'bug',
}

# Replace the old class labels with the new ones

tlm['OBJEKTART'] =   tlm['OBJEKTART'].replace(class_mapping)
tlm['OBJEKTART_'] =   tlm['OBJEKTART_'].replace(class_mapping)
tlm['class'] = tlm['OBJEKTART'].fillna(tlm['OBJEKTART_'])


In [8]:
tlm.to_file(tlm_path)

In [90]:
clean_tlm = tlm.drop(['UUID', 'DATUM_AEND', 'DATUM_ERST', 'ERSTELL_J', 'ERSTELL_M',
       'REVISION_J', 'REVISION_M', 'GRUND_AEND', 'HERKUNFT', 'HERKUNFT_J',
       'HERKUNFT_M', 'OBJEKTART', 'REVISION_Q', 'UUID_2', 'DATUM_AE_1',
       'DATUM_ER_1', 'ERSTELL_J_', 'ERSTELL_M_', 'REVISION_1', 'REVISION_2',
       'GRUND_AE_1', 'HERKUNFT_2', 'HERKUNFT_1', 'HERKUNFT_3', 'OBJEKTART_',
       'REVISION_3', ], axis = 1, )
print(clean_tlm.columns)

# Save the modified GeoDataFrame to a new file (optional)
clean_tlm.to_file(ace_label_path)


Index(['CLASS', 'geometry', 'class'], dtype='object')


In [93]:
clean_tlm['class'].unique()

array(['scree', 'bedrock', 'forest', 'glacier', 'water',
       'scree with grass', 'bedrock with grass', 'large blocks',
       'large blocks with grass'], dtype=object)