# Prepare simplified signatures for upload

In [51]:
import dask_geopandas
import geopandas
import pyogrio

In [43]:
signatures = pyogrio.read_dataframe('urbangrammar_samba/spatial_signatures/signatures/signatures_combined_levels_simplified.gpkg')

In [44]:
signatures

Unnamed: 0,kmeans10gb,level2,signature_type,geometry
0,0,0.0,0_0,"POLYGON ((62219.999 798499.999, 62109.999 7985..."
1,0,0.0,0_0,"POLYGON ((63507.682 796515.168, 63471.096 7965..."
2,0,0.0,0_0,"POLYGON ((65953.174 802246.171, 65523.864 8023..."
3,0,0.0,0_0,"POLYGON ((67297.740 803435.799, 67220.290 8034..."
4,0,0.0,0_0,"POLYGON ((75760.000 852669.999, 75699.999 8527..."
...,...,...,...,...
96699,9,8.0,9_8,"POLYGON ((323321.005 463795.415, 323236.741 46..."
96700,9,8.0,9_8,"POLYGON ((325929.840 1008792.060, 325890.989 1..."
96701,9,8.0,9_8,"POLYGON ((337804.769 1013422.582, 337547.855 1..."
96702,9,8.0,9_8,"POLYGON ((422304.270 1147826.989, 422296.000 1..."


In [45]:
types = {
    "0_0": "Countryside agriculture",
    "1_0": "Accessible suburbia",
    "3_0": "Open sprawl",
    "4_0": "Wild countryside",
    "5_0": "Warehouse/Park land",
    "6_0": "Gridded residential quarters",
    "7_0": "Urban buffer",
    "8_0": "Disconnected suburbia",
    "2_0": "Dense residential neighbourhoods",
    "2_1": "Connected residential neighbourhoods",
    "2_2": "Dense urban neighbourhoods",
    "9_0": "Local urbanity",
    "9_1": "Concentrated urbanity",
    "9_2": "Regional urbanity",
    "9_4": "Metropolitan urbanity",
    "9_5": "Hyper concentrated urbanity",
    "9_3": "outlier",
    "9_6": "outlier",
    "9_7": "outlier",
    "9_8": "outlier",
}

In [46]:
signatures["type"] = signatures.signature_type.map(types)
signatures["id"] = range(len(signatures))

In [47]:
signatures.head()

Unnamed: 0,kmeans10gb,level2,signature_type,geometry,type,id
0,0,0.0,0_0,"POLYGON ((62219.999 798499.999, 62109.999 7985...",Countryside agriculture,0
1,0,0.0,0_0,"POLYGON ((63507.682 796515.168, 63471.096 7965...",Countryside agriculture,1
2,0,0.0,0_0,"POLYGON ((65953.174 802246.171, 65523.864 8023...",Countryside agriculture,2
3,0,0.0,0_0,"POLYGON ((67297.740 803435.799, 67220.290 8034...",Countryside agriculture,3
4,0,0.0,0_0,"POLYGON ((75760.000 852669.999, 75699.999 8527...",Countryside agriculture,4


In [48]:
signatures = signatures[["id", "type", "geometry"]]

In [49]:
signatures

Unnamed: 0,id,type,geometry
0,0,Countryside agriculture,"POLYGON ((62219.999 798499.999, 62109.999 7985..."
1,1,Countryside agriculture,"POLYGON ((63507.682 796515.168, 63471.096 7965..."
2,2,Countryside agriculture,"POLYGON ((65953.174 802246.171, 65523.864 8023..."
3,3,Countryside agriculture,"POLYGON ((67297.740 803435.799, 67220.290 8034..."
4,4,Countryside agriculture,"POLYGON ((75760.000 852669.999, 75699.999 8527..."
...,...,...,...
96699,96699,outlier,"POLYGON ((323321.005 463795.415, 323236.741 46..."
96700,96700,outlier,"POLYGON ((325929.840 1008792.060, 325890.989 1..."
96701,96701,outlier,"POLYGON ((337804.769 1013422.582, 337547.855 1..."
96702,96702,outlier,"POLYGON ((422304.270 1147826.989, 422296.000 1..."


## Cleanup sliver geometries

Remove artifacts caused by floating point imprecision in dissolving of ET cells into signature geometries.

In [54]:
from shapely.geometry import Polygon

def fix(gdf):
    new = []
    for poly in gdf.geometry:
        new.append(Polygon(shell=poly.exterior, holes=[i for i in poly.interiors if Polygon(i).area > 1]))
    return geopandas.GeoSeries(new, index=gdf.index, crs=gdf.crs)


signatures["geometry"] = fix(signatures.geometry)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [56]:
signatures.to_file("spatial_signatures_GB_simplified.gpkg", driver="GPKG")