In [None]:
from dask.distributed import Client, LocalCluster
from dask import delayed, dataframe as dd
import dask

import pandas as pd
import dask.dataframe as dd
import geopandas as gpd
import math
import numpy as np

import momepy
from scripts.tessellation import enclosed_tessellation

import matplotlib.pyplot as plt
import geopandas as gpd

In [None]:
place = "singapore"
local_crs = 3414

In [None]:
Daskcluster = LocalCluster(threads_per_worker=2,
                n_workers=8, memory_limit='100GB')

client = Client(Daskcluster)
client

In [None]:
streets = gpd.read_parquet(f"./out/{place}/streets.pq").explode()

buildings = gpd.read_parquet(f"./out/{place}/buildings_raw.pq")

study_area = gpd.read_parquet(f"./out/{place}/study_area.pq")

water = gpd.read_parquet(f"./out/{place}/water.pq")

In [None]:
water_bodies_boundaries = []

# Explode the GeoDataFrame into a GeoSeries of polygons and multipolygons
gs = water.explode()

# Convert each polygon into a MultiLineString
mls = gs.geometry.boundary

# Convert the MultiLineString into a DataFrame
water_bodies = gpd.GeoDataFrame({'geometry': mls})

for water_body in water_bodies.geometry:
    # Create a LineString object from the coordinates
    water_bodies_boundaries.append(water_body)

# # Create a GeoDataFrame of the water area boundaries
water_bodies_boundaries = gpd.GeoDataFrame({'geometry': water_bodies_boundaries})

In [None]:
water_bodies_boundaries

In [None]:
# study_area = gpd.GeoDataFrame(pd.concat( [study_area, streets_buffer], ignore_index=True)).dissolve()

In [None]:
# # Create a figure and axis
# fig, ax = plt.subplots(figsize=(100, 100))

# # Plot study_area in green on the same axis
# study_area.plot(ax=ax, color='green')

# # Plot streets in blue on the same axis
# streets.plot(ax=ax, color='blue')

# # Plot buildings in red on the same axis
# buildings.plot(ax=ax, color='red')

# outlines.plot(ax=ax, color='black')

# # Show the plot
# plt.show()

In [None]:
enclosures = momepy.enclosures(streets.append(water_bodies_boundaries), limit= study_area, clip=True)

In [None]:
tessellation = momepy.Tessellation(buildings, unique_id='uID', enclosures=enclosures, use_dask = True).tessellation

# for debugging
tessellation_copy = tessellation

In [None]:
tessellation = tessellation_copy

In [None]:
tessellation.to_parquet(f"./out/{place}/tessellation_raw.pq")

In [None]:
tessellation = gpd.read_parquet(f"./out/{place}/tessellation_raw.pq")

In [None]:
tessellation = tessellation.drop("eID", axis = 1)
tessellation.dropna()

In [None]:
tessellation = tessellation.overlay(water, how="difference")

In [None]:
tessellation = tessellation.dropna()

In [None]:
tessellation = tessellation.explode().reset_index().drop(["level_0", "level_1"], axis=1)

In [None]:
@dask.delayed
def find_largest(group, building):
    if len(group) == 1:
        return gpd.GeoDataFrame(group)
    else:
        intersects = gpd.sjoin(group, building.set_crs(local_crs), predicate='intersects')
        if len(intersects) == 1:
            return gpd.GeoDataFrame(intersects)
        else:
            return gpd.GeoDataFrame(intersects.loc[[intersects.area.idxmax()]])
        

In [None]:
delayed_obj = []

for uID, group in tessellation.groupby("uID"):
    building = buildings.loc[buildings["uID"] == uID]
    delayed_obj.append(find_largest(group, building))
    
objects = dask.compute(*delayed_obj)

In [None]:
tessellation = gpd.GeoDataFrame(pd.concat(list(objects)), crs=tessellation.crs).reset_index(drop=True)

In [None]:
tessellation = tessellation.join(buildings, on='uID', how='inner', rsuffix="right")

In [None]:
tessellation["new_uID"] = range(len(tessellation))

In [None]:
buildings = pd.merge(buildings, tessellation, how='inner', left_on='uID', right_on='uID')

In [None]:
buildings["uID"] = buildings["new_uID"]
tessellation["uID"] = tessellation["new_uID"]

In [None]:
buildings.drop(["geometry_y", "geometryright", "uIDright", "new_uID"], axis = 1, inplace = True)
buildings.rename(columns = {"geometry_x": "geometry"}, inplace = True)

In [None]:
buildings = gpd.GeoDataFrame(buildings).set_crs(tessellation.crs, allow_override=True)

In [None]:
buildings = buildings.sort_values(by='uID', ascending=True)

In [None]:
tessellation = tessellation.sort_values(by='uID', ascending=True)
tessellation.drop(["geometryright", "uIDright", "new_uID"], axis = 1, inplace = True)
tessellation = tessellation.to_crs(local_crs)
tessellation = tessellation.reset_index(drop=True)

In [None]:
Daskcluster.close()
client.shutdown()

In [None]:
buildings.iloc[buildings["uID"].idxmax()]

In [None]:
tessellation.to_parquet(f"./out/{place}/tessellation.pq")

In [None]:
buildings.to_parquet(f"./out/{place}/buildings.pq")