In [1]:
from dask.distributed import Client, LocalCluster
import dask_geopandas as dask_geopandas
import dask
from dask.distributed import as_completed
import dask.dataframe as dd
import dask.bag as db
import geopandas
import pygeos
import numpy as np
import pandas as pd
import momepy

import os

from sqlalchemy import create_engine
from libpysal.weights import Queen

from momepy_utils import street_profile



In [2]:
workers = 2
client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))
client

0,1
Client  Scheduler: tcp://127.0.0.1:43981  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 2  Cores: 2  Memory: 84.28 GB


In [3]:
cross_chunk = pd.read_parquet('../../urbangrammar_samba/spatial_signatures/cross-chunk_indices.pq')
# chunks = geopandas.read_parquet('../../urbangrammar_samba/spatial_signatures/local_auth_chunks.pq')

In [4]:
user = os.environ.get('DB_USER')
pwd = os.environ.get('DB_PWD')
host = os.environ.get('DB_HOST')
port = os.environ.get('DB_PORT')

db_connection_url = f"postgres+psycopg2://{user}:{pwd}@{host}:{port}/built_env"

In [4]:
def measure(chunk_id):
    blg = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/buildings/blg_{chunk_id}.pq")
    tess = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk_id}.pq")
    orig_ids = tess.uID.copy()
    
    # expand across the chunk boundary
    additional_tess = []
    addtional_blg = []
    add_tess_data = []
    for chunk, inds in cross_chunk.loc[chunk_id].indices.iteritems():
        add_tess = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/tessellation/tess_{chunk}.pq").iloc[inds]
        additional_tess.append(add_tess)
        add_blg = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/buildings/blg_{chunk}.pq")
        addtional_blg.append(add_blg[add_blg.uID.isin(add_tess.uID)])
        tess_data = pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk}.pq")
        add_tess_data.append(tess_data[tess_data.uID.isin(add_tess.uID)])
    
    add_tess_data.append(pd.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq"))
    additional_tess.append(tess)
    tess = pd.concat(additional_tess)
    addtional_blg.append(blg)
    blg = pd.concat(addtional_blg)
    
    blg = blg.rename_geometry('buildings')
    tess = tess.rename_geometry('tessellation')

    df = tess.merge(blg, on='uID', how='left')
    df = df.merge(pd.concat(add_tess_data).drop(columns='enclosureID'), on='uID', how='left')
    
    # make 3d geometry 2d
    coords = pygeos.get_coordinates(df.tessellation.values.data)
    counts = pygeos.get_num_coordinates(df.tessellation.values.data)
    df['tessellation'] = geopandas.GeoSeries([pygeos.polygons(c) for c in np.split(coords, np.cumsum(counts)[:-1])], crs=df.tessellation.crs)

    w = Queen.from_dataframe(df, geom_col='tessellation')

    df['ix'] = range(len(df))


    def alignment(x, orientation):
        orientations = df[orientation].iloc[w.neighbors[x]]
        return abs(orientations - df[orientation].iloc[x]).mean()


    df['mtbAli'] = df.ix.apply(alignment, args=('stbOri',))


    def neighbor_distance(x):
        geom = df.buildings.iloc[x]
        if geom is None:
            return np.nan
        return df.buildings.iloc[w.neighbors[x]].distance(df.buildings.iloc[x]).mean()


    df['mtbNDi'] = df.ix.apply(neighbor_distance)

    df['mtcWNe'] = df.ix.apply(lambda x: w.cardinalities[x]) / df.tessellation.length


    def area_covered(x, area):
        neighbours = [x]
        neighbours += w.neighbors[x]

        return df[area].iloc[neighbours].sum()


    df['mdcAre'] = df.ix.apply(area_covered, args=('sdcAre',))

    w3 = momepy.sw_high(k=3, weights=w)

    # define adjacency list from lipysal
#     adj_list = w.to_adjlist(remove_symmetric=True)
#     adj_list["distance"] = (
#         df.buildings.iloc[adj_list.focal]
#         .reset_index(drop=True)
#         .distance(df.buildings.iloc[adj_list.neighbor].reset_index(drop=True))
#     )
#     adj_list = adj_list.set_index(['focal', 'neighbor'])


#     def mean_interbuilding_distance(x):
#         neighbours = [x]
#         neighbours += w3.neighbors[x]
#         return adj_list.distance.loc[neighbours, neighbours].mean()


#     df['ltbIBD'] = df.ix.apply(mean_interbuilding_distance)


    def weighted_reached_enclosures(x, area, enclosure_id):
        neighbours = [x]
        neighbours += w3.neighbors[x]

        vicinity = df[[area, enclosure_id]].iloc[neighbours]

        return vicinity[enclosure_id].unique().shape[0] / vicinity[area].sum()

    df['ltcWRE'] = df.ix.apply(weighted_reached_enclosures, args=('sdcAre', 'enclosureID'))
    
    df[df.uID.isin(orig_ids)].drop(columns=['buildings', 'tessellation']).to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    
    
#     chunk_area = chunks.geometry.iloc[chunk_id].buffer(5000)
#     engine = create_engine(db_connection_url)
#     sql = f"SELECT * FROM openroads_200803_topological WHERE ST_Intersects(geometry, ST_GeomFromText('{chunk_area.wkt}',27700))"
#     streets = geopandas.read_postgis(sql, engine, geom_col='geometry')
    
#     sp = street_profile(streets, blg)
#     streets['sdsSPW'] = sp[0]
#     streets['sdsSWD'] = sp[1]
#     streets['sdsSPO'] = sp[2]
    
#     streets['sdsLen'] = streets.length
#     streets['sssLin'] = momepy.Linearity(streets).series
    
#     G = momepy.gdf_to_nx(streets)
#     G = momepy.node_degree(G)
#     G = momepy.subgraph(
#         G,
#         radius=5,
#         meshedness=True,
#         cds_length=False,
#         mode="sum",
#         degree="degree",
#         length="mm_len",
#         mean_node_degree=False,
#         proportion={0: True, 3: True, 4: True},
#         cyclomatic=False,
#         edge_node_ratio=False,
#         gamma=False,
#         local_closeness=True,
#         closeness_weight="mm_len",
#         verbose=False
#     )
#     G = momepy.cds_length(G, radius=3, name="ldsCDL", verbose=False)
#     G = momepy.clustering(G, name="xcnSCl")
#     G = momepy.mean_node_dist(G, name="mtdMDi", verbose=False)
    
#     nodes, edges, sw = momepy.nx_to_gdf(G, spatial_weights=True)
    
#     edges_w3 = momepy.sw_high(k=3, gdf=edges)
    
#     edges["ldsMSL"] = momepy.SegmentsLength(edges, spatial_weights=edges_w3, mean=True, verbose=False).series
    
#     nodes_w5 = momepy.sw_high(k=5, weights=sw)
    
#     nodes["lddNDe"] = momepy.NodeDensity(nodes, edges, nodes_w5, verbose=False).series
    
#     nodes["linWID"] = momepy.NodeDensity(nodes, edges, nodes_w5, weighted=True, node_degree="degree", verbose=False).series
    
#     edges.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/edges/edges_{chunk_id}.pq")
#     nodes.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/nodes/nodes_{chunk_id}.pq")
    return f"Chunk {chunk_id} processed sucessfully."

In [5]:
inputs = iter(range(103))
futures = [client.submit(measure, next(inputs)) for i in range(workers)]
ac = as_completed(futures)
for finished_future in ac:
    # submit new future 
    try:
        new_future = client.submit(measure, next(inputs))
        ac.add(new_future)
    except StopIteration:
        pass
    print(finished_future.result())



Chunk 1 processed sucessfully.




KilledWorker: ('measure-4e6d87048769328b885c402c338c5262', <Worker 'tcp://127.0.0.1:44995', name: 0, memory: 0, processing: 1>)

In [10]:
client.close()

In [8]:
client.restart()

0,1
Client  Scheduler: tcp://127.0.0.1:35867  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 3  Cores: 3  Memory: 84.28 GB


In [5]:
%%time
measure(0)

 There are 3 disconnected components.

This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



CPU times: user 9min 45s, sys: 38.5 s, total: 10min 23s
Wall time: 9min 23s


'Chunk 0 processed sucessfully.'

In [None]:
for i in range(103):
    measure(i)