In [1]:
import numpy as np
import pandas as pd
import geopandas
import libpysal
import scipy

from dask.distributed import Client, LocalCluster, as_completed

In [2]:
workers = 8
client = Client(LocalCluster(n_workers=workers, threads_per_worker=1))
client

0,1
Client  Scheduler: tcp://127.0.0.1:39469  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 8  Cores: 8  Memory: 84.28 GB


In [3]:
cross_chunk = pd.read_parquet('../../urbangrammar_samba/spatial_signatures/cross-chunk_indices.pq')
# chunks = geopandas.read_parquet('../../urbangrammar_samba/spatial_signatures/local_auth_chunks.pq')

# user = os.environ.get('DB_USER')
# pwd = os.environ.get('DB_PWD')
# host = os.environ.get('DB_HOST')
# port = os.environ.get('DB_PORT')

# db_connection_url = f"postgres+psycopg2://{user}:{pwd}@{host}:{port}/built_env"

In [4]:
def measure(chunk_id):
    # load cells of a chunk
    cells = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")
    cells['keep'] = True
    
    # add neighbouring cells from other chunks
    cross_chunk_cells = []
    
    for chunk, inds in cross_chunk.loc[chunk_id].indices.iteritems():
        add_cells = geopandas.read_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk}.pq").iloc[inds]
        add_cells['keep'] = False
        cross_chunk_cells.append(add_cells)
    
    df = cells.append(pd.concat(cross_chunk_cells, ignore_index=True), ignore_index=True)

    # read W
    w = libpysal.weights.WSP(scipy.sparse.load_npz(f"../../urbangrammar_samba/spatial_signatures/weights/w_{chunk_id}.npz")).to_W()
    
    # alignment
    def alignment(x, orientation='stbOri'):
        orientations = df[orientation].iloc[w.neighbors[x]]
        return abs(orientations - df[orientation].iloc[x]).mean()
    
    df['mtbAli'] = [alignment(x) for x in range(len(df))]

    # mean neighbour distance
    def neighbor_distance(x):
        geom = df.buildings.iloc[x]
        if geom is None:
            return np.nan
        return df.buildings.iloc[w.neighbors[x]].distance(df.buildings.iloc[x]).mean()

    df['mtbNDi'] = [neighbor_distance(x) for x in range(len(df))]
    
    # weighted neighbours
    df['mtcWNe'] = pd.Series([w.cardinalities[x] for x in range(len(df))], index=df.index) / df.tessellation.length
    
    # area covered by neighbours
    def area_covered(x, area='sdcAre'):
        neighbours = [x]
        neighbours += w.neighbors[x]

        return df[area].iloc[neighbours].sum()

    df['mdcAre'] = [area_covered(x) for x in range(len(df))]
    
    # read W3 here
    w3 = libpysal.weights.WSP(scipy.sparse.load_npz(f"../../urbangrammar_samba/spatial_signatures/weights/w3_{chunk_id}.npz")).to_W()
      
    # weighted reached enclosures
    def weighted_reached_enclosures(x, area='sdcAre', enclosure_id='enclosureID'):
        neighbours = [x]
        neighbours += w3.neighbors[x]

        vicinity = df[[area, enclosure_id]].iloc[neighbours]

        return vicinity[enclosure_id].unique().shape[0] / vicinity[area].sum()
    
    df['ltcWRE'] = [weighted_reached_enclosures(x) for x in range(len(df))]
    
    # mean interbuilding distance - it takes ages
    # define adjacency list from lipysal
    adj_list = w.to_adjlist(remove_symmetric=True)
    adj_list["distance"] = (
        df.buildings.iloc[adj_list.focal]
        .reset_index(drop=True)
        .distance(df.buildings.iloc[adj_list.neighbor].reset_index(drop=True))
    )
    adj_list = adj_list.set_index(['focal', 'neighbor'])


    def mean_interbuilding_distance(x):
        neighbours = [x]
        neighbours += w3.neighbors[x]
        return adj_list.distance.loc[neighbours, neighbours].mean()


    df['ltbIBD'] = [mean_interbuilding_distance(x) for x in range(len(df))]
    
    # Reached neighbors and area on 3 topological steps on tessellation
    df['ltcRea'] = [w3.cardinalities[i] for i in range(len(df))]
    df['ltcAre'] = [df.sdcAre.iloc[w3.neighbors[i]].sum() for i in range(len(df))]

    df[df['keep']].drop(columns=['keep']).to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/cells/cells_{chunk_id}.pq")

#     chunk_area = chunks.geometry.iloc[chunk_id].buffer(5000)
#     engine = create_engine(db_connection_url)
#     sql = f"SELECT * FROM openroads_200803_topological WHERE ST_Intersects(geometry, ST_GeomFromText('{chunk_area.wkt}',27700))"
#     streets = geopandas.read_postgis(sql, engine, geom_col='geometry')
    
#     sp = street_profile(streets, blg)
#     streets['sdsSPW'] = sp[0]
#     streets['sdsSWD'] = sp[1]
#     streets['sdsSPO'] = sp[2]
    
#     streets['sdsLen'] = streets.length
#     streets['sssLin'] = momepy.Linearity(streets).series
    
#     G = momepy.gdf_to_nx(streets)
#     G = momepy.node_degree(G)
#     G = momepy.subgraph(
#         G,
#         radius=5,
#         meshedness=True,
#         cds_length=False,
#         mode="sum",
#         degree="degree",
#         length="mm_len",
#         mean_node_degree=False,
#         proportion={0: True, 3: True, 4: True},
#         cyclomatic=False,
#         edge_node_ratio=False,
#         gamma=False,
#         local_closeness=True,
#         closeness_weight="mm_len",
#         verbose=False
#     )
#     G = momepy.cds_length(G, radius=3, name="ldsCDL", verbose=False)
#     G = momepy.clustering(G, name="xcnSCl")
#     G = momepy.mean_node_dist(G, name="mtdMDi", verbose=False)
    
#     nodes, edges, sw = momepy.nx_to_gdf(G, spatial_weights=True)
    
#     edges_w3 = momepy.sw_high(k=3, gdf=edges)
    
#     edges["ldsMSL"] = momepy.SegmentsLength(edges, spatial_weights=edges_w3, mean=True, verbose=False).series
    
#     nodes_w5 = momepy.sw_high(k=5, weights=sw)
    
#     nodes["lddNDe"] = momepy.NodeDensity(nodes, edges, nodes_w5, verbose=False).series
    
#     nodes["linWID"] = momepy.NodeDensity(nodes, edges, nodes_w5, weighted=True, node_degree="degree", verbose=False).series
    
#     edges.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/edges/edges_{chunk_id}.pq")
#     nodes.to_parquet(f"../../urbangrammar_samba/spatial_signatures/morphometrics/nodes/nodes_{chunk_id}.pq")


    return f"Chunk {chunk_id} processed sucessfully."

In [5]:
inputs = iter(range(28, 103))
futures = [client.submit(measure, next(inputs)) for i in range(workers)]
ac = as_completed(futures)
for finished_future in ac:
    # submit new future 
    try:
        new_future = client.submit(measure, next(inputs))
        ac.add(new_future)
    except StopIteration:
        pass
    print(finished_future.result())

Chunk 28 processed sucessfully.
Chunk 34 processed sucessfully.
Chunk 29 processed sucessfully.
Chunk 31 processed sucessfully.
Chunk 33 processed sucessfully.
Chunk 35 processed sucessfully.
Chunk 30 processed sucessfully.
Chunk 36 processed sucessfully.
Chunk 39 processed sucessfully.
Chunk 37 processed sucessfully.
Chunk 41 processed sucessfully.
Chunk 43 processed sucessfully.
Chunk 32 processed sucessfully.
Chunk 42 processed sucessfully.
Chunk 44 processed sucessfully.
Chunk 47 processed sucessfully.
Chunk 45 processed sucessfully.
Chunk 46 processed sucessfully.
Chunk 48 processed sucessfully.
Chunk 38 processed sucessfully.
Chunk 49 processed sucessfully.
Chunk 51 processed sucessfully.
Chunk 50 processed sucessfully.
Chunk 52 processed sucessfully.
Chunk 40 processed sucessfully.
Chunk 55 processed sucessfully.
Chunk 53 processed sucessfully.
Chunk 54 processed sucessfully.
Chunk 56 processed sucessfully.
Chunk 57 processed sucessfully.
Chunk 58 processed sucessfully.
Chunk 60

In [6]:
import tracemalloc

In [7]:
%%time
tracemalloc.start()

ret = measure(26)

current, peak = tracemalloc.get_traced_memory()
print(f"Current memory usage is {current / 10**6}MB; Peak was {peak / 10**6}MB")
tracemalloc.stop()

 There are 4 disconnected components.
 There are 2 islands with ids: 157186, 164512.
 There are 4 disconnected components.
 There are 2 islands with ids: 157186, 164512.
  "{} islands in this weights matrix. Conversion to an "

This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



Current memory usage is 157.117712MB; Peak was 1419.861516MB
CPU times: user 4h 2min 5s, sys: 6min 51s, total: 4h 8min 56s
Wall time: 3h 47min 37s


In [8]:
tracemalloc.stop()

Current memory usage is 56.681588MB; Peak was 1160.209484MB
CPU times: user 11min 40s, sys: 53.6 s, total: 12min 34s
Wall time: 11min 15s
    
    Excluding IBD
    

Current memory usage is 38.199543MB; Peak was 1145.271618MB
CPU times: user 1h 41min 3s, sys: 7.36 s, total: 1h 41min 11s
Wall time: 1h 41min 16s

    Including IBD

In [8]:
client.close()