In [122]:
'''


A script to extract useful information from topoJSON files. 



Author: D. Ellis 2021
'''
None
# !conda install --channel conda-forge geopandas --yes
# !npm i topojson-client
# ! pip install js2py


In [123]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

filename = 'data/lad_boundaries_2020.json'
lsoadata = 'data/lsoa2011_lad2020.csv'# https://bothness.github.io/census-atlas/data/lsoa2011_lad2020.csv

In [124]:
gdf = gpd.GeoDataFrame.from_file(filename) # lad boundaries 2020
gdf.drop('id', axis=1,inplace=True)
gdf.head()


Unnamed: 0,AREACD,AREANM,geometry
0,E06000060,Buckinghamshire,"POLYGON ((-0.84276 51.54472, -0.84704 51.54522..."
1,E06000001,Hartlepool,"POLYGON ((-1.24097 54.72322, -1.22496 54.71480..."
2,E06000002,Middlesbrough,"MULTIPOLYGON (((-1.19432 54.57436, -1.19059 54..."
3,E06000003,Redcar and Cleveland,"POLYGON ((-0.79431 54.55844, -0.79555 54.55489..."
4,E06000004,Stockton-on-Tees,"POLYGON ((-1.25063 54.62535, -1.24567 54.62601..."


In [125]:
''' Bounding Boxes '''
gdf = gdf.merge(gdf.geometry.bounds,left_index=True,right_index=True)

gdf.head()

Unnamed: 0,AREACD,AREANM,geometry,minx,miny,maxx,maxy
0,E06000060,Buckinghamshire,"POLYGON ((-0.84276 51.54472, -0.84704 51.54522...",-1.140764,51.485807,-0.476706,52.081553
1,E06000001,Hartlepool,"POLYGON ((-1.24097 54.72322, -1.22496 54.71480...",-1.380934,54.621971,-1.164091,54.727176
2,E06000002,Middlesbrough,"MULTIPOLYGON (((-1.19432 54.57436, -1.19059 54...",-1.285556,54.501089,-1.137313,54.590699
3,E06000003,Redcar and Cleveland,"POLYGON ((-0.79431 54.55844, -0.79555 54.55489...",-1.200944,54.487805,-0.794311,54.644085
4,E06000004,Stockton-on-Tees,"POLYGON ((-1.25063 54.62535, -1.24567 54.62601...",-1.452295,54.464206,-1.158707,54.64524


In [126]:
''' Centroid '''
centroids = gdf.centroid
gdf['lat'] = centroids.y
gdf['lon'] = centroids.x

gdf.head()

Unnamed: 0,AREACD,AREANM,geometry,minx,miny,maxx,maxy,lat,lon
0,E06000060,Buckinghamshire,"POLYGON ((-0.84276 51.54472, -0.84704 51.54522...",-1.140764,51.485807,-0.476706,52.081553,51.785322,-0.812105
1,E06000001,Hartlepool,"POLYGON ((-1.24097 54.72322, -1.22496 54.71480...",-1.380934,54.621971,-1.164091,54.727176,54.669478,-1.259154
2,E06000002,Middlesbrough,"MULTIPOLYGON (((-1.19432 54.57436, -1.19059 54...",-1.285556,54.501089,-1.137313,54.590699,54.541991,-1.222434
3,E06000003,Redcar and Cleveland,"POLYGON ((-0.79431 54.55844, -0.79555 54.55489...",-1.200944,54.487805,-0.794311,54.644085,54.551701,-1.02091
4,E06000004,Stockton-on-Tees,"POLYGON ((-1.25063 54.62535, -1.24567 54.62601...",-1.452295,54.464206,-1.158707,54.64524,54.561628,-1.332462


In [127]:
'''
Lets find all neigbouring areas and overlaps
'''

'\nLets find all neigbouring areas and overlaps\n'

In [128]:
# expandMultiPoly
df = gdf.explode(index_parts=True)

df.geometry = df.buffer(0)

names = gdf.AREANM.values
output = dict(zip(names,[[]]*len(names)))

for index, row in df.iterrows():  
    
    
    neighbors = np.array(df[df.geometry.touches(row['geometry'])].AREANM)
    overlap = np.array(df[df.geometry.overlaps(row['geometry'])].AREANM)
    neighbors = set(np.union1d(neighbors, overlap))
 
    output[row.AREANM]= list((set(output[row.AREANM]) | neighbors )- set([row.AREANM]))



gdf['neighbors']  = [output[x] for x in gdf.AREANM]
gdf.head()


TypeError: explode() got an unexpected keyword argument 'index_parts'

In [129]:
# import js2py, json

# JSONdata = json.load(open(filename,'r'))
# TJC = js2py.require('topojson-client')
# features = TJC.feature(JSONdata,'LA2020EW')['features']




In [130]:
# rollup

df = pd.read_csv(lsoadata)

gdf['children'] = [','.join(df[df.parent == i].code.values) for i in gdf.AREACD]
gdf.head()

Unnamed: 0,AREACD,AREANM,geometry,minx,miny,maxx,maxy,lat,lon,children
0,E06000060,Buckinghamshire,"POLYGON ((-0.84276 51.54472, -0.84704 51.54522...",-1.140764,51.485807,-0.476706,52.081553,51.785322,-0.812105,"E01017627,E01017628,E01017629,E01017630,E01017..."
1,E06000001,Hartlepool,"POLYGON ((-1.24097 54.72322, -1.22496 54.71480...",-1.380934,54.621971,-1.164091,54.727176,54.669478,-1.259154,"E01011949,E01011950,E01011951,E01011952,E01011..."
2,E06000002,Middlesbrough,"MULTIPOLYGON (((-1.19432 54.57436, -1.19059 54...",-1.285556,54.501089,-1.137313,54.590699,54.541991,-1.222434,"E01012007,E01012008,E01012009,E01012010,E01012..."
3,E06000003,Redcar and Cleveland,"POLYGON ((-0.79431 54.55844, -0.79555 54.55489...",-1.200944,54.487805,-0.794311,54.644085,54.551701,-1.02091,"E01012095,E01012096,E01012097,E01012098,E01012..."
4,E06000004,Stockton-on-Tees,"POLYGON ((-1.25063 54.62535, -1.24567 54.62601...",-1.452295,54.464206,-1.158707,54.64524,54.561628,-1.332462,"E01012187,E01012188,E01012189,E01012190,E01012..."


In [131]:
''' Remove unwanted columns and write to file '''
unwanted = ['geometry']

for i in unwanted:
    try:gdf.drop(i,axis=1,inplace=True)
    except:None
        
        
    
gdf.round(5).to_csv('ladb_20.csv',index=False)

In [132]:
'Fi'


'Fi'

In [133]:
gdf.round(5).sort_values('children')

Unnamed: 0,AREACD,AREANM,minx,miny,maxx,maxy,lat,lon,children
281,E09000001,City of London,-0.11383,51.50784,-0.07283,51.52145,51.51476,-0.09194,"E01000001,E01000002,E01000003,E01000005,E01032..."
282,E09000002,Barking and Dagenham,0.06699,51.51221,0.19025,51.59893,51.54541,0.13371,"E01000006,E01000007,E01000008,E01000009,E01000..."
283,E09000003,Barnet,-0.30445,51.55520,-0.12915,51.67014,51.61598,-0.20984,"E01000115,E01000116,E01000117,E01000118,E01000..."
284,E09000004,Bexley,0.07541,51.40849,0.21772,51.51304,51.45894,0.14070,"E01000325,E01000326,E01000327,E01000328,E01000..."
285,E09000005,Brent,-0.33564,51.52789,-0.19154,51.60042,51.55859,-0.26779,"E01000471,E01000472,E01000473,E01000474,E01000..."
...,...,...,...,...,...,...,...,...,...
330,W06000019,Blaenau Gwent,-3.31017,51.68128,-3.10602,51.82543,51.76678,-3.19375,"W01001435,W01001436,W01001437,W01001438,W01001..."
331,W06000020,Torfaen,-3.14384,51.60817,-2.95888,51.79622,51.69869,-3.05360,"W01001482,W01001483,W01001484,W01001485,W01001..."
332,W06000021,Monmouthshire,-3.15737,51.55405,-2.65011,51.98311,51.76610,-2.87243,"W01001542,W01001543,W01001545,W01001546,W01001..."
333,W06000022,Newport,-3.12410,51.50190,-2.80291,51.64943,51.57931,-2.96581,"W01001600,W01001601,W01001602,W01001603,W01001..."


## Lets do the same for the sub boundaries. 

In [137]:
# minor = pd.DataFrame(df['code name'.split()].values, columns = ['AREACD','AREANM'])
# minor.head()

# json.dump(dict(zip(df.code,df.name)),open('lsoa11_20.json','w'))
df.set_index('code').T.to_json()

'{"E01000001":{"name":"City of London 001A","parent":"E09000001"},"E01000002":{"name":"City of London 001B","parent":"E09000001"},"E01000003":{"name":"City of London 001C","parent":"E09000001"},"E01000005":{"name":"City of London 001E","parent":"E09000001"},"E01000006":{"name":"Barking and Dagenham 016A","parent":"E09000002"},"E01000007":{"name":"Barking and Dagenham 015A","parent":"E09000002"},"E01000008":{"name":"Barking and Dagenham 015B","parent":"E09000002"},"E01000009":{"name":"Barking and Dagenham 016B","parent":"E09000002"},"E01000010":{"name":"Barking and Dagenham 015C","parent":"E09000002"},"E01000011":{"name":"Barking and Dagenham 016C","parent":"E09000002"},"E01000012":{"name":"Barking and Dagenham 015D","parent":"E09000002"},"E01000013":{"name":"Barking and Dagenham 013A","parent":"E09000002"},"E01000014":{"name":"Barking and Dagenham 013B","parent":"E09000002"},"E01000015":{"name":"Barking and Dagenham 009A","parent":"E09000002"},"E01000016":{"name":"Barking and Dagenham 