In [1]:
import os
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

In [None]:
ltlas_gdf = gpd.GeoDataFrame.from_file(os.path.join("gis", 'Local_Authority_Districts_(December_2019)_Boundaries_UK_BFC/Local_Authority_Districts__December_2019__Boundaries_UK_BFC.shp'))
ltlas_gdf = ltlas_gdf[['lad19cd', 'lad19nm', 'geometry']]
ltlas_gdf = ltlas_gdf.rename(columns={
    "lad19cd": "district_id",
    "lad19nm": "district_name"
})
print("# of districts in the official GIS data (Dec 2019): %s" % len(pd.unique(ltlas_gdf['district_name'])))
print("# of districts in England in the official GIS data (Dec 2019): %s" % len(ltlas_gdf[ltlas_gdf['district_id'].str.startswith('E')]['district_name'].unique()))

In [None]:
ltlas_gdf.loc[ltlas_gdf['district_name'] == 'City of London', 'district_id'] = 'E09000012'
ltlas_gdf.loc[ltlas_gdf['district_name'] == 'Isles of Scilly', 'district_id'] = 'E06000052'
ltlas_gdf = ltlas_gdf.dissolve(by='district_id').reset_index()
ltlas_gdf.loc[:, 'district_lon'] = ltlas_gdf['geometry'].centroid.x
ltlas_gdf.loc[:, 'district_lat'] = ltlas_gdf['geometry'].centroid.y
ltlas_gdf.loc[ltlas_gdf['district_id'] == 'E09000012', 'district_name'] = 'Hackney and City of London'
ltlas_gdf.loc[ltlas_gdf['district_id'] == 'E06000052', 'district_name'] = 'Cornwall and Isles of Scilly'
ltlas_gdf.to_file(os.path.join("gis", "lad19.geojson"), driver="GeoJSON", index=False)
ltlas_gdf.head()

In [None]:
cases = pd.read_csv(os.path.join("covid", 'cases.csv'))
print("# of districts in the Covid data: %s" % len(cases['areaName'].unique()))
print("# of districts in England in the Covid data: %s" % len(cases[cases['areaCode'].str.startswith('E')]['areaCode'].unique()))

In [5]:
assert(len(set(pd.unique(ltlas_gdf['district_name'])).difference(set(cases['areaName'].unique()))) == 0)

In [6]:
assert(len(set(pd.unique(ltlas_gdf['district_id'])).difference(set(cases['areaCode'].unique()))) == 0)

In [None]:
print("GIS data are alligned to the Covid geodata.")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 20))
centroids = gpd.GeoDataFrame(ltlas_gdf[['district_id', 'district_name']], geometry=gpd.points_from_xy(
    ltlas_gdf['district_lon'], ltlas_gdf['district_lat']))
ltlas_gdf.plot("district_name", ax=ax)
centroids.plot(ax=ax, marker='o', color='black', markersize=10)
plt.savefig(os.path.join("gis", "district_centroids.png"), dpi=300)