In [1]:
import geopandas as gpd
import osmnx as ox
from shapely.geometry import Polygon, MultiPolygon

# Load the Shapefile
shapefile_path = '../Lower layer Super Output Areas (December 2001) Boundaries EW BFC/LSOA_Dec_2001_EW_BFC_2022_-3090597439272179543/LSOA_2001_EW_BFC_V2.shp'
lsoa_gdf = gpd.read_file(shapefile_path)

# Ensure the GeoDataFrame has the correct coordinate reference system (CRS)
lsoa_gdf = lsoa_gdf.to_crs(epsg=4326)


In [2]:
lsoa_gdf.head()

Unnamed: 0,LSOA01CD,LSOA01NM,LSOA01NMW,GlobalID,geometry
0,E01000001,City of London 001A,City of London 001A,cc30fabd-068b-4308-8f6b-f7c3b5a70bd9,"POLYGON ((-0.09667 51.52027, -0.09666 51.52025..."
1,E01000002,City of London 001B,City of London 001B,14c8ee1b-d6f0-4afe-a05c-14a69530e38e,"POLYGON ((-0.08969 51.52069, -0.08973 51.52057..."
2,E01000003,City of London 001C,City of London 001C,c6b8af00-b5da-4582-8019-3ccc9c294e50,"POLYGON ((-0.09653 51.52295, -0.09647 51.52282..."
3,E01000004,City of London 001D,City of London 001D,7f890d5b-77d9-49b7-b355-605c03f3e2e5,"POLYGON ((-0.07891 51.52041, -0.07910 51.51971..."
4,E01000005,City of London 001E,City of London 001E,c0158609-c7de-4fed-92bd-611ed8284dce,"POLYGON ((-0.07571 51.51575, -0.07542 51.51555..."


In [3]:
import pandas as pd

participants = pd.read_csv('../LSOA_participants_unique.csv', index_col=None)
LSOA_use = list(participants['LSOA_code'].unique())
participants.head()

Unnamed: 0.1,Unnamed: 0,LSOA_code,id,area,LSOA11NM,TCITY15CD,TCITY15NM,FID
0,1,E01000001,3930876,London,City of London 001A,J01000055,London,1.0
1,2,E01000001,4650325,London,City of London 001A,J01000055,London,1.0
2,3,E01000001,3118216,London,City of London 001A,J01000055,London,1.0
3,4,E01000001,2126547,London,City of London 001A,J01000055,London,1.0
4,5,E01000001,5323908,London,City of London 001A,J01000055,London,1.0


In [4]:
LSOA_london = list(participants[participants['TCITY15NM']=="London"]["LSOA_code"].unique())
len(LSOA_london)

2936

In [5]:
london = lsoa_gdf[lsoa_gdf['LSOA01CD'].isin(LSOA_london)]

In [6]:
london.shape

(2936, 5)

In [7]:
london['area'] = london['geometry'].to_crs(epsg=3857).area
london = london[['LSOA01CD','LSOA01NM','geometry','area']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [8]:
london.head()

Unnamed: 0,LSOA01CD,LSOA01NM,geometry,area
0,E01000001,City of London 001A,"POLYGON ((-0.09667 51.52027, -0.09666 51.52025...",334981.0
1,E01000002,City of London 001B,"POLYGON ((-0.08969 51.52069, -0.08973 51.52057...",589198.6
2,E01000003,City of London 001C,"POLYGON ((-0.09653 51.52295, -0.09647 51.52282...",152351.3
3,E01000004,City of London 001D,"POLYGON ((-0.07891 51.52041, -0.07910 51.51971...",5908310.0
4,E01000005,City of London 001E,"POLYGON ((-0.07571 51.51575, -0.07542 51.51555...",488903.8


In [9]:
# Define the function to get historical OSM data from ohsome API
def get_historical_poi_num(geometry, date="2009-01-01", filter_='type:node'):
    base_url = "https://api.ohsome.org/v1/elements/geometry"
    
    data = {
        "bpolys": geometry,
        "time": date,
        "filter": filter_
    }
    
    response = requests.post(base_url, data=data)
    
    if response.status_code == 200:
        features = response.json()['features']
        num_features = len(features)
            
        return num_features
    else:
        print(f"Error fetching data: {response.status_code}")
        print(response.json())
        return None


def polygon_to_coord_list(polygon):
    if isinstance(polygon, Polygon):
        coords = list(polygon.exterior.coords)
        coord_list = ",".join([f"{x},{y}" for x, y in coords])
        return [coord_list]
    elif isinstance(polygon, MultiPolygon):
        coord_list = []
        for poly in polygon.geoms:
            coords = list(poly.exterior.coords)
            coord_list.append(",".join([f"{x},{y}" for x, y in coords]))
        return coord_list
    else:
        raise TypeError("Unsupported geometry type")

In [10]:
filters = [
# alcohol
"geometry:point and (amenity in (bar,pub) or shop in (alcohol,beverages,wine,convenience,department_store,general,mall,supermarket))",
# smoke
"geometry:point and shop in (kiosk,e-cigarette,tobacco,convenience,department_store,general,mall,supermarket)"
# medication (clinics, cosmetics only, perfume, medical supply ... excluded)
# "amenity=pharmacy",
# "shop=chemist", "shop=herbalist", "shop=nutrition_supplements",
]

In [11]:
london.shape

(2936, 4)

In [12]:
for y in ['2019','2014','2012','2010']:
    london_old = pd.read_csv('london_substance_'+y+'_old.csv')
    merged = london.merge(london_old[['LSOA01CD', 'alcohol', 'tobacco']], on='LSOA01CD', how='left')
    merged.to_csv('london_substance_'+y+'.csv')

In [15]:
var_names = ['alcohol','tobacco']

In [16]:
import requests
import re

from tqdm import tqdm
from shapely import wkt

for y in ['2019','2014','2012','2010']:
    london = pd.read_csv('london_substance_'+y+'.csv')
    london['geometry'] = london['geometry'].apply(wkt.loads)
    for i, row in tqdm(london.iterrows(), total=london.shape[0]):
        if row.isna().any() or (-1 in row.values):
            geometry_list = polygon_to_coord_list(row['geometry'])
            for j in range(len(filters)):
                if pd.isna(row[var_names[j]]) or (row[var_names[j]] == -1):
                    try:
                        total_num = 0
                        for geometry in geometry_list:
                            num = get_historical_poi_num(geometry, y+'-01-01', filters[j])
                            total_num += num
                        london.at[i, var_names[j]] = total_num
                    except Exception as e:
                        print(f"Error fetching POI data for LSOA {row['LSOA01CD']}: {e}")
                        london.at[i, var_names[j]] = -1

            # Save the updated DataFrame to the CSV file
            if i % 10 == 0:
                london.to_csv('london_substance_'+y+'.csv', index=False)
    london.to_csv('london_substance_'+y+'.csv', index=False)

100%|██████████| 2936/2936 [02:06<00:00, 23.22it/s]
100%|██████████| 2936/2936 [02:06<00:00, 23.25it/s]
100%|██████████| 2936/2936 [02:10<00:00, 22.47it/s]
100%|██████████| 2936/2936 [02:11<00:00, 22.35it/s]


In [16]:
stats = []
for y in ['2009','2010','2012','2014','2019']:
    london = pd.read_csv('london_substance_'+y+'.csv')
    year_stats = []
    for v in var_names:
        year_stats.append(london[v].sum())
    stats.append(year_stats)
stats = pd.DataFrame(stats)
stats.columns = var_names
    

In [17]:
stats.index = ['2009','2010','2012','2014','2019']

In [18]:
stats

# results should delete:
# amenity_biergarten, shop_brewing_supplies, shop_cannabis

Unnamed: 0,alcohol,tobacco
2009,1788.0,281.0
2010,2818.0,627.0
2012,3362.0,958.0
2014,3248.0,1178.0
2019,3599.0,1854.0


In [18]:
# backup function

# import json
# from shapely.geometry import Polygon, MultiPolygon

# def polygon_to_geojson_feature_collection(polygon, region_id='Region'):
#     features = []

#     def format_coordinates(coords):
#         return [[list(coord) for coord in coords]]

#     if isinstance(polygon, Polygon):
#         coordinates = format_coordinates(polygon.exterior.coords)
#         feature = {
#             "type": "Feature",
#             "properties": {"id": f"{region_id} 1"},
#             "geometry": {
#                 "type": "Polygon",
#                 "coordinates": coordinates
#             }
#         }
#         features.append(feature)

#     elif isinstance(polygon, MultiPolygon):
#         for idx, poly in enumerate(polygon.geoms):
#             coordinates = format_coordinates(poly.exterior.coords)
#             feature = {
#                 "type": "Feature",
#                 "properties": {"id": f"{region_id} {idx + 1}"},
#                 "geometry": {
#                     "type": "Polygon",
#                     "coordinates": coordinates
#                 }
#             }
#             features.append(feature)
#     else:
#         raise TypeError("Unsupported geometry type")

#     feature_collection = {
#         "type": "FeatureCollection",
#         "features": features
#     }
#     return feature_collection


{'type': 'FeatureCollection', 'features': [{'type': 'Feature', 'properties': {'id': 'Region 1'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[-1.5704498471484643, 54.923952175382496], [-1.5704497077420054, 54.92395207963483], [-1.5701740091293421, 54.92406343507078], [-1.570067292381269, 54.92410507449073], [-1.5699574872283233, 54.92414791875118], [-1.5698547810752077, 54.92418799326264], [-1.569747889753728, 54.924229700081426], [-1.5697289359590474, 54.92423709530048], [-1.5696262192179127, 54.92428016742882], [-1.569531771414466, 54.92431977124733], [-1.5694002422589792, 54.924374924008276], [-1.5692680481964985, 54.92443035554523], [-1.5690005287472784, 54.92455891469998], [-1.568879389802446, 54.9246171303187], [-1.5688792645069296, 54.92461702832883], [-1.5688746180357085, 54.92461321871992], [-1.5688275856568914, 54.924574662304586], [-1.5682360406280935, 54.92408970825292], [-1.5679862770907975, 54.92390418146634], [-1.567864526165285, 54.92381374343603], [-1.567676365597