In [1]:
# Imports
import geopandas as gpd
from src.data_retrieval import init_osm
from src.preprocessing import load_roads, load_railways, load_water_polygons, get_local_crs, reproject_all, build_water_edges, prepare_water_geometries
from src.prepare_pois import load_pois_with_green, prepare_pois, assign_pois_to_blocks
from src.process_blocks import construct_blocks, filter_water_blocks, filter_small_blocks, filter_irregular_blocks, remove_false_water_blocks
from src.plotting import plot_blocks, plot_blocks_with_suspicious

Setup

In [2]:
# CITY_NAME = "Copenhagen"
CITY_NAME = "Gdansk"

# Initialize OSM
osm, boundary = init_osm(CITY_NAME)

In [3]:
roads = load_roads(osm)
railways = load_railways(osm)
water_polygons = load_water_polygons(osm)
pois = load_pois_with_green(osm)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  edges, nodes = prepare_geodataframe(


Loaded 33140 regular POIs
Loaded 251 green-space features
Total combined POIs: 33391


In [4]:
# Compute city-local UTM projection
local_epsg = get_local_crs(boundary)
print(f"Using local CRS: EPSG:{local_epsg}")

# Reproject all at once
layers = {
    "boundary": boundary,
    "roads": roads,
    "railways": railways,
    "water_polygons": water_polygons,
    "pois": pois
}
layers = reproject_all(layers, local_epsg)

# Unpack
boundary = layers["boundary"]
roads = layers["roads"]
railways = layers["railways"]
water_polygons = layers["water_polygons"]
pois = layers["pois"]

Using local CRS: EPSG:32634


In [5]:
# Change water geometries
water_polygons = prepare_water_geometries(water_polygons)

# Build water edges after reprojection
water_edges = build_water_edges(layers["water_polygons"])

In [6]:
# Prepare pois
pois = prepare_pois(pois=pois)

Number of POIs after cleaning: 33368
Assigned categories for 19019 POIs (57.0% coverage)
Number of POIs after categorization: 33368


Blocks construction

In [7]:
# Create initial blocks
initial_blocks = construct_blocks(roads, railways, water_edges)

In [8]:
# Plot initial blocks
# plot_blocks(blocks=initial_blocks, city_name=CITY_NAME, title="initial blocks", save_path="report/figures")

In [9]:
initial_blocks.count()

geometry    10127
dtype: int64

In [10]:
print(initial_blocks.crs)

EPSG:32634


In [11]:
roads.geometry.type.unique()


array(['LineString'], dtype=object)

In [12]:
railways.geometry.type.unique()


array(['LineString'], dtype=object)

In [13]:
water_polygons.geometry.type.value_counts()


Polygon         1085
MultiPolygon      15
Name: count, dtype: int64

In [14]:
water_edges.geometry.type.unique()


array(['LineString'], dtype=object)

In [15]:
pois.geometry.type.unique()

array(['Point'], dtype=object)

In [16]:
blocks_no_water = filter_water_blocks(initial_blocks, water_polygons)

print(len(blocks_no_water))

8703


In [17]:
# plot_blocks(blocks=blocks_no_water, city_name=CITY_NAME, title="blocks after water filtering", save_path="report/figures")

In [18]:
blocks_no_small = filter_small_blocks(blocks_no_water)

print(len(blocks_no_small))


Iteration 1:
Threshold = 559.05
Small blocks found = 2176
 Merged 2176 small blocks.
 Remaining blocks = 6527

Iteration 2:
Threshold = 559.05
Small blocks found = 0
No small blocks left
6527


In [19]:
# plot_blocks(blocks=blocks_no_small, city_name=CITY_NAME, title="blocks after small filtering", save_path="report/figures")

In [20]:
blocks_cleaned, suspicious = remove_false_water_blocks(blocks_no_small, area_quantile=0.999, compactness_quantile=0.03)

Removed 1 suspected false-water blocks (area>100%, compactness<3%).


In [21]:
# plot_blocks_with_suspicious(blocks=blocks_cleaned, suspicious=suspicious, city_name=CITY_NAME, title="false water blocks", save_path="report/figures")

In [22]:
blocks_no_irregular = filter_irregular_blocks(blocks_cleaned)

print(len(blocks_no_irregular))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

6003


In [23]:
# plot_blocks(blocks=blocks_no_irregular, city_name=CITY_NAME, title="blocks after irregular filtering", save_path="report/figures")

In [24]:
blocks = blocks_no_irregular

In [25]:
# plot_blocks(blocks=blocks_no_irregular, city_name=CITY_NAME, title="final blocks", save_path="report/figures")

Assign POIS to blocks

In [26]:
blocks_with_pois = assign_pois_to_blocks(pois, blocks)

print(len(blocks_with_pois))

POIs assigned to 2286 blocks (out of 6003)
6003
