In [None]:
import sys
import os
from pathlib import Path

# Handling the scenario where __file__ might not be defined
if '__file__' in globals():
    # If __file__ is defined, use it to get the current directory
    current_dir = Path(os.path.dirname(os.path.abspath(__file__)))
else:
    # If __file__ is not defined, fall back to the current working directory
    current_dir = Path(os.getcwd())

# Calculate the parent directory
parent_dir = current_dir.parent

# Append the parent directory to sys.path
sys.path.append(str(parent_dir))


In [None]:
import geopandas as gpd
import requests
import os
from shapely.geometry import Polygon, LineString, MultiLineString
import io
import osmnx as ox
from shapely import Point
from mapclassify import greedy
import momepy
import shapely
from shapely.ops import split, snap, unary_union


In [None]:
local_crs = 27700
place = "test"
lat = 55.86421405612109
lng = -4.251846930489373
country = "UK"
crs=4326
radius=1

In [None]:
latlng = (lat, lng)

In [None]:
if not os.path.exists(f'../output/{place}'):
    os.makedirs(f'../output/{place}')

## part 1: Find limits of cells to download

In [None]:
buffer = gpd.GeoDataFrame(index=[0], geometry=[Point(lng, lat)], crs=4326).to_crs(local_crs).buffer((radius+5)*1000).to_crs(4326)

In [None]:
buffer.explore()

In [None]:
water_gdf = ox.geometries_from_polygon(buffer[0], tags={"natural": "water"})
water_gdf = water_gdf[water_gdf.geometry.type.isin(['Polygon', 'MultiPolygon'])]

In [None]:
# List all columns to drop, which is all except 'geometry'
columns_to_drop = [col for col in water_gdf.columns if col != 'geometry']

# Drop these columns
water_gdf = water_gdf.drop(columns=columns_to_drop)

water_gdf = water_gdf.reset_index(drop=True)

In [None]:
water_gdf.to_parquet(f"../output/{place}/water.pq")

In [None]:
buffer = gpd.GeoDataFrame(geometry=buffer, crs=4326)

In [None]:
if country == None:
    world_poly = gpd.read_file(f"../input/{place}_study_area.shp")
else:
    world_poly = gpd.read_file(f"../input/{country}_study_area.shp")

In [None]:
study_area = gpd.overlay(world_poly, buffer, how='intersection').overlay(water_gdf, how='difference')

In [None]:
downloaded_gdf = gpd.read_parquet("../input/gitignore/os-buildings/joint_buildings.pq")

In [None]:
downloaded_gdf = gpd.sjoin(downloaded_gdf.to_crs(4326), buffer, how="inner", op="intersects")

In [None]:
downloaded_gdf.plot()

In [None]:
streets = gpd.read_parquet("../input/gitignore/os-roads/merged_roads.parquet")

In [None]:
streets = streets.to_crs(4326)

In [None]:
# List of all column names to be dropped
columns_to_drop = [col for col in streets.columns if col != 'geometry']

# Drop the columns
streets = streets.drop(columns=columns_to_drop)

In [None]:
streets = gpd.sjoin(streets, buffer, how="inner", op="intersects")

In [None]:
# streets.explore()

In [None]:
streets = streets.drop(columns="index_right")

In [None]:
streets = streets.explode()

In [None]:

# Assuming you have a GeoDataFrame named 'gdf' with your data

def convert_geometry(geom):
    if geom.geom_type == 'LineStringZ':
        return LineString([(x, y) for x, y, z in geom.coords])
    else:
        return geom  # Return the geometry unchanged if it's not LineStringZ or MultiLineString

# Apply the conversion to each geometry in the GeoDataFrame
streets['geometry'] = streets['geometry'].apply(convert_geometry)

In [None]:
downloaded_gdf.to_parquet(f"../output/{place}/buildings_raw.pq")

In [None]:
streets.to_parquet(f"../output/{place}/streets_raw.pq")

In [None]:
osm_graph= ox.graph.graph_from_polygon(buffer.geometry[0], network_type='all', custom_filter='["railway"~"rail"]["tunnel"!="yes"]')
osm_graph = ox.projection.project_graph(osm_graph, to_crs=local_crs)
rail = ox.graph_to_gdfs(
    ox.get_undirected(osm_graph),
    nodes=False,
    edges=True,
    node_geometry=False,
    fill_edge_geometry=True
    
)

rail.head()

# List all columns to drop, which is all except 'geometry'
columns_to_drop = [col for col in rail.columns if col != 'geometry']

# Drop these columns
rail = rail.drop(columns=columns_to_drop).reset_index(drop=True)

# Assuming your GeoDataFrame is named 'gdf'
rail_strings = []
for geometry in rail.geometry:
    if geometry.geom_type == 'LineString':
        rail_strings.append(geometry)
    elif geometry.geom_type == 'MultiLineString':
        rail_strings.extend(list(geometry))
        
collection = shapely.GeometryCollection(rail_strings)  # combine to a single object
noded = shapely.node(collection)  # add missing nodes
rail_noded_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(noded), crs=local_crs)

In [None]:
rail_noded_gdf.explore()

In [None]:
rail_noded_gdf.to_parquet(f"../output/{place}/rail_raw.pq")

In [None]:
study_area.to_parquet((f"../output/{place}/study_area.pq"))

In [None]:
study_area.plot()